diff --git a/.github/runs-on.yml b/.github/runs-on.yml
index bdcdabf45204..1d97b8c5de21 100644
--- a/.github/runs-on.yml
+++ b/.github/runs-on.yml
@@ -25,9 +25,11 @@ runners:
   linux-amd64-gpu:
     family: ["g4dn.xlarge"]
     image: linux-amd64
+    spot: "false"
   linux-amd64-mgpu:
     family: ["g4dn.12xlarge"]
     image: linux-amd64
+    spot: "false"
   linux-arm64-cpu:
     cpu: 16
     family: ["c6g", "c7g"]
@@ -35,6 +37,7 @@ runners:
   windows-gpu:
     family: ["g4dn.2xlarge"]
     image: windows-amd64
+    spot: "false"
   windows-cpu:
     cpu: 32
     family: ["c7i-flex", "c7i", "c7a", "c5", "c5a"]
diff --git a/.github/workflows/cuda13.yml b/.github/workflows/cuda13.yml
new file mode 100644
index 000000000000..5ea448f25cce
--- /dev/null
+++ b/.github/workflows/cuda13.yml
@@ -0,0 +1,87 @@
+name: XGBoost CI (CUDA 13)
+
+on: [push, pull_request]
+
+permissions:
+  contents: read  # to fetch code (actions/checkout)
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  BRANCH_NAME: >-
+    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+
+jobs:
+  build-cuda13:
+    name: Build CUDA 13
+    runs-on:
+      - runs-on=${{ github.run_id }}
+      - runner=linux-amd64-cpu
+      - tag=cuda13-build-cuda13
+    steps:
+      # Restart Docker daemon so that it recognizes the ephemeral disks
+      - run: sudo systemctl restart docker
+      - uses: actions/checkout@v4
+        with:
+          submodules: "true"
+      - name: Log into Docker registry (AWS ECR)
+        run: bash ops/pipeline/login-docker-registry.sh
+      - run: |
+          bash ops/pipeline/build-cuda13.sh
+      - name: Stash files
+        run: |
+          python3 ops/pipeline/manage-artifacts.py upload \
+            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
+            --prefix cache/${{ github.run_id }}/build-cuda13 \
+            build/testxgboost python-package/dist/*.whl
+  test-cpp-cuda13:
+    name: Google Test (C++) with CUDA 13
+    needs: [build-cuda13]
+    runs-on:
+      - runs-on=${{ github.run_id }}
+      - runner=linux-amd64-gpu
+      - tag=cuda13-test-cpp-cuda13
+    steps:
+      # Restart Docker daemon so that it recognizes the ephemeral disks
+      - run: sudo systemctl restart docker
+      - uses: actions/checkout@v4
+        with:
+          submodules: "true"
+      - name: Log into Docker registry (AWS ECR)
+        run: bash ops/pipeline/login-docker-registry.sh
+      - name: Unstash gtest
+        run: |
+          python3 ops/pipeline/manage-artifacts.py download \
+            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
+            --prefix cache/${{ github.run_id }}/build-cuda13 \
+            --dest-dir build \
+            testxgboost
+          chmod +x build/testxgboost
+      - run: |
+          bash ops/pipeline/test-cpp-cuda13.sh
+  test-python-cuda13:
+    name: Run Python tests with CUDA 13
+    needs: [build-cuda13]
+    runs-on:
+      - runs-on=${{ github.run_id }}
+      - runner=linux-amd64-gpu
+      - tag=cuda13-test-python-cuda13
+    steps:
+      # Restart Docker daemon so that it recognizes the ephemeral disks
+      - run: sudo systemctl restart docker
+      - uses: actions/checkout@v4
+        with:
+          submodules: "true"
+      - name: Log into Docker registry (AWS ECR)
+        run: bash ops/pipeline/login-docker-registry.sh
+      - name: Unstash Python wheel
+        run: |
+          python3 ops/pipeline/manage-artifacts.py download \
+            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
+            --prefix cache/${{ github.run_id }}/build-cuda13 \
+            --dest-dir wheelhouse \
+            *.whl
+      - name: Run Python tests
+        run: bash ops/pipeline/test-python-wheel-cuda13.sh
diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml
index a1b170c9d105..64a6642c6212 100644
--- a/.github/workflows/jvm_tests.yml
+++ b/.github/workflows/jvm_tests.yml
@@ -77,7 +77,7 @@ jobs:
           - description: "MacOS (Intel)"
             script: ops/pipeline/build-jvm-macos-intel.sh
             libname: libxgboost4j_intel.dylib
-            runner: macos-13
+            runner: macos-15-intel
     steps:
       - uses: actions/checkout@v4
         with:
@@ -132,12 +132,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [windows-latest, macos-13]
+        os: [windows-latest, macos-15-intel]
     steps:
       - uses: actions/checkout@v4
         with:
           submodules: 'true'
-      - uses: actions/setup-java@v4
+      - uses: actions/setup-java@v5
         with:
           distribution: 'temurin'
           java-version: '8'
@@ -152,7 +152,7 @@ jobs:
           key: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
           restore-keys: ${{ runner.os }}-m2-${{ hashFiles('./jvm-packages/pom.xml') }}
       - name: Test XGBoost4J (Core) on macos
-        if: matrix.os == 'macos-13'
+        if: matrix.os == 'macos-15-intel'
         run: |
           cd jvm-packages
           mvn test -B -pl :xgboost4j_2.12 -Duse.openmp=OFF
@@ -205,7 +205,7 @@ jobs:
           SCALA_VERSION: ${{ matrix.scala_version }}
 
   deploy-jvm-packages:
-    name: Deploy JVM packages to S3 (${{ matrix.variant.name }})
+    name: Deploy JVM packages to S3 (${{ matrix.variant.name }}, Scala ${{ matrix.scala_version }})
     needs: [build-jvm-gpu, build-test-jvm-packages, test-jvm-packages-gpu]
     runs-on:
       - runs-on
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 73636e7ce66d..e7874875ec90 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -72,30 +72,3 @@ jobs:
         run: |
           python3 ops/script/lint_cpp.py
           bash ops/script/lint_cmake.sh
-
-  lintr:
-    runs-on: ubuntu-latest
-    name: Run R linters on Ubuntu
-    env:
-      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: 'true'
-      - uses: r-lib/actions/setup-r@v2
-        with:
-          r-version: "release"
-      - name: Cache R packages
-        uses: actions/cache@v4
-        with:
-          path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-r-release-7-${{ hashFiles('R-package/DESCRIPTION') }}
-          restore-keys: ${{ runner.os }}-r-release-7-${{ hashFiles('R-package/DESCRIPTION') }}
-      - name: Install dependencies
-        shell: Rscript {0}
-        run: |
-          source("./R-package/tests/helper_scripts/install_deps.R")
-      - name: Run lintr
-        run: |
-          MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/
-          Rscript ops/script/lint_r.R $(pwd)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 332606cece7b..11fb4ff0a7df 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -29,34 +29,6 @@ jobs:
       - name: Log into Docker registry (AWS ECR)
         run: bash ops/pipeline/login-docker-registry.sh
       - run: bash ops/pipeline/build-cpu.sh
-      - name: Stash CLI executable
-        run: |
-          python3 ops/pipeline/manage-artifacts.py upload \
-            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
-            --prefix cache/${{ github.run_id }}/build-cpu \
-            ./xgboost
-
-  build-cpu-arm64:
-    name: Build CPU ARM64 + manylinux_2_28_aarch64 wheel
-    runs-on:
-      - runs-on=${{ github.run_id }}
-      - runner=linux-arm64-cpu
-      - tag=build-cpu-arm64
-    steps:
-      # Restart Docker daemon so that it recognizes the ephemeral disks
-      - run: sudo systemctl restart docker
-      - uses: actions/checkout@v4
-        with:
-          submodules: "true"
-      - name: Log into Docker registry (AWS ECR)
-        run: bash ops/pipeline/login-docker-registry.sh
-      - run: bash ops/pipeline/build-cpu-arm64.sh
-      - name: Stash files
-        run: |
-          python3 ops/pipeline/manage-artifacts.py upload \
-            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
-            --prefix cache/${{ github.run_id }}/build-cpu-arm64 \
-            ./xgboost python-package/dist/*.whl
 
   build-cuda:
     name: Build CUDA + manylinux_2_28_x86_64 wheel
@@ -79,7 +51,7 @@ jobs:
           python3 ops/pipeline/manage-artifacts.py upload \
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
             --prefix cache/${{ github.run_id }}/build-cuda \
-            build/testxgboost ./xgboost python-package/dist/*.whl
+            build/testxgboost python-package/dist/*.whl
 
   build-cuda-with-rmm:
     name: Build CUDA with RMM
@@ -122,20 +94,50 @@ jobs:
           bash ops/pipeline/build-cuda.sh \
             xgb-ci.gpu_build_rockylinux8_dev_ver enable-rmm
 
-  build-manylinux2014:
-    name: Build manylinux2014_${{ matrix.arch }} wheel
+  build-python-wheels-arm64:
+    name: Build manylinux_2_28_aarch64 wheel
+    runs-on:
+      - runs-on=${{ github.run_id }}
+      - runner=linux-arm64-cpu
+      - tag=build-python-wheels-arm64
+    steps:
+      # Restart Docker daemon so that it recognizes the ephemeral disks
+      - run: sudo systemctl restart docker
+      - uses: actions/checkout@v4
+        with:
+          submodules: "true"
+      - name: Log into Docker registry (AWS ECR)
+        run: bash ops/pipeline/login-docker-registry.sh
+      - run: bash ops/pipeline/build-python-wheels-arm64.sh
+      - name: Stash files
+        run: |
+          python3 ops/pipeline/manage-artifacts.py upload \
+            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
+            --prefix cache/${{ github.run_id }}/build-python-wheels-arm64 \
+            python-package/dist/*.whl
+
+  build-python-wheels-cpu:
+    name: Build CPU wheel for ${{ matrix.manylinux_target }}_${{ matrix.arch }}
     runs-on:
       - runs-on
       - runner=${{ matrix.runner }}
       - run-id=${{ github.run_id }}
-      - tag=main-build-manylinux2014-${{ matrix.arch }}
+      - tag=main-build-python-wheels-cpu-${{ matrix.manylinux_target }}-${{ matrix.arch }}
     strategy:
       fail-fast: false
       matrix:
         include:
-        - arch: aarch64
+        - manylinux_target: manylinux2014
+          arch: aarch64
           runner: linux-arm64-cpu
-        - arch: x86_64
+        - manylinux_target: manylinux2014
+          arch: x86_64
+          runner: linux-amd64-cpu
+        - manylinux_target: manylinux_2_28
+          arch: aarch64
+          runner: linux-arm64-cpu
+        - manylinux_target: manylinux_2_28
+          arch: x86_64
           runner: linux-amd64-cpu
     steps:
       # Restart Docker daemon so that it recognizes the ephemeral disks
@@ -145,7 +147,9 @@ jobs:
           submodules: "true"
       - name: Log into Docker registry (AWS ECR)
         run: bash ops/pipeline/login-docker-registry.sh
-      - run: bash ops/pipeline/build-manylinux2014.sh ${{ matrix.arch }}
+      - run: |
+          bash ops/pipeline/build-python-wheels-cpu.sh \
+            ${{ matrix.manylinux_target }} ${{ matrix.arch }}
 
   build-gpu-rpkg:
     name: Build GPU-enabled R package
@@ -163,7 +167,6 @@ jobs:
         run: bash ops/pipeline/login-docker-registry.sh
       - run: bash ops/pipeline/build-gpu-rpkg.sh
 
-
   test-cpp-gpu:
     name: >-
       Run Google Tests with GPUs
@@ -208,7 +211,7 @@ jobs:
 
   test-python-wheel:
     name: Run Python tests (${{ matrix.description }})
-    needs: [build-cuda, build-cpu-arm64]
+    needs: [build-cuda, build-python-wheels-arm64]
     runs-on:
       - runs-on
       - runner=${{ matrix.runner }}
@@ -235,10 +238,10 @@ jobs:
             runner: linux-amd64-cpu
             artifact_from: build-cuda
           - description: cpu-arm64
-            image_repo: xgb-ci.aarch64
+            image_repo: xgb-ci.manylinux_2_28_aarch64
             suite: cpu-arm64
             runner: linux-arm64-cpu
-            artifact_from: build-cpu-arm64
+            artifact_from: build-python-wheels-arm64
     steps:
       # Restart Docker daemon so that it recognizes the ephemeral disks
       - run: sudo systemctl restart docker
@@ -253,8 +256,6 @@ jobs:
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
             --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \
             --dest-dir wheelhouse \
-            *.whl xgboost
-          mv -v wheelhouse/xgboost .
-          chmod +x ./xgboost
+            *.whl
       - name: Run Python tests, ${{ matrix.description }}
         run: bash ops/pipeline/test-python-wheel.sh ${{ matrix.suite }} ${{ matrix.image_repo }}
diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index 180b1a855733..343bb899ed16 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [macos-13, windows-latest, ubuntu-latest]
+        os: [macos-15-intel, windows-latest, ubuntu-latest]
     steps:
       - uses: actions/checkout@v4
         with:
@@ -32,13 +32,13 @@ jobs:
       - name: Install extra package for MacOS
         run: |
           mamba install -c conda-forge llvm-openmp
-        if: matrix.os == 'macos-13'
+        if: matrix.os == 'macos-15-intel'
       - name: Build and install XGBoost
         run: bash ops/pipeline/test-python-sdist.sh
 
   python-tests-on-macos:
-    name: Test XGBoost Python package on macos-13
-    runs-on: macos-13
+    name: Test XGBoost Python package on macos-15-intel
+    runs-on: macos-15-intel
     timeout-minutes: 60
     steps:
       - uses: actions/checkout@v4
diff --git a/.github/workflows/python_wheels_macos.yml b/.github/workflows/python_wheels_macos.yml
index cbece0512274..14a37840c719 100644
--- a/.github/workflows/python_wheels_macos.yml
+++ b/.github/workflows/python_wheels_macos.yml
@@ -25,7 +25,7 @@ jobs:
       fail-fast: false
       matrix:
         include:
-        - os: macos-13
+        - os: macos-15-intel
           platform_id: macosx_x86_64
         - os: macos-14
           platform_id: macosx_arm64
diff --git a/.github/workflows/python_wheels_variants.yml b/.github/workflows/python_wheels_variants.yml
new file mode 100644
index 000000000000..4a0e1248d030
--- /dev/null
+++ b/.github/workflows/python_wheels_variants.yml
@@ -0,0 +1,36 @@
+name: Build Python wheels using Wheel Variant prototype (WheelNext)
+
+on: [push, pull_request]
+
+permissions:
+  contents: read  # to fetch code (actions/checkout)
+
+defaults:
+  run:
+    shell: bash -l {0}
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  BRANCH_NAME: >-
+    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+
+jobs:
+  python-wheels-variants:
+    name: Build Python wheels using Wheel Variant prototype (WheelNext)
+    runs-on:
+      - runs-on=${{ github.run_id }}
+      - runner=linux-amd64-cpu
+      - tag=python-wheels-variants
+    steps:
+      # Restart Docker daemon so that it recognizes the ephemeral disks
+      - run: sudo systemctl restart docker
+      - uses: actions/checkout@v4
+        with:
+          submodules: "true"
+      - name: Log into Docker registry (AWS ECR)
+        run: bash ops/pipeline/login-docker-registry.sh
+      - run: |
+          bash ops/pipeline/build-variant-wheels.sh
diff --git a/.github/workflows/python_wheels_winarm64.yml b/.github/workflows/python_wheels_winarm64.yml
new file mode 100644
index 000000000000..13ac0ad9c15c
--- /dev/null
+++ b/.github/workflows/python_wheels_winarm64.yml
@@ -0,0 +1,90 @@
+name: Build Python wheels targeting Windows ARM64
+
+on: [push, pull_request]
+
+permissions:
+  contents: read  # to fetch code (actions/checkout)
+
+defaults:
+  run:
+    shell: pwsh
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  BRANCH_NAME: >-
+    ${{ github.event.pull_request.number && 'PR-' }}${{ github.event.pull_request.number || github.ref_name }}
+    
+jobs:
+  python-wheels-Win-ARM64:
+    name: Build wheel for Windows ARM64
+    runs-on: windows-11-arm
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: 'true'
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install wheel setuptools awscli packaging
+
+      - name: Build XGBoost for Win-ARM64
+        run: |
+          mkdir build
+          cd build
+          cmake .. -G"Visual Studio 17 2022" -A ARM64
+          cmake --build . --config Release -- /m /nodeReuse:false "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal"
+
+      - name: Build Python wheel xgboost for Win-ARM64
+        run: |
+          cd python-package
+          mkdir -p wheelhouse
+          pip wheel --no-deps -v . --wheel-dir wheelhouse/
+          $wheelFile = Get-ChildItem wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName
+          python -m wheel tags --python-tag py3 --abi-tag none --platform win_arm64 --remove $wheelFile
+
+      - name: Upload Python wheel xgboost
+        if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
+        run: |
+          $wheelFile = Get-ChildItem python-package/wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName
+          python ops/pipeline/manage-artifacts.py upload `
+            --s3-bucket xgboost-nightly-builds `
+            --prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public `
+            $wheelFile
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
+
+      - name: Clean up
+        run: |
+          $wheelFile = Get-ChildItem python-package/wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName
+          Remove-Item -Path $wheelFile -Verbose
+          
+      - name: Build Python wheel xgboost-cpu for Win-ARM64
+        run: |
+          # Patch to rename pkg to xgboost-cpu
+          python ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na
+          cd python-package
+          pip wheel --no-deps -v . --wheel-dir wheelhouse/
+          $wheelFile = Get-ChildItem wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName
+          python -m wheel tags --python-tag py3 --abi-tag none --platform win_arm64 --remove $wheelFile
+
+      - name: Upload Python wheel xgboost-cpu
+        if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
+        run: |
+          $wheelFile = Get-ChildItem python-package/wheelhouse/*.whl | Select-Object -First 1 -ExpandProperty FullName
+          python ops/pipeline/manage-artifacts.py upload `
+            --s3-bucket xgboost-nightly-builds `
+            --prefix ${{ env.BRANCH_NAME }}/${{ github.sha }} --make-public `
+            $wheelFile
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_IAM_S3_UPLOADER }}
diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml
index 43ad372a1e84..6beff9ef4628 100644
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@@ -42,12 +42,6 @@ jobs:
       - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.r }}
-      - name: Cache R packages
-        uses: actions/cache@v4
-        with:
-          path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-r-${{ matrix.r }}-8-${{ hashFiles('R-package/DESCRIPTION') }}
-          restore-keys: ${{ runner.os }}-r-${{ matrix.r }}-8-${{ hashFiles('R-package/DESCRIPTION') }}
       - uses: actions/setup-python@v5
         with:
           python-version: "3.10"
@@ -70,13 +64,12 @@ jobs:
     name: Test R package on Debian
     runs-on: ubuntu-latest
     container:
-      image: rhub/debian-gcc-release
+      image: rhub/ubuntu-release # rhub uses ubuntu for debian tests.
     steps:
       - name: Install system dependencies
         run: |
           # Must run before checkout to have the latest git installed.
-          # No need to add pandoc, the container has it figured out.
-          apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git librsvg2-dev librsvg2-2 -y
+          apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git librsvg2-dev librsvg2-2 pandoc -y
       - name: Trust git cloning project sources
         run: |
           git config --global --add safe.directory "${GITHUB_WORKSPACE}"
@@ -90,8 +83,9 @@ jobs:
       - name: Test R
         shell: bash -l {0}
         run: |
-          python3 ops/script/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check
+          python3 ops/script/test_r_package.py --r=/opt/R/release/bin/R --build-tool=autotools --task=check
       - uses: dorny/paths-filter@v3
+        # Run the document check if there are changes in the R package.
         id: changes
         with:
           filters: |
@@ -100,4 +94,12 @@ jobs:
       - name: Run document check
         if: steps.changes.outputs.r_package == 'true'
         run: |
-          python3 ops/script/test_r_package.py --r=/usr/bin/R --task=doc
+          python3 ops/script/test_r_package.py --r=/opt/R/release/bin/R --task=doc
+      - name: Run lintr
+        run: |
+          # Prevent the linter from checking generated R scripts.
+          if [ -d ./xgboost.Rcheck ] ; then
+            rm -rf ./xgboost.Rcheck
+          fi
+          MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/
+          Rscript ops/script/lint_r.R $(pwd)
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 10d430c5f0f8..0dd950ba2955 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -36,7 +36,7 @@ jobs:
           python ops/pipeline/manage-artifacts.py upload `
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} `
             --prefix cache/${{ github.run_id }}/build-win64-gpu `
-            build/testxgboost.exe xgboost.exe `
+            build/testxgboost.exe `
             (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName)
 
   build-win64-cpu:
@@ -70,8 +70,7 @@ jobs:
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} `
             --prefix cache/${{ github.run_id }}/build-win64-gpu `
             --dest-dir build `
-            *.whl testxgboost.exe xgboost.exe
-          Move-Item -Path build/xgboost.exe -Destination .
+            *.whl testxgboost.exe
           New-Item -ItemType Directory -Path python-package/dist/ -Force
           Move-Item -Path (Get-ChildItem build/*.whl | Select-Object -Expand FullName) `
             -Destination python-package/dist/
diff --git a/.gitignore b/.gitignore
index c29dcc43d9d3..43820813baf3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@
 *.Rcheck
 *.rds
 *.tar.gz
+*.tar.bz2
 *conf
 *buffer
 *.model
@@ -163,3 +164,4 @@ Rplots.pdf
 
 # nsys
 *.nsys-rep
+rmm_log.dev*
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 845347ea1ad6..a79238d7cf69 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ if(PLUGIN_SYCL)
   string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 
-project(xgboost LANGUAGES CXX C VERSION 3.1.0)
+project(xgboost LANGUAGES CXX C VERSION 3.2.0)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 
@@ -53,6 +53,7 @@ option(USE_OPENMP "Build with OpenMP support." ON)
 option(BUILD_STATIC_LIB "Build static library" OFF)
 option(BUILD_DEPRECATED_CLI "Build the deprecated command line interface" OFF)
 option(FORCE_SHARED_CRT "Build with dynamic CRT on Windows (/MD)" OFF)
+option(BUILD_WITH_GIT_HASH "Add a short git hash to the build info." OFF)
 ## Bindings
 option(JVM_BINDINGS "Build JVM bindings" OFF)
 option(R_LIB "Build shared library for R package" OFF)
@@ -72,6 +73,7 @@ option(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR "Output build artifacts in CMake binar
 ## CUDA
 option(USE_CUDA  "Build with GPU acceleration" OFF)
 option(USE_NCCL  "Build with NCCL to enable distributed GPU support." OFF)
+option(USE_NVCOMP "Build with nvcomp to enable sparse data compression. (experimental)" OFF)
 # This is specifically designed for PyPI binary release and should be disabled for most of the cases.
 option(USE_DLOPEN_NCCL "Whether to load nccl dynamically." OFF)
 option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
@@ -124,6 +126,9 @@ endif()
 if(USE_NCCL AND (NOT USE_CUDA))
   message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
 endif()
+if(USE_NVCOMP AND (NOT USE_CUDA))
+  message(SEND_ERROR "`USE_NVCOMP` must be enabled with `USE_CUDA` flag.")
+endif()
 if(USE_DEVICE_DEBUG AND (NOT USE_CUDA))
   message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_CUDA` flag.")
 endif()
@@ -145,6 +150,9 @@ if(R_LIB AND GOOGLE_TEST)
     "Some C++ tests will fail with `R_LIB` enabled, as R package redirects some functions to R runtime implementation."
   )
 endif()
+if(R_LIB AND USE_NCCL)
+  message(SEND_ERROR "`R_LIB` doesn't support distributed computing with NCCL yet.")
+endif()
 if(PLUGIN_RMM AND NOT (USE_CUDA))
   message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
 endif()
@@ -199,6 +207,9 @@ endif()
 if(PLUGIN_DENSE_PARSER)
   message(SEND_ERROR "The option `PLUGIN_DENSE_PARSER` has been removed from XGBoost.")
 endif()
+if(BUILD_DEPRECATED_CLI)
+  message(SEND_ERROR  "The option `BUILD_DEPRECATED_CLI` is removed from XGBoost.")
+endif()
 
 #-- Sanitizer
 if(USE_SANITIZER)
@@ -229,39 +240,25 @@ if(USE_CUDA)
   endif()
 
   find_package(CUDAToolkit REQUIRED)
-  find_package(CCCL CONFIG)
-  if(CCCL_FOUND)
-    message(STATUS "Standalone CCCL found.")
-  else()
-    message(STATUS "Standalone CCCL not found. Attempting to use CCCL from CUDA Toolkit...")
-    find_package(CCCL CONFIG
-      HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
-    if(NOT CCCL_FOUND)
-      message(STATUS "Could not locate CCCL from CUDA Toolkit. Using Thrust and CUB from CUDA Toolkit...")
-      find_package(libcudacxx CONFIG REQUIRED
-        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
-      find_package(CUB CONFIG REQUIRED
-        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
-      find_package(Thrust CONFIG REQUIRED
-        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
-      thrust_create_target(Thrust HOST CPP DEVICE CUDA)
-      add_library(CCCL::CCCL INTERFACE IMPORTED GLOBAL)
-      target_link_libraries(CCCL::CCCL INTERFACE libcudacxx::libcudacxx CUB::CUB Thrust)
-    endif()
-  endif()
-  # Define guard macros to prevent windows.h from conflicting with winsock2.h
-  if(WIN32)
-    target_compile_definitions(CCCL::CCCL INTERFACE NOMINMAX WIN32_LEAN_AND_MEAN _WINSOCKAPI_)
+endif()
+
+if(USE_NVCOMP)
+  find_package(nvcomp REQUIRED)
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 12.8)
+    message(SEND_ERROR "NVComp support requires CUDA >= 12.8")
   endif()
 endif()
 
+
 if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
     ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
       (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
 endif()
 
-find_package(Threads REQUIRED)
+if(NOT (CMAKE_SYSTEM_NAME STREQUAL "Emscripten"))
+  find_package(Threads REQUIRED)
+endif()
 
 # -- OpenMP
 include(cmake/FindOpenMPMacOS.cmake)
@@ -338,13 +335,41 @@ if(PLUGIN_RMM)
   list(REMOVE_ITEM rmm_link_libs CUDA::cudart)
   list(APPEND rmm_link_libs CUDA::cudart_static)
   set_target_properties(rmm::rmm PROPERTIES INTERFACE_LINK_LIBRARIES "${rmm_link_libs}")
+
+  # Pick up patched CCCL from RMM
+elseif(USE_CUDA)
+  # If using CUDA and not RMM, search for CCCL.
+  find_package(CCCL CONFIG)
+  if(CCCL_FOUND)
+    message(STATUS "Standalone CCCL found.")
+  else()
+    message(STATUS "Standalone CCCL not found. Attempting to use CCCL from CUDA Toolkit...")
+    find_package(CCCL CONFIG
+      HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
+    if(NOT CCCL_FOUND)
+      message(STATUS "Could not locate CCCL from CUDA Toolkit. Using Thrust and CUB from CUDA Toolkit...")
+      find_package(libcudacxx CONFIG REQUIRED
+        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
+      find_package(CUB CONFIG REQUIRED
+        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
+      find_package(Thrust CONFIG REQUIRED
+        HINTS ${CUDAToolkit_LIBRARY_DIR}/cmake)
+      thrust_create_target(Thrust HOST CPP DEVICE CUDA)
+      add_library(CCCL::CCCL INTERFACE IMPORTED GLOBAL)
+      target_link_libraries(CCCL::CCCL INTERFACE libcudacxx::libcudacxx CUB::CUB Thrust)
+    endif()
+  endif()
+  # Define guard macros to prevent windows.h from conflicting with winsock2.h
+  if(WIN32)
+    target_compile_definitions(CCCL::CCCL INTERFACE NOMINMAX WIN32_LEAN_AND_MEAN _WINSOCKAPI_)
+  endif()
 endif()
 
 if(PLUGIN_SYCL)
   set(CMAKE_CXX_LINK_EXECUTABLE
       "icpx <FLAGS> <CMAKE_CXX_LINK_FLAGS> -qopenmp <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
   set(CMAKE_CXX_CREATE_SHARED_LIBRARY
-      "icpx <CMAKE_SHARED_LIBRARY_CXX_FLAGS> -qopenmp <LANGUAGE_COMPILE_FLAGS> \
+      "icpx -shared <CMAKE_SHARED_LIBRARY_CXX_FLAGS> -qopenmp <LANGUAGE_COMPILE_FLAGS> \
       <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG>,<TARGET_SONAME> \
       -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
 endif()
@@ -362,28 +387,6 @@ target_include_directories(xgboost
   $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/include>)
 #-- End shared library
 
-#-- CLI for xgboost
-if(BUILD_DEPRECATED_CLI)
-  add_executable(runxgboost ${xgboost_SOURCE_DIR}/src/cli_main.cc)
-  target_link_libraries(runxgboost PRIVATE objxgboost)
-  target_include_directories(runxgboost
-    PRIVATE
-    ${xgboost_SOURCE_DIR}/include
-    ${xgboost_SOURCE_DIR}/dmlc-core/include
-  )
-  set_target_properties(runxgboost PROPERTIES OUTPUT_NAME xgboost)
-  xgboost_target_properties(runxgboost)
-  xgboost_target_link_libraries(runxgboost)
-  xgboost_target_defs(runxgboost)
-
-  if(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
-    set_output_directory(runxgboost ${xgboost_BINARY_DIR})
-  else()
-    set_output_directory(runxgboost ${xgboost_SOURCE_DIR})
-  endif()
-endif()
-#-- End CLI for xgboost
-
 # Common setup for all targets
 foreach(target xgboost objxgboost dmlc)
   xgboost_target_properties(${target})
@@ -397,7 +400,7 @@ if(JVM_BINDINGS)
   xgboost_target_defs(xgboost4j)
 endif()
 
-if(USE_OPENMP AND APPLE)
+if(USE_OPENMP AND APPLE AND NOT BUILD_STATIC_LIB)
   patch_openmp_path_macos(xgboost libxgboost)
 endif()
 
@@ -407,11 +410,6 @@ else()
   set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib)
 endif()
 
-# Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names
-if(BUILD_DEPRECATED_CLI)
-  add_dependencies(xgboost runxgboost)
-endif()
-
 #-- Installing XGBoost
 if(R_LIB)
   include(cmake/RPackageInstallTargetSetup.cmake)
@@ -446,17 +444,9 @@ install(DIRECTORY ${xgboost_SOURCE_DIR}/include/xgboost
 #
 # https://github.com/dmlc/xgboost/issues/6085
 if(BUILD_STATIC_LIB)
-  if(BUILD_DEPRECATED_CLI)
-    set(INSTALL_TARGETS xgboost runxgboost objxgboost dmlc)
-  else()
-    set(INSTALL_TARGETS xgboost objxgboost dmlc)
-  endif()
+  set(INSTALL_TARGETS xgboost objxgboost dmlc)
 else()
-  if(BUILD_DEPRECATED_CLI)
-    set(INSTALL_TARGETS xgboost runxgboost)
-  else()
-    set(INSTALL_TARGETS xgboost)
-  endif()
+  set(INSTALL_TARGETS xgboost)
 endif()
 
 install(TARGETS ${INSTALL_TARGETS}
@@ -501,21 +491,6 @@ if(GOOGLE_TEST)
     NAME TestXGBoostLib
     COMMAND testxgboost
     WORKING_DIRECTORY ${xgboost_BINARY_DIR})
-  # CLI tests
-  configure_file(
-    ${xgboost_SOURCE_DIR}/tests/cli/machine.conf.in
-    ${xgboost_BINARY_DIR}/tests/cli/machine.conf
-    @ONLY
-    NEWLINE_STYLE UNIX)
-  if(BUILD_DEPRECATED_CLI)
-    add_test(
-      NAME TestXGBoostCLI
-      COMMAND runxgboost ${xgboost_BINARY_DIR}/tests/cli/machine.conf
-      WORKING_DIRECTORY ${xgboost_BINARY_DIR})
-    set_tests_properties(TestXGBoostCLI
-      PROPERTIES
-      PASS_REGULAR_EXPRESSION ".*test-rmse:0.087.*")
-  endif()
 endif()
 
 # Add xgboost.pc
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index f96a7dc0d34c..926ad43fa6f0 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -11,7 +11,7 @@ The Project Management Committee(PMC) consists group of active committers that m
 * [Michael Benesty](https://github.com/pommedeterresautee)
   - Michael is a lawyer and data scientist in France. He is the creator of XGBoost interactive analysis module in R.
 * [Yuan Tang](https://github.com/terrytangyuan), Red Hat
-  - Yuan is a principal software engineer at Red Hat. He contributed mostly in R and Python packages.
+  - Yuan is a Senior Principal Software Engineer at Red Hat AI. He contributed mostly in R and Python packages.
 * [Nan Zhu](https://github.com/CodingCat), Uber
   - Nan is a software engineer in Uber. He contributed mostly in JVM packages.
 * [Jiaming Yuan](https://github.com/trivialfis)
@@ -43,8 +43,8 @@ Committers are people who have made substantial contribution to the project and
 
 Become a Committer
 ------------------
-XGBoost is a open source project and we are actively looking for new committers who are willing to help maintaining and lead the project.
-Committers comes from contributors who:
+XGBoost is an open source project and we are actively looking for new committers who are willing to help maintaining and lead the project.
+Committers come from contributors who:
 * Made substantial contribution to the project.
 * Willing to spent time on maintaining and lead the project.
 
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 47be99b20dc9..db54f4775d71 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 3.1.0.0
-Date: 2025-03-03
+Version: 3.2.0.0
+Date: 2025-09-27
 Authors@R: c(
   person("Tianqi", "Chen", role = c("aut"),
          email = "tianqi.tchen@gmail.com"),
@@ -71,6 +71,6 @@ Imports:
     data.table (>= 1.9.6),
     jsonlite (>= 1.0)
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Encoding: UTF-8
 SystemRequirements: GNU make, C++17
diff --git a/R-package/LICENSE b/R-package/LICENSE
deleted file mode 100644
index bc1c21d59fe5..000000000000
--- a/R-package/LICENSE
+++ /dev/null
@@ -1,13 +0,0 @@
-Copyright (c) 2014-2023, Tianqi Chen and XBGoost Contributors
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 1fa0d7a9cf46..af37b12c7c01 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -103,6 +103,7 @@ importFrom(stats,median)
 importFrom(stats,predict)
 importFrom(stats,sd)
 importFrom(stats,variable.names)
+importFrom(utils,hasName)
 importFrom(utils,head)
 importFrom(utils,object.size)
 importFrom(utils,str)
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index 5c30ffccaa16..19a197a5b3b3 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -1104,8 +1104,9 @@ coef.xgb.Booster <- function(object, ...) {
   if (booster_type != "gblinear") {
     stop("Coefficients are not defined for Booster type ", booster_type)
   }
-  model_json <- jsonlite::fromJSON(rawToChar(xgb.save.raw(object, raw_format = "json")))
-  base_score <- model_json$learner$learner_model_param$base_score
+  model_json <- jsonlite::fromJSON(
+    rawToChar(xgb.save.raw(object, raw_format = "json"))
+  )
   num_feature <- as.numeric(model_json$learner$learner_model_param$num_feature)
 
   weights <- model_json$learner$gradient_booster$model$weights
@@ -1116,6 +1117,9 @@ coef.xgb.Booster <- function(object, ...) {
   sep <- num_feature * n_cols
   coefs <- weights[seq(1, sep)]
   intercepts <- weights[seq(sep + 1, length(weights))]
+  base_score <- jsonlite::fromJSON(
+    model_json$learner$learner_model_param$base_score
+  )
   intercepts <- intercepts + as.numeric(base_score)
 
   if (add_names) {
@@ -1206,6 +1210,14 @@ xgb.copy.Booster <- function(model) {
   return(.Call(XGDuplicate_R, model))
 }
 
+xgb.reset.Booster <- function(model) {
+  if (!inherits(model, "xgb.Booster")) {
+    stop("'model' must be an 'xgb.Booster' object.")
+  }
+  .Call(XGBoosterReset_R, xgb.get.handle(model))
+  return(model)
+}
+
 #' Check if two boosters share the same C object
 #'
 #' Checks whether two booster objects refer to the same underlying C object.
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index f81fd51dfec8..50b13afa93ea 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -76,8 +76,6 @@
 #'   - `nfeatures`: Number of features in training data.
 #'   - `folds`: The list of CV folds' indices - either those passed through the `folds`
 #'      parameter or randomly generated.
-#'   - `best_iteration`: Iteration number with the best evaluation metric value
-#'      (only available with early stopping).
 #'
 #'   Plus other potential elements that are the result of callbacks, such as a list `cv_predict` with
 #'   a sub-element `pred` when passing `prediction = TRUE`, which is added by the [xgb.cb.cv.predict()]
@@ -92,18 +90,23 @@
 #'
 #' cv <- xgb.cv(
 #'   data = dtrain,
-#'   nrounds = 3,
+#'   nrounds = 20,
+#'   early_stopping_rounds = 1,
 #'   params = xgb.params(
 #'     nthread = 2,
 #'     max_depth = 3,
 #'     objective = "binary:logistic"
 #'   ),
 #'   nfold = 5,
-#'   metrics = list("rmse","auc")
+#'   metrics = list("rmse","auc"),
+#'   prediction = TRUE
 #' )
 #' print(cv)
 #' print(cv, verbose = TRUE)
 #'
+#' # Callbacks might add additional attributes, separated by the name of the callback
+#' cv$early_stop$best_iteration
+#' head(cv$cv_predict$pred)
 #' @export
 xgb.cv <- function(params = xgb.params(), data, nrounds, nfold,
                    prediction = FALSE, showsd = TRUE, metrics = list(),
@@ -273,6 +276,7 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold,
 
     if (should_stop) break
   }
+
   cb_outputs <- .execute.cb.after.training(
     callbacks,
     bst_folds,
@@ -282,6 +286,11 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold,
     msg
   )
 
+  # Just in case if the model is referenced in callbacks.
+  lapply(bst_folds, function(fd) {
+    xgb.reset.Booster(fd$bst)
+  })
+
   # the CV result
   ret <- list(
     call = match.call(),
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index a94f42593121..ad4e9298abe3 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -390,7 +390,7 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
     call = match.call(),
     params = params
   )
-
+  bst <- xgb.reset.Booster(bst)
   curr_attrs <- attributes(bst)
   if (NROW(curr_attrs)) {
     curr_attrs <- curr_attrs[
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index 9ebc866cbcf2..a77d9d42387d 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -1596,6 +1596,7 @@ NULL
 #' @importFrom stats sd
 #' @importFrom stats variable.names
 #' @importFrom utils head
+#' @importFrom utils hasName
 #' @importFrom graphics barplot
 #' @importFrom graphics lines
 #' @importFrom graphics points
diff --git a/R-package/bootstrap.R b/R-package/bootstrap.R
new file mode 100644
index 000000000000..dfafca67085f
--- /dev/null
+++ b/R-package/bootstrap.R
@@ -0,0 +1,36 @@
+## Script used to bootstrap R-universe build.
+
+## Execute git commands to initialize git submodules
+system("git submodule init")
+system("git submodule update")
+
+## core
+file.copy("../src", "./src/", recursive = TRUE)
+file.copy("../include", "./src/", recursive = TRUE)
+file.copy("../amalgamation", "./src/", recursive = TRUE)
+
+## dmlc-core
+dir.create("./src/dmlc-core")
+file.copy("../dmlc-core/include", "./src/dmlc-core/", recursive = TRUE)
+file.copy("../dmlc-core/src", "./src/dmlc-core/", recursive = TRUE)
+
+pkgroot <- function(path) {
+  ## read the file from path, replace the PKGROOT=../../ with PKGROOT=.
+  lines <- readLines(path)
+  lines <- gsub("PKGROOT=../../", "PKGROOT=.", lines, fixed = TRUE)
+  writeLines(lines, path)
+}
+
+## makefile and license
+file.copy("../LICENSE", "./LICENSE")
+pkgroot("./src/Makevars.in")
+pkgroot("./src/Makevars.win.in")
+
+## misc
+path <- file.path("remove_warning_suppression_pragma.sh")
+file.remove(path)
+path <- file.path("CMakeLists.txt")
+file.remove(path)
+
+## remove the directory recursively ./tests/helper_scripts
+unlink("tests/helper_scripts", recursive = TRUE)
diff --git a/R-package/configure b/R-package/configure
index fc4594db9458..cb54bc2bebdb 100755
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for xgboost 3.1.0.
+# Generated by GNU Autoconf 2.71 for xgboost 3.2.0.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -607,8 +607,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='xgboost'
 PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='3.1.0'
-PACKAGE_STRING='xgboost 3.1.0'
+PACKAGE_VERSION='3.2.0'
+PACKAGE_STRING='xgboost 3.2.0'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1262,7 +1262,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures xgboost 3.1.0 to adapt to many kinds of systems.
+\`configure' configures xgboost 3.2.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1324,7 +1324,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of xgboost 3.1.0:";;
+     short | recursive ) echo "Configuration of xgboost 3.2.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1407,7 +1407,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-xgboost configure 3.1.0
+xgboost configure 3.2.0
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -1668,7 +1668,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by xgboost $as_me 3.1.0, which was
+It was created by xgboost $as_me 3.2.0, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -2796,11 +2796,11 @@ if test x$ac_prog_cxx_stdcxx = xno
 then :
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5
 printf %s "checking for $CXX option to enable C++11 features... " >&6; }
-if test ${ac_cv_prog_cxx_11+y}
+if test ${ac_cv_prog_cxx_cxx11+y}
 then :
   printf %s "(cached) " >&6
 else $as_nop
-  ac_cv_prog_cxx_11=no
+  ac_cv_prog_cxx_cxx11=no
 ac_save_CXX=$CXX
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
@@ -2842,11 +2842,11 @@ if test x$ac_prog_cxx_stdcxx = xno
 then :
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5
 printf %s "checking for $CXX option to enable C++98 features... " >&6; }
-if test ${ac_cv_prog_cxx_98+y}
+if test ${ac_cv_prog_cxx_cxx98+y}
 then :
   printf %s "(cached) " >&6
 else $as_nop
-  ac_cv_prog_cxx_98=no
+  ac_cv_prog_cxx_cxx98=no
 ac_save_CXX=$CXX
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
@@ -3855,7 +3855,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by xgboost $as_me 3.1.0, which was
+This file was extended by xgboost $as_me 3.2.0, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -3919,7 +3919,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-xgboost config.status 3.1.0
+xgboost config.status 3.2.0
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 
diff --git a/R-package/configure.ac b/R-package/configure.ac
index fb5a28b5a95f..68369c9275f5 100644
--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@
 
 AC_PREREQ(2.69)
 
-AC_INIT([xgboost],[3.1.0],[],[xgboost],[])
+AC_INIT([xgboost],[3.2.0],[],[xgboost],[])
 
 : ${R_HOME=`R RHOME`}
 if test -z "${R_HOME}"; then
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index e1bf60116ca6..e857a9b7420e 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -170,8 +170,6 @@ It is created by the \code{\link[=xgb.cb.evaluation.log]{xgb.cb.evaluation.log()
 \item \code{nfeatures}: Number of features in training data.
 \item \code{folds}: The list of CV folds' indices - either those passed through the \code{folds}
 parameter or randomly generated.
-\item \code{best_iteration}: Iteration number with the best evaluation metric value
-(only available with early stopping).
 }
 
 Plus other potential elements that are the result of callbacks, such as a list \code{cv_predict} with
@@ -203,16 +201,21 @@ dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
 
 cv <- xgb.cv(
   data = dtrain,
-  nrounds = 3,
+  nrounds = 20,
+  early_stopping_rounds = 1,
   params = xgb.params(
     nthread = 2,
     max_depth = 3,
     objective = "binary:logistic"
   ),
   nfold = 5,
-  metrics = list("rmse","auc")
+  metrics = list("rmse","auc"),
+  prediction = TRUE
 )
 print(cv)
 print(cv, verbose = TRUE)
 
+# Callbacks might add additional attributes, separated by the name of the callback
+cv$early_stop$best_iteration
+head(cv$cv_predict$pred)
 }
diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 67e7a86b0033..07fb0c9d5153 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -85,11 +85,12 @@ OBJECTS= \
     $(PKGROOT)/src/data/iterative_dmatrix.o \
     $(PKGROOT)/src/predictor/predictor.o \
     $(PKGROOT)/src/predictor/cpu_predictor.o \
-    $(PKGROOT)/src/predictor/cpu_treeshap.o \
+    $(PKGROOT)/src/predictor/treeshap.o \
     $(PKGROOT)/src/tree/constraints.o \
     $(PKGROOT)/src/tree/param.o \
     $(PKGROOT)/src/tree/fit_stump.o \
     $(PKGROOT)/src/tree/tree_model.o \
+    $(PKGROOT)/src/tree/tree_view.o \
     $(PKGROOT)/src/tree/tree_updater.o \
     $(PKGROOT)/src/tree/multi_target_tree_model.o \
     $(PKGROOT)/src/tree/updater_approx.o \
@@ -98,7 +99,7 @@ OBJECTS= \
     $(PKGROOT)/src/tree/updater_quantile_hist.o \
     $(PKGROOT)/src/tree/updater_refresh.o \
     $(PKGROOT)/src/tree/updater_sync.o \
-    $(PKGROOT)/src/tree/hist/param.o \
+    $(PKGROOT)/src/tree/hist/hist_param.o \
     $(PKGROOT)/src/tree/hist/histogram.o \
     $(PKGROOT)/src/linear/linear_updater.o \
     $(PKGROOT)/src/linear/updater_coordinate.o \
@@ -127,7 +128,9 @@ OBJECTS= \
     $(PKGROOT)/src/common/host_device_vector.o \
     $(PKGROOT)/src/common/io.o \
     $(PKGROOT)/src/common/json.o \
+    $(PKGROOT)/src/common/linalg_op.o \
     $(PKGROOT)/src/common/numeric.o \
+    $(PKGROOT)/src/common/optional_weight.o \
     $(PKGROOT)/src/common/pseudo_huber.o \
     $(PKGROOT)/src/common/quantile.o \
     $(PKGROOT)/src/common/random.o \
@@ -135,6 +138,7 @@ OBJECTS= \
     $(PKGROOT)/src/common/survival_util.o \
     $(PKGROOT)/src/common/threading_utils.o \
     $(PKGROOT)/src/common/ranking_utils.o \
+    $(PKGROOT)/src/common/param_array.o \
     $(PKGROOT)/src/common/quantile_loss_utils.o \
     $(PKGROOT)/src/common/timer.o \
     $(PKGROOT)/src/common/version.o \
diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in
index d84cdb43ce6c..471a0deae9b2 100644
--- a/R-package/src/Makevars.win.in
+++ b/R-package/src/Makevars.win.in
@@ -84,11 +84,12 @@ OBJECTS= \
     $(PKGROOT)/src/data/iterative_dmatrix.o \
     $(PKGROOT)/src/predictor/predictor.o \
     $(PKGROOT)/src/predictor/cpu_predictor.o \
-    $(PKGROOT)/src/predictor/cpu_treeshap.o \
+    $(PKGROOT)/src/predictor/treeshap.o \
     $(PKGROOT)/src/tree/constraints.o \
     $(PKGROOT)/src/tree/param.o \
     $(PKGROOT)/src/tree/fit_stump.o \
     $(PKGROOT)/src/tree/tree_model.o \
+    $(PKGROOT)/src/tree/tree_view.o \
     $(PKGROOT)/src/tree/multi_target_tree_model.o \
     $(PKGROOT)/src/tree/tree_updater.o \
     $(PKGROOT)/src/tree/updater_approx.o \
@@ -97,7 +98,7 @@ OBJECTS= \
     $(PKGROOT)/src/tree/updater_quantile_hist.o \
     $(PKGROOT)/src/tree/updater_refresh.o \
     $(PKGROOT)/src/tree/updater_sync.o \
-    $(PKGROOT)/src/tree/hist/param.o \
+    $(PKGROOT)/src/tree/hist/hist_param.o \
     $(PKGROOT)/src/tree/hist/histogram.o \
     $(PKGROOT)/src/linear/linear_updater.o \
     $(PKGROOT)/src/linear/updater_coordinate.o \
@@ -126,7 +127,9 @@ OBJECTS= \
     $(PKGROOT)/src/common/host_device_vector.o \
     $(PKGROOT)/src/common/io.o \
     $(PKGROOT)/src/common/json.o \
+    $(PKGROOT)/src/common/linalg_op.o \
     $(PKGROOT)/src/common/numeric.o \
+    $(PKGROOT)/src/common/optional_weight.o \
     $(PKGROOT)/src/common/pseudo_huber.o \
     $(PKGROOT)/src/common/quantile.o \
     $(PKGROOT)/src/common/random.o \
@@ -134,6 +137,7 @@ OBJECTS= \
     $(PKGROOT)/src/common/survival_util.o \
     $(PKGROOT)/src/common/threading_utils.o \
     $(PKGROOT)/src/common/ranking_utils.o \
+    $(PKGROOT)/src/common/param_array.o \
     $(PKGROOT)/src/common/quantile_loss_utils.o \
     $(PKGROOT)/src/common/timer.o \
     $(PKGROOT)/src/common/version.o \
diff --git a/R-package/src/init.c b/R-package/src/init.c
index 523e5118a6f5..fb40933ccb5b 100644
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@@ -20,6 +20,7 @@ extern SEXP XGDuplicate_R(SEXP);
 extern SEXP XGPointerEqComparison_R(SEXP, SEXP);
 extern SEXP XGBoosterTrainOneIter_R(SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGBoosterCreate_R(SEXP);
+extern SEXP XGBoosterReset_R(SEXP);
 extern SEXP XGBoosterCopyInfoFromDMatrix_R(SEXP, SEXP);
 extern SEXP XGBoosterSetStrFeatureInfo_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterGetStrFeatureInfo_R(SEXP, SEXP);
@@ -83,6 +84,7 @@ static const R_CallMethodDef CallEntries[] = {
   {"XGPointerEqComparison_R",     (DL_FUNC) &XGPointerEqComparison_R,     2},
   {"XGBoosterTrainOneIter_R",     (DL_FUNC) &XGBoosterTrainOneIter_R,     5},
   {"XGBoosterCreate_R",           (DL_FUNC) &XGBoosterCreate_R,           1},
+  {"XGBoosterReset_R",            (DL_FUNC) &XGBoosterReset_R,            1},
   {"XGBoosterCopyInfoFromDMatrix_R", (DL_FUNC) &XGBoosterCopyInfoFromDMatrix_R, 2},
   {"XGBoosterSetStrFeatureInfo_R",(DL_FUNC) &XGBoosterSetStrFeatureInfo_R,3},  // NOLINT
   {"XGBoosterGetStrFeatureInfo_R",(DL_FUNC) &XGBoosterGetStrFeatureInfo_R,2},  // NOLINT
diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc
index 68a45ba96780..90f73343cf21 100644
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -306,18 +306,24 @@ object, need to be destructed before triggering the R error.
 In order to preserve the error message, it gets copied to a temporary
 buffer, and the R error section is reached through a 'goto' statement
 that bypasses usual function control flow. */
-char cpp_ex_msg[512];
+namespace {
+constexpr std::size_t MsgSize = 512;
+char cpp_ex_msg[MsgSize];
+}  // anonymous namespace
+
 /*!
  * \brief macro to annotate end of api
  */
-#define R_API_END()                             \
-  } catch(std::exception &e) {                  \
-    std::strncpy(cpp_ex_msg, e.what(), 512);    \
-    goto throw_cpp_ex_as_R_err;                 \
-  }                                             \
-  if (false) {                                  \
-    throw_cpp_ex_as_R_err:                      \
-    Rf_error("%s", cpp_ex_msg);                 \
+#define R_API_END()                                  \
+  }                                                  \
+  catch (std::exception & e) {                       \
+    cpp_ex_msg[MsgSize - 1] = 0;                     \
+    std::strncpy(cpp_ex_msg, e.what(), MsgSize - 1); \
+    goto throw_cpp_ex_as_R_err;                      \
+  }                                                  \
+  if (false) {                                       \
+  throw_cpp_ex_as_R_err:                             \
+    Rf_error("%s", cpp_ex_msg);                      \
   }
 
 /**
@@ -990,7 +996,7 @@ SEXP XGBAltrepSerializer_R(SEXP R_altrepped_obj) {
   return R_NilValue; /* <- should not be reached */
 }
 
-SEXP XGBAltrepDeserializer_R(SEXP unused, SEXP R_state) {
+SEXP XGBAltrepDeserializer_R(SEXP /*unused*/, SEXP R_state) {
   SEXP R_altrepped_obj = Rf_protect(XGBMakeEmptyAltrep());
   R_API_BEGIN();
   BoosterHandle handle = nullptr;
@@ -1081,6 +1087,13 @@ XGB_DLL SEXP XGBoosterCreate_R(SEXP dmats) {
   return out;
 }
 
+XGB_DLL SEXP XGBoosterReset_R(SEXP handle) {
+  R_API_BEGIN();
+  CHECK_CALL(XGBoosterReset(R_ExternalPtrAddr(handle)));
+  R_API_END();
+  return R_NilValue;
+}
+
 XGB_DLL SEXP XGBoosterCopyInfoFromDMatrix_R(SEXP booster, SEXP dmat) {
   R_API_BEGIN();
   char const **feature_names;
diff --git a/R-package/tests/testthat/helper_model.R b/R-package/tests/testthat/helper_model.R
new file mode 100644
index 000000000000..4a11ce52042f
--- /dev/null
+++ b/R-package/tests/testthat/helper_model.R
@@ -0,0 +1,7 @@
+## A special file sourced by testthat.
+
+get_basescore <- function(model) {
+  as.numeric(
+    jsonlite::fromJSON(model$learner$learner_model_param$base_score)
+  )
+}
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
index 30a854808610..d3185e7fccdd 100644
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -77,12 +77,17 @@ test_that("custom objective with multi-class shape", {
   n_classes <- 3
 
   fake_softprob <- function(preds, dtrain) {
-    expect_true(all(matrix(preds) == 0.5))
+    mpreds <- as.matrix(preds)
+    expect_equal(
+      mpreds,
+      matrix(0.5, nrow = nrow(mpreds), ncol = ncol(mpreds)),
+      tolerance = 1e-4
+    )
     ## use numeric vector here to test compatibility with XGBoost < 2.1
     grad <- rnorm(length(as.matrix(preds)))
     expect_equal(dim(data)[1] * n_classes, dim(as.matrix(preds))[1] * n_classes)
     hess <- rnorm(length(as.matrix(preds)))
-    return(list(grad = grad, hess = hess))
+    list(grad = grad, hess = hess)
   }
   fake_merror <- function(preds, dtrain) {
     expect_equal(dim(data)[1] * n_classes, dim(as.matrix(preds))[1])
diff --git a/R-package/tests/testthat/test_io.R b/R-package/tests/testthat/test_io.R
index 722f278b3f7a..980c400999d9 100644
--- a/R-package/tests/testthat/test_io.R
+++ b/R-package/tests/testthat/test_io.R
@@ -18,16 +18,14 @@ test_that("load/save raw works", {
 
   json_bytes <- xgb.save.raw(booster, raw_format = "json")
   ubj_bytes <- xgb.save.raw(booster, raw_format = "ubj")
-  old_bytes <- xgb.save.raw(booster, raw_format = "deprecated")
 
   from_json <- xgb.load.raw(json_bytes)
   from_ubj <- xgb.load.raw(ubj_bytes)
 
-  json2old <- xgb.save.raw(from_json, raw_format = "deprecated")
-  ubj2old <- xgb.save.raw(from_ubj, raw_format = "deprecated")
+  json2ubj <- xgb.save.raw(from_json, raw_format = "ubj")
+  ubj2ubj <- xgb.save.raw(from_ubj, raw_format = "ubj")
 
-  expect_equal(json2old, ubj2old)
-  expect_equal(json2old, old_bytes)
+  expect_equal(json2ubj, ubj2ubj)
 })
 
 test_that("saveRDS preserves C and R attributes", {
diff --git a/R-package/tests/testthat/test_model_compatibility.R b/R-package/tests/testthat/test_model_compatibility.R
index 9bab6e0c91a7..b084a9bff3bc 100644
--- a/R-package/tests/testthat/test_model_compatibility.R
+++ b/R-package/tests/testthat/test_model_compatibility.R
@@ -1,7 +1,7 @@
 context("Models from previous versions of XGBoost can be loaded")
 
 metadata <- list(
-  kRounds = 2,
+  kRounds = 4,
   kRows = 1000,
   kCols = 4,
   kForests = 2,
@@ -10,87 +10,126 @@ metadata <- list(
 )
 
 run_model_param_check <- function(config) {
-  testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')
-  testthat::expect_equal(config$learner$learner_train_param$booster, 'gbtree')
+  testthat::expect_equal(config$learner$learner_model_param$num_feature, "4")
+  testthat::expect_equal(config$learner$learner_train_param$booster, "gbtree")
+}
+
+get_n_rounds <- function(model_file) {
+  is_10 <- grepl("1.0.0rc1", model_file, fixed = TRUE)
+  if (is_10) {
+    2
+  } else {
+    metadata$kRounds
+  }
 }
 
 get_num_tree <- function(booster) {
   dump <- xgb.dump(booster)
-  m <- regexec('booster\\[[0-9]+\\]', dump, perl = TRUE)
+  m <- regexec("booster\\[[0-9]+\\]", dump, perl = TRUE)
   m <- regmatches(dump, m)
-  num_tree <- Reduce('+', lapply(m, length))
-  return(num_tree)
+  num_tree <- Reduce("+", lapply(m, length))
+  num_tree
 }
 
-run_booster_check <- function(booster, name) {
+run_booster_check <- function(booster, model_file) {
   config <- xgb.config(booster)
   run_model_param_check(config)
-  if (name == 'cls') {
-    testthat::expect_equal(get_num_tree(booster),
-                           metadata$kForests * metadata$kRounds * metadata$kClasses)
-    testthat::expect_equal(as.numeric(config$learner$learner_model_param$base_score), 0.5)
-    testthat::expect_equal(config$learner$learner_train_param$objective, 'multi:softmax')
-    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class),
-                           metadata$kClasses)
-  } else if (name == 'logitraw') {
-    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
-    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
-    testthat::expect_equal(config$learner$learner_train_param$objective, 'binary:logitraw')
-  } else if (name == 'logit') {
-    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
-    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
-    testthat::expect_equal(config$learner$learner_train_param$objective, 'binary:logistic')
-  } else if (name == 'ltr') {
-    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
-    testthat::expect_equal(config$learner$learner_train_param$objective, 'rank:ndcg')
+  is_model <- function(typ) {
+    grepl(typ, model_file, fixed = TRUE)
+  }
+  n_rounds <- get_n_rounds(model_file = model_file)
+  if (is_model("cls")) {
+    testthat::expect_equal(
+      get_num_tree(booster), metadata$kForests * n_rounds * metadata$kClasses
+    )
+    testthat::expect_equal(get_basescore(config), c(0.5, 0.5, 0.5))  # nolint
+    testthat::expect_equal(
+      config$learner$learner_train_param$objective, "multi:softmax"
+    )
+    testthat::expect_equal(
+      as.numeric(config$learner$learner_model_param$num_class),
+      metadata$kClasses
+    )
+  } else if (is_model("logitraw")) {
+    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)
+    testthat::expect_equal(
+      as.numeric(config$learner$learner_model_param$num_class), 0
+    )
+    testthat::expect_equal(
+      config$learner$learner_train_param$objective, "binary:logitraw"
+    )
+  } else if (is_model("logit")) {
+    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)
+    testthat::expect_equal(
+      as.numeric(config$learner$learner_model_param$num_class), 0
+    )
+    testthat::expect_equal(
+      config$learner$learner_train_param$objective, "binary:logistic"
+    )
+  } else if (is_model("ltr")) {
+    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)
+    testthat::expect_equal(
+      config$learner$learner_train_param$objective, "rank:ndcg"
+    )
+  } else if (is_model("aft")) {
+    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)
+    testthat::expect_equal(
+      config$learner$learner_train_param$objective, "survival:aft"
+    )
   } else {
-    testthat::expect_equal(name, 'reg')
-    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
-    testthat::expect_equal(as.numeric(config$learner$learner_model_param$base_score), 0.5)
-    testthat::expect_equal(config$learner$learner_train_param$objective, 'reg:squarederror')
+    testthat::expect_true(is_model("reg"))
+    testthat::expect_equal(get_num_tree(booster), metadata$kForests * n_rounds)
+    testthat::expect_equal(get_basescore(config), 0.5)  # nolint
+    testthat::expect_equal(
+      config$learner$learner_train_param$objective, "reg:squarederror"
+    )
   }
 }
 
 test_that("Models from previous versions of XGBoost can be loaded", {
-  bucket <- 'xgboost-ci-jenkins-artifacts'
-  region <- 'us-west-2'
-  file_name <- 'xgboost_r_model_compatibility_test.zip'
+  bucket <- "xgboost-ci-jenkins-artifacts"
+  region <- "us-west-2"
+  file_name <- "xgboost_model_compatibility_tests-3.0.2.zip"
   zipfile <- tempfile(fileext = ".zip")
   extract_dir <- tempdir()
-  download.file(paste('https://', bucket, '.s3-', region, '.amazonaws.com/', file_name, sep = ''),
-                destfile = zipfile, mode = 'wb', quiet = TRUE)
+  result <- tryCatch(
+    {
+      download.file(
+        paste(
+          "https://", bucket, ".s3-", region, ".amazonaws.com/", file_name,
+          sep = ""
+        ),
+        destfile = zipfile, mode = "wb", quiet = TRUE
+      )
+      zipfile
+    },
+    error = function(e) {
+      print(e)
+      NA_character_
+    }
+  )
+  if (is.na(result)) {
+    print("Failed to download old models.")
+    return()
+  }
+
   unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
-  model_dir <- file.path(extract_dir, 'models')
+  model_dir <- file.path(extract_dir, "models")
 
-  pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4), nthread = 2)
+  pred_data <- xgb.DMatrix(
+    matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4),
+    nthread = 2
+  )
 
   lapply(list.files(model_dir), function(x) {
     model_file <- file.path(model_dir, x)
-    m <- regexec("xgboost-([0-9\\.]+)\\.([a-z]+)\\.[a-z]+", model_file, perl = TRUE)
-    m <- regmatches(model_file, m)[[1]]
-    model_xgb_ver <- m[2]
-    name <- m[3]
-    is_rds <- endsWith(model_file, '.rds')
-    is_json <- endsWith(model_file, '.json')
-    # TODO: update this test for new RDS format
-    if (is_rds) {
-      return(NULL)
-    }
-    # Expect an R warning when a model is loaded from RDS and it was generated by version < 1.1.x
-    if (is_rds && compareVersion(model_xgb_ver, '1.1.1.1') < 0) {
-      booster <- readRDS(model_file)
-      expect_warning(predict(booster, newdata = pred_data))
-      booster <- readRDS(model_file)
-      expect_warning(run_booster_check(booster, name))
-    } else {
-      if (is_rds) {
-        booster <- readRDS(model_file)
-      } else {
-        booster <- xgb.load(model_file)
-        xgb.model.parameters(booster) <- list(nthread = 2)
-      }
-      predict(booster, newdata = pred_data)
-      run_booster_check(booster, name)
+    is_skl <- grepl("scikit", model_file, fixed = TRUE)
+    if (is_skl) {
+      return()
     }
+    booster <- xgb.load(model_file)
+    xgb.model.parameters(booster) <- list(nthread = 2)
+    predict(booster, newdata = pred_data)
+    run_booster_check(booster, model_file)
   })
 })
diff --git a/R-package/tests/testthat/test_poisson_regression.R b/R-package/tests/testthat/test_poisson_regression.R
index 3aef55fa1e97..f73bc34878af 100644
--- a/R-package/tests/testthat/test_poisson_regression.R
+++ b/R-package/tests/testthat/test_poisson_regression.R
@@ -27,7 +27,7 @@ test_that("Poisson regression is centered around mean", {
   )
   model_json <- xgb.save.raw(model, "json") |> rawToChar() |> jsonlite::fromJSON()
   expect_equal(
-    model_json$learner$learner_model_param$base_score |> as.numeric(),
+    get_basescore(model_json),
     mean(y),
     tolerance = 1e-4
   )
@@ -47,7 +47,7 @@ test_that("Poisson regression is centered around mean", {
   )
   model_json <- xgb.save.raw(model_weighted, "json") |> rawToChar() |> jsonlite::fromJSON()
   expect_equal(
-    model_json$learner$learner_model_param$base_score |> as.numeric(),
+    get_basescore(model_json),
     weighted.mean(y, w),
     tolerance = 1e-4
   )
diff --git a/README.md b/README.md
index f5af0cdc4e7f..ac34880b3064 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,7 @@
 <img src="https://xgboost.ai/images/logo/xgboost-logo-trimmed.png" width=200/> eXtreme Gradient Boosting
 ===========
 
-[![Build Status](https://badge.buildkite.com/aca47f40a32735c00a8550540c5eeff6a4c1d246a580cae9b0.svg?branch=master)](https://buildkite.com/xgboost/xgboost-ci)
-[![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost-CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions)
+[![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost%20CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions)
 [![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org)
 [![GitHub license](https://dmlc.github.io/img/apache2.svg)](./LICENSE)
 [![CRAN Status Badge](https://www.r-pkg.org/badges/version/xgboost)](https://cran.r-project.org/web/packages/xgboost)
@@ -50,7 +49,8 @@ Become a sponsor and get a logo here. See details at [Sponsoring the XGBoost Pro
 
 <a href="https://www.nvidia.com/en-us/" target="_blank"><img src="https://raw.githubusercontent.com/xgboost-ai/xgboost-ai.github.io/master/images/sponsors/nvidia.jpg" alt="NVIDIA" width="72" height="72"></a>
 <a href="https://www.comet.com/site/?utm_source=xgboost&utm_medium=github&utm_content=readme" target="_blank"><img src="https://cdn.comet.ml/img/notebook_logo.png" height="72"></a>
-<a href="https://opencollective.com/guest-f5ebfc79" target="_blank"><img src="https://images.opencollective.com/guest-f5ebfc79/avatar/256.png" height="72"></a>
+<a href="https://opencollective.com/tomislav1" target="_blank"><img src="https://images.opencollective.com/tomislav1/avatar/256.png" height="72"></a>
+<a href="https://databento.com/?utm_source=xgboost&utm_medium=sponsor&utm_content=display"><img src="https://raw.githubusercontent.com/xgboost-ai/xgboost-ai.github.io/refs/heads/master/images/sponsors/databento.png" height="72"></a>
 
 ### Backers
 [[Become a backer](https://opencollective.com/xgboost#backer)]
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index 246bf7eeaf60..6f785dd3ab4c 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -43,6 +43,20 @@ function(set_default_configuration_release)
     endif()
 endfunction()
 
+if(BUILD_WITH_GIT_HASH)
+  execute_process(COMMAND git rev-parse --short HEAD
+    WORKING_DIRECTORY ${xgboost_SOURCE_DIR}
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    OUTPUT_VARIABLE XGBOOST_GIT_HASH
+    ERROR_VARIABLE XGBOOST_GIT_ERROR
+    RESULT_VARIABLE GIT_COMMAND_RESULT)
+
+  if(NOT GIT_COMMAND_RESULT EQUAL 0)
+    message(FATAL_ERROR "Failed to retrieve the git hash:\n${XGBOOST_GIT_ERROR}")
+  endif()
+  message(STATUS "Git hash: ${XGBOOST_GIT_HASH}")
+endif()
+
 # Generate CMAKE_CUDA_ARCHITECTURES form a list of architectures
 # Also generates PTX for the most recent architecture for forwards compatibility
 function(compute_cmake_cuda_archs archs)
@@ -55,7 +69,9 @@ function(compute_cmake_cuda_archs archs)
 
   # Set up defaults based on CUDA varsion
   if(NOT CMAKE_CUDA_ARCHITECTURES)
-    if(CUDA_VERSION VERSION_GREATER_EQUAL "12.8")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
+      set(CMAKE_CUDA_ARCHITECTURES 75 80 90 100 120)
+    elseif(CUDA_VERSION VERSION_GREATER_EQUAL "12.8")
       set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90 100 120)
     elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
       set(CMAKE_CUDA_ARCHITECTURES 50 60 70 80 90)
@@ -98,13 +114,13 @@ function(xgboost_set_cuda_flags target)
   if(USE_DEVICE_DEBUG)
     target_compile_options(${target} PRIVATE
       $<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-G;-src-in-ptx>)
-  else()
-    target_compile_options(${target} PRIVATE
-      $<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>)
   endif()
 
   if(USE_NVTX)
     target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
+    if(NOT USE_DEVICE_DEBUG)
+      target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>)
+    endif()
   endif()
 
   # Use CCCL we find before CUDA Toolkit to make sure we get newer headers as intended
@@ -236,14 +252,23 @@ macro(xgboost_target_defs target)
   if(PLUGIN_RMM)
     target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
   endif()
+
+  if(USE_NVCOMP)
+    target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_NVCOMP=1)
+  endif()
+  if(BUILD_WITH_GIT_HASH)
+    target_compile_definitions(objxgboost PUBLIC -DXGBOOST_GIT_HASH="${XGBOOST_GIT_HASH}")
+  endif()
 endmacro()
 
 # handles dependencies
 macro(xgboost_target_link_libraries target)
-  if(BUILD_STATIC_LIB)
-    target_link_libraries(${target} PUBLIC Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
-  else()
-    target_link_libraries(${target} PRIVATE Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
+  if(NOT (CMAKE_SYSTEM_NAME STREQUAL "Emscripten"))
+    if(BUILD_STATIC_LIB)
+      target_link_libraries(${target} PUBLIC Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
+    else()
+      target_link_libraries(${target} PRIVATE Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
+    endif()
   endif()
 
   if(USE_OPENMP)
@@ -262,6 +287,10 @@ macro(xgboost_target_link_libraries target)
     target_link_libraries(${target} PRIVATE rmm::rmm)
   endif()
 
+  if(USE_NVCOMP)
+    target_link_libraries(${target} PRIVATE nvcomp::nvcomp)
+  endif()
+
   if(USE_NCCL)
     xgboost_link_nccl(${target})
   endif()
diff --git a/demo/CLI/README.rst b/demo/CLI/README.rst
deleted file mode 100644
index e828cd8aeb5a..000000000000
--- a/demo/CLI/README.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-XGBoost Command Line Interface Walkthrough
-==========================================
-
-Please note that the command line interface is deprecated in 2.1.0, use other language bindings instead. For a list of available bindings, see https://xgboost.readthedocs.io/en/stable/
diff --git a/demo/CLI/binary_classification/README.md b/demo/CLI/binary_classification/README.md
deleted file mode 100644
index 7fe2120ece0a..000000000000
--- a/demo/CLI/binary_classification/README.md
+++ /dev/null
@@ -1,163 +0,0 @@
-Binary Classification
-=====================
-This is the quick start tutorial for xgboost CLI version.
-Here we demonstrate how to use XGBoost for a binary classification task. Before getting started, make sure you compile xgboost in the root directory of the project by typing ```make```.
-The script 'runexp.sh' can be used to run the demo. Here we use [mushroom dataset](https://archive.ics.uci.edu/ml/datasets/Mushroom) from UCI machine learning repository.
-
-### Tutorial
-#### Generate Input Data
-XGBoost takes LIBSVM format. An example of faked input data is below:
-```
-1 101:1.2 102:0.03
-0 1:2.1 10001:300 10002:400
-...
-```
-Each line represent a single instance, and in the first line '1' is the instance label,'101' and '102' are feature indices, '1.2' and '0.03' are feature values. In the binary classification case, '1' is used to indicate positive samples, and '0' is used to indicate negative samples. We also support probability values in [0,1] as label, to indicate the probability of the instance being positive.
-
-
-First we will transform the dataset into classic LIBSVM format and split the data into training set and test set by running:
-```
-python mapfeat.py
-python mknfold.py agaricus.txt 1
-```
-The two files, 'agaricus.txt.train' and 'agaricus.txt.test' will be used as training set and test set.
-
-#### Training
-Then we can run the training process:
-```
-../../xgboost mushroom.conf
-```
-
-mushroom.conf is the configuration for both training and testing. Each line containing the [attribute]=[value] configuration:
-
-```conf
-# General Parameters, see comment for each definition
-# can be gbtree or gblinear
-booster = gbtree
-# choose logistic regression loss function for binary classification
-objective = binary:logistic
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0
-# minimum loss reduction required to make a further partition
-gamma = 1.0
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1
-# maximum depth of a tree
-max_depth = 3
-
-# Task Parameters
-# the number of round to do boosting
-num_round = 2
-# 0 means do not save any model except the final round model
-save_period = 0
-# The path of training data
-data = "agaricus.txt.train"
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-eval[test] = "agaricus.txt.test"
-# The path of test data
-test:data = "agaricus.txt.test"
-```
-We use the tree booster and logistic regression objective in our setting. This indicates that we accomplish our task using classic gradient boosting regression tree(GBRT), which is a promising method for binary classification.
-
-The parameters shown in the example gives the most common ones that are needed to use xgboost.
-If you are interested in more parameter settings, the complete parameter settings and detailed descriptions are [here](https://xgboost.readthedocs.io/en/stable/parameter.html). Besides putting the parameters in the configuration file, we can set them by passing them as arguments as below:
-
-```
-../../xgboost mushroom.conf max_depth=6
-```
-This means that the parameter max_depth will be set as 6 rather than 3 in the conf file. When you use command line, make sure max_depth=6 is passed in as single argument, i.e. do not contain space in the argument. When a parameter setting is provided in both command line input and  the config file, the command line setting will override the setting in config file.
-
-In this example, we use tree booster for gradient boosting. If you would like to use linear booster for regression, you can keep all the parameters except booster and the tree booster parameters as below:
-```conf
-# General Parameters
-# choose the linear booster
-booster = gblinear
-...
-
-# Change Tree Booster Parameters into Linear Booster Parameters
-# L2 regularization term on weights, default 0
-lambda = 0.01
-# L1 regularization term on weights, default 0
-alpha = 0.01
-# L2 regularization term on bias, default 0
-lambda_bias = 0.01
-
-# Regression Parameters
-...
-```
-
-#### Get Predictions
-After training, we can use the output model to get the prediction of the test data:
-```
-../../xgboost mushroom.conf task=pred model_in=0002.model
-```
-For binary classification, the output predictions are probability confidence scores in [0,1], corresponds to the probability of the label to be positive.
-
-#### Dump Model
-This is a preliminary feature, so only tree models support text dump. XGBoost can display the tree models in text or JSON files, and we can scan the model in an easy way:
-```
-../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt
-../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
-```
-
-In this demo, the tree boosters obtained will be printed in dump.raw.txt and dump.nice.txt, and the latter one is easier to understand because of usage of feature mapping featmap.txt
-
-Format of ```featmap.txt: <featureid> <featurename> <q or i or int>\n ```:
-  - Feature id must be from 0 to number of features, in sorted order.
-  - i means this feature is binary indicator feature
-  - q means this feature is a quantitative value, such as age, time, can be missing
-  - int means this feature is integer value (when int is hinted, the decision boundary will be integer)
-
-#### Monitoring Progress
-When you run training we can find there are messages displayed on screen
-```
-tree train end, 1 roots, 12 extra nodes, 0 pruned nodes ,max_depth=3
-[0]  test-error:0.016139
-boosting round 1, 0 sec elapsed
-
-tree train end, 1 roots, 10 extra nodes, 0 pruned nodes ,max_depth=3
-[1]  test-error:0.000000
-```
-The messages for evaluation are printed into stderr, so if you want only to log the evaluation progress, simply type
-```
-../../xgboost mushroom.conf 2>log.txt
-```
-Then you can find the following content in log.txt
-```
-[0]     test-error:0.016139
-[1]     test-error:0.000000
-```
-We can also monitor both training and test statistics, by adding following lines to configure
-```conf
-eval[test] = "agaricus.txt.test"
-eval[trainname] = "agaricus.txt.train"
-```
-Run the command again, we can find the log file becomes
-```
-[0]     test-error:0.016139     trainname-error:0.014433
-[1]     test-error:0.000000     trainname-error:0.001228
-```
-The rule is eval[name-printed-in-log] = filename, then the file will be added to monitoring process, and evaluated each round.
-
-xgboost also supports monitoring multiple metrics, suppose we also want to monitor average log-likelihood of each prediction during training, simply add ```eval_metric=logloss``` to configure. Run again, we can find the log file becomes
-```
-[0]     test-error:0.016139     test-negllik:0.029795   trainname-error:0.014433        trainname-negllik:0.027023
-[1]     test-error:0.000000     test-negllik:0.000000   trainname-error:0.001228        trainname-negllik:0.002457
-```
-### Saving Progress Models
-If you want to save model every two round, simply set save_period=2. You will find 0002.model in the current folder. If you want to change the output folder of models, add model_dir=foldername. By default xgboost saves the model of last round.
-
-#### Continue from Existing Model
-If you want to continue boosting from existing model, say 0002.model, use
-```
-../../xgboost mushroom.conf model_in=0002.model num_round=2 model_out=continue.model
-```
-xgboost will load from 0002.model continue boosting for 2 rounds, and save output to continue.model. However, beware that the training and evaluation data specified in mushroom.conf should not change when you use this function.
-#### Use Multi-Threading
-When you are working with a large dataset, you may want to take advantage of parallelism. If your compiler supports OpenMP, xgboost is naturally multi-threaded, to set number of parallel running add ```nthread``` parameter to your configuration.
-Eg. ```nthread=10```
-
-Set nthread to be the number of your real cpu (On Unix, this can be found using ```lscpu```)
-Some systems will have ```Thread(s) per core = 2```, for example, a 4 core cpu with 8 threads, in such case set ```nthread=4``` and not 8.
diff --git a/demo/CLI/binary_classification/agaricus-lepiota.data b/demo/CLI/binary_classification/agaricus-lepiota.data
deleted file mode 100644
index 14fe8bbe77ce..000000000000
--- a/demo/CLI/binary_classification/agaricus-lepiota.data
+++ /dev/null
@@ -1,8124 +0,0 @@
-p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-p,x,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,s,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,f,f,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-p,x,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,g
-p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,u
-p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,x,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,b,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-p,f,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,x,s,y,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,x,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,b,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,f,y,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,s,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-p,x,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,f,y,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,y,u
-p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,f,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,x,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,x,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-p,x,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,b,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,s,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,b,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,s,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,f,y,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,f,y,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,s,y,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,s,w,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,p
-p,x,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,s,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-p,x,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,x,f,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,s,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,b,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,s,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,b,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,b,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,b,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,s,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,x,f,w,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,s,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,x,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,s,y,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,s,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,y,u
-p,x,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,f,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,b,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,f,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,s,w,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,s,w,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,f,w,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-p,x,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,b,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,b,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,s,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,s,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,w,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,b,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,f,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,b,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,b,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,b,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,y,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,b,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,b,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,s,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,f,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,p
-p,x,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,b,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,f,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,s,w,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,s,y,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,b,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,s,w,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,p
-p,x,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,b,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,f,y,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,f,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,s,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,x,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,f,f,w,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-p,x,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,y,u
-p,x,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,f,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,w,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,f,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,x,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,f,w,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,w,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,s,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,g
-p,x,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,s,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,b,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,g
-p,x,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,f,w,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,s,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,f,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,f,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,f,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,f,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,x,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,b,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,s,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,b,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,f,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,x,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,b,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,f,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,y,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,b,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,y,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,b,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,f,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,w,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-p,x,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,y,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,f,w,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,s,w,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,f,y,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,b,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,x,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,b,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-p,x,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,b,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,y,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,y,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,s,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,f,s,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,b,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,s,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,p
-p,x,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,f,y,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,b,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,f,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,s,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,x,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,s,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,b,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-p,x,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,x,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,s,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,b,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,f,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-p,x,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,b,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,x,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,b,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,g
-p,x,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,b,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,w,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-p,x,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,f,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,x,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,f,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,w,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,b,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,b,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,s,w,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,f,y,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,b,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,f,y,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,b,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,f,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,f,w,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,s,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,f,s,y,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,x,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,b,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,w,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,s,w,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,f,s,w,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,s,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,x,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,x,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,f,y,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-p,f,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,g
-p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,f,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-p,x,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,w,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,b,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,w,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,s,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,p
-p,x,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,f,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,b,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,f,n,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,f,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,p
-p,f,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,b,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,s,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,s,w,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,f,f,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,y,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,x,f,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,s,w,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,p
-p,x,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,f,w,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,y,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,y,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,f,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,b,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,x,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,s,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,x,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,b,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,f,y,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,g
-p,x,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,f,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,s,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,s,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,x,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-p,x,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,x,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,g
-p,x,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,f,f,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,b,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,s,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-p,x,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,b,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,b,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,s,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,f,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,b,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,w,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,x,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,y,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,s,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,b,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,f,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,b,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,b,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,f,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,f,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,x,s,y,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,b,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,b,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,w,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,g
-p,x,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,g
-p,x,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,f,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,b,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-p,x,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,g
-p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,f,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,f,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,f,y,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,f,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,f,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,s,w,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,f,y,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,b,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,f,s,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,x,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,b,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,x,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,g
-p,x,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,s,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,x,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,b,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,f,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,b,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,s,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,x,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,s,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,x,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,s,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,s,y,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,s,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,g
-p,x,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,b,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,f,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-p,x,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,g
-p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,w,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,b,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-p,x,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,s,y,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,b,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,s,w,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,b,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,b,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,b,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,s,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,b,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,b,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,f,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,s,w,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,b,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,y,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,g
-p,x,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,s,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,b,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-p,x,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-p,x,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,b,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-p,x,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,g
-p,x,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,b,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,x,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,b,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-p,x,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,b,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,b,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,b,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,y,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-p,x,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,g
-p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,s,w,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,b,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,b,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,x,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,w,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,w,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,s,y,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,s,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,f,y,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,p
-p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,s,y,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-p,x,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,b,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,s,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,x,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,b,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,w,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,b,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,s,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,x,f,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,s,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,s,y,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,b,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,x,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,x,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,y,n,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,b,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,f,w,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,s,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,s,w,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,b,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,f,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,s,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,f,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,f,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-p,f,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-p,x,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,b,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,f,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,y,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,f,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,s,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,s,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,x,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,y,u
-e,b,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-p,f,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,f,s,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,f,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,s,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,b,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-p,x,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,b,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,f,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,u
-p,x,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,b,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,b,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-p,x,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,w,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,f,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,s,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-p,f,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,s,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,f,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,s,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-p,x,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,f,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,f,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,s,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,s,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,s,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,f,s,w,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,b,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,f,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,f,s,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-p,x,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,s,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,s,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,f,s,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,y,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,f,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,b,s,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,b,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-p,f,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-p,f,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,s,y,t,l,f,w,n,w,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,x,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,f,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,s,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,y,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,g,f,n,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,y,u
-p,x,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,f,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,x,f,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,s,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-p,f,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,f,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,y,y,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,f,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,b,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,w,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,s,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,s,y,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,s,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-p,f,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,f,f,y,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,s,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-p,f,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,s,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,y,u
-p,f,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,f,f,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,f,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-p,f,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,b,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,f,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,b,s,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,f,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,f,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,s,f,n,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,x,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,f,f,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,y,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,s,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-p,f,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,f,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,x,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,s,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-p,x,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,s,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,f,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,b,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,f,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,f,f,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,f,f,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,s,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,f,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,s,y,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-p,f,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,x,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,s,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,f,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,b,y,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,s,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,s,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,s,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,p
-p,x,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,f,s,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,b,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,b,y,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,f,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,x,s,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,b,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,s,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,f,g,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,f,g,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,y,u
-e,b,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,f,f,n,f,n,f,c,n,g,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,y,n,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,x,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,p
-e,x,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,f,f,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,s,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,s,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-p,f,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,y,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,y,n,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,b,y,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,s,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,b,s,w,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,s,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-p,f,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,s,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-p,f,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,f,s,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,s,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,f,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-p,f,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,f,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,s,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-p,x,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-p,f,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,s,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,s,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,f,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-p,x,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,g
-p,f,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,f,s,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,s,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,s,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-p,f,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,g
-p,f,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,f,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,f,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,g
-p,f,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
-p,f,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,g
-p,f,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,s,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,f,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,s,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,s,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-p,f,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-p,f,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,f,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,s,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,b,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,s,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-p,x,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,f,s,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,s,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,f,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-p,f,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,g
-p,f,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,s,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,s,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,s,p
-e,f,s,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,s,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,s,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,s,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,s,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-p,f,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,b,y,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,f,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,b,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,s,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,x,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,f,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,s,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,s,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,b,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,s,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-p,f,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,f,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,s,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,s,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,f,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,s,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,f,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,s,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-p,f,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,s,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,s,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,f,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,f,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,s,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,s,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,f,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,b,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-p,x,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,g
-p,f,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,f,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-p,f,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,x,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,s,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-p,f,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,s,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,s,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,s,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,s,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,s,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,s,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,s,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-p,f,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,s,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,s,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,f,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,s,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,s,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,s,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,s,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,s,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,x,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,s,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,s,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,s,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,s,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,p
-p,f,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-p,f,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,g
-p,x,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,u
-p,f,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,b,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-p,f,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,s,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,s,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-p,f,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,u
-p,f,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,s,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,s,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,f,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-p,f,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,s,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,s,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-p,f,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,s,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,s,w,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,f,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,s,y,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,f,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,s,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-p,f,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,f,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-p,f,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,s,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,y,g
-p,x,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,f,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,s,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-p,f,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,f,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,f,s,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,s,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,s,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,f,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,b,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,f,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,f,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,s,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,f,w,t,a,f,w,n,p,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,f,f,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,b,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,f,s,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,s,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,s,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,s,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-p,f,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,f,s,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-p,f,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,f,s,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-p,x,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-p,x,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,s,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-p,f,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,f,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,f,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,f,f,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,y,y,t,a,f,c,b,w,e,r,s,y,w,w,p,w,o,p,k,y,g
-e,x,f,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,b,s,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,s,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-p,f,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,s,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,b,y,y,t,a,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,f,s,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,s,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,s,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,b,y,w,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,f,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-p,f,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,s,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,s,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,s,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,g
-e,f,s,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,s,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,x,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,f,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-p,f,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,f,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,x,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,f,f,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,f,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,s,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,s,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,s,g
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,s,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,s,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,s,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,y,u
-p,f,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,u
-p,f,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,s,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,f,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,s,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,f,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-p,f,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,u
-p,f,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,g
-p,f,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,f,f,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,s,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-p,f,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,g
-p,f,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,s,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,s,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,s,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,s,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,s,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,s,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,s,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,f,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,f,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,y,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,s,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-p,x,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-p,x,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,f,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,y,y,t,l,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,y,n,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,s,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,s,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,x,s,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,s,y,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,s,m
-p,f,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,s,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,f,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,s,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,s,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,s,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,f,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,f,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,f,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,f,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,w,t,l,f,c,b,w,e,c,s,s,w,w,p,w,o,p,k,n,g
-e,x,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,m
-e,f,s,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,b,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,n,m
-p,f,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,s,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,y,n,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,k,s,g
-e,x,s,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,s,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,x,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,s,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-p,f,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,f,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,s,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,f,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,s,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,y,n,t,l,f,c,b,w,e,r,s,y,w,w,p,w,o,p,n,y,g
-p,f,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,y,u
-p,f,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,f,s,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,y,y,t,a,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,y,p
-e,x,f,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-p,f,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,s,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,s,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,s,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-p,x,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,f,s,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,g
-e,x,s,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,f,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,u
-p,f,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,s,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,f,g,f,n,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,s,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,s,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,a,g
-p,f,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-p,f,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,s,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,s,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,f,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,s,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-p,f,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,f,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,f,y,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,s,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,s,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-p,f,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,s,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,y,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,g
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,g
-e,f,s,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,s,w,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,s,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,f,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,x,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
-e,f,f,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,s,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,s,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,s,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,f,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,f,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,s,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,s,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,y,t,a,f,w,n,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,f,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,s,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,s,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,s,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,s,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,s,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,s,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,s,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,w,t,l,f,w,n,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-p,f,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,s,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,s,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,s,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,s,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,f,s,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,s,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,w,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,g,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,y,y,t,l,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,y,g
-p,f,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,s,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,f,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,s,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,s,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-p,f,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,b,s,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,g,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,s,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,s,w,t,l,f,w,n,n,t,b,s,s,w,w,p,w,o,p,u,v,d
-e,x,f,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,s,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,s,n,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-p,f,s,w,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,f,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,x,f,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,f,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,f,s,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,s,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,s,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-p,x,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,s,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-p,f,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,s,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,s,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-p,f,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,f,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
-p,f,y,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,s,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,f,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,s,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,s,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,s,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,f,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,y,y,t,l,f,c,b,n,e,r,s,y,w,w,p,w,o,p,n,s,g
-p,f,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,f,f,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,a,g
-p,f,s,n,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,s,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,f,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,s,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,f,f,w,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,s,y,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,w,t,a,f,w,n,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,n,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,s,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,a,g
-e,b,y,w,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,s,n,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,s,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,s,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,f,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,s,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,f,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,s,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,y,y,t,a,f,c,b,p,e,r,s,y,w,w,p,w,o,p,n,s,p
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,f,n,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,w,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,y,w,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m
-e,x,s,n,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,s,w,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,f,s,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-p,f,y,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,f,f,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,f,w,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,f,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,u
-e,f,s,g,f,n,f,w,b,p,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,x,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,s,y,t,l,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,n,m
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,s,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,g,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,f,s,w,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,g,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,g,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,f,f,n,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,f,y,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,s,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,n,s,m
-e,x,s,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,f,n,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,g,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,s,g
-p,x,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,s,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,f,s,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,f,s,n,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,s,g
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,s,w,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,s,p,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,f,w,f,n,f,w,b,k,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,f,g,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,p
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,f,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,f,g,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,s,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,s,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,n,f,n,f,w,b,n,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,s,p,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,f,s,n,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,s,w,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,f,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,s,g,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,f,y,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,s,u
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,s,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,f,s,w,t,p,f,c,n,w,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,s,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,s,g
-e,x,s,g,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,n,s,g
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,n,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,k,a,g
-p,x,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,n,v,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,s,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,v,g
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,x,s,p,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,g
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,f,g,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,n,a,g
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,f,y,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
-e,x,s,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,s,p,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,s,w,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,f,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,n,v,u
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,s,w,f,n,f,w,b,h,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,f,n,f,n,f,w,b,k,t,e,f,s,w,w,p,w,o,e,n,a,g
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,s,w,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,s,w,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,f,p,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,x,f,w,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,g
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,p
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,s,n,f,n,f,w,b,p,t,e,s,f,w,w,p,w,o,e,k,a,g
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,f,w,f,n,f,w,b,h,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,s,n,f,n,f,w,b,n,t,e,s,s,w,w,p,w,o,e,n,a,g
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,g
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,s,g,f,n,f,w,b,h,t,e,f,s,w,w,p,w,o,e,k,s,g
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,x,f,g,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,s,g,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,f,w,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,f,w,f,n,f,w,b,h,t,e,s,s,w,w,p,w,o,e,n,s,g
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,n,f,n,f,w,b,n,t,e,f,f,w,w,p,w,o,e,k,s,g
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,s,n,f,n,f,w,b,p,t,e,f,f,w,w,p,w,o,e,n,s,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,x,s,p,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,s,w,f,n,f,w,b,k,t,e,s,f,w,w,p,w,o,e,k,s,g
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,s,g,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,n,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,k,a,g
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,f,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,p
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,p
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,p
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,x,f,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,g,f,n,f,w,b,n,t,e,f,s,w,w,p,w,o,e,n,s,g
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,f,g,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,p
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,x,f,w,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,p
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,f,g,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,x,s,p,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,s,w,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,s,g,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,p,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,x,s,w,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,s,w,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,p
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,p
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,p
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,g
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,g
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,s,g,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,s,p,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,s,g,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,f,g,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,x,s,g,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,w,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,s,w,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,p
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,x,s,w,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,w,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,s,w,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,x,y,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,f,p,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,f,w,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,p
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,s,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,x,f,p,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,g
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,p,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,w,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,s,p,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,g
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,g
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,x,f,w,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,s,g,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,x,s,w,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,x,s,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,g
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,x,s,w,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,g
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,g
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,s,g,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,g
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,s,p,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,p
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,s,g,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,f,p,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,g
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-p,x,f,w,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,s,p,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,x,f,p,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,s,w,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,g
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,g
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,f,w,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,x,s,w,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,f,p,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,p
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,s,p,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,g
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,f,w,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,f,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,s,g,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,x,f,w,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,f,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,g
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,p,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,s,g,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,s,p,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,p
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,s,p,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,x,y,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,s,w,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,f,p,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,f,g,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,f,p,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,x,s,w,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,s,g,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,s,p,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-p,x,s,g,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,x,s,g,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,s,g,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,x,f,g,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,s,g,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,p
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,f,g,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,p
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,x,f,p,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,s,w,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,s,p,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,f,g,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,f,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,f,w,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,x,y,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,x,f,w,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,f,g,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,s,p,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,p
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,x,f,g,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,p
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,s,w,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,w,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,g
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,g
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,f,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,g
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,s,g,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,s,p,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,x,f,p,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,x,s,w,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,s,g,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,s,p,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,s,p,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,p,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,s,g,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,f,g,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,g
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,p
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,g
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,f,g,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,x,f,p,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,g
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,g
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,x,s,g,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,p
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,f,p,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,f,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,x,f,p,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,s,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,g
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,g
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,p
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,f,g,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,y,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,p
-e,x,y,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,s,w,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,x,s,p,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,s,p,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,s,w,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,f,w,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,w,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,g
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,s,p,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,x,s,p,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,g
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-p,x,f,p,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,y,d
-p,x,f,g,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,g
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,x,f,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,x,y,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,s,w,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,g,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,g
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,f,w,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,f,g,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,s,g,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,f,g,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,s,g,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,w,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,g
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,x,f,g,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,f,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,y,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,p
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,f,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,x,s,w,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,s,w,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,x,s,g,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,x,s,p,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,g
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,g
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,x,s,p,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,g,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,d
-e,x,y,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,k,y,d
-p,x,f,w,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,y,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,g
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,x,s,w,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,x,s,w,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,p
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,f,g,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,y,d
-p,x,s,p,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,g
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,s,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,w,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,y,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,f,w,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,f,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,s,g,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,x,y,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,f,g,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,s,g,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,s,p,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,g,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,f,g,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,s,g,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,x,f,g,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,x,s,w,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,g
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,g
-e,f,y,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,f,w,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,f,w,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,g
-e,x,f,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,g
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,s,p,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,x,f,p,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,g
-e,x,y,g,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,g
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,s,p,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,s,g,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,x,s,w,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,p
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,x,s,p,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,s,w,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,f,p,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,f,s,w,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,p
-e,x,y,b,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,g
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,y,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,x,f,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,f,g,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,s,g,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,g
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,p
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,x,f,e,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,d
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,s,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,f,s,b,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,s,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,d
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,s,w,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,f,w,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,f,p,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,f,g,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,s,d
-p,x,s,p,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-e,x,f,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,g
-e,f,f,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,p
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,g
-e,f,y,u,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,y,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,x,f,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,d
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,g
-e,x,f,e,t,n,f,c,b,w,t,b,s,s,p,p,p,w,o,p,k,v,d
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,f,w,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,b,s,b,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,g
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,k,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,p
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,k,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,g
-e,f,f,n,t,n,f,c,b,u,t,b,s,s,w,p,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,s,g,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,p,g,p,w,o,p,k,v,d
-p,f,s,g,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,p
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,v,d
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,p
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,g
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,d
-e,f,f,c,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,s,b,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,k,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,p
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,k,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,d
-e,f,f,g,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,p
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,p,w,p,w,o,p,n,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,n,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,f,s,b,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,u
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,x,y,g,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,x,f,g,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,p,f,c,f,w,n,n,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,s,g,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,s,g,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,w,w,p,w,o,p,n,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,s,w,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,x,s,w,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,s,b,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,g
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,y,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,x,f,p,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,p
-e,k,y,n,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,x,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,g
-e,f,s,p,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,g
-e,k,s,p,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,g
-e,f,y,g,t,n,f,c,b,w,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,p
-e,x,y,r,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,k,y,n,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,g
-e,f,y,e,t,n,f,c,b,w,t,b,s,s,g,p,p,w,o,p,k,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,f,s,b,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,g
-e,x,s,e,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,b,y,w,t,n,f,w,n,w,e,b,s,s,w,w,p,w,o,p,w,c,l
-p,f,s,b,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,s,w,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,u
-e,f,y,g,t,n,f,c,b,n,t,b,s,s,g,p,p,w,o,p,n,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,g
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,p,g,p,w,o,p,k,v,d
-e,x,y,b,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,p
-e,x,y,u,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,u,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,f,s,b,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,d
-e,f,y,n,t,n,f,c,b,u,t,b,s,s,w,g,p,w,o,p,k,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,g
-e,x,s,b,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,k,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,s,g,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,s,g,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,f,y,g,t,n,f,c,b,u,t,b,s,s,p,g,p,w,o,p,k,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,p
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,g,g,p,w,o,p,n,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,g
-e,k,y,b,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,x,s,w,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,p,t,b,s,s,w,p,p,w,o,p,k,y,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,d
-e,f,f,c,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,y,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,x,s,g,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,g
-e,f,s,n,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,s,g,f,c,f,w,n,u,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,d
-e,x,s,b,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,g
-e,x,y,u,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,v,d
-e,f,y,b,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,s,b,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,f,s,w,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,g,g,p,w,o,p,n,v,d
-e,x,y,u,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,p
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,g,p,w,o,p,n,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,x,f,w,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,f,y,b,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,f,g,f,c,f,c,n,u,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,f,s,b,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,g
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,g
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,k,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,s,w,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,d
-e,f,y,u,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,g
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,x,s,w,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,u
-e,f,y,e,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,x,y,u,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,f,s,p,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,w,g,p,w,o,p,n,y,d
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,s,g,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,f,w,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,f,s,b,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,f,s,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,s,b,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,f,y,u,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,g
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,g,w,p,w,o,p,n,v,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,g
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,g
-e,f,f,e,t,n,f,c,b,w,t,b,s,s,w,w,p,w,o,p,k,y,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,f,p,f,c,f,w,n,p,e,b,s,s,w,w,p,w,o,p,k,s,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,p
-e,x,s,p,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,x,s,p,f,c,f,w,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,g
-e,f,f,e,t,n,f,c,b,p,t,b,s,s,p,p,p,w,o,p,n,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,g
-e,f,y,n,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,g
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,w,w,p,w,o,p,n,y,d
-p,x,s,g,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,s,g,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,g
-e,f,y,n,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,k,f,n,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,s,w,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,g
-e,x,y,e,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,b,f,y,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,s,g,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,g
-e,k,y,e,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,k,y,n,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,x,s,w,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,b,s,b,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,s,w,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,g
-e,f,y,e,t,n,f,c,b,n,t,b,s,s,w,p,p,w,o,p,n,y,d
-p,b,s,w,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-e,f,y,p,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,s,w,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,g
-e,k,y,n,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,f,s,b,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,p
-e,k,s,e,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,f,y,n,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,f,s,b,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,d
-e,f,f,g,t,n,f,c,b,u,t,b,s,s,p,p,p,w,o,p,n,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,s,g,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,p
-e,f,y,n,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,f,y,e,t,n,f,c,b,p,t,b,s,s,g,p,p,w,o,p,k,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,p,w,p,w,o,p,n,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,b,y,w,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,f,y,p,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,s,w,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,s,b,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,x,f,g,f,c,f,c,n,g,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,x,y,r,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,y,d
-e,f,y,w,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,f,s,e,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,s,g,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,x,s,g,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,s,w,f,c,f,c,n,n,e,b,s,s,w,w,p,w,o,p,k,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,p
-e,f,y,e,t,n,f,c,b,u,t,b,s,s,p,w,p,w,o,p,n,y,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,p
-e,k,s,b,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,g
-e,x,f,n,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-e,x,y,e,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,f,s,g,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,k,s,b,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,g
-e,k,s,p,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,n,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,f,s,b,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,u
-e,f,s,b,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,s,b,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,s,g,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,u
-e,f,s,b,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-e,x,s,p,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,k,s,n,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,y,p,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,f,y,u,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,p
-e,f,s,b,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,d
-e,x,s,n,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,y,n,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,b,s,b,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,f,y,u,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,d
-e,x,y,e,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,p
-e,k,s,b,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,s,w,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,x,s,g,f,c,f,c,n,p,e,b,s,s,w,w,p,w,o,p,n,v,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,d
-e,f,y,u,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,s,g,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,s,b,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-e,k,y,b,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,f,y,b,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,s,b,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,s,w,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,s,w,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,f,s,b,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,x,s,w,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,p
-e,x,y,w,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,y,d
-e,x,s,e,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,d
-e,f,f,e,t,n,f,c,b,n,t,b,s,s,g,w,p,w,o,p,n,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,k,f,y,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,g
-e,k,f,c,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,b,y,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,k,y,p,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,g
-e,k,y,b,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,s,w,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,g,w,t,n,f,w,n,w,e,b,s,s,w,w,p,w,o,p,w,c,l
-p,x,s,g,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,u
-e,f,y,c,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,p
-e,f,y,p,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,x,s,g,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-e,k,y,c,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,x,s,w,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,x,s,w,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,f,s,b,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,f,s,b,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,g
-e,k,y,c,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,f,s,w,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,s,g,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,c,g,w,t,n,f,w,n,w,e,b,s,s,w,w,p,w,o,p,w,c,l
-p,x,s,g,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,b,g,w,t,n,f,w,n,w,e,b,s,s,w,w,p,w,o,p,w,c,l
-p,x,s,b,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,s,w,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,b,f,y,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-e,k,s,n,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,k,y,b,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,s,b,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,f,s,b,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,d
-e,x,s,e,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-e,f,f,n,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-e,x,s,b,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,f,y,n,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,f,y,b,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,s,b,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,s,b,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,s,g,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,s,b,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,f,n,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-e,x,y,e,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,s,b,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,f,s,w,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,g
-e,k,y,e,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,x,f,n,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-e,x,y,n,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,f,s,b,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,s,b,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,d
-e,x,s,p,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,f,s,p,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,b,y,w,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,x,y,c,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-e,f,s,n,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,x,s,g,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,d
-p,x,s,b,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,x,s,w,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,g
-e,f,y,c,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,d
-e,x,y,w,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,f,y,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,s,g,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,k,s,p,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,f,s,b,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,u
-e,k,y,p,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,x,s,b,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,f,f,n,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-e,k,s,b,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,g,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,u
-e,f,s,n,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,x,s,g,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,x,s,g,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,y,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-e,k,y,b,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,s,w,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-e,k,y,n,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,d
-e,x,y,p,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,s,g,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,u
-e,x,s,p,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,b,y,n,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,f,s,b,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,x,s,b,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,k,y,p,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-e,k,f,c,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,g
-e,x,s,n,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,s,g,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,k,y,w,t,n,f,w,n,w,e,b,s,s,w,w,p,w,o,p,w,c,l
-e,k,f,n,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-p,k,f,y,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,s,w,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,f,y,y,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,d
-e,k,s,b,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,b,y,w,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,s,w,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,x,s,b,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,g
-e,k,y,b,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,x,s,b,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,b,y,b,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,s,w,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,g
-e,k,s,n,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,s,g,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,x,f,y,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,s,g,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,k,y,n,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,s,b,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,u
-e,f,s,e,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,s,w,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,u
-e,k,s,n,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,g
-e,x,s,b,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,u
-e,k,f,n,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,s,w,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,x,s,g,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,k,y,y,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,s,w,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,p
-e,k,y,p,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,y,w,t,n,f,w,n,w,e,b,s,s,w,w,p,w,o,p,w,c,l
-p,f,s,b,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,u
-e,f,y,w,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,y,d
-e,x,y,r,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,v,d
-e,x,s,b,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,s,g,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,f,s,g,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,f,s,b,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,g
-e,f,s,e,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,d
-e,x,s,n,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,s,g,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,w,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,g
-e,x,f,c,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,f,s,g,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,p
-e,x,y,r,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,f,y,e,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,s,g,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,u
-e,k,y,n,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-e,f,y,e,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,b,y,n,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-e,f,y,b,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,s,w,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,g
-e,f,y,n,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,b,y,w,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-e,x,y,n,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,x,s,b,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,s,g,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,f,s,g,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,f,y,w,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,x,s,w,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,x,s,w,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,k,y,y,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,f,s,w,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,f,s,g,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,u
-e,x,y,c,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,x,s,w,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,y,p,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,p
-e,x,s,p,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,b,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,f,y,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,p
-e,f,f,n,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-e,x,f,n,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,s,g,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,x,s,b,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,f,s,g,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,x,s,b,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,g
-e,x,y,r,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,f,s,g,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,f,s,w,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,x,s,w,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,y,y,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-e,f,s,n,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,s,g,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,f,s,w,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,f,s,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,g
-e,k,s,p,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,s,w,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,x,s,w,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,b,y,p,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,s,b,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,u
-e,x,y,p,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,x,s,w,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,u
-e,f,y,b,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,s,b,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,u
-e,x,f,c,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-e,f,s,p,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-e,x,y,n,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-e,f,s,p,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,s,g,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,u
-e,k,s,e,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-e,k,y,e,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,g
-e,k,s,b,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,b,y,y,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,x,s,w,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,b,f,n,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,f,y,c,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,s,w,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,b,y,p,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,s,b,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,p
-p,f,s,b,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,f,s,b,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,x,s,n,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,v,g
-e,f,s,b,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-e,f,s,e,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,x,s,n,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,d
-e,f,s,p,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,x,s,e,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,b,y,b,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,s,b,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,f,s,g,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,x,s,b,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,s,w,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,s,g,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,u
-e,f,y,r,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,b,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,g
-e,x,f,c,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,x,s,g,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,u
-e,x,y,u,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,y,w,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,x,y,p,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,s,g,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,x,s,b,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,s,g,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,w,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,x,s,g,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,x,s,g,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,s,g,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,f,s,b,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,f,s,w,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,f,s,b,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,w,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,g
-e,k,s,e,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,y,b,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-e,k,y,n,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,f,s,w,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,x,s,w,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,b,s,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,v,g
-p,f,y,w,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,s,g,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,g
-e,k,y,p,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,s,w,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,g
-e,k,s,p,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,x,y,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,p
-e,x,y,e,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,s,b,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,d
-e,k,y,e,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,y,p,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,y,w,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,g
-e,f,y,r,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,s,w,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,s,w,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,x,s,b,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-e,k,y,e,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,k,s,p,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,x,y,b,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,s,g,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,c,y,w,t,n,f,w,n,w,e,b,s,s,w,w,p,w,o,p,w,c,l
-p,f,s,w,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,s,g,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,s,g,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-e,x,y,p,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,f,y,w,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,x,s,g,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-e,k,f,n,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,x,s,w,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,p
-e,x,y,p,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,b,y,b,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,f,y,r,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,s,b,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,d
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-e,f,y,n,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,f,y,e,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-e,x,y,b,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,x,s,n,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,g
-e,x,y,b,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,g
-e,k,y,e,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,u
-e,k,y,c,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-e,k,y,n,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,b,s,p,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,x,s,b,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,u
-e,f,y,r,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,s,b,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,g
-e,f,y,u,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,w,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,u
-e,x,s,e,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,f,y,p,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,w,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,p
-e,x,y,n,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,x,y,e,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,s,w,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,f,s,b,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,s,g,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,g
-e,f,s,p,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,x,y,n,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,b,y,w,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,d
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,s,b,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,s,b,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,b,y,p,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,x,y,c,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,s,w,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,f,y,n,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,s,g,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-e,x,y,r,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,b,y,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-e,x,s,p,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,s,g,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,y,p,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-e,f,s,b,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,s,b,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,u
-e,x,y,c,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-e,x,y,w,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,f,y,n,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,k,y,p,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,s,w,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,g
-e,k,y,n,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,f,s,g,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,s,w,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,g
-e,f,s,n,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,b,s,w,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,s,b,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,u
-e,f,y,b,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-e,x,f,c,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,f,s,w,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,u
-e,k,s,p,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,k,y,n,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,x,y,b,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,f,s,g,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,p
-e,x,y,e,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,s,b,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,g
-e,x,y,n,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,d
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,b,y,b,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,p
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,y,w,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,k,s,e,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,v,p
-e,k,s,n,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,y,y,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,f,y,g,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,f,s,b,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,s,w,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,x,s,w,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,d
-e,f,y,p,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,p
-e,k,s,e,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,x,y,n,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,f,s,b,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,s,g,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,u
-p,x,s,w,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,x,f,n,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,s,b,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,g
-e,f,y,e,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,g
-p,x,s,b,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,u
-e,k,f,n,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,g
-e,k,y,c,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-p,x,f,g,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-e,f,y,p,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,y,g
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,f,y,r,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p
-p,x,s,g,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,x,s,w,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,s,b,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,g
-e,x,y,n,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,f,y,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-e,f,y,u,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,x,s,n,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,s,w,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,b,p,w,o,l,h,v,d
-e,f,s,b,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,k,f,c,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,x,s,g,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,f,y,p,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,k,y,p,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,g
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-e,f,s,n,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-e,f,s,n,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,p,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,y,p
-e,f,f,c,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,b,y,b,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,k,y,e,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,x,s,g,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,x,s,g,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,s,w,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,u
-e,f,s,p,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,s,b,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-e,f,y,w,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,v,d
-e,x,y,p,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,k,g,w,t,n,f,w,n,w,e,b,s,s,w,w,p,w,o,p,w,c,l
-p,f,s,g,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,k,s,e,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,s,w,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,s,g
-e,f,s,e,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,f,s,b,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,b,s,w,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-e,f,f,n,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,f,s,g,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,x,s,w,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,g
-e,x,s,b,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,f,y,n,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,x,y,r,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,v,d
-e,k,s,b,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,s,w,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,x,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-e,x,y,b,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,w,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,s,b,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,x,s,g,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,g
-e,k,s,n,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,s,g,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,g
-e,k,y,p,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,b,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,v,p
-p,x,s,b,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,u
-e,k,f,c,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,f,s,g,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,f,s,w,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,g
-p,b,s,b,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,b,s,p,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,s,w,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,g
-e,x,y,n,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,g
-e,x,s,b,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,s,b,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,f,s,p,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,s,w,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,f,s,w,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,u
-e,f,y,r,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,f,s,w,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,f,s,w,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,u
-e,x,y,w,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,k,y,b,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,s,b,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,u
-e,f,y,r,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,b,s,p,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,s,b,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,u
-e,f,y,c,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-e,x,y,u,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,s,b,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,u
-e,x,y,w,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,x,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-e,x,s,p,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,s,p,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,y,n,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,f,s,w,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,x,y,g,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,v,g
-e,f,y,e,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,s,b,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,b,s,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,s,b,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,v,u
-p,x,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,y,p
-p,b,y,w,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,f,y,n,f,n,f,w,n,w,e,b,s,f,w,n,p,w,o,e,w,v,l
-p,f,f,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,b,p,w,o,l,h,v,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,g
-p,x,s,g,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,y,y,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,s,w,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,g
-e,f,f,n,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,x,s,g,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,f,s,b,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,u
-e,f,s,e,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,d
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,s,w,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,s,g,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,g
-e,f,f,c,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-e,k,y,e,t,n,f,c,b,e,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,s,w,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-e,x,s,n,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,p
-p,x,s,w,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,u
-e,f,s,e,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,v,d
-e,f,y,e,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,s,g,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,g
-e,k,s,n,t,n,f,c,b,e,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,x,s,w,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,g
-p,f,y,w,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-e,f,y,w,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,v,d
-e,x,y,w,f,n,f,c,n,h,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,s,g,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,u
-e,x,y,u,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,f,s,w,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,p,b,p,w,o,l,h,v,d
-p,f,s,g,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,x,s,g,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,s,g
-e,f,y,r,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,f,s,w,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,s,g
-e,k,s,n,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,s,g,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,s,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,p,p,p,w,o,l,h,y,g
-p,b,s,w,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,b,s,b,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,y,g,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,d
-e,x,y,e,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,k,s,b,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-e,x,s,e,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,s,b,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,m
-e,f,s,n,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-e,f,y,b,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,s,g,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,f,s,b,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,b,b,p,w,o,l,h,y,g
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,s,g,t,f,f,c,b,w,t,b,f,s,w,w,p,w,o,p,h,v,u
-e,x,y,p,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,x,y,u,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,y,p
-p,x,s,g,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,s,g,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,x,s,b,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,b,s,w,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,k,y,n,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,v,p
-e,x,y,b,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-p,x,s,b,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,f,s,g,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,f,s,b,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,b,p,w,o,l,h,y,d
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,w,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,u
-e,k,y,n,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,x,s,e,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,x,y,w,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,v,d
-p,f,s,b,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,u
-p,f,s,g,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,s,u
-e,f,s,e,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,y,g
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,s,w,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,y,g,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,d
-e,x,f,n,f,n,f,w,n,w,e,b,s,s,w,n,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,b,n,p,w,o,l,h,y,d
-p,x,s,w,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,u
-e,x,s,b,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,x,s,p,t,n,f,c,b,w,e,?,s,s,w,e,p,w,t,e,w,c,w
-p,x,s,g,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,g
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,p
-p,x,s,w,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,f,y,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,v,p
-p,f,s,w,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,u
-e,k,y,n,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,x,y,p,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,y,g
-e,f,y,b,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,x,s,e,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,s,g,t,f,f,c,b,p,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,f,s,b,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,u
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,p
-p,x,s,b,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,f,s,w,t,f,f,c,b,h,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,n,p,p,w,o,l,h,y,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,n,p,p,w,o,l,h,y,p
-p,f,f,g,f,f,f,c,b,g,e,b,k,k,b,n,p,w,o,l,h,v,p
-p,b,s,p,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,b,s,w,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,x,y,n,t,n,f,c,b,w,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,x,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,b,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,b,y,y,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,x,s,g,t,f,f,c,b,w,t,b,f,f,w,w,p,w,o,p,h,v,u
-e,f,y,p,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,k,s,p,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-e,f,y,n,t,n,f,c,b,w,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,b,y,p,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,s,g,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,x,s,g,t,f,f,c,b,p,t,b,s,s,w,w,p,w,o,p,h,s,g
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,g
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,f,y,b,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,x,s,b,t,f,f,c,b,h,t,b,f,f,w,w,p,w,o,p,h,v,u
-p,b,s,b,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-e,f,y,w,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,y,d
-e,x,y,n,f,n,f,w,n,w,e,b,f,s,w,n,p,w,o,e,w,v,l
-p,x,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,v,p
-p,f,s,b,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,v,g
-e,k,s,e,t,n,f,c,b,e,e,?,s,s,e,e,p,w,t,e,w,c,w
-p,f,f,y,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,p
-p,b,f,n,f,n,f,c,n,w,e,?,k,y,w,n,p,w,o,e,w,v,d
-p,x,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,y,y,f,f,f,c,b,h,e,b,k,k,n,b,p,w,o,l,h,y,d
-p,x,y,y,f,f,f,c,b,g,e,b,k,k,n,b,p,w,o,l,h,y,g
-p,x,s,g,t,f,f,c,b,p,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-e,x,y,n,t,n,f,c,b,w,e,?,s,s,e,w,p,w,t,e,w,c,w
-e,f,y,n,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-p,f,y,b,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,k,f,n,f,n,f,c,n,w,e,?,k,y,w,y,p,w,o,e,w,v,d
-p,f,y,g,f,f,f,c,b,h,e,b,k,k,p,n,p,w,o,l,h,v,g
-p,x,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,f,g,f,f,f,c,b,h,e,b,k,k,p,p,p,w,o,l,h,y,d
-p,f,f,g,f,f,f,c,b,p,e,b,k,k,b,p,p,w,o,l,h,v,d
-p,x,s,g,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,s,g
-e,x,y,r,f,n,f,c,n,u,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,x,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,s,p,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,m
-p,x,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,b,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,g
-p,x,s,b,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,v,u
-e,k,s,e,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-p,f,s,b,t,f,f,c,b,w,t,b,s,f,w,w,p,w,o,p,h,s,g
-e,x,f,n,f,n,f,w,n,w,e,b,f,f,w,n,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,v,d
-e,f,y,w,f,n,f,c,n,w,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,f,s,w,t,f,f,c,b,w,t,b,s,s,w,w,p,w,o,p,h,s,u
-p,f,s,p,t,n,f,c,b,r,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,y,y,f,f,f,c,b,p,e,b,k,k,p,n,p,w,o,l,h,y,d
-p,f,s,g,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,g
-p,x,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,b,p,w,o,l,h,v,g
-p,f,s,g,t,f,f,c,b,h,t,b,f,s,w,w,p,w,o,p,h,s,g
-p,f,s,w,t,f,f,c,b,h,t,b,s,f,w,w,p,w,o,p,h,s,u
-p,x,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,f,y,f,f,f,c,b,p,e,b,k,k,n,n,p,w,o,l,h,y,d
-p,x,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,b,y,b,t,n,f,c,b,g,e,b,s,s,w,w,p,w,t,p,r,v,g
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,b,p,p,w,o,l,h,v,g
-p,x,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-e,k,y,b,t,n,f,c,b,e,e,?,s,s,w,w,p,w,t,e,w,c,w
-e,x,y,w,f,n,f,c,n,p,e,?,s,f,w,w,p,w,o,f,h,y,d
-p,f,s,b,t,f,f,c,b,p,t,b,s,f,w,w,p,w,o,p,h,v,u
-p,f,y,y,f,f,f,c,b,g,e,b,k,k,n,p,p,w,o,l,h,y,g
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,n,c,l
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,n,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,x,f,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-e,x,s,c,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-e,b,f,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-e,k,f,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-e,b,s,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,k,s,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-e,b,f,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-e,k,s,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-e,k,s,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-e,b,s,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,x,f,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-e,k,f,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,o,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-e,x,s,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,k,f,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,b,f,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,y,c,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,k,s,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,x,f,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-e,x,f,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,k,f,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-e,x,s,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-e,b,f,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,y,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,y,c,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-e,x,f,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-e,b,f,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,k,f,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,o,c,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-e,x,f,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,x,s,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,o,c,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,k,y,n,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-e,k,s,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-e,k,f,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,y,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,x,y,c,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,y,c,l
-e,k,s,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,x,f,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,o,c,l
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,b,y,y,f,n,f,w,n,y,e,c,y,y,y,y,p,y,o,e,w,c,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-e,b,s,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-e,k,f,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-e,b,f,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,n,c,l
-e,x,y,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,b,s,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,x,f,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,b,s,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-e,f,y,n,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-e,x,f,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,k,f,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-e,b,s,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,d
-p,f,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-e,x,s,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-e,k,f,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,f,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-e,b,s,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,n,c,l
-e,k,s,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,o,c,l
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-e,k,f,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,x,s,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,o,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,x,s,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,k,s,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,k,s,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,k,s,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,x,f,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-e,b,s,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,k,s,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,x,s,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,x,f,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,k,f,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,f,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-e,f,s,g,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,c,l
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,n,c,l
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-e,k,f,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,n,c,l
-p,k,y,c,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,x,f,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,o,v,l
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,k,f,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,x,f,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,n,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,k,f,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-e,k,s,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,y,c,l
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-e,x,s,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,y,n,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,k,s,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-e,f,s,c,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,n,c,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,b,f,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,k,f,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,n,v,l
-p,f,y,n,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-e,x,f,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,y,c,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,b,s,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-e,b,f,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,f,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-e,x,y,g,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,k,f,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,x,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,k,s,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,y,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,b,v,l
-e,k,s,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,n,v,l
-e,x,f,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,b,s,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-e,b,f,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,f,y,c,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-e,k,s,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,x,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-e,b,s,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,x,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,b,s,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,f,y,n,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,b,f,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-e,k,s,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,x,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,b,c,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,f,y,c,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,n,c,l
-e,b,s,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-e,b,s,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-e,k,y,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,d
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-e,k,f,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-e,b,f,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,x,s,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,k,f,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,x,f,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,k,s,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,x,s,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,y,v,l
-e,k,s,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-e,x,s,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,y,v,l
-e,k,s,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,f,y,e,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,b,s,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,b,c,l
-e,b,s,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,y,c,l
-e,b,s,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,o,c,l
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,y,v,l
-e,k,f,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,o,c,l
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,n,c,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,o,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-e,k,s,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-e,k,f,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,x,s,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,x,f,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,n,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,b,f,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-e,k,f,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,k,f,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-e,k,s,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-e,k,s,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,b,s,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,x,s,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,n,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,x,f,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,x,y,e,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,b,c,l
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-e,f,y,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,d
-e,k,s,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,k,y,y,f,n,f,w,n,y,e,c,y,y,y,y,p,y,o,e,w,c,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,n,c,l
-e,x,s,g,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,k,s,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,b,f,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,o,c,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,x,f,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-e,x,f,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,x,s,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,x,s,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-e,b,f,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,x,y,n,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,y,c,l
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,y,v,l
-e,b,s,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,b,s,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,b,f,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,k,s,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,f,y,e,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,f,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,y,c,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-e,k,s,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-e,b,f,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,f,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-e,k,s,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,x,s,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,y,c,l
-e,f,y,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,b,s,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,k,f,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,x,f,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,x,s,c,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,k,f,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,o,c,l
-p,f,y,y,f,n,f,w,n,w,e,c,y,y,y,y,p,y,o,e,w,c,l
-p,x,y,c,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,x,y,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-e,k,s,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,b,s,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,b,v,l
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,b,v,l
-e,f,s,n,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,k,s,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,b,s,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,y,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,b,s,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,x,s,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,b,s,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-e,b,f,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,c,y,y,f,n,f,w,n,y,e,c,y,y,y,y,p,y,o,e,w,c,l
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-e,x,s,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,y,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,n,c,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-e,b,s,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,n,c,l
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,n,v,l
-p,x,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-e,x,s,g,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-e,x,f,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-e,k,f,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,f,s,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,c,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,o,c,l
-p,f,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,k,s,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,o,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-e,x,f,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,o,v,l
-p,f,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,f,y,n,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,x,f,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,k,s,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,b,c,l
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,k,y,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,p
-e,x,f,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,v,l
-p,f,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,y,c,l
-e,x,f,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-e,x,s,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,o,c,l
-e,b,s,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,x,y,e,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,x,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,b,f,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,f,s,n,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,y,f,n,f,w,n,y,e,c,y,y,y,y,p,y,o,e,w,c,l
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,f,y,e,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,x,f,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-e,x,s,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,x,s,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,x,f,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,o,c,l
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,x,s,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,x,f,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,o,c,l
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,c,l
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,n,c,l
-e,b,s,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,n,c,l
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-e,b,f,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,b,s,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,c,l
-e,x,s,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,x,s,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,p
-e,k,s,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,x,s,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,f,y,c,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,x,s,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,k,f,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,b,f,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,n,v,l
-e,f,s,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-e,b,s,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-e,k,s,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,c,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,y,v,l
-e,x,f,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,x,f,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,b,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-e,x,s,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-e,x,f,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-e,k,s,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,b,s,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,k,f,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-e,k,f,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-e,b,f,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,x,s,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,b,s,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-e,b,s,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,f,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-e,k,s,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,x,s,n,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,b,y,y,f,n,f,w,n,w,e,c,y,y,y,y,p,y,o,e,w,c,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-e,x,s,n,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-e,k,f,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,x,f,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,b,s,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,n,c,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-e,b,s,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,f,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,b,f,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,k,s,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,x,s,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,b,c,l
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,b,v,l
-e,b,f,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,x,s,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,x,y,e,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,y,c,l
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-e,b,s,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,b,s,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,k,f,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,k,f,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,x,f,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,f,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,b,s,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,k,s,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,b,f,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,b,c,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,f,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,x,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,o,v,l
-e,k,f,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-e,b,f,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,y,c,l
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,n,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,x,s,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,f,y,c,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,n,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-e,b,f,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-e,b,f,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-e,x,s,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,x,s,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,o,v,l
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,n,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,n,c,l
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-e,k,f,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,n,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,v,l
-e,f,y,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,p
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-e,f,s,g,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,x,s,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,c,y,y,f,n,f,w,n,w,e,c,y,y,y,y,p,y,o,e,w,c,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,x,s,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,x,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,y,c,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,f,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,b,c,l
-e,b,y,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,d
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,b,s,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,x,y,n,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,f,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,y,c,l
-e,b,f,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,x,f,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,x,y,n,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,b,c,l
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,x,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,b,f,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,y,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,y,y,f,n,f,w,n,w,e,c,y,y,y,y,p,y,o,e,w,c,l
-e,k,s,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,k,f,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,k,f,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,b,c,l
-e,x,s,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,b,s,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,x,f,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,k,f,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-e,k,f,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,x,s,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,n,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,x,s,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,y,v,l
-e,b,s,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,n,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-e,b,f,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-e,b,s,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,x,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-e,x,s,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,x,s,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,k,s,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,d
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,o,v,l
-e,b,f,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-p,f,y,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,f,y,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,k,s,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,c,l
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,k,s,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-e,x,s,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,f,y,c,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,x,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-e,k,f,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,x,y,n,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,f,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,b,c,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-e,b,f,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,b,f,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,y,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,o,c,l
-e,x,s,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,p
-e,x,s,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,b,s,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,m,f,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-e,x,f,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,y,v,l
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,n,v,l
-e,k,s,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,b,v,l
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,n,c,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,n,c,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-e,b,f,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,l
-e,x,s,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,x,s,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,v,l
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,c,l
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,x,f,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,x,s,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-e,x,f,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,o,c,l
-e,b,s,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,b,v,l
-e,x,s,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-e,k,s,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,v,l
-e,k,s,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,x,s,n,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,n,c,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,x,f,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,f,s,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,b,v,l
-e,x,y,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,d
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,l
-e,b,s,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,n,c,l
-e,x,s,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,x,s,g,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-p,f,y,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,f,y,n,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-e,f,y,g,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-e,x,f,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-p,x,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,x,f,g,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,n,v,l
-p,x,y,e,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,b,f,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,x,f,g,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,f,y,n,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,o,v,l
-e,b,s,g,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,o,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,o,v,l
-e,x,f,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,x,f,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,f,y,p,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-e,k,f,g,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,y,c,l
-p,k,s,n,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,d
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,o,c,l
-p,k,y,n,f,s,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-e,k,f,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-e,f,s,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,p
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,x,f,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,o,c,l
-e,k,s,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,b,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,o,c,l
-e,f,y,g,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-p,k,y,e,f,m,a,c,b,w,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,x,y,e,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,y,v,l
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,o,c,l
-e,x,y,c,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,f,s,c,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,v,p
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,n,c,l
-e,b,f,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,k,f,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,o,c,l
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,o,c,l
-e,k,f,w,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-p,k,y,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,l
-e,b,f,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,y,v,l
-e,x,y,n,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-e,b,s,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g
-p,f,y,n,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,y,c,l
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,n,v,l
-e,b,s,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,x,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-e,b,f,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,b,f,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,b,f,w,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,o,v,l
-p,k,y,e,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,d
-p,k,y,c,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-e,x,s,g,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,n,g
-e,f,s,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,d
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,y,c,l
-e,b,y,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,p
-p,x,s,n,f,f,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,b,c,l
-p,f,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-e,k,s,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-e,k,f,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,o,c,l
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,y,v,l
-p,k,s,n,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,n,v,l
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,n,c,l
-e,x,y,n,f,n,f,c,b,w,e,b,y,y,n,n,p,w,t,p,w,y,p
-e,x,s,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,f,s,n,f,f,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,x,y,e,f,s,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,d
-p,k,y,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,p
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,n,v,l
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,y,v,l
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,y,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,v,l
-e,k,s,g,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,s,g
-e,k,f,w,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-p,k,s,e,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,p
-e,b,f,w,f,n,f,w,b,g,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,x,s,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,b,f,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,d
-p,f,s,n,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-p,f,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,p
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,y,c,l
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,n,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-e,x,f,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,k,s,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,c,l
-e,b,f,w,f,n,f,w,b,p,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,b,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,p,w,p,w,o,e,w,v,d
-e,x,y,g,t,n,f,c,b,w,e,b,s,s,w,w,p,w,t,p,w,y,p
-p,k,s,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,y,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,c,l
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,b,v,l
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,y,c,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,b,f,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,s,e,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,s,n,f,y,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,d
-e,x,f,w,f,n,f,w,b,w,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,f,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,k,y,n,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-e,x,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,n,v,l
-e,b,f,g,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,y,v,l
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,s,k,w,p,p,w,o,e,w,v,l
-p,k,s,n,f,f,f,c,n,b,t,?,s,s,w,p,p,w,o,e,w,v,p
-p,k,s,n,f,s,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,s,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-e,k,f,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,v,l
-p,k,s,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-e,b,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,c,l
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,b,c,l
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,n,c,l
-p,k,y,e,f,s,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,l
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,y,v,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,y,v,l
-e,b,f,g,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,n,g
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,o,c,l
-e,b,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,y,c,l
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,o,v,l
-e,b,f,g,f,n,f,w,b,g,e,?,s,s,w,w,p,w,t,p,w,s,g
-p,k,y,e,f,f,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-p,k,s,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,w,w,p,w,o,e,w,v,p
-p,k,s,e,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,p
-p,k,y,n,f,y,f,c,n,b,t,?,s,s,w,w,p,w,o,e,w,v,l
-e,b,f,g,f,n,f,w,b,p,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,k,f,w,f,n,f,w,b,g,e,?,s,k,w,w,p,w,t,p,w,s,g
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,n,o,p,o,v,l
-p,x,s,e,f,f,f,c,n,b,t,?,k,s,w,p,p,w,o,e,w,v,p
-e,k,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,n,v,l
-p,k,y,e,f,f,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-p,k,s,n,f,f,f,c,n,b,t,?,k,s,p,p,p,w,o,e,w,v,d
-p,k,y,e,f,f,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,p
-p,k,y,e,f,y,f,c,n,b,t,?,s,s,p,p,p,w,o,e,w,v,p
-p,x,s,n,f,y,f,c,n,b,t,?,k,k,w,w,p,w,o,e,w,v,d
-e,b,s,g,f,n,f,w,b,g,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,x,y,c,f,m,f,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,k,f,w,f,n,f,w,b,w,e,?,k,s,w,w,p,w,t,p,w,n,g
-p,k,y,n,f,s,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,k,k,w,p,p,w,o,e,w,v,d
-e,k,f,w,f,n,f,w,b,w,e,?,k,k,w,w,p,w,t,p,w,s,g
-e,f,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,b,v,l
-p,k,s,e,f,s,f,c,n,b,t,?,s,s,p,w,p,w,o,e,w,v,p
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,n,c,l
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,o,c,l
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,n,v,l
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,y,v,l
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,n,v,l
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,n,c,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,l
-e,b,s,w,f,n,f,w,b,w,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,x,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,o,o,p,n,v,l
-e,k,s,w,f,n,f,w,b,p,e,?,s,s,w,w,p,w,t,p,w,n,g
-e,k,s,n,f,n,a,c,b,o,e,?,s,s,o,o,p,n,o,p,b,v,l
-p,k,y,e,f,y,f,c,n,b,t,?,k,k,p,p,p,w,o,e,w,v,d
-p,f,y,c,f,m,a,c,b,y,e,c,k,y,c,c,p,w,n,n,w,c,d
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,o,v,l
-p,k,y,n,f,s,f,c,n,b,t,?,s,k,p,w,p,w,o,e,w,v,l
-p,k,s,e,f,y,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-p,k,y,n,f,f,f,c,n,b,t,?,k,s,p,w,p,w,o,e,w,v,d
-e,k,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,b,c,l
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,n,o,p,b,v,l
-e,f,s,n,f,n,a,c,b,n,e,?,s,s,o,o,p,o,o,p,b,c,l
-p,k,y,n,f,y,f,c,n,b,t,?,s,k,w,w,p,w,o,e,w,v,l
-e,x,s,n,f,n,a,c,b,y,e,?,s,s,o,o,p,o,o,p,o,c,l
diff --git a/demo/CLI/binary_classification/agaricus-lepiota.fmap b/demo/CLI/binary_classification/agaricus-lepiota.fmap
deleted file mode 100644
index e1efc285eff8..000000000000
--- a/demo/CLI/binary_classification/agaricus-lepiota.fmap
+++ /dev/null
@@ -1,32 +0,0 @@
-     1. cap-shape:                bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
-     2. cap-surface:              fibrous=f,grooves=g,scaly=y,smooth=s
-     3. cap-color:                brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
-     4. bruises?:                 bruises=t,no=f
-     5. odor:                     almond=a,anise=l,creosote=c,fishy=y,foul=f,
-                                  musty=m,none=n,pungent=p,spicy=s
-     6. gill-attachment:          attached=a,descending=d,free=f,notched=n
-     7. gill-spacing:             close=c,crowded=w,distant=d
-     8. gill-size:                broad=b,narrow=n
-     9. gill-color:               black=k,brown=n,buff=b,chocolate=h,gray=g,
-                                  green=r,orange=o,pink=p,purple=u,red=e,
-                                  white=w,yellow=y
-    10. stalk-shape:              enlarging=e,tapering=t
-    11. stalk-root:               bulbous=b,club=c,cup=u,equal=e,
-                                  rhizomorphs=z,rooted=r,missing=?
-    12. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
-    13. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
-    14. stalk-color-above-ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,
-                                  pink=p,red=e,white=w,yellow=y
-    15. stalk-color-below-ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,
-                                  pink=p,red=e,white=w,yellow=y
-    16. veil-type:                partial=p,universal=u
-    17. veil-color:               brown=n,orange=o,white=w,yellow=y
-    18. ring-number:              none=n,one=o,two=t
-    19. ring-type:                cobwebby=c,evanescent=e,flaring=f,large=l,
-                                  none=n,pendant=p,sheathing=s,zone=z
-    20. spore-print-color:        black=k,brown=n,buff=b,chocolate=h,green=r,
-                                  orange=o,purple=u,white=w,yellow=y
-    21. population:               abundant=a,clustered=c,numerous=n,
-                                  scattered=s,several=v,solitary=y
-    22. habitat:                  grasses=g,leaves=l,meadows=m,paths=p,
-                                  urban=u,waste=w,woods=d
diff --git a/demo/CLI/binary_classification/agaricus-lepiota.names b/demo/CLI/binary_classification/agaricus-lepiota.names
deleted file mode 100644
index 4f1f3b53e45f..000000000000
--- a/demo/CLI/binary_classification/agaricus-lepiota.names
+++ /dev/null
@@ -1,148 +0,0 @@
-1. Title: Mushroom Database
-
-2. Sources: 
-    (a) Mushroom records drawn from The Audubon Society Field Guide to North
-        American Mushrooms (1981). G. H. Lincoff (Pres.), New York: Alfred
-        A. Knopf
-    (b) Donor: Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)
-    (c) Date: 27 April 1987
-
-3. Past Usage:
-    1. Schlimmer,J.S. (1987). Concept Acquisition Through Representational
-       Adjustment (Technical Report 87-19).  Doctoral disseration, Department
-       of Information and Computer Science, University of California, Irvine.
-       --- STAGGER: asymptoted to 95% classification accuracy after reviewing
-           1000 instances.
-    2. Iba,W., Wogulis,J., & Langley,P. (1988).  Trading off Simplicity
-       and Coverage in Incremental Concept Learning. In Proceedings of 
-       the 5th International Conference on Machine Learning, 73-79.
-       Ann Arbor, Michigan: Morgan Kaufmann.  
-       -- approximately the same results with their HILLARY algorithm    
-    3. In the following references a set of rules (given below) were
-	learned for this data set which may serve as a point of
-	comparison for other researchers.
-
-	Duch W, Adamczak R, Grabczewski K (1996) Extraction of logical rules
-	from training data using backpropagation networks, in: Proc. of the
-	The 1st Online Workshop on Soft Computing, 19-30.Aug.1996, pp. 25-30,
-	available on-line at: http://www.bioele.nuee.nagoya-u.ac.jp/wsc1/
-
-	Duch W, Adamczak R, Grabczewski K, Ishikawa M, Ueda H, Extraction of
-	crisp logical rules using constrained backpropagation networks -
-	comparison of two new approaches, in: Proc. of the European Symposium
-	on Artificial Neural Networks (ESANN'97), Bruge, Belgium 16-18.4.1997,
-	pp. xx-xx
-
-	Wlodzislaw Duch, Department of Computer Methods, Nicholas Copernicus
-	University, 87-100 Torun, Grudziadzka 5, Poland
-	e-mail: duch@phys.uni.torun.pl
-	WWW     http://www.phys.uni.torun.pl/kmk/
-	
-	Date: Mon, 17 Feb 1997 13:47:40 +0100
-	From: Wlodzislaw Duch <duch@phys.uni.torun.pl>
-	Organization: Dept. of Computer Methods, UMK
-
-	I have attached a file containing logical rules for mushrooms.
-	It should be helpful for other people since only in the last year I
-	have seen about 10 papers analyzing this dataset and obtaining quite
-	complex rules. We will try to contribute other results later.
-
-	With best regards, Wlodek Duch
-	________________________________________________________________
-
-	Logical rules for the mushroom data sets.
-
-	Logical rules given below seem to be the simplest possible for the
-	mushroom dataset and therefore should be treated as benchmark results.
-
-	Disjunctive rules for poisonous mushrooms, from most general
-	to most specific:
-
-	P_1) odor=NOT(almond.OR.anise.OR.none)
-	     120 poisonous cases missed, 98.52% accuracy
-
-	P_2) spore-print-color=green
-	     48 cases missed, 99.41% accuracy
-         
-	P_3) odor=none.AND.stalk-surface-below-ring=scaly.AND.
-	          (stalk-color-above-ring=NOT.brown) 
-	     8 cases missed, 99.90% accuracy
-         
-	P_4) habitat=leaves.AND.cap-color=white
-	         100% accuracy     
-
-	Rule P_4) may also be
-
-	P_4') population=clustered.AND.cap_color=white
-
-	These rule involve 6 attributes (out of 22). Rules for edible
-	mushrooms are obtained as negation of the rules given above, for
-	example the rule:
-
-	odor=(almond.OR.anise.OR.none).AND.spore-print-color=NOT.green
-
-	gives 48 errors, or 99.41% accuracy on the whole dataset.
-
-	Several slightly more complex variations on these rules exist,
-	involving other attributes, such as gill_size, gill_spacing,
-	stalk_surface_above_ring, but the rules given above are the simplest
-	we have found.
-
-
-4. Relevant Information:
-    This data set includes descriptions of hypothetical samples
-    corresponding to 23 species of gilled mushrooms in the Agaricus and
-    Lepiota Family (pp. 500-525).  Each species is identified as
-    definitely edible, definitely poisonous, or of unknown edibility and
-    not recommended.  This latter class was combined with the poisonous
-    one.  The Guide clearly states that there is no simple rule for
-    determining the edibility of a mushroom; no rule like ``leaflets
-    three, let it be'' for Poisonous Oak and Ivy.
-
-5. Number of Instances: 8124
-
-6. Number of Attributes: 22 (all nominally valued)
-
-7. Attribute Information: (classes: edible=e, poisonous=p)
-     1. cap-shape:                bell=b,conical=c,convex=x,flat=f,
-                                  knobbed=k,sunken=s
-     2. cap-surface:              fibrous=f,grooves=g,scaly=y,smooth=s
-     3. cap-color:                brown=n,buff=b,cinnamon=c,gray=g,green=r,
-                                  pink=p,purple=u,red=e,white=w,yellow=y
-     4. bruises?:                 bruises=t,no=f
-     5. odor:                     almond=a,anise=l,creosote=c,fishy=y,foul=f,
-                                  musty=m,none=n,pungent=p,spicy=s
-     6. gill-attachment:          attached=a,descending=d,free=f,notched=n
-     7. gill-spacing:             close=c,crowded=w,distant=d
-     8. gill-size:                broad=b,narrow=n
-     9. gill-color:               black=k,brown=n,buff=b,chocolate=h,gray=g,
-                                  green=r,orange=o,pink=p,purple=u,red=e,
-                                  white=w,yellow=y
-    10. stalk-shape:              enlarging=e,tapering=t
-    11. stalk-root:               bulbous=b,club=c,cup=u,equal=e,
-                                  rhizomorphs=z,rooted=r,missing=?
-    12. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
-    13. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
-    14. stalk-color-above-ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,
-                                  pink=p,red=e,white=w,yellow=y
-    15. stalk-color-below-ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,
-                                  pink=p,red=e,white=w,yellow=y
-    16. veil-type:                partial=p,universal=u
-    17. veil-color:               brown=n,orange=o,white=w,yellow=y
-    18. ring-number:              none=n,one=o,two=t
-    19. ring-type:                cobwebby=c,evanescent=e,flaring=f,large=l,
-                                  none=n,pendant=p,sheathing=s,zone=z
-    20. spore-print-color:        black=k,brown=n,buff=b,chocolate=h,green=r,
-                                  orange=o,purple=u,white=w,yellow=y
-    21. population:               abundant=a,clustered=c,numerous=n,
-                                  scattered=s,several=v,solitary=y
-    22. habitat:                  grasses=g,leaves=l,meadows=m,paths=p,
-                                  urban=u,waste=w,woods=d
-
-8. Missing Attribute Values: 2480 of them (denoted by "?"), all for
-   attribute #11.
-
-9. Class Distribution: 
-    --    edible: 4208 (51.8%)
-    -- poisonous: 3916 (48.2%)
-    --     total: 8124 instances
diff --git a/demo/CLI/binary_classification/mapfeat.py b/demo/CLI/binary_classification/mapfeat.py
deleted file mode 100755
index 1c8ac9ab39a7..000000000000
--- a/demo/CLI/binary_classification/mapfeat.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env python3
-
-def loadfmap( fname ):
-    fmap = {}
-    nmap = {}
-
-    for l in open( fname ):
-        arr = l.split()
-        if arr[0].find('.') != -1:
-            idx = int( arr[0].strip('.') )
-            assert idx not in fmap
-            fmap[ idx ] = {}
-            ftype = arr[1].strip(':')
-            content = arr[2]
-        else:
-            content = arr[0]
-        for it in content.split(','):
-            if it.strip() == '':
-                continue
-            k , v = it.split('=')
-            fmap[ idx ][ v ] = len(nmap)
-            nmap[ len(nmap) ] = ftype+'='+k
-    return fmap, nmap
-
-def write_nmap( fo, nmap ):
-    for i in range( len(nmap) ):
-        fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
-
-# start here
-fmap, nmap = loadfmap( 'agaricus-lepiota.fmap' )
-fo = open( 'featmap.txt', 'w' )
-write_nmap( fo, nmap )
-fo.close()
-
-fo = open( 'agaricus.txt', 'w' )
-for l in open( 'agaricus-lepiota.data' ):
-    arr = l.split(',')
-    if arr[0] == 'p':
-        fo.write('1')
-    else:
-        assert arr[0] == 'e'
-        fo.write('0')
-    for i in range( 1,len(arr) ):
-        fo.write( ' %d:1' % fmap[i][arr[i].strip()] )
-    fo.write('\n')
-
-fo.close()
diff --git a/demo/CLI/binary_classification/mknfold.py b/demo/CLI/binary_classification/mknfold.py
deleted file mode 100755
index 3f178e05556c..000000000000
--- a/demo/CLI/binary_classification/mknfold.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python3
-
-import random
-import sys
-
-if len(sys.argv) < 2:
-    print ('Usage:<filename> <k> [nfold = 5]')
-    exit(0)
-
-random.seed( 10 )
-
-k = int( sys.argv[2] )
-if len(sys.argv) > 3:
-    nfold = int( sys.argv[3] )
-else:
-    nfold = 5
-
-fi = open( sys.argv[1], 'r' )
-ftr = open( sys.argv[1]+'.train', 'w' )
-fte = open( sys.argv[1]+'.test', 'w' )
-for l in fi:
-    if random.randint( 1 , nfold ) == k:
-        fte.write( l )
-    else:
-        ftr.write( l )
-
-fi.close()
-ftr.close()
-fte.close()
diff --git a/demo/CLI/binary_classification/mushroom.conf b/demo/CLI/binary_classification/mushroom.conf
deleted file mode 100644
index d78199cd767a..000000000000
--- a/demo/CLI/binary_classification/mushroom.conf
+++ /dev/null
@@ -1,29 +0,0 @@
-# General Parameters, see comment for each definition
-# choose the booster, can be gbtree or gblinear
-booster = gbtree
-# choose logistic regression loss function for binary classification
-objective = binary:logistic
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0
-# minimum loss reduction required to make a further partition
-gamma = 1.0
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1
-# maximum depth of a tree
-max_depth = 3
-
-# Task Parameters
-# the number of round to do boosting
-num_round = 2
-# 0 means do not save any model except the final round model
-save_period = 2
-# The path of training data
-data = "agaricus.txt.train?format=libsvm"
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-eval[test] = "agaricus.txt.test?format=libsvm"
-# evaluate on training data as well each round
-eval_train = 1
-# The path of test data
-test:data = "agaricus.txt.test?format=libsvm"
diff --git a/demo/CLI/binary_classification/runexp.sh b/demo/CLI/binary_classification/runexp.sh
deleted file mode 100755
index 4a33f0ed8b2d..000000000000
--- a/demo/CLI/binary_classification/runexp.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-# map feature using indicator encoding, also produce featmap.txt
-python mapfeat.py
-# split train and test
-python mknfold.py agaricus.txt 1
-
-XGBOOST=../../../xgboost
-
-# training and output the models
-$XGBOOST mushroom.conf
-# output prediction task=pred
-$XGBOOST mushroom.conf task=pred model_in=0002.model
-# print the boosters of 00002.model in dump.raw.txt
-$XGBOOST mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt
-# use the feature map in printing for better visualization
-$XGBOOST mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
-cat dump.nice.txt
diff --git a/demo/CLI/distributed-training/README.md b/demo/CLI/distributed-training/README.md
deleted file mode 100644
index 7a7a019c78b0..000000000000
--- a/demo/CLI/distributed-training/README.md
+++ /dev/null
@@ -1,27 +0,0 @@
-Distributed XGBoost Training
-============================
-This is an tutorial of Distributed XGBoost Training.
-Currently xgboost supports distributed training via CLI program with the configuration file.
-There is also plan push distributed python and other language bindings, please open an issue
-if you are interested in contributing.
-
-Build XGBoost with Distributed Filesystem Support
--------------------------------------------------
-To use distributed xgboost, you only need to turn the options on to build
-with distributed filesystems(HDFS or S3) in cmake.
-
-```
-cmake <path/to/xgboost> -DUSE_HDFS=ON -DUSE_S3=ON -DUSE_AZURE=ON
-```
-
-
-Step by Step Tutorial on AWS
-----------------------------
-Checkout [this tutorial](https://xgboost.readthedocs.org/en/latest/tutorials/aws_yarn.html) for running distributed xgboost.
-
-
-Model Analysis
---------------
-XGBoost is exchangeable across all bindings and platforms.
-This means you can use python or R to analyze the learnt model and do prediction.
-For example, you can use the [plot_model.ipynb](plot_model.ipynb) to visualize the learnt model.
diff --git a/demo/CLI/distributed-training/mushroom.aws.conf b/demo/CLI/distributed-training/mushroom.aws.conf
deleted file mode 100644
index 04283768c33a..000000000000
--- a/demo/CLI/distributed-training/mushroom.aws.conf
+++ /dev/null
@@ -1,27 +0,0 @@
-# General Parameters, see comment for each definition
-# choose the booster, can be gbtree or gblinear
-booster = gbtree
-# choose logistic regression loss function for binary classification
-objective = binary:logistic
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0
-# minimum loss reduction required to make a further partition
-gamma = 1.0
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1
-# maximum depth of a tree
-max_depth = 3
-
-# Task Parameters
-# the number of round to do boosting
-num_round = 2
-# 0 means do not save any model except the final round model
-save_period = 0
-# The path of training data
-data = "s3://mybucket/xgb-demo/train"
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-# evaluate on training data as well each round
-eval_train = 1
-
diff --git a/demo/CLI/distributed-training/plot_model.ipynb b/demo/CLI/distributed-training/plot_model.ipynb
deleted file mode 100644
index 227f960a0b59..000000000000
--- a/demo/CLI/distributed-training/plot_model.ipynb
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# XGBoost Model Analysis\n",
-    "\n",
-    "This notebook can be used to load and analysis model learnt from all xgboost bindings, including distributed training. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "import os\n",
-    "%matplotlib inline "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Please change the ```pkg_path``` and ```model_file``` to be correct path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "pkg_path = '../../python-package/'\n",
-    "model_file = 's3://my-bucket/xgb-demo/model/0002.model'\n",
-    "sys.path.insert(0, pkg_path)\n",
-    "import xgboost as xgb"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Plot the Feature Importance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "# plot the first two trees.\n",
-    "bst = xgb.Booster(model_file=model_file)\n",
-    "xgb.plot_importance(bst)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Plot the First Tree"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "tree_id = 0\n",
-    "xgb.to_graphviz(bst, tree_id)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/demo/CLI/distributed-training/run_aws.sh b/demo/CLI/distributed-training/run_aws.sh
deleted file mode 100644
index d7223ea542b5..000000000000
--- a/demo/CLI/distributed-training/run_aws.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-# This is the example script to run distributed xgboost on AWS.
-# Change the following two lines for configuration
-
-export BUCKET=mybucket
-
-# submit the job to YARN
-../../../dmlc-core/tracker/dmlc-submit --cluster=yarn --num-workers=2 --worker-cores=2\
-				       ../../../xgboost mushroom.aws.conf nthread=2\
-				       data=s3://${BUCKET}/xgb-demo/train\
-				       eval[test]=s3://${BUCKET}/xgb-demo/test\
-				       model_dir=s3://${BUCKET}/xgb-demo/model
diff --git a/demo/CLI/regression/README.md b/demo/CLI/regression/README.md
deleted file mode 100644
index 2525f9824410..000000000000
--- a/demo/CLI/regression/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-Regression
-====
-Using XGBoost for regression is very similar to using it for binary classification. We suggest that you can refer to the [binary classification demo](../binary_classification) first. In XGBoost if we use negative log likelihood as the loss function for regression, the training procedure is same as training binary classifier of XGBoost.
-
-### Tutorial
-The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter:
-```
-# General parameter
-# this is the only difference with classification, use reg:squarederror to do linear regression
-# when labels are in [0,1] we can also use reg:logistic
-objective = reg:squarederror
-...
-
-```
-
-The input format is same as binary classification, except that the label is now the target regression values. We use linear regression here, if we want use objective = reg:logistic logistic regression, the label needed to be pre-scaled into [0,1].
diff --git a/demo/CLI/regression/machine.conf b/demo/CLI/regression/machine.conf
deleted file mode 100644
index 42e2b1227298..000000000000
--- a/demo/CLI/regression/machine.conf
+++ /dev/null
@@ -1,28 +0,0 @@
-# General Parameters, see comment for each definition
-# choose the tree booster, can also change to gblinear
-booster = gbtree
-# this is the only difference with classification, use reg:squarederror to do linear classification
-# when labels are in [0,1] we can also use reg:logistic
-objective = reg:squarederror
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0
-# minimum loss reduction required to make a further partition
-gamma = 1.0
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1
-# maximum depth of a tree
-max_depth = 3
-
-# Task parameters
-# the number of round to do boosting
-num_round = 2
-# 0 means do not save any model except the final round model
-save_period = 0
-# The path of training data
-data = "machine.txt.train?format=libsvm"
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-eval[test] = "machine.txt.test?format=libsvm"
-# The path of test data
-test:data = "machine.txt.test?format=libsvm"
diff --git a/demo/CLI/regression/runexp.sh b/demo/CLI/regression/runexp.sh
deleted file mode 100755
index 900a80ccef2e..000000000000
--- a/demo/CLI/regression/runexp.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-# map the data to features. For convenience we only use 7 original attributes and encode them as features in a trivial way 
-python mapfeat.py
-# split train and test
-python mknfold.py machine.txt 1
-# training and output the models
-../../xgboost machine.conf
-# output predictions of test data
-../../xgboost machine.conf task=pred model_in=0002.model
-# print the boosters of 0002.model in dump.raw.txt
-../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
-# print the boosters of 0002.model in dump.nice.txt with feature map
-../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt 
-
-# cat the result
-cat dump.nice.txt
diff --git a/demo/CLI/yearpredMSD/README.md b/demo/CLI/yearpredMSD/README.md
deleted file mode 100644
index 3fe35056a4b1..000000000000
--- a/demo/CLI/yearpredMSD/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-Demonstrating how to use XGBoost on [Year Prediction task of Million Song Dataset](https://archive.ics.uci.edu/ml/datasets/YearPredictionMSD)
-
-1. Run runexp.sh
-```bash
-./runexp.sh
-```
-
-You can also use the script to prepare LIBSVM format, and run the [Distributed Version](../../multi-node).
-Note that though that normally you only need to use single machine for dataset at this scale, and use distributed version for larger scale dataset.
diff --git a/demo/CLI/yearpredMSD/csv2libsvm.py b/demo/CLI/yearpredMSD/csv2libsvm.py
deleted file mode 100755
index ead362ae2293..000000000000
--- a/demo/CLI/yearpredMSD/csv2libsvm.py
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-
-fo = open(sys.argv[2], 'w')
-
-for l in open(sys.argv[1]):
-    arr = l.split(',')
-    fo.write('%s' % arr[0])
-    for i in range(len(arr) - 1):
-        fo.write(' %d:%s' % (i, arr[i+1]))
-fo.close()
diff --git a/demo/CLI/yearpredMSD/runexp.sh b/demo/CLI/yearpredMSD/runexp.sh
deleted file mode 100755
index 4ec58025ed67..000000000000
--- a/demo/CLI/yearpredMSD/runexp.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-if [ -f YearPredictionMSD.txt ]
-then
-    echo "use existing data to run experiment"
-else
-    echo "getting data from uci, make sure you are connected to internet"
-    wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip
-    unzip YearPredictionMSD.txt.zip
-fi
-echo "start making data.."
-# map feature using indicator encoding, also produce featmap.txt
-python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm
-head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train
-tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test
-echo "finish making the data"
-../../../xgboost yearpredMSD.conf
diff --git a/demo/CLI/yearpredMSD/yearpredMSD.conf b/demo/CLI/yearpredMSD/yearpredMSD.conf
deleted file mode 100644
index 36cdf39c9847..000000000000
--- a/demo/CLI/yearpredMSD/yearpredMSD.conf
+++ /dev/null
@@ -1,29 +0,0 @@
-# General Parameters, see comment for each definition
-# choose the tree booster, can also change to gblinear
-booster = gbtree
-# this is the only difference with classification, use reg:squarederror to do linear classification
-# when labels are in [0,1] we can also use reg:logistic
-objective = reg:squarederror
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0
-# minimum loss reduction required to make a further partition
-gamma = 1.0
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1
-# maximum depth of a tree
-max_depth = 5
-
-base_score = 2001
-# Task parameters
-# the number of round to do boosting
-num_round = 100
-# 0 means do not save any model except the final round model
-save_period = 0
-# The path of training data
-data = "yearpredMSD.libsvm.train"
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-eval[test] = "yearpredMSD.libsvm.test"
-# The path of test data
-#test:data = "yearpredMSD.libsvm.test"
diff --git a/demo/README.md b/demo/README.md
index b0c644b0c802..c85aa6b10192 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -10,7 +10,6 @@ Contents
 --------
 - [Code Examples](#code-examples)
   - [Features Walkthrough](#features-walkthrough)
-  - [Basic Examples by Tasks](#basic-examples-by-tasks)
   - [Benchmarks](#benchmarks)
 - [Machine Learning Challenge Winning Solutions](#machine-learning-challenge-winning-solutions)
 - [Tutorials](#tutorials)
@@ -22,16 +21,18 @@ Contents
 
 Code Examples
 -------------
+
 ### Features Walkthrough
 
 _Note: for the R package, see the in-package examples and vignettes instead_
 
+_Note: For the Python package, see [Feature Walk through](https://xgboost.readthedocs.io/en/stable/python/examples/index.html)._
+
 This is a list of short codes introducing different functionalities of xgboost packages.
 
 * Basic walkthrough of packages
   [python](guide-python/basic_walkthrough.py)
   [Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/basic_walkthrough.jl)
-  [PHP](https://github.com/bpachev/xgboost-php/blob/master/demo/titanic_demo.php)
 * Customize loss function, and evaluation metric
   [python](guide-python/custom_objective.py)
   [Julia](https://github.com/antinucleon/XGBoost.jl/blob/master/demo/custom_objective.jl)
@@ -50,16 +51,6 @@ This is a list of short codes introducing different functionalities of xgboost p
 * Predicting leaf indices
   [python](guide-python/predict_leaf_indices.py)
 
-### Basic Examples by Tasks
-
-Most of examples in this section are based on CLI or python version.
-However, the parameter settings can be applied to all versions
-
-- [Binary classification](CLI/binary_classification)
-- [Multiclass classification](multiclass_classification)
-- [Regression](CLI/regression)
-- [Learning to Rank](rank)
-
 ### Benchmarks
 
 - [Starter script for Kaggle Higgs Boson](kaggle-higgs)
@@ -73,6 +64,7 @@ XGBoost is extensively used by machine learning practitioners to create state of
 this is a list of machine learning winning solutions with XGBoost.
 Please send pull requests if you find ones that are missing here.
 
+- Gábor Melis, 1st place winner of [Kaggle Higgs competition](https://github.com/ghl3/higgs-kaggle) conducted between May and September 2014. Link to [discussion](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/), [code](https://github.com/phunterlau/kaggle_higgs) and [news article](https://atlas.cern/updates/news/machine-learning-wins-higgs-challenge)
 - Bishwarup Bhattacharjee, 1st place winner of [Allstate Claims Severity](https://www.kaggle.com/competitions/allstate-claims-severity/overview) conducted on December 2016. Link to [discussion](https://www.kaggle.com/competitions/allstate-claims-severity/discussion/26416)
 - Benedikt Schifferer, Gilberto Titericz, Chris Deotte, Christof Henkel, Kazuki Onodera, Jiwei Liu, Bojan Tunguz, Even Oldridge, Gabriel De Souza Pereira Moreira and Ahmet Erdem, 1st place winner of [Twitter RecSys Challenge 2020](https://recsys-twitter.com/) conducted from June,20-August,20. [GPU Accelerated Feature Engineering and Training for Recommender Systems](https://medium.com/rapids-ai/winning-solution-of-recsys2020-challenge-gpu-accelerated-feature-engineering-and-training-for-cd67c5a87b1f)
 - Eugene Khvedchenya,Jessica Fridrich, Jan Butora, Yassine Yousfi 1st place winner in [ALASKA2 Image Steganalysis](https://www.kaggle.com/c/alaska2-image-steganalysis/overview). Link to [discussion](https://www.kaggle.com/c/alaska2-image-steganalysis/discussion/168546)
diff --git a/demo/data/regression/README.md b/demo/data/regression/README.md
new file mode 100644
index 000000000000..b7553623b112
--- /dev/null
+++ b/demo/data/regression/README.md
@@ -0,0 +1,4 @@
+Regression
+==========
+
+The dataset is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware).
diff --git a/demo/CLI/regression/machine.data b/demo/data/regression/machine.data
similarity index 100%
rename from demo/CLI/regression/machine.data
rename to demo/data/regression/machine.data
diff --git a/demo/CLI/regression/machine.names b/demo/data/regression/machine.names
similarity index 100%
rename from demo/CLI/regression/machine.names
rename to demo/data/regression/machine.names
diff --git a/demo/CLI/regression/mapfeat.py b/demo/data/regression/mapfeat.py
similarity index 100%
rename from demo/CLI/regression/mapfeat.py
rename to demo/data/regression/mapfeat.py
diff --git a/demo/CLI/regression/mknfold.py b/demo/data/regression/mknfold.py
similarity index 100%
rename from demo/CLI/regression/mknfold.py
rename to demo/data/regression/mknfold.py
diff --git a/demo/gpu_acceleration/README.rst b/demo/gpu_acceleration/README.rst
deleted file mode 100644
index 77bd221d1807..000000000000
--- a/demo/gpu_acceleration/README.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-:orphan:
-
-GPU Acceleration Demo
-=====================
-
-This is a collection of demonstration scripts to showcase the basic usage of GPU. Please
-see :doc:`/gpu/index` for more info. There are other demonstrations for distributed GPU
-training using dask or spark.
diff --git a/demo/guide-python/cat_pipeline.py b/demo/guide-python/cat_pipeline.py
index 72e786edd19b..e4ec2a5cdcae 100644
--- a/demo/guide-python/cat_pipeline.py
+++ b/demo/guide-python/cat_pipeline.py
@@ -6,6 +6,11 @@
 training and inference. There are many ways to attain the same goal, this script can be
 used as a starting point.
 
+.. versionchanged:: 3.1
+
+    Start with 3.1, users don't need this for most of the cases. See :ref:`cat-recode`
+    for more info.
+
 See Also
 --------
 - :doc:`Tutorial </tutorials/categorical>`
diff --git a/demo/gpu_acceleration/cover_type.py b/demo/guide-python/cover_type.py
similarity index 96%
rename from demo/gpu_acceleration/cover_type.py
rename to demo/guide-python/cover_type.py
index a582aaad3c60..20d2a81d0b45 100644
--- a/demo/gpu_acceleration/cover_type.py
+++ b/demo/guide-python/cover_type.py
@@ -45,5 +45,6 @@
 # Repeat for CPU algorithm
 clf = xgb.XGBClassifier(device="cpu", n_estimators=num_round)
 start = time.time()
+clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
 cpu_res = clf.evals_result()
 print("CPU Training Time: %s seconds" % (str(time.time() - start)))
diff --git a/demo/guide-python/cross_validation.py b/demo/guide-python/cross_validation.py
index a33a16c36f04..27736aa6f406 100644
--- a/demo/guide-python/cross_validation.py
+++ b/demo/guide-python/cross_validation.py
@@ -4,6 +4,7 @@
 """
 
 import os
+from typing import Any, Dict, Tuple
 
 import numpy as np
 
@@ -54,7 +55,9 @@
 # used to return the preprocessed training, test data, and parameter
 # we can use this to do weight rescale, etc.
 # as a example, we try to set scale_pos_weight
-def fpreproc(dtrain, dtest, param):
+def fpreproc(
+    dtrain: xgb.DMatrix, dtest: xgb.DMatrix, param: Any
+) -> Tuple[xgb.DMatrix, xgb.DMatrix, Dict[str, Any]]:
     label = dtrain.get_label()
     ratio = float(np.sum(label == 0)) / np.sum(label == 1)
     param["scale_pos_weight"] = ratio
@@ -74,7 +77,7 @@ def fpreproc(dtrain, dtest, param):
 print("running cross validation, with customized loss function")
 
 
-def logregobj(preds, dtrain):
+def logregobj(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:
     labels = dtrain.get_label()
     preds = 1.0 / (1.0 + np.exp(-preds))
     grad = preds - labels
@@ -82,7 +85,7 @@ def logregobj(preds, dtrain):
     return grad, hess
 
 
-def evalerror(preds, dtrain):
+def evalerror(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
     labels = dtrain.get_label()
     preds = 1.0 / (1.0 + np.exp(-preds))
     return "error", float(sum(labels != (preds > 0.0))) / len(labels)
diff --git a/demo/guide-python/custom_rmsle.py b/demo/guide-python/custom_rmsle.py
index b4a7d94ec8a8..c958b298d1e1 100644
--- a/demo/guide-python/custom_rmsle.py
+++ b/demo/guide-python/custom_rmsle.py
@@ -18,7 +18,6 @@
 from time import time
 from typing import Dict, List, Tuple
 
-import matplotlib
 import numpy as np
 from matplotlib import pyplot as plt
 
@@ -136,7 +135,7 @@ def squared_log(predt: np.ndarray,
     def rmsle(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
         ''' Root mean squared log error metric.
 
-        :math:`\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}`
+        :math:`\\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}`
         '''
         y = dtrain.get_label()
         predt[predt < -1] = -1 + 1e-6
@@ -156,11 +155,16 @@ def rmsle(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
     return results
 
 
-def plot_history(rmse_evals, rmsle_evals, py_rmsle_evals):
+def plot_history(
+    rmse_evals: Dict[str, Dict],
+    rmsle_evals: Dict[str, Dict],
+    py_rmsle_evals: Dict[str, Dict]
+) -> None:
     fig, axs = plt.subplots(3, 1)
-    ax0: matplotlib.axes.Axes = axs[0]
-    ax1: matplotlib.axes.Axes = axs[1]
-    ax2: matplotlib.axes.Axes = axs[2]
+    assert isinstance(axs, np.ndarray)
+    ax0 = axs[0]
+    ax1 = axs[1]
+    ax2 = axs[2]
 
     x = np.arange(0, kBoostRound, 1)
 
@@ -177,7 +181,7 @@ def plot_history(rmse_evals, rmsle_evals, py_rmsle_evals):
     ax2.legend()
 
 
-def main(args):
+def main(args: argparse.Namespace) -> None:
     dtrain, dtest = generate_data()
     rmse_evals = native_rmse(dtrain, dtest)
     rmsle_evals = native_rmsle(dtrain, dtest)
diff --git a/demo/guide-python/custom_softmax.py b/demo/guide-python/custom_softmax.py
index 2d2ebae2041b..207b38d01f37 100644
--- a/demo/guide-python/custom_softmax.py
+++ b/demo/guide-python/custom_softmax.py
@@ -1,4 +1,4 @@
-'''
+"""
 Demo for creating customized multi-class objective function
 ===========================================================
 
@@ -9,9 +9,10 @@
 See :doc:`/tutorials/custom_metric_obj` and :doc:`/tutorials/advanced_custom_obj` for
 detailed tutorial and notes.
 
-'''
+"""
 
 import argparse
+from typing import Dict, Tuple
 
 import numpy as np
 from matplotlib import pyplot as plt
@@ -22,9 +23,9 @@
 
 kRows = 100
 kCols = 10
-kClasses = 4                    # number of classes
+kClasses = 4  # number of classes
 
-kRounds = 10                    # number of boosting rounds.
+kRounds = 10  # number of boosting rounds.
 
 # Generate some random data for demo.
 X = np.random.randn(kRows, kCols)
@@ -33,19 +34,19 @@
 m = xgb.DMatrix(X, y)
 
 
-def softmax(x):
-    '''Softmax function with x as input vector.'''
+def softmax(x: np.ndarray) -> np.ndarray:
+    """Softmax function with x as input vector."""
     e = np.exp(x)
     return e / np.sum(e)
 
 
-def softprob_obj(predt: np.ndarray, data: xgb.DMatrix):
-    '''Loss function. Computing the gradient and upper bound on the
+def softprob_obj(predt: np.ndarray, data: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:
+    """Loss function. Computing the gradient and upper bound on the
     Hessian with a diagonal structure for XGBoost (note that this is
     not the true Hessian).
     Reimplements the `multi:softprob` inside XGBoost.
 
-    '''
+    """
     labels = data.get_label()
     if data.get_weight().size == 0:
         # Use 1 as weight if we don't have custom weight.
@@ -83,11 +84,11 @@ def softprob_obj(predt: np.ndarray, data: xgb.DMatrix):
     return grad, hess
 
 
-def predict(booster: xgb.Booster, X):
-    '''A customized prediction function that converts raw prediction to
+def predict(booster: xgb.Booster, X: xgb.DMatrix) -> np.ndarray:
+    """A customized prediction function that converts raw prediction to
     target class.
 
-    '''
+    """
     # Output margin means we want to obtain the raw prediction obtained from
     # tree leaf weight.
     predt = booster.predict(X, output_margin=True)
@@ -101,14 +102,14 @@ def predict(booster: xgb.Booster, X):
     return out
 
 
-def merror(predt: np.ndarray, dtrain: xgb.DMatrix):
+def merror(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
     y = dtrain.get_label()
-    # Like custom objective, the predt is untransformed leaf weight when custom objective
-    # is provided.
+    # Like custom objective, the predt is untransformed leaf weight when custom
+    # objective is provided.
 
-    # With the use of `custom_metric` parameter in train function, custom metric receives
-    # raw input only when custom objective is also being used.  Otherwise custom metric
-    # will receive transformed prediction.
+    # With the use of `custom_metric` parameter in train function, custom metric
+    # receives raw input only when custom objective is also being used.  Otherwise
+    # custom metric will receive transformed prediction.
     assert predt.shape == (kRows, kClasses)
     out = np.zeros(kRows)
     for r in range(predt.shape[0]):
@@ -119,68 +120,91 @@ def merror(predt: np.ndarray, dtrain: xgb.DMatrix):
 
     errors = np.zeros(kRows)
     errors[y != out] = 1.0
-    return 'PyMError', np.sum(errors) / kRows
+    return "PyMError", np.sum(errors) / kRows
 
 
-def plot_history(custom_results, native_results):
-    fig, axs = plt.subplots(2, 1)
+def plot_history(
+    custom_results: Dict[str, Dict], native_results: Dict[str, Dict]
+) -> None:
+    axs: np.ndarray
+    fig, axs = plt.subplots(2, 1)  # type: ignore
     ax0 = axs[0]
     ax1 = axs[1]
 
-    pymerror = custom_results['train']['PyMError']
-    merror = native_results['train']['merror']
+    pymerror = custom_results["train"]["PyMError"]
+    merror = native_results["train"]["merror"]
 
     x = np.arange(0, kRounds, 1)
-    ax0.plot(x, pymerror, label='Custom objective')
+    ax0.plot(x, pymerror, label="Custom objective")
     ax0.legend()
-    ax1.plot(x, merror, label='multi:softmax')
+    ax1.plot(x, merror, label="multi:softmax")
     ax1.legend()
 
     plt.show()
 
 
-def main(args):
-    custom_results = {}
+def main(args: argparse.Namespace) -> None:
+    # Since 3.1, XGBoost can estimate the base_score automatically for built-in
+    # multi-class objectives.
+    #
+    # We explicitly specify it here to disable the automatic estimation to have a proper
+    # comparison between the custom implementation and the built-in implementation.
+    intercept = np.full(shape=(kClasses,), fill_value=1 / kClasses)
+
+    custom_results: Dict[str, Dict] = {}
     # Use our custom objective function
-    booster_custom = xgb.train({'num_class': kClasses,
-                                'disable_default_eval_metric': True},
-                               m,
-                               num_boost_round=kRounds,
-                               obj=softprob_obj,
-                               custom_metric=merror,
-                               evals_result=custom_results,
-                               evals=[(m, 'train')])
+    booster_custom = xgb.train(
+        {
+            "num_class": kClasses,
+            "base_score": intercept,
+            "disable_default_eval_metric": True,
+        },
+        m,
+        num_boost_round=kRounds,
+        obj=softprob_obj,
+        custom_metric=merror,
+        evals_result=custom_results,
+        evals=[(m, "train")],
+    )
 
     predt_custom = predict(booster_custom, m)
 
-    native_results = {}
+    native_results: Dict[str, Dict] = {}
     # Use the same objective function defined in XGBoost.
-    booster_native = xgb.train({'num_class': kClasses,
-                                "objective": "multi:softmax",
-                                'eval_metric': 'merror'},
-                               m,
-                               num_boost_round=kRounds,
-                               evals_result=native_results,
-                               evals=[(m, 'train')])
+    booster_native = xgb.train(
+        {
+            "num_class": kClasses,
+            "base_score": intercept,
+            "objective": "multi:softmax",
+            "eval_metric": "merror",
+        },
+        m,
+        num_boost_round=kRounds,
+        evals_result=native_results,
+        evals=[(m, "train")],
+    )
     predt_native = booster_native.predict(m)
 
     # We are reimplementing the loss function in XGBoost, so it should
     # be the same for normal cases.
     assert np.all(predt_custom == predt_native)
-    np.testing.assert_allclose(custom_results['train']['PyMError'],
-                               native_results['train']['merror'])
+    np.testing.assert_allclose(
+        custom_results["train"]["PyMError"], native_results["train"]["merror"]
+    )
 
     if args.plot != 0:
         plot_history(custom_results, native_results)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description='Arguments for custom softmax objective function demo.')
+        description="Arguments for custom softmax objective function demo."
+    )
     parser.add_argument(
-        '--plot',
+        "--plot",
         type=int,
         default=1,
-        help='Set to 0 to disable plotting the evaluation history.')
+        help="Set to 0 to disable plotting the evaluation history.",
+    )
     args = parser.parse_args()
     main(args)
diff --git a/demo/guide-python/distributed_extmem_basic.py b/demo/guide-python/distributed_extmem_basic.py
index 1b6a8b2c1b6f..35919daff5ef 100644
--- a/demo/guide-python/distributed_extmem_basic.py
+++ b/demo/guide-python/distributed_extmem_basic.py
@@ -13,8 +13,11 @@
 If `device` is `cuda`, following are also needed:
 
 - cupy
-- python-cuda
 - rmm
+- cuda-python
+
+Not shown in this example, but you should pay attention to NUMA configuration as
+discussed in the tutorial.
 
 """
 
@@ -36,6 +39,16 @@
 from xgboost.tracker import RabitTracker
 
 
+def device_mem_total() -> int:
+    """The total number of bytes of memory this GPU has."""
+    import cuda.bindings.runtime as cudart
+
+    status, free, total = cudart.cudaMemGetInfo()
+    if status != cudart.cudaError_t.cudaSuccess:
+        raise RuntimeError(cudart.cudaGetErrorString(status))
+    return total
+
+
 def make_batches(
     n_samples_per_batch: int, n_features: int, n_batches: int, tmpdir: str, rank: int
 ) -> List[Tuple[str, str]]:
@@ -108,16 +121,13 @@ def setup_rmm() -> None:
 
     """
     import rmm
-    from cuda import cudart
     from rmm.allocators.cupy import rmm_cupy_allocator
     from rmm.mr import ArenaMemoryResource
 
     if not xgboost.build_info()["USE_RMM"]:
         return
 
-    status, free, total = cudart.cudaMemGetInfo()
-    if status != cudart.cudaError_t.cudaSuccess:
-        raise RuntimeError(cudart.cudaGetErrorString(status))
+    total = device_mem_total()
 
     mr = rmm.mr.CudaMemoryResource()
     mr = ArenaMemoryResource(mr, arena_size=int(total * 0.9))
diff --git a/demo/guide-python/evals_result.py b/demo/guide-python/evals_result.py
index 7b9da96da52a..2ea853a090b1 100644
--- a/demo/guide-python/evals_result.py
+++ b/demo/guide-python/evals_result.py
@@ -3,6 +3,7 @@
 ======================================================
 """
 import os
+from typing import Any, Dict
 
 import xgboost as xgb
 
@@ -24,7 +25,7 @@
 num_round = 2
 watchlist = [(dtest, "eval"), (dtrain, "train")]
 
-evals_result = {}
+evals_result: Dict[str, Any] = {}
 bst = xgb.train(param, dtrain, num_round, watchlist, evals_result=evals_result)
 
 print("Access logloss metric directly from evals_result:")
diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py
index 207e2ce68911..60fd1efead7f 100644
--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -23,18 +23,21 @@
 
 - cupy
 - rmm
-- python-cuda
+- cuda-python
 
 .. seealso::
 
   :ref:`sphx_glr_python_examples_distributed_extmem_basic.py`
 
+Not shown in this example, but you should pay attention to NUMA configuration as
+discussed in the tutorial.
+
 """
 
 import argparse
 import os
 import tempfile
-from typing import Callable, List, Tuple
+from typing import Callable, List, Literal, Tuple
 
 import numpy as np
 from sklearn.datasets import make_regression
@@ -42,6 +45,16 @@
 import xgboost
 
 
+def device_mem_total() -> int:
+    """The total number of bytes of memory this GPU has."""
+    import cuda.bindings.runtime as cudart
+
+    status, free, total = cudart.cudaMemGetInfo()
+    if status != cudart.cudaError_t.cudaSuccess:
+        raise RuntimeError(cudart.cudaGetErrorString(status))
+    return total
+
+
 def make_batches(
     n_samples_per_batch: int,
     n_features: int,
@@ -63,7 +76,9 @@ def make_batches(
 class Iterator(xgboost.DataIter):
     """A custom iterator for loading files in batches."""
 
-    def __init__(self, device: str, file_paths: List[Tuple[str, str]]) -> None:
+    def __init__(
+        self, device: Literal["cpu", "cuda"], file_paths: List[Tuple[str, str]]
+    ) -> None:
         self.device = device
 
         self._file_paths = file_paths
@@ -110,7 +125,7 @@ def reset(self) -> None:
 
 def hist_train(it: Iterator) -> None:
     """The hist tree method can use a special data structure `ExtMemQuantileDMatrix` for
-    faster initialization and lower memory usage.
+    faster initialization and lower memory usage (recommended).
 
     .. versionadded:: 3.0.0
 
@@ -128,7 +143,7 @@ def hist_train(it: Iterator) -> None:
 
 
 def approx_train(it: Iterator) -> None:
-    """The approx tree method uses the basic `DMatrix`."""
+    """The approx tree method uses the basic `DMatrix` (not recommended)."""
 
     # For non-data arguments, specify it here once instead of passing them by the `next`
     # method.
@@ -167,16 +182,13 @@ def setup_rmm() -> None:
     """
 
     import rmm
-    from cuda import cudart
     from rmm.allocators.cupy import rmm_cupy_allocator
     from rmm.mr import ArenaMemoryResource
 
     if not xgboost.build_info()["USE_RMM"]:
         return
 
-    status, free, total = cudart.cudaMemGetInfo()
-    if status != cudart.cudaError_t.cudaSuccess:
-        raise RuntimeError(cudart.cudaGetErrorString(status))
+    total = device_mem_total()
 
     mr = rmm.mr.CudaMemoryResource()
     mr = ArenaMemoryResource(mr, arena_size=int(total * 0.9))
diff --git a/demo/gpu_acceleration/tree_shap.py b/demo/guide-python/gpu_tree_shap.py
similarity index 66%
rename from demo/gpu_acceleration/tree_shap.py
rename to demo/guide-python/gpu_tree_shap.py
index d591307e0c1b..f0e772ff3cc4 100644
--- a/demo/gpu_acceleration/tree_shap.py
+++ b/demo/guide-python/gpu_tree_shap.py
@@ -5,16 +5,24 @@
 Demonstrates using GPU acceleration to compute SHAP values for feature importance.
 
 """
+from urllib.error import HTTPError
+
 import shap
-from sklearn.datasets import fetch_california_housing
+from sklearn.datasets import fetch_california_housing, make_regression
 
 import xgboost as xgb
 
 # Fetch dataset using sklearn
-data = fetch_california_housing()
-print(data.DESCR)
-X = data.data
-y = data.target
+try:
+    _data = fetch_california_housing(return_X_y=True)
+    X = _data.data
+    y = _data.target
+    feature_names = _data.feature_names
+    print(_data.DESCR)
+except HTTPError:
+    # Use a synthetic dataset instead if we couldn't
+    X, y = make_regression(n_samples=20640, n_features=8, random_state=1234)
+    feature_names = [f"f{i}" for i in range(8)]
 
 num_round = 500
 
@@ -26,7 +34,7 @@
 }
 
 # GPU accelerated training
-dtrain = xgb.DMatrix(X, label=y, feature_names=data.feature_names)
+dtrain = xgb.DMatrix(X, label=y, feature_names=feature_names)
 model = xgb.train(param, dtrain, num_round)
 
 # Compute shap values using GPU with xgboost
@@ -47,9 +55,9 @@
     explainer.expected_value,
     shap_values[0, :],
     X[0, :],
-    feature_names=data.feature_names,
+    feature_names=feature_names,
     matplotlib=True,
 )
 
 # Show a summary of feature importance
-shap.summary_plot(shap_values, X, plot_type="bar", feature_names=data.feature_names)
+shap.summary_plot(shap_values, X, plot_type="bar", feature_names=feature_names)
diff --git a/demo/guide-python/model_parser.py b/demo/guide-python/model_parser.py
index 39a459613409..dd4961bda7f5 100644
--- a/demo/guide-python/model_parser.py
+++ b/demo/guide-python/model_parser.py
@@ -154,7 +154,9 @@ def __init__(self, model: dict) -> None:
         self.learner_model_shape: ParamT = model["learner"]["learner_model_param"]
         self.num_output_group = int(self.learner_model_shape["num_class"])
         self.num_feature = int(self.learner_model_shape["num_feature"])
-        self.base_score = float(self.learner_model_shape["base_score"])
+        self.base_score: List[float] = json.loads(
+            self.learner_model_shape["base_score"]
+        )
         # A field encoding which output group a tree belongs
         self.tree_info = model["learner"]["gradient_booster"]["model"]["tree_info"]
 
diff --git a/demo/guide-python/multioutput_regression.py b/demo/guide-python/multioutput_regression.py
index cc64e4e09680..f641e4c9b784 100644
--- a/demo/guide-python/multioutput_regression.py
+++ b/demo/guide-python/multioutput_regression.py
@@ -16,23 +16,22 @@
 """
 
 import argparse
-from typing import Dict, List, Tuple
+from typing import Dict, List, Optional, Tuple
 
+import matplotlib
 import numpy as np
 from matplotlib import pyplot as plt
 
 import xgboost as xgb
 
 
-def plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None:
+def plot_predt(
+    y: np.ndarray, y_predt: np.ndarray, name: str, ax: matplotlib.axes.Axes
+) -> None:
     s = 25
-    plt.scatter(y[:, 0], y[:, 1], c="navy", s=s, edgecolor="black", label="data")
-    plt.scatter(
-        y_predt[:, 0], y_predt[:, 1], c="cornflowerblue", s=s, edgecolor="black"
-    )
-    plt.xlim([-1, 2])
-    plt.ylim([-1, 2])
-    plt.show()
+    ax.scatter(y[:, 0], y[:, 1], c="navy", s=s, edgecolor="black", label=name)
+    ax.scatter(y_predt[:, 0], y_predt[:, 1], c="cornflowerblue", s=s, edgecolor="black")
+    ax.legend()
 
 
 def gen_circle() -> Tuple[np.ndarray, np.ndarray]:
@@ -46,7 +45,7 @@ def gen_circle() -> Tuple[np.ndarray, np.ndarray]:
     return X, y
 
 
-def rmse_model(plot_result: bool, strategy: str) -> None:
+def rmse_model(strategy: str, ax: Optional[matplotlib.axes.Axes]) -> None:
     """Draw a circle with 2-dim coordinate as target variables."""
     X, y = gen_circle()
     # Train a regressor on it
@@ -61,11 +60,11 @@ def rmse_model(plot_result: bool, strategy: str) -> None:
     reg.fit(X, y, eval_set=[(X, y)])
 
     y_predt = reg.predict(X)
-    if plot_result:
-        plot_predt(y, y_predt, "multi")
+    if ax:
+        plot_predt(y, y_predt, f"RMSE-{strategy}", ax)
 
 
-def custom_rmse_model(plot_result: bool, strategy: str) -> None:
+def custom_rmse_model(strategy: str, ax: Optional[matplotlib.axes.Axes]) -> None:
     """Train using Python implementation of Squared Error."""
 
     def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
@@ -87,7 +86,7 @@ def squared_log(
 
     def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
         y = dtrain.get_label().reshape(predt.shape)
-        v = np.sqrt(np.sum(np.power(y - predt, 2)))
+        v = np.sqrt(np.mean(np.power(y - predt, 2)))
         return "PyRMSE", v
 
     X, y = gen_circle()
@@ -111,25 +110,36 @@ def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
     )
 
     y_predt = booster.inplace_predict(X)
-    if plot_result:
-        plot_predt(y, y_predt, "multi")
+    if ax:
+        plot_predt(y, y_predt, f"PyRMSE-{strategy}", ax)
+
+    np.testing.assert_allclose(
+        results["Train"]["rmse"], results["Train"]["PyRMSE"], rtol=1e-2
+    )
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--plot", choices=[0, 1], type=int, default=1)
     args = parser.parse_args()
+    if args.plot == 1:
+        _, axs = plt.subplots(2, 2)
+    else:
+        axs = np.full(shape=(2, 2), fill_value=None)
+    assert isinstance(axs, np.ndarray)
 
     # Train with builtin RMSE objective
     # - One model per output.
-    rmse_model(args.plot == 1, "one_output_per_tree")
+    rmse_model("one_output_per_tree", axs[0, 0])
     # - One model for all outputs, this is still working in progress, many features are
     # missing.
-    rmse_model(args.plot == 1, "multi_output_tree")
+    rmse_model("multi_output_tree", axs[0, 1])
 
     # Train with custom objective.
     # - One model per output.
-    custom_rmse_model(args.plot == 1, "one_output_per_tree")
+    custom_rmse_model("one_output_per_tree", axs[1, 0])
     # - One model for all outputs, this is still working in progress, many features are
     # missing.
-    custom_rmse_model(args.plot == 1, "multi_output_tree")
+    custom_rmse_model("multi_output_tree", axs[1, 1])
+    if args.plot == 1:
+        plt.show()
diff --git a/demo/guide-python/predict_first_ntree.py b/demo/guide-python/predict_first_ntree.py
index 78137b4e1103..312522fc5b6a 100644
--- a/demo/guide-python/predict_first_ntree.py
+++ b/demo/guide-python/predict_first_ntree.py
@@ -14,7 +14,7 @@
 test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test")
 
 
-def native_interface():
+def native_interface() -> None:
     # load data in do training
     dtrain = xgb.DMatrix(train + "?format=libsvm")
     dtest = xgb.DMatrix(test + "?format=libsvm")
@@ -34,7 +34,7 @@ def native_interface():
     print("error of ypred2=%f" % (np.sum((ypred2 > 0.5) != label) / float(len(label))))
 
 
-def sklearn_interface():
+def sklearn_interface() -> None:
     X_train, y_train = load_svmlight_file(train)
     X_test, y_test = load_svmlight_file(test)
     clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1)
diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py
index 42baf6883f85..4e0392988cb3 100644
--- a/demo/guide-python/sklearn_examples.py
+++ b/demo/guide-python/sklearn_examples.py
@@ -11,9 +11,15 @@
 """
 
 import pickle
+from urllib.error import HTTPError
 
 import numpy as np
-from sklearn.datasets import fetch_california_housing, load_digits, load_iris
+from sklearn.datasets import (
+    fetch_california_housing,
+    load_digits,
+    load_iris,
+    make_regression,
+)
 from sklearn.metrics import confusion_matrix, mean_squared_error
 from sklearn.model_selection import GridSearchCV, KFold, train_test_split
 
@@ -44,7 +50,13 @@
     print(confusion_matrix(actuals, predictions))
 
 print("California Housing: regression")
-X, y = fetch_california_housing(return_X_y=True)
+
+try:
+    X, y = fetch_california_housing(return_X_y=True)
+except HTTPError:
+    # Use a synthetic dataset instead if we couldn't
+    X, y = make_regression(n_samples=20640, n_features=8, random_state=1234)
+
 kf = KFold(n_splits=2, shuffle=True, random_state=rng)
 for train_index, test_index in kf.split(X):
     xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index])
diff --git a/demo/guide-python/sklearn_parallel.py b/demo/guide-python/sklearn_parallel.py
index db5303ab7118..2f62d2b48ca3 100644
--- a/demo/guide-python/sklearn_parallel.py
+++ b/demo/guide-python/sklearn_parallel.py
@@ -4,15 +4,20 @@
 """
 
 import multiprocessing
+from urllib.error import HTTPError
 
-from sklearn.datasets import fetch_california_housing
+from sklearn.datasets import fetch_california_housing, make_regression
 from sklearn.model_selection import GridSearchCV
 
 import xgboost as xgb
 
 if __name__ == "__main__":
     print("Parallel Parameter optimization")
-    X, y = fetch_california_housing(return_X_y=True)
+    try:
+        X, y = fetch_california_housing(return_X_y=True)
+    except HTTPError:
+        # Use a synthetic dataset instead if we couldn't
+        X, y = make_regression(n_samples=20640, n_features=8, random_state=1234)
     # Make sure the number of threads is balanced.
     xgb_model = xgb.XGBRegressor(
         n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist"
diff --git a/demo/guide-python/spark_estimator_examples.py b/demo/guide-python/spark_estimator_examples.py
index ac36065bc300..2437d2fd59a2 100644
--- a/demo/guide-python/spark_estimator_examples.py
+++ b/demo/guide-python/spark_estimator_examples.py
@@ -5,10 +5,11 @@
 @author: Weichen Xu
 """
 
+import numpy as np
 import sklearn.datasets
 from pyspark.ml.evaluation import MulticlassClassificationEvaluator, RegressionEvaluator
 from pyspark.ml.linalg import Vectors
-from pyspark.sql import SparkSession
+from pyspark.sql import DataFrame, SparkSession
 from pyspark.sql.functions import rand
 from sklearn.model_selection import train_test_split
 
@@ -17,7 +18,7 @@
 spark = SparkSession.builder.master("local[*]").getOrCreate()
 
 
-def create_spark_df(X, y):
+def create_spark_df(X: np.ndarray, y: np.ndarray) -> DataFrame:
     return spark.createDataFrame(
         spark.sparkContext.parallelize(
             [(Vectors.dense(features), float(label)) for features, label in zip(X, y)]
diff --git a/demo/guide-python/update_process.py b/demo/guide-python/update_process.py
index 17bbbc39c82c..2ef6e7fc31c9 100644
--- a/demo/guide-python/update_process.py
+++ b/demo/guide-python/update_process.py
@@ -7,16 +7,22 @@
 
 """
 
+from urllib.error import HTTPError
+
 import numpy as np
-from sklearn.datasets import fetch_california_housing
+from sklearn.datasets import fetch_california_housing, make_regression
 
 import xgboost as xgb
 
 
-def main():
+def main() -> None:
     n_rounds = 32
 
-    X, y = fetch_california_housing(return_X_y=True)
+    try:
+        X, y = fetch_california_housing(return_X_y=True)
+    except HTTPError:
+        # Use a synthetic dataset instead if we couldn't
+        X, y = make_regression(n_samples=20640, n_features=8, random_state=1234)
 
     # Train a model first
     X_train = X[: X.shape[0] // 2]
@@ -50,7 +56,7 @@ def main():
 
     # Refresh the model without changing the leaf value, but tree statistic including
     # cover and weight are refreshed.
-    refresh_result: xgb.callback.EvaluationMonitor.EvalsLog = {}
+    refresh_result = {}
     refreshed = xgb.train(
         {"process_type": "update", "updater": "refresh", "refresh_leaf": False},
         Xy_refresh,
diff --git a/dev/prepare_jvm_release.py b/dev/prepare_jvm_release.py
index c5a72724f707..ce82d8f59777 100644
--- a/dev/prepare_jvm_release.py
+++ b/dev/prepare_jvm_release.py
@@ -1,3 +1,33 @@
+"""
+Helper script to prepare for releasing XGBoost JVM packages to
+Maven Central.
+
+## Prerequisite
+
+1. You must have the right to upload artifacts to the Maven Central repo.
+   If you do not, contact Hyunsu Cho (chohyu01@cs.washington.edu) so that
+   he can contact Sonatype on your behalf in order to add you as a
+   "producer" user for the ml.dmlc namespace. See
+   https://central.sonatype.org/pages/support/#status to learn about
+   the process for adding or removing users who can publish to the project.
+
+2. Follow instructions in
+   https://central.sonatype.org/publish/publish-portal-maven/#credentials
+   to set up the authentication token in your machine.
+
+3. Set up GPG for signing artifacts:
+   https://central.sonatype.org/publish/requirements/gpg/
+
+## Making the release
+Run this script 4 times:
+
+python3 dev/prepare_jvm_release.py --scala-version 2.12 --variant cpu
+python3 dev/prepare_jvm_release.py --scala-version 2.12 --variant gpu
+python3 dev/prepare_jvm_release.py --scala-version 2.13 --variant cpu
+python3 dev/prepare_jvm_release.py --scala-version 2.13 --variant gpu
+
+"""
+
 import argparse
 import errno
 import glob
@@ -52,7 +82,7 @@ def cd(path):
 
 def run(command, **kwargs):
     print(command)
-    subprocess.check_call(command, shell=True, **kwargs)
+    subprocess.run(command, shell=True, check=True, **kwargs)
 
 
 def get_current_commit_hash():
@@ -74,141 +104,135 @@ def retrieve(url, filename=None):
 
 
 def main():
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawTextHelpFormatter
+    )
     parser.add_argument(
         "--release-version",
         type=str,
         required=True,
         help="Version of the release being prepared",
     )
+    parser.add_argument(
+        "--scala-version",
+        type=str,
+        required=True,
+        help="Version of Scala to use in the JVM packages",
+        choices=["2.12", "2.13"],
+    )
+    parser.add_argument(
+        "--variant",
+        type=str,
+        required=True,
+        choices=["cpu", "gpu"],
+        help="JVM package variant to package and publish",
+    )
+
     args = parser.parse_args()
     version = args.release_version
+    scala_version = args.scala_version
+    use_cuda = args.variant == "gpu"
 
     commit_hash = get_current_commit_hash()
     git_branch = get_current_git_branch()
-    print(
-        f"Using commit {commit_hash} of branch {git_branch}"
+    print(f"Using commit {commit_hash} of branch {git_branch}")
+    print(f"====Update pom.xml to use Scala {scala_version}====")
+    run(
+        f"{sys.executable} ops/script/change_scala_version.py "
+        f"--scala-version {scala_version} --purge-artifacts"
     )
 
     with cd("jvm-packages/"):
-        print("====copying pure-Python tracker====")
-        for use_cuda in [True, False]:
-            xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
-            cp(
-                "../python-package/xgboost/tracker.py",
-                f"{xgboost4j}/src/main/resources",
-            )
-
-        print("====copying resources for testing====")
-        with cd("../demo/CLI/regression"):
+        print("====Copying resources for testing====")
+        with cd("../demo/data/regression"):
             run(f"{sys.executable} mapfeat.py")
             run(f"{sys.executable} mknfold.py machine.txt 1")
-        for use_cuda in [True, False]:
-            xgboost4j = "xgboost4j-gpu" if use_cuda else "xgboost4j"
-            xgboost4j_spark = "xgboost4j-spark-gpu" if use_cuda else "xgboost4j-spark"
-            maybe_makedirs(f"{xgboost4j}/src/test/resources")
-            maybe_makedirs(f"{xgboost4j_spark}/src/test/resources")
-            for file in glob.glob("../demo/data/agaricus.*"):
-                cp(file, f"{xgboost4j}/src/test/resources")
-                cp(file, f"{xgboost4j_spark}/src/test/resources")
-            for file in glob.glob("../demo/CLI/regression/machine.txt.t*"):
-                cp(file, f"{xgboost4j_spark}/src/test/resources")
+        xgboost4j_spark = "xgboost4j-spark-gpu" if use_cuda else "xgboost4j-spark"
+        maybe_makedirs("xgboost4j/src/test/resources")
+        maybe_makedirs(f"{xgboost4j_spark}/src/test/resources")
+        for file in glob.glob("../demo/data/agaricus.*"):
+            cp(file, "xgboost4j/src/test/resources")
+            cp(file, f"{xgboost4j_spark}/src/test/resources")
+        for file in glob.glob("../demo/data/regression/machine.txt.t*"):
+            cp(file, f"{xgboost4j_spark}/src/test/resources")
 
         print("====Creating directories to hold native binaries====")
-        for os_ident, arch in [
-            ("linux", "x86_64"),
-            ("linux", "aarch64"),
-            ("windows", "x86_64"),
-            ("macos", "x86_64"),
-            ("macos", "aarch64"),
-        ]:
+        if use_cuda:
+            # TODO(hcho3): Add GPU build for linux aarch64
+            matrix = [("linux", "x86_64")]
+        else:
+            matrix = [
+                ("linux", "x86_64"),
+                ("linux", "aarch64"),
+                ("windows", "x86_64"),
+                ("macos", "x86_64"),
+                ("macos", "aarch64"),
+            ]
+        for os_ident, arch in matrix:
             output_dir = f"xgboost4j/src/main/resources/lib/{os_ident}/{arch}"
             maybe_makedirs(output_dir)
-        for os_ident, arch in [("linux", "x86_64")]:
-            output_dir = f"xgboost4j-gpu/src/main/resources/lib/{os_ident}/{arch}"
-            maybe_makedirs(output_dir)
 
         print("====Downloading native binaries from CI====")
-        nightly_bucket_prefix = (
-            "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
-        )
-        maven_repo_prefix = (
-            "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
-        )
-
-        retrieve(
-            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/xgboost4j_{commit_hash}.dll",
-            filename="xgboost4j/src/main/resources/lib/windows/x86_64/xgboost4j.dll",
-        )
-        retrieve(
-            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_linux_x86_64_{commit_hash}.so",
-            filename="xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
-        )
-        retrieve(
-            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_linux_arm64_{commit_hash}.so",
-            filename="xgboost4j/src/main/resources/lib/linux/aarch64/libxgboost4j.so",
-        )
-        retrieve(
-            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_{commit_hash}.dylib",
-            filename="xgboost4j/src/main/resources/lib/macos/x86_64/libxgboost4j.dylib",
-        )
-        retrieve(
-            url=f"{nightly_bucket_prefix}/{git_branch}/libxgboost4j/libxgboost4j_m1_{commit_hash}.dylib",
-            filename="xgboost4j/src/main/resources/lib/macos/aarch64/libxgboost4j.dylib",
-        )
-
-        with tempfile.TemporaryDirectory() as tempdir:
-            # libxgboost4j.so for Linux x86_64, GPU support
-            zip_path = os.path.join(tempdir, "xgboost4j-gpu_2.12.jar")
-            extract_dir = os.path.join(tempdir, "xgboost4j-gpu")
-            retrieve(
-                url=f"{maven_repo_prefix}/xgboost4j-gpu_2.12/{version}/"
-                f"xgboost4j-gpu_2.12-{version}.jar",
-                filename=zip_path,
+        if use_cuda:
+            url_prefix = (
+                "https://s3-us-west-2.amazonaws.com/xgboost-maven-repo/release/ml/dmlc"
+            )
+            with tempfile.TemporaryDirectory() as tempdir:
+                # libxgboost4j.so for Linux x86_64, GPU support
+                zip_path = os.path.join(tempdir, "xgboost4j-spark-gpu_2.12.jar")
+                extract_dir = os.path.join(tempdir, "xgboost4j-spark-gpu")
+                retrieve(
+                    url=f"{url_prefix}/xgboost4j-spark-gpu_2.12/{version}/"
+                    f"xgboost4j-spark-gpu_2.12-{version}.jar",
+                    filename=zip_path,
+                )
+                os.mkdir(extract_dir)
+                with zipfile.ZipFile(zip_path, "r") as t:
+                    t.extractall(extract_dir)
+                cp(
+                    os.path.join(
+                        extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"
+                    ),
+                    "xgboost4j/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
+                )
+            run(
+                "mvn --no-transfer-progress install -Pgpu "
+                "-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true"
             )
-            os.mkdir(extract_dir)
-            with zipfile.ZipFile(zip_path, "r") as t:
-                t.extractall(extract_dir)
-            cp(
-                os.path.join(extract_dir, "lib", "linux", "x86_64", "libxgboost4j.so"),
-                "xgboost4j-gpu/src/main/resources/lib/linux/x86_64/libxgboost4j.so",
+            run(
+                "mvn deploy -Pgpu,release -pl xgboost4j-spark-gpu "
+                "-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true"
+            )
+        else:
+            url_prefix = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
+            for os_ident, arch, src_libname, dest_libname in [
+                ("linux", "x86_64", "libxgboost4j_linux_x86_64.so", "libxgboost4j.so"),
+                (
+                    "linux",
+                    "aarch64",
+                    "libxgboost4j_linux_aarch64.so",
+                    "libxgboost4j.so",
+                ),
+                ("windows", "x86_64", "xgboost4j.dll", "xgboost4j.dll"),
+                ("macos", "x86_64", "libxgboost4j_intel.dylib", "libxgboost4j.dylib"),
+                ("macos", "aarch64", "libxgboost4j_m1.dylib", "libxgboost4j.dylib"),
+            ]:
+                retrieve(
+                    url=f"{url_prefix}/{git_branch}/{commit_hash}/{src_libname}",
+                    filename=(
+                        "xgboost4j/src/main/resources/lib/"
+                        f"{os_ident}/{arch}/{dest_libname}"
+                    ),
+                )
+            run(
+                "mvn --no-transfer-progress deploy -Pdefault,release "
+                "-DskipTests -Dmaven.test.skip=true -Dskip.native.build=true"
             )
 
     print("====Next Steps====")
-    print("1. Gain upload right to Maven Central repo.")
-    print("1-1. Sign up for a JIRA account at Sonatype: ")
-    print(
-        "1-2. File a JIRA ticket: "
-        "https://issues.sonatype.org/secure/CreateIssue.jspa?issuetype=21&pid=10134.%20Example:"
-        "https://issues.sonatype.org/browse/OSSRH-67724"
-    )
-    print(
-        "2. Store the Sonatype credentials in .m2/settings.xml. See insturctions in "
-        "https://central.sonatype.org/publish/publish-maven/"
-    )
-    print(
-        "3. Now on a Linux machine, run the following to build Scala 2.12 artifacts. "
-        "Make sure to use an Internet connection with fast upload speed:"
-    )
-    print(
-        "   # Skip native build, since we have all needed native binaries from CI\n"
-        "   GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests -Dskip.native.build=true"
-    )
-    print(
-        "4. Log into https://oss.sonatype.org/. On the left menu panel, click Staging "
-        "Repositories. Visit the URL https://oss.sonatype.org/content/repositories/mldmlc-xxxx "
-        "to inspect the staged JAR files. Finally, press Release button to publish the "
-        "artifacts to the Maven Central repository. The top-level metapackage should be "
-        "named xgboost-jvm_2.12."
-    )
-    print(
-        "5. Remove the Scala 2.12 artifacts and build Scala 2.13 artifacts:\n"
-        "   python ops/script/change_scala_version.py --scala-version 2.13 --purge-artifacts\n"
-        "   GPG_TTY=$(tty) mvn deploy -Prelease -DskipTests -Dskip.native.build=true"
-    )
     print(
-        "6. Go to https://oss.sonatype.org/ to release the Scala 2.13 artifacts. "
-        "The top-level metapackage should be named xgboost-jvm_2.13."
+        "Visit https://central.sonatype.com/publishing/deployments to verify the deployment. "
+        "You can either drop the deployment or publish it. Note: publishing is final."
     )
 
 
diff --git a/dmlc-core b/dmlc-core
index 133418575498..8986b0598df7 160000
--- a/dmlc-core
+++ b/dmlc-core
@@ -1 +1 @@
-Subproject commit 13341857549852a9a86b1894b5ba84c6276ab381
+Subproject commit 8986b0598df709117570984571476c3614f55724
diff --git a/doc/R-package/index.rst b/doc/R-package/index.rst
index 30b6f83e5232..0bdcf2dcdc76 100644
--- a/doc/R-package/index.rst
+++ b/doc/R-package/index.rst
@@ -17,9 +17,16 @@ You have found the XGBoost R Package!
 Get Started
 ***********
 
-* Checkout the :doc:`Installation Guide </install>` contains instructions to install xgboost, and :doc:`Tutorials </tutorials/index>` for examples on how to use XGBoost for various tasks.
-* Read the latest `API documentation <../r_docs/R-package/docs/reference/index.html>`__ . This might refer to a newer version than the one on CRAN.
-* Read the `CRAN documentation <https://cran.r-project.org/web/packages/xgboost/xgboost.pdf>`_.
+Since XGBoost 3.0.0, the latest R package is available on `R-universe
+<https://dmlc.r-universe.dev/xgboost>`__ while the one on CRAN is kept at an older
+version. We will work on helping the CRAN version to catch up in the future. In the
+meantime, please use R-universe packages.
+
+* Check out the :doc:`Installation Guide </install>` for instructions on how to install
+  xgboost, and :doc:`Tutorials </tutorials/index>` for examples on how to use XGBoost for
+  various tasks.
+* Read the latest `API documentation <../r_docs/R-package/docs/reference/index.html>`__.
+* Read the `CRAN documentation <https://cran.r-project.org/web/packages/xgboost/xgboost.pdf>`_. (outdated)
 
 *********
 Vignettes
diff --git a/doc/R-package/migration_guide.rst b/doc/R-package/migration_guide.rst
index 9ce8efb110b0..18e03e743a0d 100644
--- a/doc/R-package/migration_guide.rst
+++ b/doc/R-package/migration_guide.rst
@@ -40,7 +40,7 @@ XGBoost's R language bindings had large breaking changes between versions 1.x an
     - The structure of these objects has been modified - now they are represented as a simple R "ALTLIST" (a special kind of 'list' object) with additional attributes.
     - These objects now cannot be modified by adding more fields to them, but metadata for them can be added as attributes.
     - The objects distinguish between two types of attributes:
-        
+
         - R-side attributes (which can be accessed and modified through R function ``attributes(model)`` and ``attributes(model)$field <- val``), which allow arbitrary objects. Many attributes are automatically added by the model building functions, such as evaluation logs (a ``data.table`` with metrics calculated per iteration), which previously were model fields.
         - C-level attributes, which allow only JSON-compliant data and which can be accessed and set through function ``xgb.attributes(model)``. These C-level attributes are shareable through serialized models in different XGBoost interfaces, while the R-level ones are specific to the R interface. Some attributes that are standard among language bindings of XGBoost, such as the best interation, are kept as C attributes.
     - Previously, models that were just de-serialized from an on-disk format required calling method 'xgb.Booster.complete' on them to finish the full de-serialization process before being usable, or would otherwise call this method on their own automatically automatically at the first call to 'predict'. Serialization is now handled more gracefully, and there are no additional functions/methods involved - i.e. if one saves a model to disk with ``saveRDS()`` and then reads it back with ``readRDS()``, the model will be fully loaded straight away, without needing to call additional methods on it.
@@ -53,11 +53,13 @@ By default, XGBoost might recognize that some parameter has been removed or rena
 These behaviors will be removed in future versions, and function calls which currently return deprecation warnings will stop working in the future, so in order to make sure that code calling XGBoost will still keep working, it should be ensured that it doesn't issue deprecation warnings.
 
 Optionally, these deprecation warnings can be turned into errors (while still keeping other types of warnings as warnings) through an option "xgboost.strict_mode" - example:
+
 .. code-block:: r
 
     options("xgboost.strict_mode" = TRUE)
 
 It can also be controlled through an environment variable `XGB_STRICT_MODE=1`, which takes precende over the R option - e.g.:
+
 .. code-block:: r
 
     Sys.setenv("XGB_STRICT_MODE" = "1")
diff --git a/doc/changes/index.rst b/doc/changes/index.rst
index 09bc215075e4..d376c490853f 100644
--- a/doc/changes/index.rst
+++ b/doc/changes/index.rst
@@ -8,4 +8,6 @@ For release notes prior to the 2.1 release, please see `news <https://github.com
   :maxdepth: 1
   :caption: Contents:
 
+  v3.1.0
+  v3.0.0
   v2.1.0
\ No newline at end of file
diff --git a/doc/changes/v3.0.0.rst b/doc/changes/v3.0.0.rst
new file mode 100644
index 000000000000..aa37ff05f8b0
--- /dev/null
+++ b/doc/changes/v3.0.0.rst
@@ -0,0 +1,408 @@
+#################################
+3.0.3 Patch Release (Jul 30 2025)
+#################################
+
+- Fix NDCG metric with non-exp gain. (:pr:`11534`)
+- Avoid using mean intercept for ``rmsle``. (:pr:`11588`)
+- [jvm-packages] add ``setNumEarlyStoppingRounds`` API (:pr:`11571`)
+- Avoid implicit synchronization in GPU evaluation. (:pr:`11542`)
+- Remove CUDA check in the array interface handler (:pr:`11386`)
+- Fix check in GPU histogram. (:pr:`11574`)
+- Support Rapids 25.06 (:pr:`11504`)
+- Adding ``enable_categorical`` to the sklearn ``.apply`` method (:pr:`11550`)
+- Make xgboost.testing compatible with scikit-learn 1.7 (:pr:`11502`)
+- Add support for building xgboost wheels on Win-ARM64 (:pr:`11572`, :pr:`11597`, :pr:`11559`)
+
+#################################
+3.0.2 Patch Release (May 25 2025)
+#################################
+
+- Dask 2025.4.0 scheduler info compatibility fix (:pr:`11462`)
+- Fix CUDA virtual memory fallback logic on WSL2 (:pr:`11471`)
+
+#################################
+3.0.1 Patch Release (May 13 2025)
+#################################
+
+- Use ``nvidia-smi`` to detect the driver version and handle old drivers that don't support virtual memory. (:pr:`11391`)
+- Optimize deep trees for GPU external memory. (:pr:`11387`)
+- Small fix for page concatenation with external memory (:pr:`11338`)
+- Build xgboost-cpu for ``manylinux_2_28_x86_64`` (:pr:`11406`)
+- Workaround for different Dask versions (:pr:`11436`)
+- Output models now use denormal floating-point instead of ``nan``. (:pr:`11428`)
+- Fix aarch64 CI. (:pr:`11454`)
+
+
+###################
+3.0.0 (2025 Feb 27)
+###################
+
+3.0.0 is a milestone for XGBoost. This note will summarize some general changes and then
+list package-specific updates. The bump in the major version is for a reworked R package
+along with a significant update to the JVM packages.
+
+.. contents::
+  :backlinks: none
+  :local:
+
+***********************
+External Memory Support
+***********************
+
+This release features a major update to the external memory implementation with improved
+performance, a new :py:class:`~xgboost.ExtMemQuantileDMatrix` for more efficient data
+initialization, new feature coverage including categorical data support and quantile
+regression support. Additionally, GPU-based external memory is reworked to support using
+CPU memory as a data cache. Last but not least, we worked on distributed training using
+external memory along with the spark package's initial support.
+
+- A new :py:class:`~xgboost.ExtMemQuantileDMatrix` class for fast data initialization with
+  the ``hist`` tree method. The new class supports both CPU and GPU training. (:pr:`10689`,
+  :pr:`10682`, :pr:`10886`, :pr:`10860`, :pr:`10762`, :pr:`10694`, :pr:`10876`)
+- External memory now supports distributed training (:pr:`10492`, :pr:`10861`). In addition, the
+  Spark package can use external memory (the host memory) when the device is GPU. The
+  default package on maven doesn't support RMM yet. For better performance, one needs
+  to compile XGBoost from the source for now. (:pr:`11186`, :pr:`11238`, :pr:`11219`)
+- Improved performance with new optimizations for both the ``hist``-specific training and
+  the ``approx`` (:py:class:`~xgboost.DMatrix`) method. (:pr:`10529`, :pr:`10980`, :pr:`10342`)
+- New demos and documents for external memory, including distributed training. (:pr:`11234`,
+  :pr:`10929`, :pr:`10916`, :pr:`10426`, :pr:`11113`)
+- Reduced binary cache size and memory allocation overhead by not writing the cut matrix. (:pr:`10444`)
+- More feature coverage, including categorical data and all objective functions, including
+  quantile regression. In addition, various prediction types like SHAP values are
+  supported. (:pr:`10918`, :pr:`10820`, :pr:`10751`, :pr:`10724`)
+
+Significant updates for the GPU-based external memory training implementation. (:pr:`10924`,
+:pr:`10895`, :pr:`10766`, :pr:`10544`, :pr:`10677`, :pr:`10615`, :pr:`10927`, :pr:`10608`, :pr:`10711`)
+
+- GPU-based external memory supports both batch-based and sampling-based training. Before
+  the 3.0 release, XGBoost concatenates the data during training and stores the cache on
+  disk. In 3.0, XGBoost can now stage the data on the host and fetch them by
+  batch. (:pr:`10602`, :pr:`10595`, :pr:`10606`, :pr:`10549`, :pr:`10488`, :pr:`10766`,
+  :pr:`10765`, :pr:`10764`, :pr:`10760`, :pr:`10753`, :pr:`10734`, :pr:`10691`,
+  :pr:`10713`, :pr:`10826`, :pr:`10811`, :pr:`10810`, :pr:`10736`, :pr:`10538`,
+  :pr:`11333`)
+- XGBoost can now utilize `NVLink-C2C` for GPU-based external memory training and can
+  handle up to terabytes of data.
+- Support prediction cache (:pr:`10707`).
+- Automatic page concatenation for improved GPU utilization (:pr:`10887`).
+- Improved quantile sketching algorithm for batch-based inputs. See the section for
+  :ref:`new features <3_0_features>` for more info.
+- Optimization for nearly-dense input, see the section for :ref:`optimization
+  <3_0_optimization>` for more info.
+
+See our latest document for details :doc:`/tutorials/external_memory`. The PyPI package
+(``pip install``) doesn't have ``RMM`` support, which is required by the GPU external
+memory implementation. To experiment, you can compile XGBoost from source or wait for the
+RAPIDS conda package to be available.
+
+.. _3_0_networking:
+
+**********
+Networking
+**********
+
+Continuing the work from the previous release, we updated the network module to improve
+reliability. (:pr:`10453`, :pr:`10756`, :pr:`11111`, :pr:`10914`, :pr:`10828`, :pr:`10735`, :pr:`10693`, :pr:`10676`, :pr:`10349`,
+:pr:`10397`, :pr:`10566`, :pr:`10526`, :pr:`10349`)
+
+The timeout option is now supported for NCCL using the NCCL asynchronous mode (:pr:`10850`,
+:pr:`10934`, :pr:`10945`, :pr:`10930`).
+
+In addition, a new :py:class:`~xgboost.collective.Config` class is added for users to
+specify various options including timeout, tracker port, etc for distributed
+training. Both the Dask interface and the PySpark interface support the new
+configuration. (:pr:`11003`, :pr:`10281`, :pr:`10983`, :pr:`10973`)
+
+****
+SYCL
+****
+
+Continuing the work on the SYCL integration, there are significant improvements in the
+feature coverage for this release from more training parameters and more objectives to
+distributed training, along with various optimization (:pr:`10884`, :pr:`10883`).
+
+Starting with 3.0, the SYCL-plugin is close to feature-complete, users can start working
+on SYCL devices for in-core training and inference. Newly introduced features include:
+
+- Dask support for distributed training (:pr:`10812`)
+
+- Various training procedures, including split evaluation (:pr:`10605`, :pr:`10636`), grow policy
+  (:pr:`10690`, :pr:`10681`), cached prediction (:pr:`10701`).
+
+- Updates for objective functions. (:pr:`11029`, :pr:`10931`, :pr:`11016`, :pr:`10993`, :pr:`11064`, :pr:`10325`)
+
+- On-going work for float32-only devices.  (:pr:`10702`)
+
+Other related PRs (:pr:`10842`, :pr:`10543`, :pr:`10806`, :pr:`10943`, :pr:`10987`, :pr:`10548`, :pr:`10922`, :pr:`10898`, :pr:`10576`)
+
+.. _3_0_features:
+
+********
+Features
+********
+
+This section describes new features in the XGBoost core. For language-specific features,
+please visit corresponding sections.
+
+- A new initialization method for objectives that are derived from GLM. The new method is
+  based on the mean value of the input labels. The new method changes the result of the
+  estimated ``base_score``. (:pr:`10298`, :pr:`11331`)
+
+- The :py:class:`xgboost.QuantileDMatrix` can be used with all prediction types for both
+  CPU and GPU.
+
+- In prior releases, XGBoost makes a copy for the booster to release memory held by
+  internal tree methods. We formalize the procedure into a new booster method
+  :py:meth:`~xgboost.Booster.reset` / :cpp:func:`XGBoosterReset`. (:pr:`11042`)
+
+- OpenMP thread setting is exposed to the XGBoost global configuration. Users can use it
+  to workaround hardcoded OpenMP environment variables. (:pr:`11175`)
+
+- We improved learning to rank tasks for better hyper-parameter configuration and for
+  distributed training.
+
+  + In 3.0, all three distributed interfaces, including Dask, Spark, and PySpark, support
+    sorting the data based on query ID. The option for the
+    :py:class:`~xgboost.dask.DaskXGBRanker` is true by default and can be opted
+    out. (:pr:`11146`, :pr:`11007`, :pr:`11047`, :pr:`11012`, :pr:`10823`, :pr:`11023`)
+
+  + Also for learning to rank, a new parameter ``lambdarank_score_normalization`` is
+    introduced to make one of the normalizations optional. (:pr:`11272`)
+
+  + The ``lambdarank_normalization`` now uses the number of pairs when normalizing the
+    ``mean`` pair strategy. Previously, the gradient was used for both ``topk`` and
+    ``mean``. :pr:`11322`
+
+- We have improved GPU quantile sketching to reduce memory usage. The improvement helps
+  the construction of the :py:class:`~xgboost.QuantileDMatrix` and the new
+  :py:class:`~xgboost.ExtMemQuantileDMatrix`.
+
+  + A new multi-level sketching algorithm is employed to reduce the overall memory usage
+    with batched inputs.
+  + In addition to algorithmic changes, internal memory usage estimation and the quantile
+    container is also updated. (:pr:`10761`, :pr:`10843`)
+  + The change introduces two more parameters for the :py:class:`~xgboost.QuantileDMatrix`
+    and :py:class:`~xgboost.DataIter`, namely, ``max_quantile_batches`` and
+    ``min_cache_page_bytes``.
+
+- More work is needed to improve the support of categorical features. This release
+  supports plotting trees with stat for categorical nodes (:pr:`11053`). In addition, some
+  preparation work is ongoing for auto re-coding categories. (:pr:`11094`, :pr:`11114`,
+  :pr:`11089`) These are feature enhancements instead of blocking issues.
+- Implement weight-based feature importance for vector-leaf. (:pr:`10700`)
+- Reduced logging in the DMatrix construction. (:pr:`11080`)
+
+.. _3_0_optimization:
+
+************
+Optimization
+************
+
+In addition to the external memory and quantile sketching improvements, we have a number
+of optimizations and performance fixes.
+
+- GPU tree methods now use significantly less memory for both dense inputs and near-dense
+  inputs. (:pr:`10821`, :pr:`10870`)
+- For near-dense inputs, GPU training is much faster for both ``hist`` (about 2x) and
+  ``approx``.
+- Quantile regression on CPU now can handle imbalance trees much more efficiently. (:pr:`11275`)
+- Small optimization for DMatrix construction to reduce latency. Also, C users can now
+  reuse the :cpp:func:`ProxyDMatrix <XGProxyDMatrixCreate()>` for multiple inference
+  calls. (:pr:`11273`)
+- CPU prediction performance for :py:class:`~xgboost.QuantileDMatrix` has been improved
+  (:pr:`11139`) and now is on par with normal ``DMatrix``.
+- Fixed a performance issue for running inference using CPU with extremely sparse
+  :py:class:`~xgboost.QuantileDMatrix` (:pr:`11250`).
+- Optimize CPU training memory allocation for improved performance. (:pr:`11112`)
+- Improved RMM (rapids memory manager) integration. Now, with the help of
+  :py:func:`~xgboost.config_context`, all memory allocated by XGBoost should be routed to
+  RMM. As a bonus, all ``thrust`` algorithms now use async policy. (:pr:`10873`, :pr:`11173`, :pr:`10712`,
+  :pr:`10712`, :pr:`10562`)
+- When used without RMM, XGBoost is more careful with its use of caching allocator to
+  avoid holding too much device memory. (:pr:`10582`)
+
+****************
+Breaking Changes
+****************
+This section lists breaking changes that affect all packages.
+
+- Remove the deprecated ``DeviceQuantileDMatrix``. (:pr:`10974`, :pr:`10491`)
+- Support for saving the model in the ``deprecated`` has been removed. Users can still
+  load old models in 3.0. (:pr:`10490`)
+- Support for the legacy (blocking) CUDA stream is removed (:pr:`10607`)
+- XGBoost now requires CUDA 12.0 or later.
+
+*********
+Bug Fixes
+*********
+- Fix the quantile error metric (pinball loss) with multiple quantiles. (:pr:`11279`)
+- Fix potential access error when running prediction in multi-thread environment. (:pr:`11167`)
+- Check the correct dump format for the ``gblinear``. (:pr:`10831`)
+
+*************
+Documentation
+*************
+- A new tutorial for advanced usage with custom objective functions. (:pr:`10283`, :pr:`10725`)
+- The new online document site now shows documents for all packages including Python, R,
+  and JVM-based packages. (:pr:`11240`, :pr:`11216`, :pr:`11166`)
+- Lots of enhancements. (:pr:`10822`, 11137, :pr:`11138`, :pr:`11246`, :pr:`11266`, :pr:`11253`, :pr:`10731`, :pr:`11222`,
+  :pr:`10551`, :pr:`10533`)
+- Consistent use of cmake in documents. (:pr:`10717`)
+- Add a brief description for using the ``offset`` from the GLM setting (like
+  ``Poisson``). (:pr:`10996`)
+- Cleanup document for building from source. (:pr:`11145`)
+- Various fixes. (:pr:`10412`, :pr:`10405`, :pr:`10353`, :pr:`10464`, :pr:`10587`, :pr:`10350`, :pr:`11131`, :pr:`10815`)
+- Maintenance. (:pr:`11052`, :pr:`10380`)
+
+**************
+Python Package
+**************
+
+- The ``feature_weights`` parameter in the sklearn interface is now defined as
+  a scikit-learn parameter. (:pr:`9506`)
+- Initial support for polars, categorical feature is not yet supported. (:pr:`11126`, :pr:`11172`,
+  :pr:`11116`)
+- Reduce pandas dataframe overhead and overhead for various imports. (:pr:`11058`, :pr:`11068`)
+- Better xlabel in :py:func:`~xgboost.plot_importance` (:pr:`11009`)
+- Validate reference dataset for training. The :py:func:`~xgboost.train` function now
+  throws an error if a :py:class:`~xgboost.QuantileDMatrix` is used as a validation
+  dataset without a reference. (:pr:`11105`)
+- Fix misleading errors when feature names are missing during inference (:pr:`10814`)
+- Add Stacklevel to Python warning callback. The change helps improve the error message
+  for the Python package. (:pr:`10977`)
+- Remove circular reference in DataIter. It helps reduce memory usage. (:pr:`11177`)
+- Add checks for invalid inputs for `cv`. (:pr:`11255`)
+- Update Python project classifiers. (:pr:`10381`, :pr:`11028`)
+- Support doc link for the sklearn module. Users can now find links to documents in a
+  jupyter notebook. (:pr:`10287`)
+
+- Dask
+
+  + Prevent the training from hanging due to aborted workers. (:pr:`10985`) This helps
+    Dask XGBoost be robust against error. When a worker is killed, the training will fail
+    with an exception instead of hang.
+  + Optional support for client-side logging. (:pr:`10942`)
+  + Fix LTR with empty partition and NCCL error. (:pr:`11152`)
+  + Update to work with the latest Dask. (:pr:`11291`)
+  + See the :ref:`3_0_features` section for changes to ranking models.
+  + See the :ref:`3_0_networking` section for changes with the communication module.
+
+- PySpark
+
+  + Expose Training and Validation Metrics. (:pr:`11133`)
+  + Add barrier before initializing the communicator. (:pr:`10938`)
+  + Extend support for columnar input to CPU (GPU-only previously). (:pr:`11299`)
+  + See the :ref:`3_0_features` section for changes to ranking models.
+  + See the :ref:`3_0_networking` section for changes with the communication module.
+
+- Document updates (:pr:`11265`).
+- Maintenance. (:pr:`11071`, :pr:`11211`, :pr:`10837`, :pr:`10754`, :pr:`10347`, :pr:`10678`, :pr:`11002`, :pr:`10692`, :pr:`11006`,
+  :pr:`10972`, :pr:`10907`, :pr:`10659`, :pr:`10358`, :pr:`11149`, :pr:`11178`, :pr:`11248`)
+
+- Breaking changes
+
+  + Remove deprecated `feval`. (:pr:`11051`)
+  + Remove dask from the default import. (:pr:`10935`) Users are now required to import the
+    XGBoost Dask through:
+
+    .. code-block:: python
+
+       from xgboost import dask as dxgb
+
+    instead of:
+
+    .. code-block:: python
+
+       import xgboost as xgb
+       xgb.dask
+
+    The change helps avoid introducing dask into the default import set.
+
+  + Bump Python requirement to 3.10. (:pr:`10434`)
+  + Drop support for datatable. (:pr:`11070`)
+
+*********
+R Package
+*********
+
+We have been reworking the R package for a few releases now. In 3.0, we will start
+publishing a new R package on R-universe, before moving toward a CRAN update. The new
+package features a much more ergonomic interface, which is also more idiomatic to R
+speakers. In addition, a range of new features are introduced to the package. To name a
+few, the new package includes categorical feature support, ``QuantileDMatrix``, and an
+initial implementation of the external memory training. To test the new package:
+
+.. code-block:: R
+
+  install.packages('xgboost', repos = c('https://dmlc.r-universe.dev/', 'https://cloud.r-project.org/'))
+
+Also, we finally have an online documentation site for the R package featuring both
+vignettes and API references (:pr:`11166`, :pr:`11257`). A good starting point for the new interface
+is the new ``xgboost()`` function. We won't list all the feature gains here, as there are
+too many! Please visit the :doc:`/R-package/index` for more info. There's a migration
+guide (:pr:`11197`) there if you use a previous XGBoost R package version.
+
+- Support for the MSVC build was dropped due to incompatibility with R headers. (:pr:`10355`,
+  :pr:`11150`)
+- Maintenance (:pr:`11259`)
+- Related PRs. (:pr:`11171`, :pr:`11231`, :pr:`11223`, :pr:`11073`, :pr:`11224`, :pr:`11076`, :pr:`11084`, :pr:`11081`,
+  :pr:`11072`, :pr:`11170`, :pr:`11123`, :pr:`11168`, :pr:`11264`, :pr:`11140`, :pr:`11117`, :pr:`11104`, :pr:`11095`, :pr:`11125`, :pr:`11124`,
+  :pr:`11122`, :pr:`11108`, :pr:`11102`, :pr:`11101`, :pr:`11100`, :pr:`11077`, :pr:`11099`, :pr:`11074`, :pr:`11065`, :pr:`11092`, :pr:`11090`,
+  :pr:`11096`, :pr:`11148`, :pr:`11151`, :pr:`11159`, :pr:`11204`, :pr:`11254`, :pr:`11109`, :pr:`11141`, :pr:`10798`, :pr:`10743`, :pr:`10849`,
+  :pr:`10747`, :pr:`11022`, :pr:`10989`, :pr:`11026`, :pr:`11060`, :pr:`11059`, :pr:`11041`, :pr:`11043`, :pr:`11025`, :pr:`10674`, :pr:`10727`,
+  :pr:`10745`, :pr:`10733`, :pr:`10750`, :pr:`10749`, :pr:`10744`, :pr:`10794`, :pr:`10330`, :pr:`10698`, :pr:`10687`, :pr:`10688`, :pr:`10654`,
+  :pr:`10456`, :pr:`10556`, :pr:`10465`, :pr:`10337`)
+
+************
+JVM Packages
+************
+
+The XGBoost 3.0 release features a significant update to the JVM packages, and in
+particular, the Spark package. There are breaking changes in packaging and some
+parameters. Please visit the :doc:`migration guide </jvm/xgboost_spark_migration>` for
+related changes. The work brings new features and a more unified feature set between CPU
+and GPU implementation. (:pr:`10639`, :pr:`10833`, :pr:`10845`, :pr:`10847`, :pr:`10635`, :pr:`10630`, :pr:`11179`, :pr:`11184`)
+
+- Automatic partitioning for distributed learning to rank. See the :ref:`features
+  <3_0_features>` section above (:pr:`11023`).
+- Resolve spark compatibility issue (:pr:`10917`)
+- Support missing value when constructing dmatrix with iterator (:pr:`10628`)
+- Fix transform performance issue (:pr:`10925`)
+- Honor skip.native.build option in xgboost4j-gpu (:pr:`10496`)
+- Support array features type for CPU (:pr:`10937`)
+- Change default missing value to ``NaN`` for better alignment (:pr:`11225`)
+- Don't cast to float if it's already float (:pr:`10386`)
+- Maintenance. (:pr:`10982`, :pr:`10979`, :pr:`10978`, :pr:`10673`, :pr:`10660`, :pr:`10835`, :pr:`10836`, :pr:`10857`, :pr:`10618`,
+  :pr:`10627`)
+
+***********
+Maintenance
+***********
+
+Code maintenance includes both refactoring (:pr:`10531`, :pr:`10573`, :pr:`11069`), cleanups (:pr:`11129`,
+:pr:`10878`, :pr:`11244`, :pr:`10401`, :pr:`10502`, :pr:`11107`, :pr:`11097`, :pr:`11130`, :pr:`10758`, :pr:`10923`, :pr:`10541`, :pr:`10990`),
+and improvements for tests (:pr:`10611`, :pr:`10658`, :pr:`10583`, :pr:`11245`, :pr:`10708`), along with fixing
+various warnings in compilers and test dependencies (:pr:`10757`, :pr:`10641`, :pr:`11062`,
+:pr:`11226`). Also, miscellaneous updates, including some dev scripts and profiling annotations
+(:pr:`10485`, :pr:`10657`, :pr:`10854`, :pr:`10718`, :pr:`11158`, :pr:`10697`, :pr:`11276`).
+
+Lastly, dependency updates (:pr:`10362`, :pr:`10363`, :pr:`10360`, :pr:`10373`, :pr:`10377`, :pr:`10368`, :pr:`10369`,
+:pr:`10366`, :pr:`11032`, :pr:`11037`, :pr:`11036`, :pr:`11035`, :pr:`11034`, :pr:`10518`, :pr:`10536`, :pr:`10586`, :pr:`10585`, :pr:`10458`,
+:pr:`10547`, :pr:`10429`, :pr:`10517`, :pr:`10497`, :pr:`10588`, :pr:`10975`, :pr:`10971`, :pr:`10970`, :pr:`10949`, :pr:`10947`, :pr:`10863`,
+:pr:`10953`, :pr:`10954`, :pr:`10951`, :pr:`10590`, :pr:`10600`, :pr:`10599`, :pr:`10535`, :pr:`10516`, :pr:`10786`, :pr:`10859`, :pr:`10785`,
+:pr:`10779`, :pr:`10790`, :pr:`10777`, :pr:`10855`, :pr:`10848`, :pr:`10778`, :pr:`10772`, :pr:`10771`, :pr:`10862`, :pr:`10952`, :pr:`10768`,
+:pr:`10770`, :pr:`10769`, :pr:`10664`, :pr:`10663`, :pr:`10892`, :pr:`10979`, :pr:`10978`).
+
+***
+CI
+***
+
+- The CI is reworked to use `RunsOn` to integrate custom CI pipelines with GitHub
+  action. The migration helps us reduce the maintenance burden and make the CI
+  configuration more accessible to others. (:pr:`11001`, :pr:`11079`, :pr:`10649`, :pr:`11196`, :pr:`11055`,
+  :pr:`10483`, :pr:`11078`, :pr:`11157`)
+
+- Other maintenance work includes various small fixes, enhancements, and tooling
+  updates. (:pr:`10877`, :pr:`10494`, :pr:`10351`, :pr:`10609`, :pr:`11192`, :pr:`11188`, :pr:`11142`, :pr:`10730`, :pr:`11066`,
+  :pr:`11063`, :pr:`10800`, :pr:`10995`, :pr:`10858`, :pr:`10685`, :pr:`10593`, :pr:`11061`)
diff --git a/doc/changes/v3.1.0.rst b/doc/changes/v3.1.0.rst
new file mode 100644
index 000000000000..227e8a369314
--- /dev/null
+++ b/doc/changes/v3.1.0.rst
@@ -0,0 +1,172 @@
+#################################
+3.1.1 Patch Release (Oct 22 2025)
+#################################
+
+- Emit correct error when performing inplace-predict using a CPU-only version of XGBoost,
+  but with a GPU input. (:pr:`11761`)
+- Enhance the error message for loading the removed binary model format. (:pr:`11760`)
+- Use the correct group ID for SHAP when the intercept is a vector. (:pr:`11764`)
+
+###################
+3.1.0 (2025 Sep 22)
+###################
+
+We are delighted to share the latest 3.1.0 update for XGBoost.
+
+********************
+Categorical Re-coder
+********************
+
+This release features a major update to categorical data support by introducing a
+re-coder. This re-coder saves categories in the trained model and re-codes the data during
+inference, to keep the categorical encoding consistent. Aside from primitive types like
+integers, it also supports string-based categories. The implementation works with all
+supported Python DataFrame implementations. (:pr:`11609`, :pr:`11665`, :pr:`11605`,
+:pr:`11628`, :pr:`11598`, :pr:`11591`, :pr:`11568`, :pr:`11561`, :pr:`11650`, :pr:`11621`,
+:pr:`11611`, :pr:`11313`, :pr:`11311`, :pr:`11310`, :pr:`11315`, :pr:`11303`, :pr:`11612`,
+:pr:`11098`, :pr:`11347`) See :ref:`cat-recode` for more information. (:pr:`11297`)
+
+In addition, categorical support for Polars data frames is now available (:pr:`11565`).
+
+Lastly, we removed the experimental tag for categorical feature support in this
+release. (:pr:`11690`)
+
+***************
+External Memory
+***************
+
+We continue the work on external memory support on 3.1. In this release, XGBoost features
+an adaptive cache for CUDA external memory. The improved cache can split the data between
+CPU memory and GPU memory according to the underlying hardware and data
+size. (:pr:`11556`, :pr:`11465`, :pr:`11664`, :pr:`11594`, :pr:`11469`, :pr:`11547`,
+:pr:`11339`, :pr:`11477`, :pr:`11453`, :pr:`11446`, :pr:`11458`, :pr:`11426`, :pr:`11566`,
+:pr:`11497`)
+
+Also, there's an optional support (opt-in) for using ``nvcomp`` and the GB200
+decompression engine to handle sparse data (requires nvcomp as a plugin) (:pr:`11451`,
+:pr:`11464`, :pr:`11460`, :pr:`11512`, :pr:`11520`). We improved the memory usage of
+quantile sketching with external memory (:pr:`11641`) and optimized the predictor for
+training (:pr:`11548`). To help ensure the training performance, the latest XGBoost
+features detection for NUMA (Non-Uniform Memory Access) node (:pr:`11538`, :pr:`11576`) for checking cross-socket data
+access. We are working on additional tooling to enhance NUMA node performance. Aside from
+features, we have also added various documentation improvements. (:pr:`11412`,
+:pr:`11631`)
+
+Lastly, external memory support with text file input has been removed
+(:pr:`11562`). Moving forward, we will focus on iterator inputs.
+
+
+****************************
+Multi-Target/Class Intercept
+****************************
+
+Starting with 3.1, the base-score (intercept) is estimated and stored as a vector when the
+model has multiple outputs, be it multi-target regression or multi-class
+classification. This change enhances the initial estimation for multi-output models and
+will be the starting point for future work on vector-leaf. (:pr:`11277`, :pr:`11651`,
+:pr:`11625`, :pr:`11649`, :pr:`11630`, :pr:`11647`, :pr:`11656`, :pr:`11663`)
+
+********
+Features
+********
+
+- Support leaf prediction with QDM on CPU. (:pr:`11620`)
+- Improve seed with mean sampling for the first iteration. (:pr:`11639`)
+- Optionally include git hash in CMake build. (:pr:`11587`)
+
+****************************
+Removing Deprecated Features
+****************************
+
+This version removes some deprecated features, notably, the binary IO format, along with
+features deprecated in 2.0.
+
+- Binary serialization format has been removed in 3.1. The format has been formally
+  deprecated in `1.6 <https://github.com/dmlc/xgboost/issues/7547>`__. (:pr:`11307`,
+  :pr:`11553`, :pr:`11552`, :pr:`11602`)
+
+- Removed old GPU-related parameters including ``use_gpu`` (pyspark), ``gpu_id``,
+  ``gpu_hist``, and ``gpu_coord_descent``. These parameters have been deprecated in
+  2.0. Use the ``device`` parameter instead. (:pr:`11395`, :pr:`11554`, :pr:`11549`,
+  :pr:`11543`, :pr:`11539`, :pr:`11402`)
+
+- Remove deprecated C functions: ``XGDMatrixCreateFromCSREx``,
+  ``XGDMatrixCreateFromCSCEx``. (:pr:`11514`, :pr:`11513`)
+
+- XGBoost starts emit warning for text inputs. (:pr:`11590`)
+
+
+*************
+Optimizations
+*************
+
+- Optimize CPU inference with Array-Based Tree Traversal (:pr:`11519`)
+- Specialize for GPU dense histogram. (:pr:`11443`)
+- [sycl] Improve L1 cache locality for histogram building. (:pr:`11555`)
+- [sycl] Reduce predictor memory consumption and improve L2 locality (:pr:`11603`)
+
+*****
+Fixes
+*****
+
+- Fix static linking C++ libraries on macOS (:pr:`11522`)
+- Rename param.hh/cc to hist_param.hh/cc to fix xcode build (:pr:`11378`)
+- [sycl] Fix build with updated compiler (:pr:`11618`)
+- [sycl] Various fixes for fp32-only devices. (:pr:`11527`, :pr:`11524`)
+- Fix compilation on android older than API 26 (:pr:`11366`)
+- Fix loading Gamma model from 1.3. (:pr:`11377`)
+
+**************
+Python Package
+**************
+
+- Support mixing Python metrics and built-in metrics for the skl interface. (:pr:`11536`)
+- CUDA 13 Support for PyPI with the new ``xgboost-cu13`` package. (:pr:`11677`, :pr:`11662`)
+- Remove wheels for manylinux2014. (:pr:`11673`)
+- Initial support for building variant wheels (:pr:`11531`, :pr:`11645`, :pr:`11294`)
+- Minimum PySpark version is now set to 3.4 (:pr:`11364`). In addition, the PySpark
+  interface now checks the validation indicator column type and has a fix for None column
+  input. (:pr:`11535`, :pr:`11523`)
+- [dask] Small cleanup for the predict function. (:pr:`11423`)
+
+*********
+R Package
+*********
+
+Now that most of the deprecated features have been removed in this release, we will try to
+bring the latest R package back to CRAN.
+
+- Implement Booster reset. (:pr:`11357`)
+- Improvements for documentation, including having code examples in XGBoost's sphinx
+  documentation side, and notes for R-universe release. (:pr:`11369`, :pr:`11410`,
+  :pr:`11685`, :pr:`11316`)
+
+************
+JVM Packages
+************
+
+- Support columnar inputs for cpu pipeline (:pr:`11352`)
+- Rewrite the `LabeledPoint` as a Java class (:pr:`11545`)
+- Various fixes and document updates. (:pr:`11525`, :pr:`11508`, :pr:`11489`, :pr:`11682`)
+
+*********
+Documents
+*********
+
+Changes for general documentation:
+
+- Update notes about GPU memory usage. (:pr:`11375`)
+- Various fixes and updates. (:pr:`11503`, :pr:`11532`, :pr:`11328`, :pr:`11344`, :pr:`11626`)
+
+
+******************
+CI and Maintenance
+******************
+
+- Code cleanups. (:pr:`11367`, :pr:`11342`, :pr:`11658`, :pr:`11528`, :pr:`11585`,
+  :pr:`11672`, :pr:`11642`, :pr:`11667`, :pr:`11495`, :pr:`11567`)
+- Various cleanup and fixes for tests. (:pr:`11405`, :pr:`11389`, :pr:`11396`, :pr:`11456`)
+- Support CMake 4.0 (:pr:`11382`)
+- Various CI updates and fixes (:pr:`11318`, :pr:`11349`, :pr:`11653`, :pr:`11637`,
+  :pr:`11683`, :pr:`11638`, :pr:`11644`, :pr:`11306`, :pr:`11560`, :pr:`11323`, :pr:`11617`,
+  :pr:`11341`, :pr:`11693`)
diff --git a/doc/cli.rst b/doc/cli.rst
deleted file mode 100644
index aff6f30beaec..000000000000
--- a/doc/cli.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-############################
-XGBoost Command Line version
-############################
-
-See `XGBoost Command Line walkthrough <https://github.com/dmlc/xgboost/tree/master/demo/CLI/binary_classification>`_.
diff --git a/doc/conf.py b/doc/conf.py
index ce6a0219ccb1..b467fb37e172 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -84,7 +84,9 @@ def is_id():
     elif branch.startswith("release_"):
         pass  # release branch, like: release_2.1.0
     elif branch == "stable":
-        branch = f"release_{xgboost.__version__}"
+        # Avoid patch release branch.
+        v = xgboost.__version__.split(".")
+        branch = f"release_{v[0]}.{v[1]}.0"
     elif is_id():
         # Likely PR branch
         branch = f"PR-{branch}"
@@ -124,10 +126,18 @@ def try_fetch_jvm_doc(branch: str) -> bool:
         """
         try:
             local_jvm_docs = os.environ.get("XGBOOST_JVM_DOCS", None)
+            url = f"{S3_BUCKET}/{branch}/{commit}/{branch}.tar.bz2"
             if local_jvm_docs is not None:
-                filename = os.path.expanduser(local_jvm_docs)
+                local_jvm_docs = os.path.expanduser(local_jvm_docs)
+
+            if local_jvm_docs is not None and os.path.exists(local_jvm_docs):
+                # Reuse an existing tarball.
+                filename = local_jvm_docs
+            elif local_jvm_docs is not None:
+                # Download to local_jvm_docs for future reuse.
+                filename, _ = urllib.request.urlretrieve(url, filename=local_jvm_docs)
+                print(f"Finished: {url} -> {filename}")
             else:
-                url = f"{S3_BUCKET}/{branch}/{commit}/{branch}.tar.bz2"
                 filename, _ = urllib.request.urlretrieve(url)
                 print(f"Finished: {url} -> {filename}")
             if not os.path.exists(TMP_DIR):
@@ -161,10 +171,17 @@ def download_r_docs() -> None:
     def try_fetch_r_doc(branch: str) -> bool:
         try:
             local_r_docs = os.environ.get("XGBOOST_R_DOCS", None)
+            url = f"{S3_BUCKET}/{branch}/{commit}/r-docs-{branch}.tar.bz2"
             if local_r_docs is not None:
-                filename = os.path.expanduser(local_r_docs)
+                local_r_docs = os.path.expanduser(local_r_docs)
+
+            if local_r_docs is not None and os.path.exists(local_r_docs):
+                # Reuse an existing tarball.
+                filename = local_r_docs
+            elif local_r_docs is not None:
+                filename, _ = urllib.request.urlretrieve(url, filename=local_r_docs)
+                print(f"Finished: {url} -> {filename}")
             else:
-                url = f"{S3_BUCKET}/{branch}/{commit}/r-docs-{branch}.tar.bz2"
                 filename, _ = urllib.request.urlretrieve(url)
                 print(f"Finished: {url} -> {filename}")
 
@@ -225,7 +242,7 @@ def is_readthedocs_build():
 # General information about the project.
 project = "xgboost"
 author = "%s developers" % project
-copyright = "2022, %s" % author
+copyright = "2025, %s" % author
 github_doc_root = "https://github.com/dmlc/xgboost/tree/master/doc/"
 
 # Add any Sphinx extension module names here, as strings. They can be
@@ -238,6 +255,8 @@ def is_readthedocs_build():
     "sphinx.ext.mathjax",
     "sphinx.ext.intersphinx",
     "sphinx_gallery.gen_gallery",
+    "sphinx_issues",
+    "sphinx_tabs.tabs",
     "breathe",
     "myst_parser",
 ]
@@ -248,7 +267,6 @@ def is_readthedocs_build():
         "../demo/guide-python",
         "../demo/dask",
         "../demo/aft_survival",
-        "../demo/gpu_acceleration",
         "../demo/rmm_plugin",
     ],
     # path to where to save gallery generated output
@@ -256,12 +274,15 @@ def is_readthedocs_build():
         "python/examples",
         "python/dask-examples",
         "python/survival-examples",
-        "python/gpu-examples",
         "python/rmm-examples",
     ],
     "matplotlib_animations": True,
 }
 
+# Sphinx-issues configuration
+# Path to GitHub repo {group}/{project}  (note that `group` is the GitHub user or organization)
+issues_github_path = "dmlc/xgboost"
+
 autodoc_typehints = "description"
 
 graphviz_output_format = "png"
diff --git a/doc/contrib/ci.rst b/doc/contrib/ci.rst
index 1e01ed52fbfd..964d833e4756 100644
--- a/doc/contrib/ci.rst
+++ b/doc/contrib/ci.rst
@@ -15,9 +15,14 @@ project.
 Tips for testing
 ****************
 
-====================================
+=======
+R tests
+=======
+
+------------------------------------
 Running R tests with ``noLD`` option
-====================================
+------------------------------------
+
 You can run R tests using a custom-built R with compilation flag
 ``--disable-long-double``. See `this page <https://blog.r-hub.io/2019/05/21/nold/>`_ for more
 details about noLD. This is a requirement for keeping XGBoost on CRAN (the R package index).
@@ -25,6 +30,14 @@ Unlike other tests, this test must be invoked manually. Simply add a review comm
 ``/gha run r-nold-test`` to a pull request to kick off the test.
 (Ordinary comment won't work. It needs to be a review comment.)
 
+---------------------------------
+Using container images from r-hub
+---------------------------------
+
+The r-hub project `provides <https://github.com/r-hub/containers>`__ a list of container
+`images <https://r-hub.github.io/containers/>`__ for reproducing CRAN environments.
+
+
 ===============================
 Making changes to CI containers
 ===============================
diff --git a/doc/contrib/docs.rst b/doc/contrib/docs.rst
index b0ef7d0a5fca..c850c956626d 100644
--- a/doc/contrib/docs.rst
+++ b/doc/contrib/docs.rst
@@ -107,8 +107,12 @@ build directory. Following is a list of environment variables used by the fetche
 
  - ``READTHEDOCS``: Read the docs flag. Build the full documentation site including R, JVM and
    C doc when set to ``True`` (case sensitive).
- - ``XGBOOST_R_DOCS``: Local path for pre-built R document, used for development.
- - ``XGBOOST_JVM_DOCS``: Local path for pre-built JVM document, used for development.
+ - ``XGBOOST_R_DOCS``: Local path for pre-built R document, used for development. If it
+   points to a file that doesn't exist, the configuration script will download the
+   packaged document to that path for future reuse.
+ - ``XGBOOST_JVM_DOCS``: Local path for pre-built JVM document, used for
+   development. Similar to the R docs environment variable when it points to a non-existent
+   file.
 
 As of writing, RTD doesn't provide any facility to be embedded as a GitHub action but we
 need a way to specify the dependency between the CI pipelines and the document build in
diff --git a/doc/contrib/release.rst b/doc/contrib/release.rst
index 4548b1ffa9a2..61735341f725 100644
--- a/doc/contrib/release.rst
+++ b/doc/contrib/release.rst
@@ -21,20 +21,28 @@ Making a Release
 3. Commit the change, create a PR on GitHub on release branch.  Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.
 4. Create a tag on release branch, either on GitHub or locally.
 5. Make a release on GitHub tag page, which might be done with previous step if the tag is created on GitHub.
-6. Submit pip, CRAN, and Maven packages.
+6. Submit pip, R-universe, CRAN, and Maven packages.
 
    There are helper scripts for automating the process in ``xgboost/dev/``.
 
    + The pip package is maintained by `Hyunsu Cho <https://github.com/hcho3>`__ and `Jiaming Yuan <https://github.com/trivialfis>`__.
 
-   + The CRAN package is maintained by `Tong He <https://github.com/hetong007>`_ and `Jiaming Yuan <https://github.com/trivialfis>`__.
+   + The CRAN package and the R-universe packages are maintained by `Jiaming Yuan <https://github.com/trivialfis>`__.
 
    + The Maven package is maintained by `Nan Zhu <https://github.com/CodingCat>`_ and `Hyunsu Cho <https://github.com/hcho3>`_.
 
 
+R Universe Packages
+-------------------
+
+Since XGBoost 3.0.0, we host the R package on `R-Universe
+<https://dmlc.r-universe.dev/xgboost>`__. To make a new release, change the
+``packages.json`` in `dmlc.r-universe.dev <https://github.com/dmlc/dmlc.r-universe.dev>`__
+with a new release branch.
+
 R CRAN Package
 --------------
-Before submitting a release, one should test the package on `R-hub <https://builder.r-hub.io/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first.  Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
+Before submitting a release, one should test the package on `R-hub <https://r-hub.github.io/rhub/>`__ and `win-builder <https://win-builder.r-project.org/>`__ first.  Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder.
 
 According to the `CRAN policy <https://cran.r-project.org/web/packages/policies.html>`__:
 
@@ -42,7 +50,17 @@ According to the `CRAN policy <https://cran.r-project.org/web/packages/policies.
 
 We need to check the number of CPUs used in examples. Export ``_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_=2.5`` before running ``R CMD check --as-cran`` `[1] <#references>`__ and make sure the machine you are using has enough CPU cores to reveal any potential policy violation.
 
+Read The Docs
+-------------
+
+We might need to manually activate the new release branch for `read the docs
+<https://xgboost.readthedocs.io/>`__ and set it as the default branch in the console `[2]
+<#references>`__. Please check the document build and make sure the correct branch is
+activated and selected after making a new release.
+
 References
 ----------
 
 [1] https://stat.ethz.ch/pipermail/r-package-devel/2022q4/008610.html
+
+[2] https://github.com/readthedocs/readthedocs.org/issues/12073
\ No newline at end of file
diff --git a/doc/gpu/index.rst b/doc/gpu/index.rst
index 9603a628cb81..8366068a762c 100644
--- a/doc/gpu/index.rst
+++ b/doc/gpu/index.rst
@@ -4,7 +4,7 @@ XGBoost GPU Support
 
 This page contains information about GPU algorithms supported in XGBoost.
 
-.. note:: CUDA 11.0, Compute Capability 5.0 required (See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)
+.. note:: CUDA 12.0, Compute Capability 5.0 required (See `this list <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_ to look up compute capability of your GPU card.)
 
 *********************************************
 CUDA Accelerated Tree Construction Algorithms
@@ -43,7 +43,7 @@ XGBoost makes use of `GPUTreeShap <https://github.com/rapidsai/gputreeshap>`_ as
   shap_values = booster.predict(dtrain, pred_contribs=True)
   shap_interaction_values = model.predict(dtrain, pred_interactions=True)
 
-See :ref:`sphx_glr_python_gpu-examples_tree_shap.py` for a worked example.
+See :ref:`sphx_glr_python_examples_gpu_tree_shap.py` for a worked example.
 
 Multi-node Multi-GPU Training
 =============================
@@ -66,7 +66,15 @@ The dataset itself is stored on device in a compressed ELLPACK format. The ELLPA
 
 Working memory is allocated inside the algorithm proportional to the number of rows to keep track of gradients, tree positions and other per row statistics. Memory is allocated for histogram bins proportional to the number of bins, number of features and nodes in the tree. For performance reasons we keep histograms in memory from previous nodes in the tree, when a certain threshold of memory usage is passed we stop doing this to conserve memory at some performance loss.
 
-If you are getting out-of-memory errors on a big dataset, try the :py:class:`xgboost.QuantileDMatrix` or :doc:`external memory version </tutorials/external_memory>`. Note that when ``external memory`` is used for GPU hist, it's best to employ gradient based sampling as well. Last but not least, ``inplace_predict`` can be preferred over ``predict`` when data is already on GPU. Both ``QuantileDMatrix`` and ``inplace_predict`` are automatically enabled if you are using the scikit-learn interface.
+If you are getting out-of-memory errors on a big dataset, try the
+:py:class:`xgboost.QuantileDMatrix` first. If you have access to NVLink-C2C devices, see
+:doc:`external memory version </tutorials/external_memory>`. In addition,
+:py:meth:`~xgboost.Booster.inplace_predict` should be preferred over ``predict`` when data
+is already on GPU. Both :py:class:`xgboost.QuantileDMatrix` and
+:py:meth:`~xgboost.Booster.inplace_predict` are automatically enabled if you are using the
+scikit-learn interface. Last but not least, using :py:class:`~xgboost.QuantileDMatrix`
+with a data iterator as input is a great way to increase memory capacity, see
+:ref:`sphx_glr_python_examples_quantile_data_iterator.py`.
 
 
 CPU-GPU Interoperability
diff --git a/doc/index.rst b/doc/index.rst
index 6f37aa464eca..964381d28a04 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -32,6 +32,5 @@ Contents
   Julia Package <julia>
   C Package <c>
   C++ Interface <c++>
-  CLI Interface <cli>
   contrib/index
   changes/index
diff --git a/doc/install.rst b/doc/install.rst
index 9a7330c8b428..7fcea0d3b68c 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -104,45 +104,40 @@ Conda should be able to detect the existence of a GPU on your machine and instal
 
 .. code-block:: bash
 
-   # CPU only
-   conda install -c conda-forge py-xgboost-cpu
-   # Use NVIDIA GPU
-   conda install -c conda-forge py-xgboost-gpu
+   # CPU variant
+   conda install -c conda-forge py-xgboost=*=cpu*
+   # GPU variant
+   conda install -c conda-forge py-xgboost=*=cuda*
 
 To force the installation of the GPU variant on a machine that does not have an NVIDIA GPU, use environment variable ``CONDA_OVERRIDE_CUDA``,
 as described in `"Managing Virtual Packages" in the conda docs <https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-virtual.html>`_.
 
 .. code-block:: bash
 
-  export CONDA_OVERRIDE_CUDA="12.5"
-  conda install -c conda-forge py-xgboost-gpu
+  export CONDA_OVERRIDE_CUDA="12.8"
+  conda install -c conda-forge py-xgboost=*=cuda*
 
-Visit the `Miniconda website <https://docs.conda.io/en/latest/miniconda.html>`_ to obtain Conda.
-
-.. note:: ``py-xgboost-gpu`` not available on Windows.
-
-   The ``py-xgboost-gpu`` is currently not available on Windows. If you are using Windows,
-   please use ``pip`` to install XGBoost with GPU support.
+You can install Conda from the following link: `Download the conda-forge Installer <https://conda-forge.org/download/>`_.
 
 R
 -
 
-* From CRAN:
+* From R Universe
 
-  .. code-block:: R
+.. code-block:: R
 
-    install.packages("xgboost")
+    install.packages('xgboost', repos = c('https://dmlc.r-universe.dev/', 'https://cloud.r-project.org/'))
 
-  .. note:: Using all CPU cores (threads) on Mac OSX
+.. note:: Using all CPU cores (threads) on Mac OSX
 
-     If you are using Mac OSX, you should first install OpenMP library (``libomp``) by running
+   If you are using Mac OSX, you should first install OpenMP library (``libomp``) by running
 
-     .. code-block:: bash
+   .. code-block:: bash
 
         brew install libomp
 
-     and then run ``install.packages("xgboost")``. Without OpenMP, XGBoost will only use a
-     single CPU core, leading to suboptimal training speed.
+   and then run ``install.packages("xgboost")``. Without OpenMP, XGBoost will only use a
+   single CPU core, leading to suboptimal training speed.
 
 * We also provide **experimental** pre-built binary with GPU support. With this binary,
   you will be able to use the GPU algorithm without building XGBoost from the source.
@@ -158,6 +153,30 @@ R
     # Install XGBoost
     R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz
 
+
+* From CRAN (outdated):
+
+.. warning::
+
+    We are working on bringing the CRAN version of XGBoost up-to-date, in the meantime,
+    please use packages from the R-universe.
+
+
+.. code-block:: R
+
+    install.packages("xgboost")
+
+.. note:: Using all CPU cores (threads) on Mac OSX
+
+   If you are using Mac OSX, you should first install OpenMP library (``libomp``) by running
+
+   .. code-block:: bash
+
+        brew install libomp
+
+   and then run ``install.packages("xgboost")``. Without OpenMP, XGBoost will only use a
+   single CPU core, leading to suboptimal training speed.
+
 JVM
 ---
 
diff --git a/doc/model.schema b/doc/model.schema
deleted file mode 100644
index 103d9d9e4221..000000000000
--- a/doc/model.schema
+++ /dev/null
@@ -1,555 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "definitions": {
-    "gbtree": {
-      "type": "object",
-      "properties": {
-        "name": {
-          "const": "gbtree"
-        },
-        "model": {
-          "type": "object",
-          "properties": {
-            "gbtree_model_param": {
-              "$ref": "#/definitions/gbtree_model_param"
-            },
-            "trees": {
-              "type": "array",
-              "items": {
-                "type": "object",
-                "properties": {
-                  "tree_param": {
-                    "$ref": "#/definitions/tree_param"
-                  },
-                  "id": {
-                    "type": "integer"
-                  },
-                  "loss_changes": {
-                    "type": "array",
-                    "items": {
-                      "type": "number"
-                    }
-                  },
-                  "sum_hessian": {
-                    "type": "array",
-                    "items": {
-                      "type": "number"
-                    }
-                  },
-                  "base_weights": {
-                    "type": "array",
-                    "items": {
-                      "type": "number"
-                    }
-                  },
-                  "left_children": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "right_children": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "parents": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "split_indices": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "split_conditions": {
-                    "type": "array",
-                    "items": {
-                      "type": "number"
-                    }
-                  },
-                  "split_type": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "default_left": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "categories": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "categories_nodes": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "categories_segments": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  },
-                  "categories_sizes": {
-                    "type": "array",
-                    "items": {
-                      "type": "integer"
-                    }
-                  }
-                },
-                "required": [
-                  "tree_param",
-                  "loss_changes",
-                  "sum_hessian",
-                  "base_weights",
-                  "left_children",
-                  "right_children",
-                  "parents",
-                  "split_indices",
-                  "split_conditions",
-                  "default_left",
-                  "categories",
-                  "categories_nodes",
-                  "categories_segments",
-                  "categories_sizes"
-                ]
-              }
-            },
-            "tree_info": {
-              "type": "array",
-              "items": {
-                "type": "integer"
-              }
-            }
-          },
-          "required": [
-            "gbtree_model_param",
-            "trees",
-            "tree_info"
-          ]
-        }
-      },
-      "required": [
-        "name",
-        "model"
-      ]
-    },
-    "gbtree_model_param": {
-      "type": "object",
-      "properties": {
-        "num_trees": {
-          "type": "string"
-        },
-        "num_parallel_tree": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "num_trees",
-        "num_parallel_tree"
-      ]
-    },
-    "tree_param": {
-      "type": "object",
-      "properties": {
-        "num_nodes": {
-          "type": "string"
-        },
-        "size_leaf_vector": {
-          "type": "string"
-        },
-        "num_feature": {
-          "type": "string"
-        }
-      },
-      "required": [
-        "num_nodes",
-        "num_feature",
-        "size_leaf_vector"
-      ]
-    },
-    "reg_loss_param": {
-      "type": "object",
-      "properties": {
-        "scale_pos_weight": {
-          "type": "string"
-        }
-      }
-    },
-    "pseudo_huber_param": {
-      "type": "object",
-      "properties": {
-        "huber_slope": {
-          "type": "string"
-        }
-      }
-    },
-    "aft_loss_param": {
-      "type": "object",
-      "properties": {
-        "aft_loss_distribution": {
-          "type": "string"
-        },
-        "aft_loss_distribution_scale": {
-          "type": "string"
-        }
-      }
-    },
-    "softmax_multiclass_param": {
-      "type": "object",
-      "properties": {
-        "num_class": { "type": "string" }
-      }
-    },
-    "lambda_rank_param": {
-      "type": "object",
-      "properties": {
-        "num_pairsample": { "type": "string" },
-        "fix_list_weight": { "type": "string" }
-      }
-    },
-    "lambdarank_param": {
-      "type": "object",
-      "properties": {
-        "lambdarank_num_pair_per_sample": { "type": "string" },
-        "lambdarank_pair_method": { "type": "string" },
-        "lambdarank_unbiased": {"type": "string" },
-        "lambdarank_bias_norm": {"type": "string" },
-        "ndcg_exp_gain": {"type": "string"}
-      }
-    }
-  },
-  "type": "object",
-  "properties": {
-    "version": {
-      "type": "array",
-      "items": [
-        {
-          "type": "number",
-          "minimum": 1
-        },
-        {
-          "type": "number",
-          "minimum": 0
-        },
-        {
-          "type": "number",
-          "minimum": 0
-        }
-      ],
-      "minItems": 3,
-      "maxItems": 3
-    },
-    "learner": {
-      "type": "object",
-      "properties": {
-        "feature_names": {
-          "type": "array",
-          "items": {
-              "type": "string"
-          }
-        },
-        "feature_types": {
-          "type": "array",
-          "items": {
-              "type": "string"
-          }
-        },
-        "gradient_booster": {
-          "oneOf": [
-            {
-              "$ref": "#/definitions/gbtree"
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "gblinear" },
-                "model": {
-                  "type": "object",
-                  "properties": {
-                    "weights": {
-                      "type": "array",
-                      "items": {
-                        "type": "number"
-                      }
-                    }
-                  }
-                }
-              }
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "dart" },
-                "gbtree": {
-                  "$ref": "#/definitions/gbtree"
-                },
-                "weight_drop": {
-                  "type": "array",
-                  "items": {
-                    "type": "number"
-                  }
-                }
-              },
-              "required": [
-                "name",
-                "gbtree",
-                "weight_drop"
-              ]
-            }
-          ]
-        },
-
-        "objective": {
-          "oneOf": [
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "reg:squarederror" },
-                "reg_loss_param": { "$ref": "#/definitions/reg_loss_param"}
-              },
-              "required": [
-                "name",
-                "reg_loss_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "reg:pseudohubererror" },
-                "reg_loss_param": { "$ref": "#/definitions/reg_loss_param"}
-              },
-              "required": [
-                "name",
-                "reg_loss_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "reg:squaredlogerror" },
-                "reg_loss_param": { "$ref": "#/definitions/reg_loss_param"}
-              },
-              "required": [
-                "name",
-                "reg_loss_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "reg:linear" },
-                "reg_loss_param": { "$ref": "#/definitions/reg_loss_param"}
-              },
-              "required": [
-                "name",
-                "reg_loss_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "reg:logistic" },
-                "reg_loss_param": { "$ref": "#/definitions/reg_loss_param"}
-              },
-              "required": [
-                "name",
-                "reg_loss_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "binary:logistic" },
-                "reg_loss_param": { "$ref": "#/definitions/reg_loss_param"}
-              },
-              "required": [
-                "name",
-                "reg_loss_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "binary:logitraw" },
-                "reg_loss_param": { "$ref": "#/definitions/reg_loss_param"}
-              },
-              "required": [
-                "name",
-                "reg_loss_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "count:poisson" },
-                "poisson_regression_param": {
-                  "type": "object",
-                  "properties": {
-                    "max_delta_step": { "type": "string" }
-                  }
-                }
-              },
-              "required": [
-                "name",
-                "poisson_regression_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "reg:tweedie" },
-                "tweedie_regression_param": {
-                  "type": "object",
-                  "properties": {
-                    "tweedie_variance_power": { "type": "string" }
-                  }
-                }
-              },
-              "required": [
-                "name",
-                "tweedie_regression_param"
-              ]
-            },
-            {
-              "properties": {
-                "name": {
-                  "const": "reg:absoluteerror"
-                }
-              },
-              "type": "object"
-            },
-            {
-              "properties": {
-                "name": {
-                  "const": "reg:quantileerror"
-                },
-                "quantile_loss_param": {
-                  "type": "object",
-                  "properties": {
-                    "quantle_alpha": {"type": "array"}
-                  }
-                }
-              },
-              "type": "object"
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "survival:cox" }
-              },
-              "required": [ "name" ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "reg:gamma" }
-              },
-              "required": [ "name" ]
-            },
-
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "multi:softprob" },
-                "softmax_multiclass_param": { "$ref": "#/definitions/softmax_multiclass_param"}
-              },
-              "required": [
-                "name",
-                "softmax_multiclass_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "multi:softmax" },
-                "softmax_multiclass_param": { "$ref": "#/definitions/softmax_multiclass_param"}
-              },
-              "required": [
-                "name",
-                "softmax_multiclass_param"
-              ]
-            },
-
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "rank:pairwise" },
-                "lambda_rank_param": { "$ref": "#/definitions/lambdarank_param"}
-              },
-              "required": [
-                "name",
-                "lambdarank_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "rank:ndcg" },
-                "lambda_rank_param": { "$ref": "#/definitions/lambdarank_param"}
-              },
-              "required": [
-                "name",
-                "lambdarank_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": { "const": "rank:map" },
-                "lambda_rank_param": { "$ref": "#/definitions/lambda_rank_param"}
-              },
-              "required": [
-                "name",
-                "lambda_rank_param"
-              ]
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": {"const": "survival:aft"},
-                "aft_loss_param": { "$ref": "#/definitions/aft_loss_param"}
-              }
-            },
-            {
-              "type": "object",
-              "properties": {
-                "name": {"const": "binary:hinge"}
-              }
-            }
-          ]
-        },
-
-        "learner_model_param": {
-          "type": "object",
-          "properties": {
-            "base_score": { "type": "string" },
-            "num_class": { "type": "string" },
-            "num_feature": { "type": "string" },
-            "num_target": { "type": "string" }
-          }
-        }
-      },
-      "required": [
-        "gradient_booster",
-        "objective"
-      ]
-    }
-  },
-  "required": [
-    "version",
-    "learner"
-  ]
-}
diff --git a/doc/parameter.rst b/doc/parameter.rst
index e9a309c24766..c5f0681aefd2 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -6,7 +6,6 @@ Before running XGBoost, we must set three types of parameters: general parameter
 - **General parameters** relate to which booster we are using to do boosting, commonly tree or linear model
 - **Booster parameters** depend on which booster you have chosen
 - **Learning task parameters** decide on the learning scenario. For example, regression tasks may use different parameters with ranking tasks.
-- **Command line parameters** relate to behavior of CLI version of XGBoost.
 
 .. note:: Parameters in R package
 
@@ -419,13 +418,18 @@ Specify the learning task and the corresponding learning objective. The objectiv
 
 * ``base_score``
 
-  - The initial prediction score of all instances, global bias
+  The initial prediction score of all instances, also known as the global bias, or the intercept.
+
+  .. versionchanged:: 3.1.0
+
+    XGBoost is updated to use vector-valued intercept by default.
+
   - The parameter is automatically estimated for selected objectives before training. To
-    disable the estimation, specify a real number argument.
-  - If ``base_margin`` is supplied, ``base_score`` will not be added.
-  - For sufficient number of iterations, changing this value will not have too much effect.
+    disable the estimation, specify a real number argument, e.g. ``base_score = 0.5``.
+  - If ``base_margin`` is supplied, ``base_score`` will not be used.
+  - If we train the model with a sufficient number of iterations, changing this value does not offer significant benefit.
 
-  See :doc:`/tutorials/intercept` for more info.
+  See :doc:`/tutorials/intercept` for more information, including different use cases.
 
 * ``eval_metric`` [default according to objective]
 
@@ -540,6 +544,10 @@ These are parameters specific to learning to rank task. See :doc:`Learning to Ra
 
   Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress.
 
+  .. versionchanged:: 3.0.0
+
+  When the ``mean`` method is used, it's normalized by the ``lambdarank_num_pair_per_sample`` instead of gradient.
+
 * ``lambdarank_score_normalization`` [default = ``true``]
 
   .. versionadded:: 3.0.0
@@ -564,65 +572,4 @@ These are parameters specific to learning to rank task. See :doc:`Learning to Ra
 
 * ``ndcg_exp_gain`` [default = ``true``]
 
-  Whether we should use exponential gain function for ``NDCG``. There are two forms of gain function for ``NDCG``, one is using relevance value directly while the other is using :math:`2^{rel} - 1` to emphasize on retrieving relevant documents. When ``ndcg_exp_gain`` is true (the default), relevance degree cannot be greater than 31.
-
-***********************
-Command Line Parameters
-***********************
-The following parameters are only used in the console version of XGBoost. The CLI has been
-deprecated and will be removed in future releases.
-
-* ``num_round``
-
-  - The number of rounds for boosting
-
-* ``data``
-
-  - The path of training data
-
-* ``test:data``
-
-  - The path of test data to do prediction
-
-* ``save_period`` [default=0]
-
-  - The period to save the model. Setting ``save_period=10`` means that for every 10 rounds XGBoost will save the model. Setting it to 0 means not saving any model during the training.
-
-* ``task`` [default= ``train``] options: ``train``, ``pred``, ``eval``, ``dump``
-
-  - ``train``: training using data
-  - ``pred``: making prediction for test:data
-  - ``eval``: for evaluating statistics specified by ``eval[name]=filename``
-  - ``dump``: for dump the learned model into text format
-
-* ``model_in`` [default=NULL]
-
-  - Path to input model, needed for ``test``, ``eval``, ``dump`` tasks. If it is specified in training, XGBoost will continue training from the input model.
-
-* ``model_out`` [default=NULL]
-
-  - Path to output model after training finishes. If not specified, XGBoost will output files with such names as ``0003.model`` where ``0003`` is number of boosting rounds.
-
-* ``model_dir`` [default= ``models/``]
-
-  - The output directory of the saved models during training
-
-* ``fmap``
-
-  - Feature map, used for dumping model
-
-* ``dump_format`` [default= ``text``] options: ``text``, ``json``
-
-  - Format of model dump file
-
-* ``name_dump`` [default= ``dump.txt``]
-
-  - Name of model dump file
-
-* ``name_pred`` [default= ``pred.txt``]
-
-  - Name of prediction file, used in pred mode
-
-* ``pred_margin`` [default=0]
-
-  - Predict margin instead of transformed probability
+  Whether we should use exponential gain function for ``NDCG``. There are two forms of gain function for ``NDCG``, one is using relevance value directly while the other is using :math:`2^{rel} - 1` to emphasize on retrieving relevant documents. When ``ndcg_exp_gain`` is true (the default), relevance degree cannot be greater than 31.
\ No newline at end of file
diff --git a/doc/python/data_input.rst b/doc/python/data_input.rst
new file mode 100644
index 000000000000..8343c1079e44
--- /dev/null
+++ b/doc/python/data_input.rst
@@ -0,0 +1,86 @@
+################################
+Supported Python data structures
+################################
+
+This page is a support matrix for various input types.
+
+.. _py-data:
+
+*******
+Markers
+*******
+
+- T: Supported.
+- F: Not supported.
+- NE: Invalid type for the use case. For instance, :py:class:`pandas.Series` can not be multi-target label.
+- NPA: Support with the help of numpy array.
+- AT: Support with the help of arrow table.
+- CPA: Support with the help of cupy array.
+- SciCSR: Support with the help of scipy sparse CSR :py:class:`scipy.sparse.csr_matrix`. The conversion to scipy CSR may or may not be possible. Raise a type error if conversion fails.
+- FF: We can look forward to having its support in recent future if requested.
+- empty: To be filled in.
+
+************
+Table Header
+************
+- `X` means predictor matrix.
+- Meta info: label, weight, etc.
+- Multi Label: 2-dim label for multi-target.
+- Others: Anything else that we don't list here explicitly including formats like `lil`, `dia`, `bsr`. XGBoost will try to convert it into scipy csr.
+
+**************
+Support Matrix
+**************
+
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| Name                    | DMatrix X | QuantileDMatrix X | Sklearn X | Meta Info | Inplace prediction | Multi Label |
++=========================+===========+===================+===========+===========+====================+=============+
+| numpy.ndarray           | T         | T                 | T         | T         | T                  | T           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| scipy.sparse.csr        | T         | T                 | T         | NE        | T                  | F           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| scipy.sparse.csc        | T         | F                 | T         | NE        | F                  | F           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| scipy.sparse.coo        | SciCSR    | F                 | SciCSR    | NE        | F                  | F           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| uri                     | T         | F                 | F         | F         | NE                 | F           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| list                    | NPA       | NPA               | NPA       | NPA       | NPA                | T           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| tuple                   | NPA       | NPA               | NPA       | NPA       | NPA                | T           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| pandas.DataFrame        | NPA       | NPA               | NPA       | NPA       | NPA                | NPA         |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| pandas.Series           | NPA       | NPA               | NPA       | NPA       | NPA                | NE          |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| cudf.DataFrame          | T         | T                 | T         | T         | T                  | T           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| cudf.Series             | T         | T                 | T         | T         | FF                 | NE          |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| cupy.ndarray            | T         | T                 | T         | T         | T                  | T           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| torch.Tensor            | T         | T                 | T         | T         | T                  | T           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| dlpack                  | CPA       | CPA               |           | CPA       | FF                 | FF          |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| modin.DataFrame         | NPA       | FF                | NPA       | NPA       | FF                 |             |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| modin.Series            | NPA       | FF                | NPA       | NPA       | FF                 |             |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| pyarrow.Table           | T         | T                 | T         | T         | T                  | T           |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| polars.DataFrame        | AT        | AT                | AT        | AT        | AT                 | AT          |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| polars.LazyFrame (WARN) | AT        | AT                | AT        | AT        | AT                 | AT          |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| polars.Series           | AT        | AT                | AT        | AT        | AT                 | NE          |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| _\_array\_\_            | NPA       | F                 | NPA       | NPA       | H                  |             |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+| Others                  | SciCSR    | F                 |           | F         | F                  |             |
++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
+
+The polars ``LazyFrame.collect`` supports many configurations, ranging from the choice of
+query engine to type coercion. XGBoost simply uses the default parameter. Please run
+``collect`` to obtain the ``DataFrame`` before passing it into XGBoost for finer control
+over the behaviour.
\ No newline at end of file
diff --git a/doc/python/index.rst b/doc/python/index.rst
index 28169a4efdd1..dbae48b2ec3b 100644
--- a/doc/python/index.rst
+++ b/doc/python/index.rst
@@ -12,9 +12,9 @@ Contents
   python_intro
   sklearn_estimator
   python_api
+  data_input
   callbacks
   examples/index
   dask-examples/index
   survival-examples/index
-  gpu-examples/index
   rmm-examples/index
diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst
index 5398fb5d091f..595b7f067a01 100644
--- a/doc/python/python_api.rst
+++ b/doc/python/python_api.rst
@@ -43,6 +43,8 @@ Core Data Structure
     :members:
     :show-inheritance:
 
+.. autoclass:: xgboost.core.Categories
+
 Learning API
 ------------
 .. automodule:: xgboost.training
@@ -204,6 +206,14 @@ Collective
 
 .. autofunction:: xgboost.collective.init
 
+.. autofunction:: xgboost.collective.finalize
+
+.. autofunction:: xgboost.collective.get_rank
+
+.. autofunction:: xgboost.collective.get_world_size
+
+.. autoclass:: xgboost.collective.CommunicatorContext
+
 .. automodule:: xgboost.tracker
 
 .. autoclass:: xgboost.tracker.RabitTracker
\ No newline at end of file
diff --git a/doc/python/python_intro.rst b/doc/python/python_intro.rst
index 5ded82922879..f9a458af4142 100644
--- a/doc/python/python_intro.rst
+++ b/doc/python/python_intro.rst
@@ -32,9 +32,9 @@ To verify your installation, run the following in Python:
 
 Data Interface
 --------------
-The XGBoost Python module is able to load data from many different types of data format including both CPU and GPU data structures. For a complete list of supported data types, please reference the :ref:`py-data`. For a detailed description of text input formats, please visit :doc:`/tutorials/input_format`.
+The XGBoost Python module is able to load data from many different types of data format including both CPU and GPU data structures. For a comprehensive list of supported data types, please reference the :doc:`/python/data_input`. For a detailed description of text input formats, please visit :doc:`/tutorials/input_format`.
 
-The input data is stored in a :py:class:`DMatrix <xgboost.DMatrix>` object. For the sklearn estimator interface, a :py:class:`DMatrix` or a :py:class:`QuantileDMatrix` is created depending on the chosen algorithm and the input, see the sklearn API reference for details. We will illustrate some of the basic input types with the ``DMatrix`` here.
+The input data is stored in a :py:class:`DMatrix <xgboost.DMatrix>` object. For the sklearn estimator interface, a :py:class:`DMatrix` or a :py:class:`QuantileDMatrix` is created depending on the chosen algorithm and the input, see the sklearn API reference for details. We will illustrate some of the basic input types using the ``DMatrix`` here.
 
 * To load a NumPy array into :py:class:`DMatrix <xgboost.DMatrix>`:
 
@@ -59,11 +59,12 @@ The input data is stored in a :py:class:`DMatrix <xgboost.DMatrix>` object. For
     label = pandas.DataFrame(np.random.randint(2, size=4))
     dtrain = xgb.DMatrix(data, label=label)
 
-* Saving :py:class:`DMatrix <xgboost.DMatrix>` into a XGBoost binary file will make loading faster:
+* Saving :py:class:`DMatrix <xgboost.DMatrix>` into a XGBoost binary file:
 
   .. code-block:: python
 
-    dtrain = xgb.DMatrix('train.svm.txt?format=libsvm')
+    data = np.random.rand(5, 10)  # 5 entities, each contains 10 features
+    label = np.random.randint(2, size=5)  # binary target
     dtrain.save_binary('train.buffer')
 
 * Missing values can be replaced by a default value in the :py:class:`DMatrix <xgboost.DMatrix>` constructor:
@@ -79,116 +80,6 @@ The input data is stored in a :py:class:`DMatrix <xgboost.DMatrix>` object. For
     w = np.random.rand(5, 1)
     dtrain = xgb.DMatrix(data, label=label, missing=np.NaN, weight=w)
 
-When performing ranking tasks, the number of weights should be equal
-to number of groups.
-
-* To load a LIBSVM text file or a XGBoost binary file into :py:class:`DMatrix <xgboost.DMatrix>`:
-
-  .. code-block:: python
-
-    dtrain = xgb.DMatrix('train.svm.txt?format=libsvm')
-    dtest = xgb.DMatrix('test.svm.buffer')
-
-  The parser in XGBoost has limited functionality. When using Python interface, it's
-  recommended to use sklearn ``load_svmlight_file`` or other similar utilites than
-  XGBoost's builtin parser.
-
-* To load a CSV file into :py:class:`DMatrix <xgboost.DMatrix>`:
-
-  .. code-block:: python
-
-    # label_column specifies the index of the column containing the true label
-    dtrain = xgb.DMatrix('train.csv?format=csv&label_column=0')
-    dtest = xgb.DMatrix('test.csv?format=csv&label_column=0')
-
-  The parser in XGBoost has limited functionality. When using Python interface, it's
-  recommended to use pandas ``read_csv`` or other similar utilites than XGBoost's builtin
-  parser.
-
-.. _py-data:
-
-Supported data structures for various XGBoost functions
-=======================================================
-
-*******
-Markers
-*******
-
-- T: Supported.
-- F: Not supported.
-- NE: Invalid type for the use case. For instance, `pd.Series` can not be multi-target label.
-- NPA: Support with the help of numpy array.
-- AT: Support with the help of arrow table.
-- CPA: Support with the help of cupy array.
-- SciCSR: Support with the help of scripy sparse CSR. The conversion to scipy CSR may or may not be possible. Raise a type error if conversion fails.
-- FF: We can look forward to having its support in recent future if requested.
-- empty: To be filled in.
-
-************
-Table Header
-************
-- `X` means predictor matrix.
-- Meta info: label, weight, etc.
-- Multi Label: 2-dim label for multi-target.
-- Others: Anything else that we don't list here explicitly including formats like `lil`, `dia`, `bsr`. XGBoost will try to convert it into scipy csr.
-
-**************
-Support Matrix
-**************
-
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| Name                    | DMatrix X | QuantileDMatrix X | Sklearn X | Meta Info | Inplace prediction | Multi Label |
-+=========================+===========+===================+===========+===========+====================+=============+
-| numpy.ndarray           | T         | T                 | T         | T         | T                  | T           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| scipy.sparse.csr        | T         | T                 | T         | NE        | T                  | F           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| scipy.sparse.csc        | T         | F                 | T         | NE        | F                  | F           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| scipy.sparse.coo        | SciCSR    | F                 | SciCSR    | NE        | F                  | F           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| uri                     | T         | F                 | F         | F         | NE                 | F           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| list                    | NPA       | NPA               | NPA       | NPA       | NPA                | T           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| tuple                   | NPA       | NPA               | NPA       | NPA       | NPA                | T           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| pandas.DataFrame        | NPA       | NPA               | NPA       | NPA       | NPA                | NPA         |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| pandas.Series           | NPA       | NPA               | NPA       | NPA       | NPA                | NE          |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| cudf.DataFrame          | T         | T                 | T         | T         | T                  | T           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| cudf.Series             | T         | T                 | T         | T         | FF                 | NE          |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| cupy.ndarray            | T         | T                 | T         | T         | T                  | T           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| torch.Tensor            | T         | T                 | T         | T         | T                  | T           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| dlpack                  | CPA       | CPA               |           | CPA       | FF                 | FF          |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| modin.DataFrame         | NPA       | FF                | NPA       | NPA       | FF                 |             |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| modin.Series            | NPA       | FF                | NPA       | NPA       | FF                 |             |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| pyarrow.Table           | T         | T                 | T         | T         | T                  | T           |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| polars.DataFrame        | AT        | AT                | AT        | AT        | AT                 | AT          |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| polars.LazyFrame (WARN) | AT        | AT                | AT        | AT        | AT                 | AT          |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| polars.Series           | AT        | AT                | AT        | AT        | AT                 | NE          |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| _\_array\_\_            | NPA       | F                 | NPA       | NPA       | H                  |             |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-| Others                  | SciCSR    | F                 |           | F         | F                  |             |
-+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
-
-The polars ``LazyFrame.collect`` supports many configurations, ranging from the choice of
-query engine to type coercion. XGBoost simply uses the default parameter. Please run
-``collect`` to obtain the ``DataFrame`` before passing it into XGBoost for finer control
-over the behaviour.
-
 Setting Parameters
 ------------------
 XGBoost can use either a list of pairs or a dictionary to set :doc:`parameters </parameter>`. For instance:
@@ -227,11 +118,11 @@ Training a model requires a parameter list and data set.
   num_round = 10
   bst = xgb.train(param, dtrain, num_round, evallist)
 
-After training, the model can be saved.
+After training, the model can be saved into ``JSON`` or ``UBJSON``:
 
 .. code-block:: python
 
-  bst.save_model('0001.model')
+  bst.save_model('model.ubj')
 
 The model and its feature map can also be dumped to a text file.
 
@@ -247,10 +138,10 @@ A saved model can be loaded as follows:
 .. code-block:: python
 
   bst = xgb.Booster({'nthread': 4})  # init model
-  bst.load_model('model.bin')  # load model data
+  bst.load_model('model.ubj')  # load model data
 
-Methods including `update` and `boost` from `xgboost.Booster` are designed for
-internal usage only.  The wrapper function `xgboost.train` does some
+Methods including `update` and `boost` from :py:class:`xgboost.Booster` are designed for
+internal usage only.  The wrapper function :py:class:`xgboost.train` does some
 pre-configuration including setting up caches and some other parameters.
 
 Early Stopping
diff --git a/doc/requirements.txt b/doc/requirements.txt
index 9a2097035228..4dc0c2275658 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -11,6 +11,8 @@ scipy
 myst-parser
 ray[train]
 sphinx-gallery
+sphinx-issues
+sphinx-tabs
 dask
 pyspark
 cloudpickle
diff --git a/doc/tutorials/advanced_custom_obj.rst b/doc/tutorials/advanced_custom_obj.rst
index 5b81b47167e1..344f3acc89a8 100644
--- a/doc/tutorials/advanced_custom_obj.rst
+++ b/doc/tutorials/advanced_custom_obj.rst
@@ -94,184 +94,181 @@ Where:
 In this case, we want to optimize the negative of the log-likelihood summed across rows.
 The resulting function, gradient and Hessian could be implemented as follows:
 
-.. code-block:: python
-    :caption: Python
-
-    import numpy as np
-    from scipy.special import loggamma, psi as digamma, polygamma
-    trigamma = lambda x: polygamma(1, x)
-
-    def dirichlet_fun(pred: np.ndarray, Y: np.ndarray) -> float:
-        epred = np.exp(pred)
-        sum_epred = np.sum(epred, axis=1, keepdims=True)
-        return (
-            loggamma(epred).sum()
-            - loggamma(sum_epred).sum()
-            - np.sum(np.log(Y) * (epred - 1))
-        )
-    def dirichlet_grad(pred: np.ndarray, Y: np.ndarray) -> np.ndarray:
-        epred = np.exp(pred)
-        return epred * (
-            digamma(epred)
-            - digamma(np.sum(epred, axis=1, keepdims=True))
-            - np.log(Y)
-        )
-    def dirichlet_hess(pred: np.ndarray, Y: np.ndarray) -> np.ndarray:
-        epred = np.exp(pred)
-        grad = dirichlet_grad(pred, Y)
-        k = Y.shape[1]
-        H = np.empty((pred.shape[0], k, k))
-        for row in range(pred.shape[0]):
-            H[row, :, :] = (
-                - trigamma(epred[row].sum()) * np.outer(epred[row], epred[row])
-                + np.diag(grad[row] + trigamma(epred[row]) * epred[row] ** 2)
+.. tabs::
+    .. code-tab:: py
+
+        import numpy as np
+        from scipy.special import loggamma, psi as digamma, polygamma
+        trigamma = lambda x: polygamma(1, x)
+
+        def dirichlet_fun(pred: np.ndarray, Y: np.ndarray) -> float:
+            epred = np.exp(pred)
+            sum_epred = np.sum(epred, axis=1, keepdims=True)
+            return (
+                loggamma(epred).sum()
+                - loggamma(sum_epred).sum()
+                - np.sum(np.log(Y) * (epred - 1))
             )
-        return H
-
-.. code-block:: r
-    :caption: R
-
-    softmax <- function(x) {
-        max.x <- max(x)
-        e <- exp(x - max.x)
-        return(e / sum(e))
-    }
-
-    dirichlet.fun <- function(pred, y) {
-        epred <- exp(pred)
-        sum_epred <- rowSums(epred)
-        return(
-            sum(lgamma(epred))
-            - sum(lgamma(sum_epred))
-            - sum(log(y) * (epred - 1))
-        )
-    }
-
-    dirichlet.grad <- function(pred, y) {
-        epred <- exp(pred)
-        return(
-            epred * (
+        def dirichlet_grad(pred: np.ndarray, Y: np.ndarray) -> np.ndarray:
+            epred = np.exp(pred)
+            return epred * (
                 digamma(epred)
-                - digamma(rowSums(epred))
-                - log(y)
-            )
-        )
-    }
-
-    dirichlet.hess <- function(pred, y) {
-        epred <- exp(pred)
-        grad <- dirichlet.grad(pred, y)
-        k <- ncol(y)
-        H <- array(dim = c(nrow(y), k, k))
-        for (row in seq_len(nrow(y))) {
-            H[row, , ] <- (
-                - trigamma(sum(epred[row,])) * tcrossprod(epred[row,])
-                + diag(grad[row,] + trigamma(epred[row,]) * epred[row,]^2)
+                - digamma(np.sum(epred, axis=1, keepdims=True))
+                - np.log(Y)
             )
+        def dirichlet_hess(pred: np.ndarray, Y: np.ndarray) -> np.ndarray:
+            epred = np.exp(pred)
+            grad = dirichlet_grad(pred, Y)
+            k = Y.shape[1]
+            H = np.empty((pred.shape[0], k, k))
+            for row in range(pred.shape[0]):
+                H[row, :, :] = (
+                    - trigamma(epred[row].sum()) * np.outer(epred[row], epred[row])
+                    + np.diag(grad[row] + trigamma(epred[row]) * epred[row] ** 2)
+                )
+            return H
+
+    .. code-tab:: r R
+
+        softmax <- function(x) {
+            max.x <- max(x)
+            e <- exp(x - max.x)
+            return(e / sum(e))
         }
-        return(H)
-    }
-
 
-Convince yourself that the implementation is correct:
-
-.. code-block:: python
-    :caption: Python
-
-    from math import isclose
-    from scipy import stats
-    from scipy.optimize import check_grad
-    from scipy.special import softmax
-
-    def gen_random_dirichlet(rng: np.random.Generator, m: int, k: int):
-        alpha = np.exp(rng.standard_normal(size=k))
-        return rng.dirichlet(alpha, size=m)
-    
-    def test_dirichlet_fun_grad_hess():
-        k = 3
-        m = 10
-        rng = np.random.default_rng(seed=123)
-        Y = gen_random_dirichlet(rng, m, k)
-        x0 = rng.standard_normal(size=k)
-        for row in range(Y.shape[0]):
-            fun_row = dirichlet_fun(x0.reshape((1,-1)), Y[[row]])
-            ref_logpdf = stats.dirichlet.logpdf(
-                Y[row] / Y[row].sum(), # <- avoid roundoff error
-                np.exp(x0),
+        dirichlet.fun <- function(pred, y) {
+            epred <- exp(pred)
+            sum_epred <- rowSums(epred)
+            return(
+                sum(lgamma(epred))
+                - sum(lgamma(sum_epred))
+                - sum(log(y) * (epred - 1))
             )
-            assert isclose(fun_row, -ref_logpdf)
+        }
 
-            gdiff = check_grad(
-                lambda pred: dirichlet_fun(pred.reshape((1,-1)), Y[[row]]),
-                lambda pred: dirichlet_grad(pred.reshape((1,-1)), Y[[row]]),
-                x0
+        dirichlet.grad <- function(pred, y) {
+            epred <- exp(pred)
+            return(
+                epred * (
+                    digamma(epred)
+                    - digamma(rowSums(epred))
+                    - log(y)
+                )
             )
-            assert gdiff <= 1e-6
-
-            H_numeric = np.empty((k,k))
-            eps = 1e-7
-            for ii in range(k):
-                x0_plus_eps = x0.reshape((1,-1)).copy()
-                x0_plus_eps[0,ii] += eps
-                for jj in range(k):
-                    H_numeric[ii, jj] = (
-                        dirichlet_grad(x0_plus_eps, Y[[row]])[0][jj]
-                        - dirichlet_grad(x0.reshape((1,-1)), Y[[row]])[0][jj]
-                    ) / eps
-            H = dirichlet_hess(x0.reshape((1,-1)), Y[[row]])[0]
-            np.testing.assert_almost_equal(H, H_numeric, decimal=6)
-    test_dirichlet_fun_grad_hess()
-
-
-.. code-block:: r
-    :caption: R
-
-    library(DirichletReg)
-    library(testthat)
-
-    test_that("dirichlet formulae", {
-        k <- 3L
-        m <- 10L
-        set.seed(123)
-        alpha <- exp(rnorm(k))
-        y <- rdirichlet(m, alpha)
-        x0 <- rnorm(k)
-        
-        for (row in seq_len(m)) {
-            logpdf <- dirichlet.fun(matrix(x0, nrow=1), y[row,,drop=F])
-            ref_logpdf <- ddirichlet(y[row,,drop=F], exp(x0), log = T)
-            expect_equal(logpdf, -ref_logpdf)
-            
-            eps <- 1e-7
-            grad_num <- numeric(k)
-            for (col in seq_len(k)) {
-                xplus <- x0
-                xplus[col] <- x0[col] + eps
-                grad_num[col] <- (
-                    dirichlet.fun(matrix(xplus, nrow=1), y[row,,drop=F])
-                    - dirichlet.fun(matrix(x0, nrow=1), y[row,,drop=F])
-                ) / eps
+        }
+
+        dirichlet.hess <- function(pred, y) {
+            epred <- exp(pred)
+            grad <- dirichlet.grad(pred, y)
+            k <- ncol(y)
+            H <- array(dim = c(nrow(y), k, k))
+            for (row in seq_len(nrow(y))) {
+                H[row, , ] <- (
+                    - trigamma(sum(epred[row,])) * tcrossprod(epred[row,])
+                    + diag(grad[row,] + trigamma(epred[row,]) * epred[row,]^2)
+                )
             }
-            
-            grad <- dirichlet.grad(matrix(x0, nrow=1), y[row,,drop=F])
-            expect_equal(grad |> as.vector(), grad_num, tolerance=1e-6)
-            
-            H_numeric <- array(dim=c(k, k))
-            for (ii in seq_len(k)) {
-                xplus <- x0
-                xplus[ii] <- x0[ii] + eps
-                for (jj in seq_len(k)) {
-                    H_numeric[ii, jj] <- (
-                        dirichlet.grad(matrix(xplus, nrow=1), y[row,,drop=F])[1, jj]
-                        - grad[1L, jj]
+            return(H)
+        }
+
+
+Convince yourself that the implementation is correct:
+
+.. tabs::
+    .. code-tab:: py
+
+        from math import isclose
+        from scipy import stats
+        from scipy.optimize import check_grad
+        from scipy.special import softmax
+
+        def gen_random_dirichlet(rng: np.random.Generator, m: int, k: int):
+            alpha = np.exp(rng.standard_normal(size=k))
+            return rng.dirichlet(alpha, size=m)
+
+        def test_dirichlet_fun_grad_hess():
+            k = 3
+            m = 10
+            rng = np.random.default_rng(seed=123)
+            Y = gen_random_dirichlet(rng, m, k)
+            x0 = rng.standard_normal(size=k)
+            for row in range(Y.shape[0]):
+                fun_row = dirichlet_fun(x0.reshape((1,-1)), Y[[row]])
+                ref_logpdf = stats.dirichlet.logpdf(
+                    Y[row] / Y[row].sum(), # <- avoid roundoff error
+                    np.exp(x0),
+                )
+                assert isclose(fun_row, -ref_logpdf)
+
+                gdiff = check_grad(
+                    lambda pred: dirichlet_fun(pred.reshape((1,-1)), Y[[row]]),
+                    lambda pred: dirichlet_grad(pred.reshape((1,-1)), Y[[row]]),
+                    x0
+                )
+                assert gdiff <= 1e-6
+
+                H_numeric = np.empty((k,k))
+                eps = 1e-7
+                for ii in range(k):
+                    x0_plus_eps = x0.reshape((1,-1)).copy()
+                    x0_plus_eps[0,ii] += eps
+                    for jj in range(k):
+                        H_numeric[ii, jj] = (
+                            dirichlet_grad(x0_plus_eps, Y[[row]])[0][jj]
+                            - dirichlet_grad(x0.reshape((1,-1)), Y[[row]])[0][jj]
+                        ) / eps
+                H = dirichlet_hess(x0.reshape((1,-1)), Y[[row]])[0]
+                np.testing.assert_almost_equal(H, H_numeric, decimal=6)
+        test_dirichlet_fun_grad_hess()
+
+    .. code-tab:: r R
+
+        library(DirichletReg)
+        library(testthat)
+
+        test_that("dirichlet formulae", {
+            k <- 3L
+            m <- 10L
+            set.seed(123)
+            alpha <- exp(rnorm(k))
+            y <- rdirichlet(m, alpha)
+            x0 <- rnorm(k)
+
+            for (row in seq_len(m)) {
+                logpdf <- dirichlet.fun(matrix(x0, nrow=1), y[row,,drop=F])
+                ref_logpdf <- ddirichlet(y[row,,drop=F], exp(x0), log = T)
+                expect_equal(logpdf, -ref_logpdf)
+
+                eps <- 1e-7
+                grad_num <- numeric(k)
+                for (col in seq_len(k)) {
+                    xplus <- x0
+                    xplus[col] <- x0[col] + eps
+                    grad_num[col] <- (
+                        dirichlet.fun(matrix(xplus, nrow=1), y[row,,drop=F])
+                        - dirichlet.fun(matrix(x0, nrow=1), y[row,,drop=F])
                     ) / eps
                 }
+
+                grad <- dirichlet.grad(matrix(x0, nrow=1), y[row,,drop=F])
+                expect_equal(grad |> as.vector(), grad_num, tolerance=1e-6)
+
+                H_numeric <- array(dim=c(k, k))
+                for (ii in seq_len(k)) {
+                    xplus <- x0
+                    xplus[ii] <- x0[ii] + eps
+                    for (jj in seq_len(k)) {
+                        H_numeric[ii, jj] <- (
+                            dirichlet.grad(matrix(xplus, nrow=1), y[row,,drop=F])[1, jj]
+                            - grad[1L, jj]
+                        ) / eps
+                    }
+                }
+
+                H <- dirichlet.hess(matrix(xplus, nrow=1), y[row,,drop=F])
+                expect_equal(H[1,,], H_numeric, tolerance=1e-6)
             }
-            
-            H <- dirichlet.hess(matrix(xplus, nrow=1), y[row,,drop=F])
-            expect_equal(H[1,,], H_numeric, tolerance=1e-6)
-        }
-    })
+        })
 
 ******************************************
 Dirichlet Regression as Objective Function
@@ -302,61 +299,60 @@ the expected and true Hessian for Dirichlet will match, which is a nice
 property for optimization (i.e. the Hessian will be positive at a stationary
 point, which means it will be a minimum rather than a maximum or saddle point).
 
-.. code-block:: python
-    :caption: Python
-
-    def dirichlet_expected_hess(pred: np.ndarray) -> np.ndarray:
-        epred = np.exp(pred)
-        k = pred.shape[1]
-        Ehess = np.empty((pred.shape[0], k, k))
-        for row in range(pred.shape[0]):
-            Ehess[row, :, :] = (
-                - trigamma(epred[row].sum()) * np.outer(epred[row], epred[row])
-                + np.diag(trigamma(epred[row]) * epred[row] ** 2)
-            )
-        return Ehess
-    def test_dirichlet_expected_hess():
-        k = 3
-        rng = np.random.default_rng(seed=123)
-        x0 = rng.standard_normal(size=k)
-        y_sample = rng.dirichlet(np.exp(x0), size=5_000_000)
-        x_broadcast = np.broadcast_to(x0, (y_sample.shape[0], k))
-        g_sample = dirichlet_grad(x_broadcast, y_sample)
-        ref = (g_sample.T @ g_sample) / y_sample.shape[0]
-        Ehess = dirichlet_expected_hess(x0.reshape((1,-1)))[0]
-        np.testing.assert_almost_equal(Ehess, ref, decimal=2)
-    test_dirichlet_expected_hess()
-
-.. code-block:: r
-    :caption: R
-
-    dirichlet.expected.hess <- function(pred) {
-        epred <- exp(pred)
-        k <- ncol(pred)
-        H <- array(dim = c(nrow(pred), k, k))
-        for (row in seq_len(nrow(pred))) {
-            H[row, , ] <- (
-                - trigamma(sum(epred[row,])) * tcrossprod(epred[row,])
-                + diag(trigamma(epred[row,]) * epred[row,]^2)
-            )
+.. tabs::
+    .. code-tab:: py
+
+        def dirichlet_expected_hess(pred: np.ndarray) -> np.ndarray:
+            epred = np.exp(pred)
+            k = pred.shape[1]
+            Ehess = np.empty((pred.shape[0], k, k))
+            for row in range(pred.shape[0]):
+                Ehess[row, :, :] = (
+                    - trigamma(epred[row].sum()) * np.outer(epred[row], epred[row])
+                    + np.diag(trigamma(epred[row]) * epred[row] ** 2)
+                )
+            return Ehess
+        def test_dirichlet_expected_hess():
+            k = 3
+            rng = np.random.default_rng(seed=123)
+            x0 = rng.standard_normal(size=k)
+            y_sample = rng.dirichlet(np.exp(x0), size=5_000_000)
+            x_broadcast = np.broadcast_to(x0, (y_sample.shape[0], k))
+            g_sample = dirichlet_grad(x_broadcast, y_sample)
+            ref = (g_sample.T @ g_sample) / y_sample.shape[0]
+            Ehess = dirichlet_expected_hess(x0.reshape((1,-1)))[0]
+            np.testing.assert_almost_equal(Ehess, ref, decimal=2)
+        test_dirichlet_expected_hess()
+
+    .. code-tab:: r R
+
+        dirichlet.expected.hess <- function(pred) {
+            epred <- exp(pred)
+            k <- ncol(pred)
+            H <- array(dim = c(nrow(pred), k, k))
+            for (row in seq_len(nrow(pred))) {
+                H[row, , ] <- (
+                    - trigamma(sum(epred[row,])) * tcrossprod(epred[row,])
+                    + diag(trigamma(epred[row,]) * epred[row,]^2)
+                )
+            }
+            return(H)
         }
-        return(H)
-    }
-
-    test_that("expected hess", {
-        k <- 3L
-        set.seed(123)
-        x0 <- rnorm(k)
-        alpha <- exp(x0)
-        n.samples <- 5e6
-        y.samples <- rdirichlet(n.samples, alpha)
-        
-        x.broadcast <- rep(x0, n.samples) |> matrix(ncol=k, byrow=T)
-        grad.samples <- dirichlet.grad(x.broadcast, y.samples)
-        ref <- crossprod(grad.samples) / n.samples
-        Ehess <- dirichlet.expected.hess(matrix(x0, nrow=1))
-        expect_equal(Ehess[1,,], ref, tolerance=1e-2)
-    })
+
+        test_that("expected hess", {
+            k <- 3L
+            set.seed(123)
+            x0 <- rnorm(k)
+            alpha <- exp(x0)
+            n.samples <- 5e6
+            y.samples <- rdirichlet(n.samples, alpha)
+
+            x.broadcast <- rep(x0, n.samples) |> matrix(ncol=k, byrow=T)
+            grad.samples <- dirichlet.grad(x.broadcast, y.samples)
+            ref <- crossprod(grad.samples) / n.samples
+            Ehess <- dirichlet.expected.hess(matrix(x0, nrow=1))
+            expect_equal(Ehess[1,,], ref, tolerance=1e-2)
+        })
 
 But note that this is still not usable for XGBoost, since the expected
 Hessian, just like the true Hessian, has shape ``[nrows, k, k]``, while
@@ -376,29 +372,28 @@ of a diagonally dominant matrix:
 That is: take the absolute value of the expected Hessian for each row of the data,
 and sum by rows of the ``[k, k]``-shaped Hessian for that row in the data:
 
-.. code-block:: python
-    :caption: Python
-
-    def dirichlet_diag_upper_bound_expected_hess(
-        pred: np.ndarray, Y: np.ndarray
-    ) -> np.ndarray:
-        Ehess = dirichlet_expected_hess(pred)
-        diag_bound_Ehess = np.empty((pred.shape[0], Y.shape[1]))
-        for row in range(pred.shape[0]):
-            diag_bound_Ehess[row, :] = np.abs(Ehess[row, :, :]).sum(axis=1)
-        return diag_bound_Ehess
-
-.. code-block:: r
-    :caption: R
-
-    dirichlet.diag.upper.bound.expected.hess <- function(pred, y) {
-        Ehess <- dirichlet.expected.hess(pred)
-        diag.bound.Ehess <- array(dim=dim(pred))
-        for (row in seq_len(nrow(pred))) {
-            diag.bound.Ehess[row,] <- abs(Ehess[row,,]) |> rowSums()
+.. tabs::
+    .. code-tab:: py
+
+        def dirichlet_diag_upper_bound_expected_hess(
+            pred: np.ndarray, Y: np.ndarray
+        ) -> np.ndarray:
+            Ehess = dirichlet_expected_hess(pred)
+            diag_bound_Ehess = np.empty((pred.shape[0], Y.shape[1]))
+            for row in range(pred.shape[0]):
+                diag_bound_Ehess[row, :] = np.abs(Ehess[row, :, :]).sum(axis=1)
+            return diag_bound_Ehess
+
+    .. code-tab:: r R
+
+        dirichlet.diag.upper.bound.expected.hess <- function(pred, y) {
+            Ehess <- dirichlet.expected.hess(pred)
+            diag.bound.Ehess <- array(dim=dim(pred))
+            for (row in seq_len(nrow(pred))) {
+                diag.bound.Ehess[row,] <- abs(Ehess[row,,]) |> rowSums()
+            }
+            return(diag.bound.Ehess)
         }
-        return(diag.bound.Ehess)
-    }
 
 (*note: the calculation can be made more efficiently than what is shown here
 by not calculating the full matrix, and in R, by making the rows be the last
@@ -407,60 +402,58 @@ dimension and transposing after the fact*)
 With all these pieces in place, one can now frame this model into the format
 required for XGBoost's custom objectives:
 
-.. code-block:: python
-    :caption: Python
+.. tabs::
+    .. code-tab:: py
 
-    import xgboost as xgb
-    from typing import Tuple
+        import xgboost as xgb
+        from typing import Tuple
 
-    def dirichlet_xgb_objective(
-        pred: np.ndarray, dtrain: xgb.DMatrix
-    ) -> Tuple[np.ndarray, np.ndarray]:
-        Y = dtrain.get_label().reshape(pred.shape)
-        return (
-            dirichlet_grad(pred, Y),
-            dirichlet_diag_upper_bound_expected_hess(pred, Y),
-        )
+        def dirichlet_xgb_objective(
+            pred: np.ndarray, dtrain: xgb.DMatrix
+        ) -> Tuple[np.ndarray, np.ndarray]:
+            Y = dtrain.get_label().reshape(pred.shape)
+            return (
+                dirichlet_grad(pred, Y),
+                dirichlet_diag_upper_bound_expected_hess(pred, Y),
+            )
+
+    .. code-tab:: r R
 
-.. code-block:: r
-    :caption: R
-
-    library(xgboost)
-    
-    dirichlet.xgb.objective <- function(pred, dtrain) {
-        y <- getinfo(dtrain, "label")
-        return(
-            list(
-                grad = dirichlet.grad(pred, y),
-                hess = dirichlet.diag.upper.bound.expected.hess(pred, y)
+        library(xgboost)
+
+        dirichlet.xgb.objective <- function(pred, dtrain) {
+            y <- getinfo(dtrain, "label")
+            return(
+                list(
+                    grad = dirichlet.grad(pred, y),
+                    hess = dirichlet.diag.upper.bound.expected.hess(pred, y)
+                )
             )
-        )
-    }
+        }
 
 And for an evaluation metric monitoring based on the Dirichlet log-likelihood:
 
-.. code-block:: python
-    :caption: Python
-
-    def dirichlet_eval_metric(
-        pred: np.ndarray, dtrain: xgb.DMatrix
-    ) -> Tuple[str, float]:
-        Y = dtrain.get_label().reshape(pred.shape)
-        return "dirichlet_ll", dirichlet_fun(pred, Y)
-
-.. code-block:: r
-    :caption: R
-
-    dirichlet.eval.metric <- function(pred, dtrain) {
-        y <- getinfo(dtrain, "label")
-        ll <- dirichlet.fun(pred, y)
-        return(
-            list(
-                metric = "dirichlet_ll",
-                value = ll
+.. tabs::
+    .. code-tab:: py
+
+        def dirichlet_eval_metric(
+            pred: np.ndarray, dtrain: xgb.DMatrix
+        ) -> Tuple[str, float]:
+            Y = dtrain.get_label().reshape(pred.shape)
+            return "dirichlet_ll", dirichlet_fun(pred, Y)
+
+    .. code-tab:: r R
+
+        dirichlet.eval.metric <- function(pred, dtrain) {
+            y <- getinfo(dtrain, "label")
+            ll <- dirichlet.fun(pred, y)
+            return(
+                list(
+                    metric = "dirichlet_ll",
+                    value = ll
+                )
             )
-        )
-    }
+        }
 
 *****************
 Practical Example
@@ -478,88 +471,86 @@ lake (sand, silt, clay).
 
 The data:
 
-.. code-block:: python
-    :caption: Python
-    
-    # depth
-    X = np.array([
-        10.4,11.7,12.8,13,15.7,16.3,18,18.7,20.7,22.1,
-        22.4,24.4,25.8,32.5,33.6,36.8,37.8,36.9,42.2,47,
-        47.1,48.4,49.4,49.5,59.2,60.1,61.7,62.4,69.3,73.6,
-        74.4,78.5,82.9,87.7,88.1,90.4,90.6,97.7,103.7,
-    ]).reshape((-1,1))
-    # sand, silt, clay
-    Y = np.array([
-        [0.775,0.195,0.03], [0.719,0.249,0.032], [0.507,0.361,0.132],
-        [0.522,0.409,0.066], [0.7,0.265,0.035], [0.665,0.322,0.013],
-        [0.431,0.553,0.016], [0.534,0.368,0.098], [0.155,0.544,0.301],
-        [0.317,0.415,0.268], [0.657,0.278,0.065], [0.704,0.29,0.006],
-        [0.174,0.536,0.29], [0.106,0.698,0.196], [0.382,0.431,0.187],
-        [0.108,0.527,0.365], [0.184,0.507,0.309], [0.046,0.474,0.48],
-        [0.156,0.504,0.34], [0.319,0.451,0.23], [0.095,0.535,0.37],
-        [0.171,0.48,0.349], [0.105,0.554,0.341], [0.048,0.547,0.41],
-        [0.026,0.452,0.522], [0.114,0.527,0.359], [0.067,0.469,0.464],
-        [0.069,0.497,0.434], [0.04,0.449,0.511], [0.074,0.516,0.409],
-        [0.048,0.495,0.457], [0.045,0.485,0.47], [0.066,0.521,0.413],
-        [0.067,0.473,0.459], [0.074,0.456,0.469], [0.06,0.489,0.451],
-        [0.063,0.538,0.399], [0.025,0.48,0.495], [0.02,0.478,0.502],
-    ])
-
-.. code-block:: r
-    :caption: R
-
-    data("ArcticLake", package="DirichletReg")
-    x <- ArcticLake[, c("depth"), drop=F]
-    y <- ArcticLake[, c("sand", "silt", "clay")] |> as.matrix()
+.. tabs::
+    .. code-tab:: py
+
+        # depth
+        X = np.array([
+            10.4,11.7,12.8,13,15.7,16.3,18,18.7,20.7,22.1,
+            22.4,24.4,25.8,32.5,33.6,36.8,37.8,36.9,42.2,47,
+            47.1,48.4,49.4,49.5,59.2,60.1,61.7,62.4,69.3,73.6,
+            74.4,78.5,82.9,87.7,88.1,90.4,90.6,97.7,103.7,
+        ]).reshape((-1,1))
+        # sand, silt, clay
+        Y = np.array([
+            [0.775,0.195,0.03], [0.719,0.249,0.032], [0.507,0.361,0.132],
+            [0.522,0.409,0.066], [0.7,0.265,0.035], [0.665,0.322,0.013],
+            [0.431,0.553,0.016], [0.534,0.368,0.098], [0.155,0.544,0.301],
+            [0.317,0.415,0.268], [0.657,0.278,0.065], [0.704,0.29,0.006],
+            [0.174,0.536,0.29], [0.106,0.698,0.196], [0.382,0.431,0.187],
+            [0.108,0.527,0.365], [0.184,0.507,0.309], [0.046,0.474,0.48],
+            [0.156,0.504,0.34], [0.319,0.451,0.23], [0.095,0.535,0.37],
+            [0.171,0.48,0.349], [0.105,0.554,0.341], [0.048,0.547,0.41],
+            [0.026,0.452,0.522], [0.114,0.527,0.359], [0.067,0.469,0.464],
+            [0.069,0.497,0.434], [0.04,0.449,0.511], [0.074,0.516,0.409],
+            [0.048,0.495,0.457], [0.045,0.485,0.47], [0.066,0.521,0.413],
+            [0.067,0.473,0.459], [0.074,0.456,0.469], [0.06,0.489,0.451],
+            [0.063,0.538,0.399], [0.025,0.48,0.495], [0.02,0.478,0.502],
+        ])
+
+    .. code-tab:: r R
+
+        data("ArcticLake", package="DirichletReg")
+        x <- ArcticLake[, c("depth"), drop=F]
+        y <- ArcticLake[, c("sand", "silt", "clay")] |> as.matrix()
 
 Fitting an XGBoost model and making predictions:
 
-.. code-block:: python
-    :caption: Python
-    
-    from typing import Dict, List
-    
-    dtrain = xgb.DMatrix(X, label=Y)
-    results: Dict[str, Dict[str, List[float]]] = {}
-    booster = xgb.train(
-        params={
-            "tree_method": "hist",
-            "num_target": Y.shape[1],
-            "base_score": 0,
-            "disable_default_eval_metric": True,
-            "max_depth": 3,
-            "seed": 123,
-        },
-        dtrain=dtrain,
-        num_boost_round=10,
-        obj=dirichlet_xgb_objective,
-        evals=[(dtrain, "Train")],
-        evals_result=results,
-        custom_metric=dirichlet_eval_metric,
-    )
-    yhat = softmax(booster.inplace_predict(X), axis=1)
-
-.. code-block:: r
-    :caption: R
-
-    dtrain <- xgb.DMatrix(x, y)
-    booster <- xgb.train(
-        params = list(
-            tree_method="hist",
-            num_target=ncol(y),
-            base_score=0,
-            disable_default_eval_metric=TRUE,
-            max_depth=3,
-            seed=123
-        ),
-        data = dtrain,
-        nrounds = 10,
-        obj = dirichlet.xgb.objective,
-        evals = list(Train=dtrain),
-        eval_metric = dirichlet.eval.metric
-    )
-    raw.pred <- predict(booster, x, reshape=TRUE)
-    yhat <- apply(raw.pred, 1, softmax) |> t()
+.. tabs::
+    .. code-tab:: py
+
+        from typing import Dict, List
+
+        dtrain = xgb.DMatrix(X, label=Y)
+        results: Dict[str, Dict[str, List[float]]] = {}
+        booster = xgb.train(
+            params={
+                "tree_method": "hist",
+                "num_target": Y.shape[1],
+                "base_score": 0,
+                "disable_default_eval_metric": True,
+                "max_depth": 3,
+                "seed": 123,
+            },
+            dtrain=dtrain,
+            num_boost_round=10,
+            obj=dirichlet_xgb_objective,
+            evals=[(dtrain, "Train")],
+            evals_result=results,
+            custom_metric=dirichlet_eval_metric,
+        )
+        yhat = softmax(booster.inplace_predict(X), axis=1)
+
+    .. code-tab:: r R
+
+        dtrain <- xgb.DMatrix(x, y)
+        booster <- xgb.train(
+            params = list(
+                tree_method="hist",
+                num_target=ncol(y),
+                base_score=0,
+                disable_default_eval_metric=TRUE,
+                max_depth=3,
+                seed=123
+            ),
+            data = dtrain,
+            nrounds = 10,
+            obj = dirichlet.xgb.objective,
+            evals = list(Train=dtrain),
+            eval_metric = dirichlet.eval.metric
+        )
+        raw.pred <- predict(booster, x, reshape=TRUE)
+        yhat <- apply(raw.pred, 1, softmax) |> t()
 
 
 Should produce an evaluation log as follows (note: the function is decreasing as
@@ -604,102 +595,100 @@ For simplicity, this example will nevertheless reuse the same likelihood
 and gradient functions that were defined earlier alongside with SciPy's / R's
 L-BFGS solver to obtain the optimal vector-valued intercept:
 
-.. code-block:: python
-    :caption: Python
+.. tabs::
+    .. code-tab:: py
+
+        from scipy.optimize import minimize
+
+        def get_optimal_intercepts(Y: np.ndarray) -> np.ndarray:
+            k = Y.shape[1]
+            res = minimize(
+                fun=lambda pred: dirichlet_fun(
+                    np.broadcast_to(pred, (Y.shape[0], k)),
+                    Y
+                ),
+                x0=np.zeros(k),
+                jac=lambda pred: dirichlet_grad(
+                    np.broadcast_to(pred, (Y.shape[0], k)),
+                    Y
+                ).sum(axis=0)
+            )
+            return res["x"]
+        intercepts = get_optimal_intercepts(Y)
+
+    .. code-tab:: r R
+
+        get.optimal.intercepts <- function(y) {
+            k <- ncol(y)
+            broadcast.vec <- function(x) rep(x, nrow(y)) |> matrix(ncol=k, byrow=T)
+            res <- optim(
+                par = numeric(k),
+                fn = function(x) dirichlet.fun(broadcast.vec(x), y),
+                gr = function(x) dirichlet.grad(broadcast.vec(x), y) |> colSums(),
+                method = "L-BFGS-B"
+            )
+            return(res$par)
+        }
+        intercepts <- get.optimal.intercepts(y)
 
-    from scipy.optimize import minimize
 
-    def get_optimal_intercepts(Y: np.ndarray) -> np.ndarray:
-        k = Y.shape[1]
-        res = minimize(
-            fun=lambda pred: dirichlet_fun(
-                np.broadcast_to(pred, (Y.shape[0], k)),
-                Y
-            ),
-            x0=np.zeros(k),
-            jac=lambda pred: dirichlet_grad(
-                np.broadcast_to(pred, (Y.shape[0], k)),
-                Y
-            ).sum(axis=0)
+Now fitting a model again, this time with the intercept:
+
+.. tabs::
+    .. code-tab:: py
+
+        base_margin = np.broadcast_to(intercepts, Y.shape)
+        dtrain_w_intercept = xgb.DMatrix(X, label=Y, base_margin=base_margin)
+        results: Dict[str, Dict[str, List[float]]] = {}
+        booster = xgb.train(
+            params={
+                "tree_method": "hist",
+                "num_target": Y.shape[1],
+                "base_score": 0,
+                "disable_default_eval_metric": True,
+                "max_depth": 3,
+                "seed": 123,
+            },
+            dtrain=dtrain_w_intercept,
+            num_boost_round=10,
+            obj=dirichlet_xgb_objective,
+            evals=[(dtrain, "Train")],
+            evals_result=results,
+            custom_metric=dirichlet_eval_metric,
         )
-        return res["x"]
-    intercepts = get_optimal_intercepts(Y)
-
-.. code-block:: r
-    :caption: R
-
-    get.optimal.intercepts <- function(y) {
-        k <- ncol(y)
-        broadcast.vec <- function(x) rep(x, nrow(y)) |> matrix(ncol=k, byrow=T)
-        res <- optim(
-            par = numeric(k),
-            fn = function(x) dirichlet.fun(broadcast.vec(x), y),
-            gr = function(x) dirichlet.grad(broadcast.vec(x), y) |> colSums(),
-            method = "L-BFGS-B"
+        yhat = softmax(
+            booster.predict(
+                xgb.DMatrix(X, base_margin=base_margin)
+            ),
+            axis=1
         )
-        return(res$par)
-    }
-    intercepts <- get.optimal.intercepts(y)
 
-
-Now fitting a model again, this time with the intercept:
-
-.. code-block:: python
-    :caption: Python
-
-    base_margin = np.broadcast_to(intercepts, Y.shape)
-    dtrain_w_intercept = xgb.DMatrix(X, label=Y, base_margin=base_margin)
-    results: Dict[str, Dict[str, List[float]]] = {}
-    booster = xgb.train(
-        params={
-            "tree_method": "hist",
-            "num_target": Y.shape[1],
-            "base_score": 0,
-            "disable_default_eval_metric": True,
-            "max_depth": 3,
-            "seed": 123,
-        },
-        dtrain=dtrain_w_intercept,
-        num_boost_round=10,
-        obj=dirichlet_xgb_objective,
-        evals=[(dtrain, "Train")],
-        evals_result=results,
-        custom_metric=dirichlet_eval_metric,
-    )
-    yhat = softmax(
-        booster.predict(
-            xgb.DMatrix(X, base_margin=base_margin)
-        ),
-        axis=1
-    )
-
-.. code-block:: r
-    :caption: R
-
-    base.margin <- rep(intercepts, nrow(y)) |> matrix(nrow=nrow(y), byrow=T)
-    dtrain <- xgb.DMatrix(x, y, base_margin=base.margin)
-    booster <- xgb.train(
-        params = list(
-            tree_method="hist",
-            num_target=ncol(y),
-            base_score=0,
-            disable_default_eval_metric=TRUE,
-            max_depth=3,
-            seed=123
-        ),
-        data = dtrain,
-        nrounds = 10,
-        obj = dirichlet.xgb.objective,
-        evals = list(Train=dtrain),
-        eval_metric = dirichlet.eval.metric
-    )
-    raw.pred <- predict(
-        booster,
-        x,
-        base_margin=base.margin,
-        reshape=TRUE
-    )
-    yhat <- apply(raw.pred, 1, softmax) |> t()
+    .. code-tab:: r R
+
+        base.margin <- rep(intercepts, nrow(y)) |> matrix(nrow=nrow(y), byrow=T)
+        dtrain <- xgb.DMatrix(x, y, base_margin=base.margin)
+        booster <- xgb.train(
+            params = list(
+                tree_method="hist",
+                num_target=ncol(y),
+                base_score=0,
+                disable_default_eval_metric=TRUE,
+                max_depth=3,
+                seed=123
+            ),
+            data = dtrain,
+            nrounds = 10,
+            obj = dirichlet.xgb.objective,
+            evals = list(Train=dtrain),
+            eval_metric = dirichlet.eval.metric
+        )
+        raw.pred <- predict(
+            booster,
+            x,
+            base_margin=base.margin,
+            reshape=TRUE
+        )
+        yhat <- apply(raw.pred, 1, softmax) |> t()
 
 .. code-block:: none
 
diff --git a/doc/tutorials/categorical.rst b/doc/tutorials/categorical.rst
index f0409aaa393f..0d64ac562b28 100644
--- a/doc/tutorials/categorical.rst
+++ b/doc/tutorials/categorical.rst
@@ -2,23 +2,21 @@
 Categorical Data
 ################
 
-.. note::
+**Contents**
 
-   As of XGBoost 1.6, the feature is experimental and has limited features. Only the
-   Python package is fully supported.
+.. contents::
+  :backlinks: none
+  :local:
 
-.. versionadded:: 3.0
-
-   Support for the R package using ``factor``.
+Since version 1.5, XGBoost has support for categorical data.  For numerical data, the
+split condition is defined as :math:`value < threshold`, while for categorical data the
+split is defined depending on whether partitioning or onehot encoding is used. For
+partition-based splits, the splits are specified as :math:`value \in categories`, where
+``categories`` is the set of categories in one feature.  If onehot encoding is used
+instead, then the split is defined as :math:`value == category`. More advanced categorical
+split strategy is planned for future releases and this tutorial details how to inform
+XGBoost about the data type.
 
-Starting from version 1.5, the XGBoost Python package has experimental support for
-categorical data available for public testing. For numerical data, the split condition is
-defined as :math:`value < threshold`, while for categorical data the split is defined
-depending on whether partitioning or onehot encoding is used. For partition-based splits,
-the splits are specified as :math:`value \in categories`, where ``categories`` is the set
-of categories in one feature.  If onehot encoding is used instead, then the split is
-defined as :math:`value == category`. More advanced categorical split strategy is planned
-for future releases and this tutorial details how to inform XGBoost about the data type.
 
 ************************************
 Training with scikit-learn Interface
@@ -69,6 +67,9 @@ for a worked example of using categorical data with ``scikit-learn`` interface w
 one-hot encoding.  A comparison between using one-hot encoded data and XGBoost's
 categorical data support can be found :ref:`sphx_glr_python_examples_cat_in_the_dat.py`.
 
+.. versionadded:: 3.0
+
+   Support for the R package using ``factor``.
 
 ********************
 Optimal Partitioning
@@ -137,38 +138,153 @@ feature it's specified as ``"c"``.  The Dask module in XGBoost has the same inte
 :class:`dask.Array <dask.Array>` can also be used for categorical data. Lastly, the
 sklearn interface :py:class:`~xgboost.XGBRegressor` has the same parameter.
 
-****************
-Data Consistency
-****************
+.. _cat-recode:
+
+********************************
+Auto-recoding (Data Consistency)
+********************************
+
+.. versionchanged:: 3.1
 
-XGBoost accepts parameters to indicate which feature is considered categorical, either through the ``dtypes`` of a dataframe or through the ``feature_types`` parameter. However, XGBoost by itself doesn't store information on how categories are encoded in the first place. For instance, given an encoding schema that maps music genres to integer codes:
+  Starting with XGBoost 3.1, the **Python** interface can perform automatic re-coding for
+  new inputs.
+
+XGBoost accepts parameters to indicate which feature is considered categorical, either
+through the ``dtypes`` of a dataframe or through the ``feature_types`` parameter. However,
+except for the Python interface, XGBoost doesn't store the information about how
+categories are encoded in the first place. For instance, given an encoding schema that
+maps music genres to integer codes:
 
 .. code-block:: python
 
   {"acoustic": 0, "indie": 1, "blues": 2, "country": 3}
 
-XGBoost doesn't know this mapping from the input and hence cannot store it in the model. The mapping usually happens in the users' data engineering pipeline with column transformers like :py:class:`sklearn.preprocessing.OrdinalEncoder`. To make sure correct result from XGBoost, users need to keep the pipeline for transforming data consistent across training and testing data. One should watch out for errors like:
+Aside from the Python interface (R/Java/C, etc), XGBoost doesn't know this mapping from
+the input and hence cannot store it in the model. The mapping usually happens in the
+users' data engineering pipeline. To ensure the correct result from XGBoost, users need to
+keep the pipeline for transforming data consistent across training and testing data.
+
+Starting with 3.1, the **Python** interface can remember the encoding and perform recoding
+during inference and training continuation when the input is a dataframe (`pandas`,
+`cuDF`, `polars`, `pyarrow`, `modin`). The feature support focuses on basic usage. It has
+some restrictions on the types of inputs that can be accepted. First, category names must
+have one of the following types:
+
+- string
+- integer, from 8-bit to 64-bit, both signed and unsigned are supported.
+- 32-bit or 64-bit floating point
+
+Other category types are not supported. Second, the input types must be strictly
+consistent. For example, XGBoost will raise an error if the categorical columns in the
+training set are unsigned integers whereas the test dataset has signed integer columns. If
+you have categories that are not one of the supported types, you need to perform the
+re-coding using a pre-processing data transformer like the
+:py:class:`sklearn.preprocessing.OrdinalEncoder`. See
+:ref:`sphx_glr_python_examples_cat_pipeline.py` for a worked example using an ordinal
+encoder. To clarify, the type here refers to the type of the name of categories (called
+``Index`` in pandas):
+
+.. code-block:: python
+
+  # string type
+  {"acoustic": 0, "indie": 1, "blues": 2, "country": 3}
+  # integer type
+  {-1: 0, 1: 1, 3: 2, 7: 3}
+  # depending on the dataframe implementation, it can be signed or unsigned.
+  {5: 0, 1: 1, 3: 2, 7: 3}
+  # floating point type, both 32-bit and 64-bit are supported.
+  {-1.0: 0, 1.0: 1, 3.0: 2, 7.0: 3}
+
+Internally, XGBoost attempts to extract the categories from the dataframe inputs. For
+inference (predict), the re-coding happens on the fly and there's no data copy (baring
+some internal transformations performed by the dataframe itself). For training
+continuation however, re-coding requires some extra steps if you are using the native
+interface. The sklearn interface and the Dask interface can handle training continuation
+automatically. Last, please note that using the re-coder with the native interface is
+still experimental. It's ready for testing, but we want to observe the feature usage for a
+period of time and might make some breaking changes if needed. The following is a snippet
+of using the native interface:
+
+.. code-block:: python
+
+  import pandas as pd
+
+  X = pd.DataFrame()
+  Xy = xgboost.QuantileDMatrix(X, y, enable_categorical=True)
+  booster = xgboost.train({}, Xy)
+
+  # XGBoost can handle re-coding for inference without user intervention
+  X_new = pd.DataFrame()
+  booster.inplace_predict(X_new)
+
+  # Get categories saved in the model for training continuation
+  categories = booster.get_categories()
+  # Use saved categories as a reference for re-coding.
+  # Training continuation requires a re-coded DMatrix, pass the categories as feature_types
+  Xy_new = xgboost.QuantileDMatrix(
+    X_new, y_new, feature_types=categories, enable_categorical=True, ref=Xy
+  )
+  booster_1 = xgboost.train({}, Xy_new, xgb_model=booster)
+
+
+No extra step is required for using the scikit-learn interface as long as the inputs are
+dataframes. During training continuation, XGBoost will either extract the categories from
+the previous model or use the categories from the new training dataset if the input model
+doesn't have the information. As a side note, users can inspect the content of the
+categories by exporting it to arrow arrays. This interface is still experimental:
 
 .. code-block:: python
 
-  X_train["genre"] = X_train["genre"].astype("category")
-  reg = xgb.XGBRegressor(enable_categorical=True).fit(X_train, y_train)
+  categories = booster.get_categories(export_to_arrow=True)
+  print(categories.to_arrow())
+
+For **R**, the auto-recoding is not yet supported as of 3.1. To provide an example:
+
+.. code-block:: R
+
+    > f0 = factor(c("a", "b", "c"))
+    > as.numeric(f0)
+    [1] 1 2 3
+    > f0
+    [1] a b c
+    Levels: a b c
+
+In the above snippet, we have the mapping: ``a -> 1, b -> 2, c -> 3``. Assuming the above
+is the training data, and the next snippet is the test data:
+
+.. code-block:: R
+
+    > f1 = factor(c("a", "c"))
+    > as.numeric(f1)
+    [1] 1 2
+    > f1
+    [1] a c
+    Levels: a c
+
+
+Now, we have ``a -> 1, c -> 2`` because ``b`` is missing, and the R factor encodes the data
+differently, resulting in invalid test-time encoding. XGBoost cannot remember the original
+encoding for the R package. You will have to encode the data explicitly during inference:
+
+.. code-block:: R
 
-  # invalid encoding
-  X_test["genre"] = X_test["genre"].astype("category")
-  reg.predict(X_test)
+    > f1 = factor(c("a", "c"), levels = c("a", "b", "c"))
+    > f1
+    [1] a c
+    Levels: a b c
+    > as.numeric(f1)
+      [1] 1 3
 
-In the above snippet, training data and test data are encoded separately, resulting in two different encoding schemas and invalid prediction result. See :ref:`sphx_glr_python_examples_cat_pipeline.py` for a worked example using ordinal encoder.
 
 *************
 Miscellaneous
 *************
 
-By default, XGBoost assumes input categories are integers starting from 0 till the number
-of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
-values due to mistakes or missing values in training dataset. It can be negative value,
-integer values that can not be accurately represented by 32-bit floating point, or values
-that are larger than actual number of unique categories.  During training this is
+By default, XGBoost assumes input category codes are integers starting from 0 till the
+number of categories :math:`[0, n\_categories)`. However, user might provide inputs with
+invalid values due to mistakes or missing values in training dataset. It can be negative
+value, integer values that can not be accurately represented by 32-bit floating point, or
+values that are larger than actual number of unique categories.  During training this is
 validated but for prediction it's treated as the same as not-chosen category for
 performance reasons.
 
diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst
index bbdd9f20df2b..030e6b841dbc 100644
--- a/doc/tutorials/external_memory.rst
+++ b/doc/tutorials/external_memory.rst
@@ -2,90 +2,137 @@
 Using XGBoost External Memory Version
 #####################################
 
+**Contents**
+
+.. contents::
+  :backlinks: none
+  :local:
+
+
+********
+Overview
+********
+
 When working with large datasets, training XGBoost models can be challenging as the entire
-dataset needs to be loaded into memory. This can be costly and sometimes
-infeasible. Starting from 1.5, users can define a custom iterator to load data in chunks
-for running XGBoost algorithms. External memory can be used for training and prediction,
-but training is the primary use case and it will be our focus in this tutorial. For
-prediction and evaluation, users can iterate through the data themselves, whereas training
-requires the entire dataset to be loaded into the memory. Significant progress was made in
-the 3.0 release for the GPU implementation. We will introduce the difference between CPU
-and GPU in the following sections.
+dataset needs to be loaded into the main memory. This can be costly and sometimes
+infeasible.
+
+External memory training is sometimes called out-of-core training. It refers to the
+capability that XGBoost can optionally cache data in a location external to the main
+processor, be it CPU or GPU. XGBoost doesn't support network file systems by itself. As a
+result, for CPU, the external memory usually refers to a harddrive. And for GPU, it refers
+to either the host memory or a harddrive.
+
+Users can define a custom iterator to load data in chunks for running XGBoost
+algorithms. External memory can be used for training and prediction, but training is the
+primary use case and it will be our focus in this tutorial. For prediction and evaluation,
+users can iterate through the data themselves, whereas training requires the entire
+dataset to be loaded into the memory. During model training, XGBoost fetches the cache in
+batches to construct the decision trees, hence avoiding loading the entire dataset into
+the main memory and achieve better vertical scaling (scaling within the same node).
+
+Significant progress was made in the 3.0 release for the GPU implementation. We will
+introduce the difference between CPU and GPU in the following sections.
 
 .. note::
 
-   Training on data from external memory is not supported by the ``exact`` tree method.
+   Training on data from external memory is not supported by the ``exact`` tree method. We
+   recommend using the default ``hist`` tree method for performance reasons.
 
 .. note::
 
    The feature is considered experimental but ready for public testing in 3.0. Vector-leaf
    is not yet supported.
 
-The external memory support has undergone multiple development iterations. Like the
-:py:class:`~xgboost.QuantileDMatrix` with :py:class:`~xgboost.DataIter`, XGBoost loads
-data batch-by-batch using a custom iterator supplied by the user. However, unlike the
-:py:class:`~xgboost.QuantileDMatrix`, external memory does not concatenate the batches
-(unless specified by the ``extmem_single_page``) . Instead, it caches all batches in the
-external memory and fetch them on-demand. Go to the end of the document to see a
-comparison between :py:class:`~xgboost.QuantileDMatrix` and the external memory version of
-:py:class:`~xgboost.ExtMemQuantileDMatrix`.
+The external memory support has undergone multiple development iterations. See below
+sections for a brief history.
 
-**Contents**
-
-.. contents::
-  :backlinks: none
-  :local:
 
 *************
 Data Iterator
 *************
 
-Starting with XGBoost 1.5, users can define their own data loader using Python or C
-interface. Some examples are in the ``demo`` directory for a quick start. To enable
-external memory training, users need to define a data iterator with 2 class methods:
-``next`` and ``reset``, then pass it into the :py:class:`~xgboost.DMatrix` or the
-:py:class:`~xgboost.ExtMemQuantileDMatrix` constructor.
+To start using the external memory, users need define a data iterator. The data iterator
+interface was added to the Python and C interfaces in 1.5, and to the R interface in
+3.0.0. Like the :py:class:`~xgboost.QuantileDMatrix` with :py:class:`~xgboost.DataIter`,
+XGBoost loads data batch-by-batch using the custom iterator supplied by the user. However,
+unlike the :py:class:`~xgboost.QuantileDMatrix`, external memory does not concatenate the
+batches (unless specified by the ``extmem_single_page`` for GPU) . Instead, it caches all
+batches in the external memory and fetch them on-demand. Go to the end of the document to
+see a comparison between :py:class:`~xgboost.QuantileDMatrix` and the external memory
+version of :py:class:`~xgboost.ExtMemQuantileDMatrix`.
+
+Some examples are in the ``demo`` directory for a quick start. To enable external memory
+training, the custom data iterator needs to have two class methods: ``next`` and
+``reset``.
 
 .. code-block:: python
 
-  import os
-  from typing import List, Callable
-  import xgboost
-  from sklearn.datasets import load_svmlight_file
-
-  class Iterator(xgboost.DataIter):
-    def __init__(self, svm_file_paths: List[str]) -> None:
-      self._file_paths = svm_file_paths
-      self._it = 0
-      # XGBoost will generate some cache files under the current directory with the prefix
-      # "cache"
-      super().__init__(cache_prefix=os.path.join(".", "cache"))
-
-    def next(self, input_data: Callable) -> bool:
-      """Advance the iterator by 1 step and pass the data to XGBoost. This function is
-      called by XGBoost during the construction of ``DMatrix``
-
-      """
-      if self._it == len(self._file_paths):
-        # return False to let XGBoost know this is the end of the iteration
-        return False
-
-      # input_data is a function passed in by XGBoost and has the exact same signature of
-      # ``DMatrix``
-      X, y = load_svmlight_file(self._file_paths[self._it])
-      # Keyword-only arguments, see the ``DMatrix`` class for accepted arguments.
-      input_data(data=X, label=y)
-      self._it += 1
-      # Return True to let XGBoost know we haven't seen all the files yet.
-      return True
-
-    def reset(self) -> None:
-      """Reset the iterator to its beginning"""
-      self._it = 0
-
-  it = Iterator(["file_0.svm", "file_1.svm", "file_2.svm"])
-
-  # Use the ``ExtMemQuantileDMatrix`` for the hist tree method.
+    import os
+    from typing import List, Callable
+
+    import numpy as np
+    import xgboost
+
+    class Iterator(xgboost.DataIter):
+        """A custom iterator for loading files in batches."""
+
+        def __init__(
+            self, device: Literal["cpu", "cuda"], file_paths: List[Tuple[str, str]]
+        ) -> None:
+            self.device = device
+
+            self._file_paths = file_paths
+            self._it = 0
+            # XGBoost will generate some cache files under the current directory with the
+            # prefix "cache"
+            super().__init__(cache_prefix=os.path.join(".", "cache"))
+
+        def load_file(self) -> Tuple[np.ndarray, np.ndarray]:
+            """Load a single batch of data."""
+            X_path, y_path = self._file_paths[self._it]
+            # When the `ExtMemQuantileDMatrix` is used, the device must match. GPU cannot
+            # consume CPU input data and vice-versa.
+            if self.device == "cpu":
+                X = np.load(X_path)
+                y = np.load(y_path)
+            else:
+                import cupy as cp
+
+                X = cp.load(X_path)
+                y = cp.load(y_path)
+
+            assert X.shape[0] == y.shape[0]
+            return X, y
+
+        def next(self, input_data: Callable) -> bool:
+            """Advance the iterator by 1 step and pass the data to XGBoost.  This function
+            is called by XGBoost during the construction of ``DMatrix``
+
+            """
+            if self._it == len(self._file_paths):
+                # return False to let XGBoost know this is the end of iteration
+                return False
+
+            # input_data is a keyword-only function passed in by XGBoost and has the similar
+            # signature to the ``DMatrix`` constructor.
+            X, y = self.load_file()
+            input_data(data=X, label=y)
+            self._it += 1
+            return True
+
+        def reset(self) -> None:
+            """Reset the iterator to its beginning"""
+            self._it = 0
+
+After defining the iterator, we can to pass it into the :py:class:`~xgboost.DMatrix` or
+the :py:class:`~xgboost.ExtMemQuantileDMatrix` constructor:
+
+.. code-block:: python
+
+  it = Iterator(device="cpu", file_paths=["file_0.npy", "file_1.npy", "file_2.npy"])
+
+  # Use the ``ExtMemQuantileDMatrix`` for the hist tree method, recommended.
   Xy = xgboost.ExtMemQuantileDMatrix(it)
   booster = xgboost.train({"tree_method": "hist"}, Xy)
 
@@ -117,15 +164,14 @@ GPU Version (GPU Hist tree method)
 External memory is supported by GPU algorithms (i.e., when ``device`` is set to
 ``cuda``). Starting with 3.0, the default GPU implementation is similar to what the CPU
 version does. It also supports the use of :py:class:`~xgboost.ExtMemQuantileDMatrix` when
-the ``hist`` tree method is employed. For a GPU device, the main memory is the device
-memory, whereas the external memory can be either a disk or the CPU memory. XGBoost stages
-the cache on CPU memory by default. Users can change the backing storage to disk by
+the ``hist`` tree method is employed (default). For a GPU device, the main memory is the
+device memory, whereas the external memory can be either a disk or the CPU memory. XGBoost
+stages the cache on CPU memory by default. Users can change the backing storage to disk by
 specifying the ``on_host`` parameter in the :py:class:`~xgboost.DataIter`. However, using
 the disk is not recommended as it's likely to make the GPU slower than the CPU. The option
-is here for experimental purposes only. In addition,
-:py:class:`~xgboost.ExtMemQuantileDMatrix` parameters ``max_num_device_pages``,
-``min_cache_page_bytes``, and ``max_quantile_batches`` can help control the data placement
-and memory usage.
+is here for experimentation purposes only. In addition,
+:py:class:`~xgboost.ExtMemQuantileDMatrix` parameters ``min_cache_page_bytes``, and
+``max_quantile_batches`` can help control the data placement and memory usage.
 
 Inputs to the :py:class:`~xgboost.ExtMemQuantileDMatrix` (through the iterator) must be on
 the GPU. Following is a snippet from :ref:`sphx_glr_python_examples_external_memory.py`:
@@ -138,8 +184,8 @@ the GPU. Following is a snippet from :ref:`sphx_glr_python_examples_external_mem
 
     # It's important to use RMM for GPU-based external memory to improve performance.
     # If XGBoost is not built with RMM support, a warning will be raised.
-    # We use the pool memory resource here, you can also try the `ArenaMemoryResource` for
-    # improved memory fragmentation handling.
+    # We use the pool memory resource here for simplicity, you can also try the
+    # `ArenaMemoryResource` for improved memory fragmentation handling.
     mr = rmm.mr.PoolMemoryResource(rmm.mr.CudaAsyncMemoryResource())
     rmm.mr.set_current_device_resource(mr)
     # Set the allocator for cupy as well.
@@ -147,22 +193,22 @@ the GPU. Following is a snippet from :ref:`sphx_glr_python_examples_external_mem
     # Make sure XGBoost is using RMM for all allocations.
     with xgboost.config_context(use_rmm=True):
         # Construct the iterators for ExtMemQuantileDMatrix
-	# ...
-	# Build the ExtMemQuantileDMatrix and start training
-	Xy_train = xgboost.ExtMemQuantileDMatrix(it_train, max_bin=n_bins)
-	# Use the training DMatrix as a reference
-	Xy_valid = xgboost.ExtMemQuantileDMatrix(it_valid, max_bin=n_bins, ref=Xy_train)
-	booster = xgboost.train(
-	    {
-		"tree_method": "hist",
-		"max_depth": 6,
-		"max_bin": n_bins,
-		"device": device,
-	    },
-	    Xy_train,
-	    num_boost_round=n_rounds,
-	    evals=[(Xy_train, "Train"), (Xy_valid, "Valid")]
-	)
+        # ...
+        # Build the ExtMemQuantileDMatrix and start training
+        Xy_train = xgboost.ExtMemQuantileDMatrix(it_train, max_bin=n_bins)
+        # Use the training DMatrix as a reference
+        Xy_valid = xgboost.ExtMemQuantileDMatrix(it_valid, max_bin=n_bins, ref=Xy_train)
+        booster = xgboost.train(
+            {
+                "tree_method": "hist",
+                "max_depth": 6,
+                "max_bin": n_bins,
+                "device": device,
+            },
+            Xy_train,
+            num_boost_round=n_rounds,
+            evals=[(Xy_train, "Train"), (Xy_valid, "Valid")]
+        )
 
 It's crucial to use `RAPIDS Memory Manager (RMM) <https://github.com/rapidsai/rmm>`__ with
 an asynchronous memory resource for all memory allocation when training with external
@@ -170,12 +216,13 @@ memory. XGBoost relies on the asynchronous memory pool to reduce the overhead of
 fetching. In addition, the open source `NVIDIA Linux driver
 <https://developer.nvidia.com/blog/nvidia-transitions-fully-towards-open-source-gpu-kernel-modules/>`__
 is required for ``Heterogeneous memory management (HMM)`` support. Usually, users need not
-to change :py:class:`~xgboost.ExtMemQuantileDMatrix` parameters ``max_num_device_pages``
-and ``min_cache_page_bytes``, they are automatically configured based on the device and
-don't change model accuracy. However, the ``max_quantile_batches`` can be useful if
+to change :py:class:`~xgboost.ExtMemQuantileDMatrix` parameters like
+``min_cache_page_bytes``, they are automatically configured based on the device and don't
+change model accuracy. However, the ``max_quantile_batches`` can be useful if
 :py:class:`~xgboost.ExtMemQuantileDMatrix` is running out of device memory during
 construction, see :py:class:`~xgboost.QuantileDMatrix` and the following sections for more
-info.
+info. Currently, we focus on devices with ``NVLink-C2C`` support for GPU-based external
+memory support.
 
 In addition to the batch-based data fetching, the GPU version supports concatenating
 batches into a single blob for the training data to improve performance. For GPUs
@@ -218,11 +265,107 @@ through the input data twice, as a result, the most significant overhead compare
 in-core training is one additional data read when the data is dense. Please note that
 there are multiple variants of the platform and they come with different C2C
 bandwidths. During initial development of the feature, we used the LPDDR5 480G version,
-which has about 350GB/s bandwidth for host to device transfer.
+which has about 350GB/s bandwidth for host to device transfer. When choosing the variant
+for training XGBoost models, one should pay extra attention to the C2C bandwidth.
+
+Here we provide a simple example as a starting point for training with external memory. We
+used this example for one of the benchmarks. To train a model with `2 ^ 29` 32-bit
+floating point samples, `512` features (total 1TB) on a GH200 (a H200 GPU connected to a
+Grace CPU by a chip-to-chip link) system. One can start with:
+- Evenly divide the data into 128 batches with 8GB per batch.
+- Define a custom iterator as previously described.
+- Set the `max_quantile_batches` parameter of the :py:class:`~xgboost.ExtMemQuantileDMatrix` to 32 (256GB per sub-stream for quantization). Load the data.
+- Start training with ``device=cuda``.
 
 To run experiments on these platforms, the open source `NVIDIA Linux driver
 <https://developer.nvidia.com/blog/nvidia-transitions-fully-towards-open-source-gpu-kernel-modules/>`__
-with version ``>=565.47`` is required, it should come with CTK 12.7 and later versions.
+with version ``>=565.47`` is required, it should come with CTK 12.7 and later
+versions. Lastly, there's a known issue with Linux 6.11 that can lead to CUDA host memory
+allocation failure with an ``invalid argument`` error.
+
+.. _extmem-adaptive-cache:
+
+==============
+Adaptive Cache
+==============
+
+Starting with 3.1, XGBoost introduces an adaptive cache for GPU-based external memory
+training. The feature helps split the data cache into a host cache and a device cache. By
+keeping a portion of the cache on the GPU, we can reduce the amount of data transfer
+during training when there's sufficient amount of GPU memory. The feature can be
+controlled by the ``cache_host_ratio`` parameter in the
+:py:class:`xgboost.ExtMemQuantileDMatrix`. It is disabled when the device has full C2C
+bandwidth since it's not needed there. On devices that with reduced bandwidth or devices
+with PCIe connections, unless explicitly specified, the ratio is automatically estimated
+based on device memory size and the size of the dataset.
+
+However, this parameter increases memory fragmentation as XGBoost needs large memory pages
+with irregular sizes. As a result, you might see out of memory error after the
+construction of the ``DMatrix`` but before the actual training begins.
+
+For reference, we tested the adaptive cache with a 128GB (512 features) dense 32bit
+floating dataset using a NVIDIA A6000 GPU, which comes with 48GB device memory. The
+``cache_host_ratio`` was estimated to be about 0.3, meaning about 30 percent of the
+quantized cache was on the host and rest of 70 percent was actually in-core. Given this
+ratio, the overhead is minimal. However, the estimated ratio increases as the data size
+grows.
+
+================================
+Non-Uniform Memory Access (NUMA)
+================================
+
+On multi-socket systems, `NUMA
+<https://en.wikipedia.org/wiki/Non-uniform_memory_access>`__ helps optimize data access by
+prioritizing memory that is local to each socket.  On these systems, it's essential to set
+the correct affinity to reduce the overhead of cross-socket data access. Since the out of
+core training stages the data cache on the host and trains the model using a GPU, the
+training performance is particularly sensitive to the data read bandwidth. To provide some
+context, on a GB200 machine, accessing the wrong NUMA node from a GPU can reduce the C2C
+bandwidth by half. Even if you are not using distributed training, you should still pay
+attention to NUMA control since there's no guarantee that your process will have the
+correct configuration.
+
+We have tested two approaches of NUMA configuration. The first (and recommended) way is to
+use the ``numactl`` command line available on Linux distributions:
+
+.. code-block:: sh
+
+    numactl --membind=${NODEID} --cpunodebind=${NODEID} ./myapp
+
+
+To obtain the node ID, you can check the machine topology via ``nvidia-smi``:
+
+.. code-block:: sh
+
+    nvidia-smi topo -m
+
+The column ``NUMA Affinity`` lists the NUMA node ID for each GPU. In the example output
+shown below, the `GPU0` is associated with the `0` node ID::
+
+            GPU0    GPU1    NIC0    NIC1    NIC2    NIC3    CPU Affinity    NUMA Affinity   GPU NUMA ID
+    GPU0     X      NV18    NODE    NODE    NODE    SYS     0-71            0               2
+    GPU1    NV18     X      SYS     SYS     SYS     NODE    72-143          1               10
+    NIC0    NODE    SYS      X      PIX     NODE    SYS
+    NIC1    NODE    SYS     PIX      X      NODE    SYS
+    NIC2    NODE    SYS     NODE    NODE     X      SYS
+    NIC3    SYS     NODE    SYS     SYS     SYS      X
+
+Alternatively, one can also use the ``hwloc`` command line interface, please make sure the
+strict flag is used:
+
+.. code-block:: sh
+
+    hwloc-bind --strict --membind node:${NODEID} --cpubind node:${NODEID} ./myapp
+
+Another approach is to use the CPU affinity. The `dask-cuda
+<https://github.com/rapidsai/dask-cuda>`__ project configures optimal CPU affinity for the
+Dask interface through using the `nvml` library in addition to the Linux sched
+routines. This can help guide the memory allocation policy but does not enforce it. As a
+result, when the memory is under pressure, the OS can allocate memory on different NUMA
+nodes. On the other hand, it's easier to use since launchers like
+:py:class:`~dask_cuda.LocalCUDACluster` have already integrated the solution.
+
+We use the first approach for benchmarks as it has better enforcement.
 
 ********************
 Distributed Training
@@ -240,15 +383,16 @@ Best Practices
 **************
 
 In previous sections, we demonstrated how to train a tree-based model with data residing
-on an external memory and made some recommendations for batch size. Here are some other
-configurations we find useful. The external memory feature involves iterating through data
-batches stored in a cache during tree construction. For optimal performance, we recommend
-using the ``grow_policy=depthwise`` setting, which allows XGBoost to build an entire layer
-of tree nodes with only a few batch iterations. Conversely, using the ``lossguide`` policy
-requires XGBoost to iterate over the data set for each tree node, resulting in
-significantly slower performance.
-
-In addition, this ``hist`` tree method should be preferred over the ``approx`` tree method
+on an external memory. In addition, we made some recommendations for batch size and
+NUMA. Here are some other configurations we find useful. The external memory feature
+involves iterating through data batches stored in a cache during tree construction. For
+optimal performance, we recommend using the ``grow_policy=depthwise`` setting, which
+allows XGBoost to build an entire layer of tree nodes with only a few batch
+iterations. Conversely, using the ``lossguide`` policy requires XGBoost to iterate over
+the data set for each tree node, resulting in significantly slower performance (tree size
+is exponential to the depth).
+
+In addition, the ``hist`` tree method should be preferred over the ``approx`` tree method
 as the former doesn't recreate the histogram bins for every iteration. Creating the
 histogram bins requires loading the raw input data, which is prohibitively expensive. The
 :py:class:`~xgboost.ExtMemQuantileDMatrix` designed for the ``hist`` tree method can speed
@@ -258,21 +402,22 @@ Since the external memory implementation focuses on training where XGBoost needs
 the entire dataset, only the ``X`` is divided into batches while everything else is
 concatenated. As a result, it's recommended for users to define their own management code
 to iterate through the data for inference, especially for SHAP value computation. The size
-of SHAP results can be larger than ``X``, making external memory in XGBoost less
-effective. Some frameworks like ``dask`` can help with the data chunking and iterate
-through the data for inference with memory spilling.
+of SHAP matrix can be larger than the feature matrix ``X``, making external memory in
+XGBoost less effective.
 
 When external memory is used, the performance of CPU training is limited by disk IO
 (input/output) speed. This means that the disk IO speed primarily determines the training
 speed. Similarly, PCIe bandwidth limits the GPU performance, assuming the CPU memory is
 used as a cache and address translation services (ATS) is unavailable. During development,
-we observed that typical data transfer in XGBoost with PCIe4x16 has about 24GB/s
-bandwidth, which is significantly lower than the GPU processing performance. Whereas with
-a C2C-enabled machine, the performance of data transfer and processing in training are
-similar. Running inference is much less computation-intensive than training and, hence,
-much faster. As a result, the performance bottleneck of inference is back to data
-transfer. For GPU, the time it takes to read the data from host to device completely
-determines the time it takes to run inference, even if a C2C link is available.
+we observed that typical data transfer in XGBoost with PCIe4x16 has about 24GB/s bandwidth
+and about 42GB/s with PCIe5, which is significantly lower than the GPU processing
+performance. Whereas with a C2C-enabled machine, the performance of data transfer and
+processing in training are close to each other.
+
+Running inference is much less computation-intensive than training and, hence, much
+faster. As a result, the performance bottleneck of inference is back to data transfer. For
+GPU, the time it takes to read the data from host to device completely determines the time
+it takes to run inference, even if a C2C link is available.
 
 .. code-block:: python
 
@@ -284,11 +429,10 @@ subject to memory fragmentation even if the :py:class:`~rmm.mr.CudaAsyncMemoryRe
 used. You might want to start the training with a fresh pool instead of starting training
 right after the ETL process. If you run into out-of-memory errors and you are convinced
 that the pool is not full yet (pool memory usage can be profiled with ``nsight-system``),
-consider tuning the RMM memory resource like using
-:py:class:`~rmm.mr.CudaAsyncMemoryResource` in conjunction with
+consider using the :py:class:`~rmm.mr.ArenaMemoryResource` memory resource. Alternatively,
+using :py:class:`~rmm.mr.CudaAsyncMemoryResource` in conjunction with
 :py:class:`BinningMemoryResource(mr, 21, 25) <rmm.mr.BinningMemoryResource>` instead of
-the :py:class:`~rmm.mr.PoolMemoryResource`. Alternately, the
-:py:class:`~rmm.mr.ArenaMemoryResource` is also an excellent option.
+the default :py:class:`~rmm.mr.PoolMemoryResource`.
 
 During CPU benchmarking, we used an NVMe connected to a PCIe-4 slot. Other types of
 storage can be too slow for practical usage. However, your system will likely perform some
@@ -337,8 +481,8 @@ so far focuses on following fronts of optimization for external memory:
 - If the OS can cache the data, the performance should be close to in-core training.
 - For GPU, the actual computation should overlap with memory copy as much as possible.
 
-Starting with XGBoost 2.0, the implementation of external memory uses ``mmap``. It has not
-been tested against system errors like disconnected network devices (`SIGBUS`). In the
+Starting with XGBoost 2.0, the CPU implementation of external memory uses ``mmap``. It has
+not been tested against system errors like disconnected network devices (`SIGBUS`). In the
 face of a bus error, you will see a hard crash and need to clean up the cache files. If
 the training session might take a long time and you use solutions like NVMe-oF, we
 recommend checkpointing your model periodically. Also, it's worth noting that most tests
@@ -381,47 +525,7 @@ undergone multiple development iterations. Here's a brief summary of major chang
   introduced the :py:class:`~xgboost.ExtMemQuantileDMatrix` class, added quantile-based
   objectives support.
 - In addition, we begin support for distributed training in 3.0
-
-****************
-Text File Inputs
-****************
-
-.. warning::
-
-   This is the original form of external memory support before 1.5 and is now deprecated,
-   users are encouraged to use a custom data iterator instead.
-
-There is no significant difference between using the external memory version of text input
-and the in-memory version of text input. The only difference is the filename format.
-
-The external memory version takes in the following `URI
-<https://en.wikipedia.org/wiki/Uniform_Resource_Identifier>`_ format:
-
-.. code-block:: none
-
-  filename?format=libsvm#cacheprefix
-
-The ``filename`` is the typical path to LIBSVM format file you want to load in, and
-``cacheprefix`` is a path to a cache file that XGBoost will use for caching preprocessed
-data in binary form.
-
-To load from csv files, use the following syntax:
-
-.. code-block:: none
-
-  filename.csv?format=csv&label_column=0#cacheprefix
-
-where ``label_column`` should point to the csv column acting as the label.
-
-If you have a dataset stored in a file similar to ``demo/data/agaricus.txt.train`` with LIBSVM
-format, the external memory support can be enabled by:
-
-.. code-block:: python
-
-  dtrain = DMatrix('../data/agaricus.txt.train?format=libsvm#dtrain.cache')
-
-XGBoost will first load ``agaricus.txt.train`` in, preprocess it, then write to a new file named
-``dtrain.cache`` as an on disk cache for storing preprocessed data in an internal binary format. For
-more notes about text input formats, see :doc:`/tutorials/input_format`.
-
-For the CLI version, simply add the cache suffix, e.g. ``"../data/agaricus.txt.train?format=libsvm#dtrain.cache"``.
+- 3.1 added support for having divided cache pages. One can have part of a cache page in
+  the GPU and the rest of the cache in the host memory. In addition, XGBoost works with
+  the Grace Blackwell hardware decompression engine when data is sparse.
+- The text file cache format has been removed in 3.1.0.
\ No newline at end of file
diff --git a/doc/tutorials/input_format.rst b/doc/tutorials/input_format.rst
index 415ee3471362..03e5092a11a9 100644
--- a/doc/tutorials/input_format.rst
+++ b/doc/tutorials/input_format.rst
@@ -6,6 +6,11 @@ Text Input Format of DMatrix
 
 Here we will briefly describe the text input formats for XGBoost. However, for users with access to a supported language environment like Python or R, it's recommended to use data parsers from that ecosystem instead. For instance, :py:func:`sklearn.datasets.load_svmlight_file`.
 
+.. warning::
+
+   As stated above, users are encouraged to use third-party data parsers. The text parsers
+   in XGBoost have been deprecated.
+
 ******************
 Basic Input Format
 ******************
diff --git a/doc/tutorials/intercept.rst b/doc/tutorials/intercept.rst
index 9e238f2cf86c..baebd0fee84a 100644
--- a/doc/tutorials/intercept.rst
+++ b/doc/tutorials/intercept.rst
@@ -9,12 +9,28 @@ automatically based on targets upon training. The behavior can be controlled by
 ``base_score`` to a constant value. The following snippet disables the automatic
 estimation:
 
-.. code-block:: python
+.. tabs::
+    .. code-tab:: py
 
-    import xgboost as xgb
+        import xgboost as xgb
 
-    reg = xgb.XGBRegressor()
-    reg.set_params(base_score=0.5)
+        clf = xgb.XGBClassifier(n_estimators=10)
+        clf.set_params(base_score=0.5)
+
+    .. code-tab:: r R
+
+        library(xgboost)
+
+        # Load built-in dataset
+        data(agaricus.train, package = "xgboost")
+
+        # Set base_score parameter directly
+        model <- xgboost(
+          x = agaricus.train$data,
+          y = factor(agaricus.train$label),
+          base_score = 0.5,
+          nrounds = 10
+        )
 
 In addition, here 0.5 represents the value after applying the inverse link function. See
 the end of the document for a description.
@@ -24,22 +40,53 @@ Other than the ``base_score``, users can also provide global bias via the data f
 and multi-class, the ``base_margin`` is a matrix with size ``(n_samples, n_targets)`` or
 ``(n_samples, n_classes)``.
 
-.. code-block:: python
+.. tabs::
+    .. code-tab:: py
+
+        import xgboost as xgb
+        from sklearn.datasets import make_classification
+
+        X, y = make_classification()
+
+        clf = xgb.XGBClassifier()
+        clf.fit(X, y)
+        # Request for raw prediction
+        m = clf.predict(X, output_margin=True)
+
+        clf_1 = xgb.XGBClassifier()
+        # Feed the prediction into the next model
+        # Using base margin overrides the base score, see below sections.
+        clf_1.fit(X, y, base_margin=m)
+        clf_1.predict(X, base_margin=m)
+
+    .. code-tab:: r R
+
+        library(xgboost)
 
-    import xgboost as xgb
-    from sklearn.datasets import make_regression
+        # Load built-in dataset
+        data(agaricus.train, package = "xgboost")
 
-    X, y = make_regression()
+        # Train first model
+        model_1 <- xgboost(
+          x = agaricus.train$data,
+          y = factor(agaricus.train$label),
+          nrounds = 10
+        )
 
-    reg = xgb.XGBRegressor()
-    reg.fit(X, y)
-    # Request for raw prediction
-    m = reg.predict(X, output_margin=True)
+        # Request for raw prediction
+        m <- predict(model_1, agaricus.train$data, type = "raw")
 
-    reg_1 = xgb.XGBRegressor()
-    # Feed the prediction into the next model
-    reg_1.fit(X, y, base_margin=m)
-    reg_1.predict(X, base_margin=m)
+        # Feed the prediction into the next model using base_margin
+        # Using base margin overrides the base score, see below sections.
+        model_2 <- xgboost(
+          x = agaricus.train$data,
+          y = factor(agaricus.train$label),
+          base_margin = m,
+          nrounds = 10
+        )
+
+        # Make predictions with base_margin
+        pred <- predict(model_2, agaricus.train$data, base_margin = m)
 
 
 It specifies the bias for each sample and can be used for stacking an XGBoost model on top
@@ -136,4 +183,110 @@ We have:
    E[c_i] &= \exp{(F(x_i) + \ln{\gamma_i})} \\
    E[c_i] &= g^{-1}(F(x_i) + g(\gamma_i))
 
-As you can see, we can use the ``base_margin`` for modeling with offset similar to GLMs
\ No newline at end of file
+As you can see, we can use the ``base_margin`` for modeling with offset similar to GLMs
+
+*******
+Example
+*******
+
+The following example shows the relationship between ``base_score`` and ``base_margin``
+using binary logistic with a `logit` link function:
+
+.. tabs::
+    .. code-tab:: py
+
+        import numpy as np
+        from scipy.special import logit
+        from sklearn.datasets import make_classification
+
+        import xgboost as xgb
+
+        X, y = make_classification(random_state=2025)
+
+    .. code-tab:: r R
+
+        library(xgboost)
+
+        # Load built-in dataset
+        data(agaricus.train, package = "xgboost")
+        X <- agaricus.train$data
+        y <- agaricus.train$label
+
+The intercept is a valid probability (0.5). It's used as the initial estimation of the
+probability of obtaining a positive sample.
+
+.. tabs::
+    .. code-tab:: py
+
+        intercept = 0.5
+
+    .. code-tab:: r R
+
+        intercept <- 0.5
+
+First we use the intercept to train a model:
+
+.. tabs::
+    .. code-tab:: py
+
+        booster = xgb.train(
+            {"base_score": intercept, "objective": "binary:logistic"},
+            dtrain=xgb.DMatrix(X, y),
+            num_boost_round=1,
+        )
+        predt_0 = booster.predict(xgb.DMatrix(X, y))
+
+    .. code-tab:: r R
+
+        # First model with base_score
+        model_0 <- xgboost(
+          x = X, y = factor(y),
+          base_score = intercept,
+          objective = "binary:logistic",
+          nrounds = 1
+        )
+        predt_0 <- predict(model_0, X)
+
+Apply :py:func:`~scipy.special.logit` to obtain the "margin":
+
+.. tabs::
+    .. code-tab:: py
+
+        # Apply logit function to obtain the "margin"
+        margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)
+        Xy = xgb.DMatrix(X, y, base_margin=margin)
+        # Second model with base_margin
+        # 0.2 is a dummy value to show that `base_margin` overrides `base_score`.
+        booster = xgb.train(
+            {"base_score": 0.2, "objective": "binary:logistic"},
+            dtrain=Xy,
+            num_boost_round=1,
+        )
+        predt_1 = booster.predict(Xy)
+
+    .. code-tab:: r R
+
+        # Apply logit function to obtain the "margin"
+        logit_intercept <- log(intercept / (1 - intercept))
+        margin <- rep(logit_intercept, length(y))
+        # Second model with base_margin
+        # 0.2 is a dummy value to show that `base_margin` overrides `base_score`
+        model_1 <- xgboost(
+          x = X, y = factor(y),
+          base_margin = margin,
+          base_score = 0.2,
+          objective = "binary:logistic",
+          nrounds = 1
+        )
+        predt_1 <- predict(model_1, X, base_margin = margin)
+
+Compare the results:
+
+.. tabs::
+    .. code-tab:: py
+
+        np.testing.assert_allclose(predt_0, predt_1)
+
+    .. code-tab:: r R
+
+        all.equal(predt_0, predt_1, tolerance = 1e-6)
diff --git a/doc/tutorials/kubernetes.rst b/doc/tutorials/kubernetes.rst
index 7f7994b659e4..c2adac22c236 100644
--- a/doc/tutorials/kubernetes.rst
+++ b/doc/tutorials/kubernetes.rst
@@ -2,33 +2,33 @@
 Distributed XGBoost on Kubernetes
 ###################################
 
-Distributed XGBoost training and batch prediction on `Kubernetes <https://kubernetes.io/>`_ are supported via `Kubeflow XGBoost Training Operator <https://github.com/kubeflow/training-operator>`_.
+Distributed XGBoost training and batch prediction on `Kubernetes <https://kubernetes.io/>`_ are supported via `Kubeflow Trainer <https://github.com/kubeflow/trainer>`_.
 
 ************
 Instructions
 ************
 In order to run a XGBoost job in a Kubernetes cluster, perform the following steps:
 
-1. Install XGBoost Operator on the Kubernetes cluster.
+1. Install Kubeflow Trainer on the Kubernetes cluster.
 
-   a. XGBoost Operator is designed to manage the scheduling and monitoring of XGBoost jobs. Follow `this installation guide <https://www.kubeflow.org/docs/components/training/xgboost/#installing-xgboost-operator>`_ to install XGBoost Operator.
+   a. Kubeflow Trainer is designed to manage the scheduling and monitoring of XGBoost jobs. Follow `this installation guide <https://www.kubeflow.org/docs/components/trainer/>`_ to install it.
 
-2. Write application code that will be executed by the XGBoost Operator.
+2. Write application code that will be executed by the Kubeflow Trainer.
 
-   a. To use XGBoost Operator, you'll have to write a couple of Python scripts that implement the distributed training logic for XGBoost. Please refer to the `Iris classification example <https://github.com/kubeflow/training-operator/tree/master/examples/xgboost/xgboost-dist>`_.
+   a. To use Kubeflow Trainer, you'll have to write a couple of Python scripts that implement the distributed training logic for XGBoost. Please refer to the `Iris classification example <https://github.com/kubeflow/trainer/tree/master/examples/xgboost/xgboost-dist>`_.
    b. Data reader/writer: you need to implement the data reader and writer based on the specific requirements of your chosen data source. For example, if your dataset is stored in a Hive table, you have to write the code to read from or write to the Hive table based on the index of the worker.
-   c. Model persistence: in the `Iris classification example <https://github.com/kubeflow/training-operator/tree/master/examples/xgboost/xgboost-dist>`_, the model is stored in `Alibaba OSS <https://www.alibabacloud.com/product/oss>`_. If you want to store your model in other storages such as Amazon S3 or Google NFS, you'll need to implement the model persistence logic based on the requirements of the chosen storage system.
+   c. Model persistence: in the `Iris classification example <https://github.com/kubeflow/trainer/tree/master/examples/xgboost/xgboost-dist>`_, the model is stored in `Alibaba OSS <https://www.alibabacloud.com/product/oss>`_. If you want to store your model in other storages such as Amazon S3 or Google NFS, you'll need to implement the model persistence logic based on the requirements of the chosen storage system.
 
 3. Configure the XGBoost job using a YAML file.
 
-   a. YAML file is used to configure the computational resources and environment for your XGBoost job to run, e.g. the number of workers/masters and the number of CPU/GPUs. Please refer to this `YAML template <https://github.com/kubeflow/training-operator/blob/master/examples/xgboost/xgboost-dist/xgboostjob_v1alpha1_iris_train.yaml>`_ for an example.
+   a. YAML file is used to configure the computational resources and environment for your XGBoost job to run, e.g. the number of workers/masters and the number of CPU/GPUs. Please refer to this `YAML template <https://github.com/kubeflow/trainer/blob/master/examples/xgboost/xgboost-dist/xgboostjob_v1alpha1_iris_train.yaml>`_ for an example.
 
 4. Submit XGBoost job to a Kubernetes cluster.
 
-   a. Use `kubectl <https://kubernetes.io/docs/reference/kubectl/overview/>`_ to submit a distributed XGBoost job as illustrated `here <https://www.kubeflow.org/docs/components/training/xgboost/#creating-a-xgboost-training-job>`_.
+   a. Use `kubectl <https://kubernetes.io/docs/reference/kubectl/overview/>`_ to submit a distributed XGBoost job as illustrated `here <https://www.kubeflow.org/docs/components/trainer/>`_.
 
 *******
 Support
 *******
 
-Please submit an issue on `XGBoost Operator repo <https://github.com/kubeflow/training-operator/issues>`_ for any feature requests or problems.
+Please submit an issue on `Kubeflow Trainer repo <https://github.com/kubeflow/trainer/issues>`_ for any feature requests or problems.
diff --git a/doc/tutorials/learning_to_rank.rst b/doc/tutorials/learning_to_rank.rst
index e1c1ab85a3eb..5dd1ece5213b 100644
--- a/doc/tutorials/learning_to_rank.rst
+++ b/doc/tutorials/learning_to_rank.rst
@@ -196,14 +196,12 @@ The learning to rank implementation has been significantly updated in 2.0 with a
 
     params = {
         # 1.7 only supports sampling, while 2.0 and later use top-k as the default.
-	# See above sections for the trade-off.
+        # See above sections for the trade-off.
         "lambdarank_pair_method": "mean",
-        # Normalization was added in 2.0
-        "lambdarank_normalization": False,
         # 1.7 uses the ranknet loss while later versions use the NDCG weighted loss
         "objective": "rank:pairwise",
-	# 1.7 doesn't have this normalization.
-	"lambdarank_score_normalization": False,
+        # 1.7 doesn't have this normalization.
+        "lambdarank_score_normalization": False,
         "base_score": 0.5,
         # The default tree method has been changed from approx to hist.
         "tree_method": "approx",
diff --git a/doc/tutorials/ray.rst b/doc/tutorials/ray.rst
index 1e9bc3fafa43..b6e5f7c7b297 100644
--- a/doc/tutorials/ray.rst
+++ b/doc/tutorials/ray.rst
@@ -250,8 +250,8 @@ a few things you need to do:
 
 
 Ray Tune supports various
-`search algorithms and libraries (e.g. BayesOpt, Tree-Parzen estimators) <https://docs.ray.io/en/latest/tune/key-concepts.html#search-algorithms>`_,
-`smart schedulers like successive halving <https://docs.ray.io/en/latest/tune/key-concepts.html#trial-schedulers>`_,
+`search algorithms and libraries (e.g. BayesOpt, Tree-Parzen estimators) <https://docs.ray.io/en/latest/tune/key-concepts.html#tune-search-algorithms>`_,
+`smart schedulers like successive halving <https://docs.ray.io/en/latest/tune/key-concepts.html#tune-schedulers>`_,
 and other features. Please refer to the `Ray Tune documentation <http://tune.io>`_
 for more information.
 
diff --git a/doc/tutorials/saving_model.rst b/doc/tutorials/saving_model.rst
index 326834cd4f8d..20aeb210c465 100644
--- a/doc/tutorials/saving_model.rst
+++ b/doc/tutorials/saving_model.rst
@@ -2,17 +2,16 @@
 Introduction to Model IO
 ########################
 
+**Contents**
+
+.. contents::
+  :backlinks: none
+  :local:
+
 Since 2.1.0, the default model format for XGBoost is the UBJSON format, the option is
 enabled for serializing models to file, serializing models to buffer, and for memory
 snapshot (pickle and alike).
 
-In XGBoost 1.0.0, we introduced support of using `JSON
-<https://www.json.org/json-en.html>`_ for saving/loading XGBoost models and related
-hyper-parameters for training, aiming to replace the old binary internal format with an
-open format that can be easily reused.  Later in XGBoost 1.6.0, additional support for
-`Universal Binary JSON <https://ubjson.org/>`__ is added as an optimization for more
-efficient model IO, which is set to default in 2.1.
-
 JSON and UBJSON have the same document structure with different representations, and we
 will refer them collectively as the JSON format. This tutorial aims to share some basic
 insights into the JSON serialisation method used in XGBoost.  Without explicitly
@@ -27,41 +26,33 @@ which means inside XGBoost, there are 2 distinct parts:
 1. The model consisting of trees and
 2. Hyperparameters and configurations used for building the model.
 
-If you come from Deep Learning community, then it should be
-clear to you that there are differences between the neural network structures composed of
-weights with fixed tensor operations, and the optimizers (like RMSprop) used to train them.
+If you come from the Deep Learning community, then it should be clear to you that there
+are differences between the neural network structures composed of weights with fixed
+tensor operations, and the optimizers (like RMSprop) used to train them.
 
 So when one calls ``booster.save_model`` (``xgb.save`` in R), XGBoost saves the trees,
 some model parameters like number of input columns in trained trees, and the objective
 function, which combined to represent the concept of "model" in XGBoost.  As for why are
 we saving the objective as part of model, that's because objective controls transformation
-of global bias (called ``base_score`` in XGBoost) and task-specific information.  Users
-can share this model with others for prediction, evaluation or continue the training with
-a different set of hyper-parameters etc.
+of global bias (called ``base_score`` or the intercept in XGBoost) and task-specific
+information.  Users can share this model with others for inference, evaluation or continue
+the training with a different set of hyper-parameters etc.
 
 However, this is not the end of story.  There are cases where we need to save something
 more than just the model itself.  For example, in distributed training, XGBoost performs
 checkpointing operation.  Or for some reasons, your favorite distributed computing
 framework decide to copy the model from one worker to another and continue the training in
-there.  In such cases, the serialisation output is required to contain enough information
+there. In such cases, the serialisation output is required to contain enough information
 to continue previous training without user providing any parameters again.  We consider
-such scenario as **memory snapshot** (or memory based serialisation method) and distinguish it
-with normal model IO operation. Currently, memory snapshot is used in the following places:
+such scenario as **memory snapshot** (or memory based serialisation method) and
+distinguish it with normal model IO operation. Currently, memory snapshot is used in the
+following places:
 
 * Python package: when the ``Booster`` object is pickled with the built-in ``pickle`` module.
 * R package: when the ``xgb.Booster`` object is persisted with the built-in functions ``saveRDS``
   or ``save``.
 * JVM packages: when the ``Booster`` object is serialized with the built-in functions ``saveModel``.
 
-Other language bindings are still working in progress.
-
-.. note::
-
-  The old binary format doesn't distinguish difference between model and raw memory
-  serialisation format, it's a mix of everything, which is part of the reason why we want
-  to replace it with a more robust serialisation method.  JVM Package has its own memory
-  based serialisation methods.
-
 To enable JSON format support for model IO (saving only the trees and objective), provide
 a filename with ``.json`` or ``.ubj`` as file extension, the latter is the extension for
 `Universal Binary JSON <https://ubjson.org/>`__
@@ -88,10 +79,9 @@ a filename with ``.json`` or ``.ubj`` as file extension, the latter is the exten
   JSON files that were produced by an external source may lead to undefined behaviors
   and crashes.
 
-While for memory snapshot, UBJSON is the default starting with xgboost 1.6. When loading
-the model back, XGBoost recognizes the file extensions ``.json`` and ``.ubj``, and can
-dispatch accordingly. If the extension is not specified, XGBoost tries to guess the right
-one.
+When loading the model back, XGBoost recognizes the file extensions ``.json`` and
+``.ubj``, and can dispatch accordingly. If the extension is not specified, XGBoost tries
+to guess the right one.
 
 ***************************************************************
 A note on backward compatibility of models and memory snapshots
@@ -234,7 +224,7 @@ You can load it back to the model generated by same version of XGBoost by:
 
   bst.load_config(config)
 
-This way users can study the internal representation more closely.  Please note that some
+This way users can study the internal representation more closely. Please note that some
 JSON generators make use of locale dependent floating point serialization methods, which
 is not supported by XGBoost.
 
@@ -242,24 +232,71 @@ is not supported by XGBoost.
 Difference between saving model and dumping model
 *************************************************
 
-XGBoost has a function called ``dump_model`` in Booster object, which lets you to export
-the model in a readable format like ``text``, ``json`` or ``dot`` (graphviz).  The primary
-use case for it is for model interpretation or visualization, and is not supposed to be
-loaded back to XGBoost.  The JSON version has a `schema
-<https://github.com/dmlc/xgboost/blob/master/doc/dump.schema>`__.  See next section for
-more info.
-
-***********
-JSON Schema
-***********
-
-Another important feature of JSON format is a documented `schema
-<https://json-schema.org/>`__, based on which one can easily reuse the output model from
-XGBoost.  Here is the JSON schema for the output model (not serialization, which will not
-be stable as noted above).  For an example of parsing XGBoost tree model, see
-``/demo/json-model``.  Please notice the "weight_drop" field used in "dart" booster.
-XGBoost does not scale tree leaf directly, instead it saves the weights as a separated
-array.
-
-.. include:: ../model.schema
-   :code: json
+XGBoost has a function called ``dump_model`` in the Booster class, which lets you to
+export the model in a readable format like ``text``, ``json`` or ``dot`` (graphviz).  The
+primary use case for it is for model interpretation and visualization, and is not supposed
+to be loaded back to XGBoost.
+
+**********
+Categories
+**********
+
+Since 3.1, the categories encoding from a training dataframe is stored in the booster to
+provide test-time re-coding support, see :ref:`cat-recode` for more info about how the
+re-coder works. We will briefly explain the JSON format for the serialized category index.
+
+The categories are saved in a JSON object named "cats" under the gbtree model. It contains
+three keys:
+
+- feature_segments
+
+This is a CSR-like pointer that stores the number of categories for each feature. It
+starts with zero and ends with the total number of categories from all features. For
+example:
+
+.. code-block:: python
+
+    feature_segments = [0, 3, 3, 5]
+
+The ``feature_segments`` list represents a dataset with two categorical features and one
+numerical feature. The first feature contains three categories, the second feature is
+numerical and thus has no categories, and the last feature includes two categories.
+
+- sorted_idx
+
+This array stores the sorted indices (`argsort`) of categories across all features,
+segmented by the ``feature_segments``. Given a feature with categories: ``["b", "c",
+"a"]``, the sorted index is ``[2, 0, 1]``.
+
+- enc
+
+This is an array with a length equal to the number of features, storing all the categories
+in the same order as the input dataframe. The storage schema depends on whether the
+categories are strings (XGBoost also supports numerical categories, such as integers). For
+string categories, we use a schema similar to the arrow format for a string array. The
+categories of each feature are represented by two arrays, namely ``offsets`` and
+``values``. The format is also similar to a CSR-matrix. The ``values`` field is a
+``uint8`` array storing characters from all category names. Given a feature with three
+categories: ``["bb", "c", "a"]``, the ``values`` field is ``[98, 98, 99, 97]``. Then the
+``offsets`` segments the ``values`` array similar to a CSR pointer: ``[0, 2, 3, 4]``. We
+chose to not store the ``values`` as a JSON string to avoid handling special characters
+and string encoding. The string names are stored exactly as given by the dataframe.
+
+As for numerical categories, the ``enc`` contains two keys: ``type`` and ``values``. The
+``type`` field is an integer ID that identifies the type of the categories, such as 64-bit
+integers and 32-bit floating points (note that they are all f32 inside a decision
+tree). The exact mapping between the type to the integer ID is internal but stable. The
+``values`` is an array storing all categories in a feature.
+
+*************
+Brief History
+*************
+
+- The JSON format was introduced in 1.0, aiming to replace the now removed old binary
+  internal format with an open format that can be easily reused
+- Later in XGBoost 1.6.0, additional support for Universal Binary JSON was introduced as
+  an optimization for more efficient model IO.
+- UBJSON has been set to default in 2.1.
+- The old binary format was removed in 3.1.
+- The JSON schema file is no longer maintained and has been removed in 3.2. The underlying
+  schema of the model is not changed.
\ No newline at end of file
diff --git a/doc/tutorials/slicing_model.rst b/doc/tutorials/slicing_model.rst
index a24545a615c6..0ea0afd9dd81 100644
--- a/doc/tutorials/slicing_model.rst
+++ b/doc/tutorials/slicing_model.rst
@@ -8,53 +8,51 @@ Slice tree model
 When ``booster`` is set to ``gbtree`` or ``dart``, XGBoost builds a tree model, which is a
 list of trees and can be sliced into multiple sub-models.
 
-In Python:
-
-.. code-block:: python
-
-    from sklearn.datasets import make_classification
-    num_classes = 3
-    X, y = make_classification(n_samples=1000, n_informative=5,
-                               n_classes=num_classes)
-    dtrain = xgb.DMatrix(data=X, label=y)
-    num_parallel_tree = 4
-    num_boost_round = 16
-    # total number of built trees is num_parallel_tree * num_classes * num_boost_round
-
-    # We build a boosted random forest for classification here.
-    booster = xgb.train({
-        'num_parallel_tree': 4, 'subsample': 0.5, 'num_class': 3},
-                        num_boost_round=num_boost_round, dtrain=dtrain)
-
-    # This is the sliced model, containing [3, 7) forests
-    # step is also supported with some limitations like negative step is invalid.
-    sliced: xgb.Booster = booster[3:7]
-
-    # Access individual tree layer
-    trees = [_ for _ in booster]
-    assert len(trees) == num_boost_round
-
-In R:
-
-.. versionadded:: 3.0.0
-
-.. code-block:: R
-
-    data(agaricus.train, package = "xgboost")
-    dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
-
-    model <- xgb.train(
-      params = xgb.params(objective = "binary:logistic", max_depth = 4),
-      data = dm,
-      nrounds = 20
-    )
-    sliced <- model[seq(3, 7)]
-    ##### xgb.Booster
-    # of features: 126
-    # of rounds:  5
+.. tabs::
+
+    .. code-tab:: py
+
+        import xgboost as xgb
+        from sklearn.datasets import make_classification
+        num_classes = 3
+        X, y = make_classification(n_samples=1000, n_informative=5,
+                                   n_classes=num_classes)
+        dtrain = xgb.DMatrix(data=X, label=y)
+        num_parallel_tree = 4
+        num_boost_round = 16
+        # total number of built trees is num_parallel_tree * num_classes * num_boost_round
+
+        # We build a boosted random forest for classification here.
+        booster = xgb.train({
+            'num_parallel_tree': 4, 'subsample': 0.5, 'num_class': 3},
+                            num_boost_round=num_boost_round, dtrain=dtrain)
+
+        # This is the sliced model, containing [3, 7) forests
+        # step is also supported with some limitations like negative step is invalid.
+        sliced: xgb.Booster = booster[3:7]
+
+        # Access individual tree layer
+        trees = [_ for _ in booster]
+        assert len(trees) == num_boost_round
+
+    .. code-tab:: r R
+
+        library(xgboost)
+        data(agaricus.train, package = "xgboost")
+        dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
+
+        model <- xgb.train(
+          params = xgb.params(objective = "binary:logistic", max_depth = 4),
+          data = dm,
+          nrounds = 20
+        )
+        sliced <- model[seq(3, 7)]
+        ##### xgb.Booster
+        # of features: 126
+        # of rounds:  5
 
 The sliced model is a copy of selected trees, that means the model itself is immutable
-during slicing.  This feature is the basis of `save_best` option in early stopping
+during slicing. This feature is the basis of ``save_best`` option in early stopping
 callback. See :ref:`sphx_glr_python_examples_individual_trees.py` for a worked example on
 how to combine prediction with sliced trees.
 
diff --git a/include/xgboost/base.h b/include/xgboost/base.h
index 64aab5c41b0c..7bfbc1300b94 100644
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2015-2024, XGBoost Contributors
+ * Copyright 2015-2025, XGBoost Contributors
  * \file base.h
  * \brief Defines configuration macros and basic types for xgboost.
  */
@@ -72,6 +72,14 @@
 #define XGBOOST_DEV_INLINE
 #endif  // defined(__CUDA__) || defined(__CUDACC__)
 
+
+// restrict
+#if defined(_MSC_VER)
+#define XGBOOST_RESTRICT __restrict
+#else
+#define XGBOOST_RESTRICT __restrict__
+#endif
+
 // These check are for Makefile.
 #if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
 /* default logic for software pre-fetching */
@@ -105,9 +113,13 @@ using bst_bin_t = std::int32_t;  // NOLINT
  * @brief Type for data row index (sample).
  */
 using bst_idx_t = std::uint64_t;  // NOLINT
-/*! \brief Type for tree node index. */
+/**
+ * \brief Type for tree node index and tree depth.
+ */
 using bst_node_t = std::int32_t;      // NOLINT
-/*! \brief Type for ranking group index. */
+/**
+ * @brief Type for ranking group index.
+ */
 using bst_group_t = std::uint32_t;  // NOLINT
 /**
  * @brief Type for indexing into output targets.
@@ -118,7 +130,7 @@ using bst_target_t = std::uint32_t;  // NOLINT
  */
 using bst_layer_t = std::int32_t;  // NOLINT
 /**
- * \brief Type for indexing trees.
+ * @brief Type for indexing trees.
  */
 using bst_tree_t = std::int32_t;  // NOLINT
 /**
diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h
index 852f65d38f52..6237af7b5196 100644
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -1,8 +1,7 @@
 /**
- * Copyright 2015-2024, XGBoost Contributors
- * \file c_api.h
- * \author Tianqi Chen
- * \brief C API of XGBoost, used for interfacing to other languages.
+ * Copyright 2015-2025, XGBoost Contributors
+ *
+ * @brief C API of XGBoost, used to interface with other high-level languages.
  */
 #ifndef XGBOOST_C_API_H_
 #define XGBOOST_C_API_H_
@@ -31,7 +30,7 @@ typedef uint64_t bst_ulong;  // NOLINT(*)
 /**
  * @mainpage
  *
- * \brief XGBoost C API reference.
+ * @brief XGBoost C API reference.
  *
  * For the official document page see:
  * <a href="https://xgboost.readthedocs.io/en/stable/c.html">XGBoost C Package</a>.
@@ -46,66 +45,69 @@ typedef uint64_t bst_ulong;  // NOLINT(*)
  * @{
  */
 
-/*! \brief handle to DMatrix */
+/** @brief handle to DMatrix */
 typedef void *DMatrixHandle;  // NOLINT(*)
-/*! \brief handle to Booster */
+/** @brief handle to Booster */
 typedef void *BoosterHandle;  // NOLINT(*)
 
-/*!
- * \brief Return the version of the XGBoost library being currently used.
+/**
+ * @brief Return the version of the XGBoost library.
  *
- *  The output variable is only written if it's not NULL.
+ *   The output variable is only written if it's not NULL.
  *
- * \param major Store the major version number
- * \param minor Store the minor version number
- * \param patch Store the patch (revision) number
+ * @param major Store the major version number.
+ * @param minor Store the minor version number.
+ * @param patch Store the patch (revision) number.
  */
 XGB_DLL void XGBoostVersion(int* major, int* minor, int* patch);
 
-/*!
- * \brief Get compile information of shared library.
+/**
+ * @brief Get compile information of the shared XGBoost library.
  *
- * \param out string encoded JSON object containing build flags and dependency version.
+ * @param out string encoded JSON object containing build flags and dependency versions.
  *
- * \return 0 for success, -1 for failure
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBuildInfo(char const **out);
 
-/*!
- * \brief get string message of the last error
+/**
+ * @brief Get the string message of the last error.
+ *
+ *   Most functions in XGBoost returns 0 when success and non-zero when an error
+ *   occurred. In the case of error, @ref XGBGetLastError can be used to retrieve the
+ *   error message
  *
- *  all function in this file will return 0 when success
- *  and -1 when an error occurred,
- *  XGBGetLastError can be called to retrieve the error
+ *   This function is thread safe.
  *
- *  this function is thread safe and can be called by different thread
- * \return const char* error information
+ * @return The error message from the last error.
  */
 XGB_DLL const char *XGBGetLastError();
 
-/*!
- * \brief register callback function for LOG(INFO) messages -- helpful messages
+/**
+ * @brief register callback function for LOG(INFO) messages -- helpful messages
  *        that are not errors.
- * Note: this function can be called by multiple threads. The callback function
- *       will run on the thread that registered it
- * \return 0 for success, -1 for failure
+ *
+ * @note This function can be called by multiple threads. The callback function
+ *       will run on the thread that registered it.
+ *
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBRegisterLogCallback(void (*callback)(const char*));
 
-/*!
- * \brief Set global configuration (collection of parameters that apply globally). This function
+/**
+ * @brief Set global configuration (collection of parameters that apply globally). This function
  *        accepts the list of key-value pairs representing the global-scope parameters to be
  *        configured. The list of key-value pairs are passed in as a JSON string.
- * \param config a JSON string representing the list of key-value pairs. The JSON object shall
+ * @param config a JSON string representing the list of key-value pairs. The JSON object shall
  *                 be flat: no value can be a JSON object or an array.
- * \return 0 for success, -1 for failure
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBSetGlobalConfig(char const *config);
 
-/*!
- * \brief Get current global configuration (collection of parameters that apply globally).
- * \param out_config pointer to received returned global configuration, represented as a JSON string.
- * \return 0 for success, -1 for failure
+/**
+ * @brief Get current global configuration (collection of parameters that apply globally).
+ * @param out_config pointer to received returned global configuration, represented as a JSON string.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBGetGlobalConfig(char const **out_config);
 
@@ -124,45 +126,81 @@ XGB_DLL int XGBGetGlobalConfig(char const **out_config);
  * @{
  */
 
-/*!
- * \brief load a data matrix
- * \deprecated since 2.0.0
- * \see XGDMatrixCreateFromURI()
- * \param fname the name of the file
- * \param silent whether print messages during loading
- * \param out a loaded data matrix
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief load a data matrix
+ *
+ * @deprecated since 2.0.0
+ * @see XGDMatrixCreateFromURI()
+ * @param fname the name of the file
+ * @param silent whether print messages during loading
+ * @param out a loaded data matrix
+ *
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle *out);
 
-/*!
- * \brief load a data matrix
- * \param config JSON encoded parameters for DMatrix construction.  Accepted fields are:
+/**
+ * @brief load a data matrix
+ *
+ * @param config JSON encoded parameters for DMatrix construction.  Accepted fields are:
  *   - uri: The URI of the input file. The URI parameter `format` is required when loading text data.
- *          \verbatim embed:rst:leading-asterisk
+ *          @verbatim embed:rst:leading-asterisk
  *            See :doc:`/tutorials/input_format` for more info.
- *          \endverbatim
+ *          @endverbatim
  *   - silent (optional): Whether to print message during loading. Default to true.
  *   - data_split_mode (optional): Whether the file was split by row or column beforehand for distributed computing. Default to row.
- * \param out a loaded data matrix
- * \return 0 when success, -1 when failure happens
+ * @param out a loaded data matrix
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixCreateFromURI(char const *config, DMatrixHandle *out);
 
-
-/*!
- * \brief create a matrix content from CSR format
- * \deprecated since 2.0.0
- * \see XGDMatrixCreateFromCSR()
- */
-XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
-                                     const float *data, size_t nindptr, size_t nelem,
-                                     size_t num_col, DMatrixHandle *out);
 /**
  * @brief Create a DMatrix from columnar data. (table)
  *
  * A special type of input to the `DMatrix` is the columnar format, which refers to
- * column-based dataframes based on the arrow formatt.
+ * column-based dataframes. XGBoost can accept both numeric data types like integers and
+ * floats, along with the categorical type, called dictionary in arrow's term. The
+ * addition of categorical type is introduced in 3.1.0. The dataframe is represented by a
+ * list array interfaces with one object for each column.
+ *
+ * A categorical type is represented by 3 buffers, the validity mask, the names of the
+ * categories (called index for most of the dataframe implementation), and the codes used
+ * to represent the categories in the rows. XGBoost consumes a categorical column by
+ * accepting two JSON-encoded arrow arrays in a list. The first item in the list is a JSON
+ * object with `{"offsets": IntegerArray, "values": StringArray }` representing the string
+ * names defined by the arrow columnar format. The second buffer is an masked integer
+ * array that stores the categorical codes along with the validity mask:
+ *
+ * @code{javascript}
+ * [
+ *   // categorical column, represented as an array (list)
+ *   [
+ *     {
+ *       'offsets':
+ *       {
+ *         'data': (129412626415808, True),
+ *         'typestr': '<i4', 'version': 3, 'strides': None, 'shape': (3,), 'mask': None
+ *       },
+ *       'values':
+ *       {
+ *         'data': (129412626416000, True),
+ *         'typestr': '<i1', 'version': 3, 'strides': None, 'shape': (7,), 'mask': None
+ *       }
+ *     },
+ *     {
+ *       'data': (106200854378448, True),
+ *       'typestr': '<i1', 'version': 3, 'strides': None, 'shape': (2,), 'mask': None
+ *     }
+ *   ],
+ *   // numeric column, represented as an object, same number of rows as the previous column (2)
+ *   {
+ *     'data': (106200854378448, True),
+ *     'typestr': '<f4', 'version': 3, 'strides': None, 'shape': (2,), 'mask': None
+ *   }
+ * ]
+ * @endcode
+ *
+ * As for numeric inputs, it's the same as dense array.
  *
  * @param data   A list of JSON-encoded array interfaces.
  * @param config See @ref XGDMatrixCreateFromDense for details.
@@ -221,38 +259,30 @@ XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatr
 XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char const *data,
                                    bst_ulong nrow, char const *config, DMatrixHandle *out);
 
-/*!
- * \brief create a matrix content from CSC format
- * \deprecated since 2.0.0
- * \see XGDMatrixCreateFromCSC()
- */
-XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices,
-                                     const float *data, size_t nindptr, size_t nelem,
-                                     size_t num_row, DMatrixHandle *out);
 
-/*!
- * \brief create matrix content from dense matrix
- * \param data pointer to the data space
- * \param nrow number of rows
- * \param ncol number columns
- * \param missing which value to represent missing value
- * \param out created dmatrix
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief create matrix content from dense matrix
+ * @param data pointer to the data space
+ * @param nrow number of rows
+ * @param ncol number columns
+ * @param missing which value to represent missing value
+ * @param out created dmatrix
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixCreateFromMat(const float *data,
                                    bst_ulong nrow,
                                    bst_ulong ncol,
                                    float missing,
                                    DMatrixHandle *out);
-/*!
- * \brief create matrix content from dense matrix
- * \param data pointer to the data space
- * \param nrow number of rows
- * \param ncol number columns
- * \param missing which value to represent missing value
- * \param out created dmatrix
- * \param nthread number of threads (up to maximum cores available, if <=0 use all cores)
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief create matrix content from dense matrix
+ * @param data pointer to the data space
+ * @param nrow number of rows
+ * @param ncol number columns
+ * @param missing which value to represent missing value
+ * @param out created dmatrix
+ * @param nthread number of threads (up to maximum cores available, if <=0 use all cores)
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data,  // NOLINT
                                        bst_ulong nrow, bst_ulong ncol,
@@ -273,15 +303,15 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const float *data,  // NOLINT
 XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data, char const *config,
                                             DMatrixHandle *out);
 
-/*!
- * \brief Create DMatrix from CUDA array.
- * \param data JSON encoded __cuda_array_interface__ for array data.
- * \param config JSON encoded configuration.  Required values are:
+/**
+ * @brief Create DMatrix from CUDA array.
+ * @param data JSON encoded __cuda_array_interface__ for array data.
+ * @param config JSON encoded configuration.  Required values are:
  *   - missing: Which value to represent missing value.
  *   - nthread (optional): Number of threads used for initializing DMatrix.
  *   - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
- * \param out created dmatrix
- * \return 0 when success, -1 when failure happens
+ * @param out created dmatrix
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data, char const *config,
                                                   DMatrixHandle *out);
@@ -339,19 +369,19 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data, char const *
 
 /*  ==== First set of callback functions, used exclusively by JVM packages. ==== */
 
-/*! \brief handle to a external data iterator */
+/** @brief handle to a external data iterator */
 typedef void *DataIterHandle;  // NOLINT(*)
 /** @brief handle to an internal data holder. */
 typedef void *DataHolderHandle;  // NOLINT(*)
 
 
-/*! \brief Mini batch used in XGBoost Data Iteration */
+/** @brief Mini batch used in XGBoost Data Iteration */
 typedef struct {  // NOLINT(*)
-  /*! \brief number of rows in the minibatch */
+  /** @brief number of rows in the minibatch */
   size_t size;
-  /* \brief number of columns in the minibatch. */
+  /** @brief number of columns in the minibatch. */
   size_t columns;
-  /*! \brief row pointer to the rows in the data */
+  /** @brief row pointer to the rows in the data */
 #ifdef __APPLE__
   /* Necessary as Java on MacOS defines jlong as long int
    * and gcc defines int64_t as long long int. */
@@ -359,47 +389,47 @@ typedef struct {  // NOLINT(*)
 #else
   int64_t* offset;  // NOLINT(*)
 #endif  // __APPLE__
-  /*! \brief labels of each instance */
+  /** @brief labels of each instance */
   float* label;
-  /*! \brief weight of each instance, can be NULL */
+  /** @brief weight of each instance, can be NULL */
   float* weight;
-  /*! \brief feature index */
+  /** @brief feature index */
   int* index;
-  /*! \brief feature values */
+  /** @brief feature values */
   float* value;
 } XGBoostBatchCSR;
 
-/*!
- * \brief Callback to set the data to handle,
- * \param handle The handle to the callback.
- * \param batch The data content to be set.
+/**
+ * @brief Callback to set the data to handle,
+ * @param handle The handle to the callback.
+ * @param batch The data content to be set.
  */
 XGB_EXTERN_C typedef int XGBCallbackSetData(  // NOLINT(*)
     DataHolderHandle handle, XGBoostBatchCSR batch);
 
-/*!
- * \brief The data reading callback function.
+/**
+ * @brief The data reading callback function.
  *  The iterator will be able to give subset of batch in the data.
  *
  *  If there is data, the function will call set_function to set the data.
  *
- * \param data_handle The handle to the callback.
- * \param set_function The batch returned by the iterator
- * \param set_function_handle The handle to be passed to set function.
- * \return 0 if we are reaching the end and batch is not returned.
+ * @param data_handle The handle to the callback.
+ * @param set_function The batch returned by the iterator
+ * @param set_function_handle The handle to be passed to set function.
+ * @return 0 if we are reaching the end and batch is not returned.
  */
 XGB_EXTERN_C typedef int XGBCallbackDataIterNext(  // NOLINT(*)
     DataIterHandle data_handle, XGBCallbackSetData *set_function,
     DataHolderHandle set_function_handle);
 
-/*!
- * \brief Create a DMatrix from a data iterator.
- * \param data_handle The handle to the data.
- * \param callback The callback to get the data.
- * \param cache_info Additional information about cache file, can be null.
- * \param missing Which value to represent missing value.
- * \param out The created DMatrix
- * \return 0 when success, -1 when failure happens.
+/**
+ * @brief Create a DMatrix from a data iterator.
+ * @param data_handle The handle to the data.
+ * @param callback The callback to get the data.
+ * @param cache_info Additional information about cache file, can be null.
+ * @param missing Which value to represent missing value.
+ * @param out The created DMatrix
+ * @return 0 when success, -1 when failure happens.
  */
 XGB_DLL int XGDMatrixCreateFromDataIter(
     DataIterHandle data_handle,
@@ -450,8 +480,8 @@ XGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter);  // NOLINT(
 XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLINT(*)
 
 
-/*!
- * \brief Create an external memory DMatrix with data iterator.
+/**
+ * @brief Create an external memory DMatrix with data iterator.
  *
  * Short note for how to use second set of callback for external memory data support:
  *
@@ -461,17 +491,17 @@ XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLIN
  *           \ref XGDMatrixCreateFromCallback, along with other parameters encoded as a JSON object.
  * - Step 3: Call appropriate data setters in `next` functions.
  *
- * \param iter    A handle to external data iterator.
- * \param proxy   A DMatrix proxy handle created by \ref XGProxyDMatrixCreate.
- * \param reset   Callback function resetting the iterator state.
- * \param next    Callback function yielding the next batch of data.
- * \param config  JSON encoded parameters for DMatrix construction.  Accepted fields are:
+ * @param iter    A handle to external data iterator.
+ * @param proxy   A DMatrix proxy handle created by \ref XGProxyDMatrixCreate.
+ * @param reset   Callback function resetting the iterator state.
+ * @param next    Callback function yielding the next batch of data.
+ * @param config  JSON encoded parameters for DMatrix construction.  Accepted fields are:
  *   - missing:      Which value to represent missing value
  *   - cache_prefix: The path of cache file, caller must initialize all the directories in this path.
  *   - nthread (optional): Number of threads used for initializing DMatrix.
- * \param[out] out      The created external memory DMatrix
+ * @param[out] out      The created external memory DMatrix
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy,
                                         DataIterResetCallback *reset, XGDMatrixCallbackNext *next,
@@ -525,6 +555,10 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand
  *
  * @note This is experimental and subject to change.
  *
+ * @verbatim embed:rst:leading-asterisk
+ *    See :doc:`/tutorials/external_memory` for more info.
+ * @endverbatim
+ *
  * @param iter     A handle to external data iterator.
  * @param proxy    A DMatrix proxy handle created by @ref XGProxyDMatrixCreate.
  * @param ref      Reference DMatrix for providing quantile information.
@@ -546,6 +580,10 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand
  *       help bound the memory usage. By default, XGBoost grows new sub-streams
  *       exponentially until batches are exhausted. Only used for the training dataset and
  *       the default is None (unbounded).
+ * - cache_host_ratio (optioinal): For GPU-based inputs, XGBoost can split the cache into
+ *      host and device portitions to reduce the data transfer overhead. This parameter
+ *      specifies the size of host cache compared to the size of the entire cache:
+ *      `host / (host + device)`.
  * @param out The created Quantile DMatrix.
  *
  * @return 0 when success, -1 when failure happens
@@ -598,16 +636,16 @@ XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle, const char *
  */
 XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle, char const *data);
 
-/*!
- * \brief Set data on a DMatrix proxy.
+/**
+ * @brief Set data on a DMatrix proxy.
  *
- * \param handle        A DMatrix proxy created by \ref XGProxyDMatrixCreate
- * \param indptr        JSON encoded __array_interface__ to row pointer in CSR.
- * \param indices       JSON encoded __array_interface__ to column indices in CSR.
- * \param data          JSON encoded __array_interface__ to values in CSR..
- * \param ncol          The number of columns of input CSR matrix.
+ * @param handle        A DMatrix proxy created by \ref XGProxyDMatrixCreate
+ * @param indptr        JSON encoded __array_interface__ to row pointer in CSR.
+ * @param indices       JSON encoded __array_interface__ to column indices in CSR.
+ * @param data          JSON encoded __array_interface__ to values in CSR..
+ * @param ncol          The number of columns of input CSR matrix.
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
                                      char const *indices, char const *data,
@@ -615,35 +653,36 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
 
 /** @} */  // End of Streaming
 
-/*!
- * \brief create a new dmatrix from sliced content of existing matrix
- * \param handle instance of data matrix to be sliced
- * \param idxset index set
- * \param len length of index set
- * \param out a sliced new matrix
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief create a new dmatrix from sliced content of existing matrix
+ * @param handle instance of data matrix to be sliced
+ * @param idxset index set
+ * @param len length of index set
+ * @param out a sliced new matrix
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixSliceDMatrix(DMatrixHandle handle,
                                   const int *idxset,
                                   bst_ulong len,
                                   DMatrixHandle *out);
-/*!
- * \brief create a new dmatrix from sliced content of existing matrix
- * \param handle instance of data matrix to be sliced
- * \param idxset index set
- * \param len length of index set
- * \param out a sliced new matrix
- * \param allow_groups allow slicing of an array with groups
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief create a new dmatrix from sliced content of existing matrix
+ * @param handle instance of data matrix to be sliced
+ * @param idxset index set
+ * @param len length of index set
+ * @param out a sliced new matrix
+ * @param allow_groups allow slicing of an array with groups
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixSliceDMatrixEx(DMatrixHandle handle,
                                     const int *idxset,
                                     bst_ulong len,
                                     DMatrixHandle *out,
                                     int allow_groups);
-/*!
- * \brief free space in data matrix
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief Free a DMatrix object.
+ *
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixFree(DMatrixHandle handle);
 /**
@@ -655,7 +694,7 @@ XGB_DLL int XGDMatrixFree(DMatrixHandle handle);
  *        DMatrix are not supported.
  *
  * @param handle a instance of data matrix
- * @param fname file name
+ * @param fname File name
  * @param silent print statistics when saving
  *
  * @return 0 when success, -1 when failure happens
@@ -675,13 +714,13 @@ XGB_DLL int XGDMatrixSaveBinary(DMatrixHandle handle,
 XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle, char const *field,
                                           char const *data);
 
-/*!
- * \brief set float vector to a content in info
- * \param handle a instance of data matrix
- * \param field field name, can be label, weight
- * \param array pointer to float vector
- * \param len length of array
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief set float vector to a content in info
+ * @param handle a instance of data matrix
+ * @param field field name, can be label, weight
+ * @param array pointer to float vector
+ * @param len length of array
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array,
                                   bst_ulong len);
@@ -693,21 +732,21 @@ XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const
 XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array,
                                  bst_ulong len);
 
-/*!
- * \brief Set string encoded information of all features.
+/**
+ * @brief Set string encoded information of all features.
  *
  * Accepted fields are:
  *   - feature_name
  *   - feature_type
  *
- * \param handle    An instance of data matrix
- * \param field     Field name
- * \param features  Pointer to array of strings.
- * \param size      Size of `features` pointer (number of strings passed in).
+ * @param handle    An instance of data matrix
+ * @param field     Field name
+ * @param features  Pointer to array of strings.
+ * @param size      Size of `features` pointer (number of strings passed in).
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  *
- * \code
+ * @code{c}
  *
  *   char const* feat_names [] {"feat_0", "feat_1"};
  *   XGDMatrixSetStrFeatureInfo(handle, "feature_name", feat_names, 2);
@@ -717,14 +756,14 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const
  *   char const* feat_types [] {"i", "q"};
  *   XGDMatrixSetStrFeatureInfo(handle, "feature_type", feat_types, 2);
  *
- * \endcode
+ * @endcode
  */
 XGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field,
                                        const char **features,
                                        const bst_ulong size);
 
-/*!
- * \brief Get string encoded information of all features.
+/**
+ * @brief Get string encoded information of all features.
  *
  * Accepted fields are:
  *   - feature_name
@@ -733,15 +772,15 @@ XGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field,
  * Caller is responsible for copying out the data, before next call to any API function of
  * XGBoost.
  *
- * \param handle       An instance of data matrix
- * \param field        Field name
- * \param size         Size of output pointer `features` (number of strings returned).
- * \param out_features Address of a pointer to array of strings.  Result is stored in
+ * @param handle       An instance of data matrix
+ * @param field        Field name
+ * @param size         Size of output pointer `features` (number of strings returned).
+ * @param out_features Address of a pointer to array of strings.  Result is stored in
  *                     thread local memory.
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  *
- * \code
+ * @code{c}
  *
  *  char const **c_out_features = NULL;
  *  bst_ulong out_size = 0;
@@ -756,7 +795,7 @@ XGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field,
  *    printf("feature %lu: %s\n", i, c_out_features[i]);
  *  }
  *
- * \endcode
+ * @endcode
  */
 XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
                                        bst_ulong *size,
@@ -770,13 +809,13 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
 XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
                                   bst_ulong size, int type);
 
-/*!
- * \brief get float info vector from matrix.
- * \param handle a instance of data matrix
- * \param field field name
- * \param out_len used to set result length
- * \param out_dptr pointer to the result
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief get float info vector from matrix.
+ * @param handle a instance of data matrix
+ * @param field field name
+ * @param out_len used to set result length
+ * @param out_dptr pointer to the result
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field, bst_ulong *out_len,
                                   const float **out_dptr);
@@ -784,55 +823,57 @@ XGB_DLL int XGDMatrixGetFloatInfo(const DMatrixHandle handle, const char *field,
  * @example c-api-demo.c
  */
 
-/*!
- * \brief get uint32 info vector from matrix
- * \param handle a instance of data matrix
- * \param field field name
- * \param out_len The length of the field.
- * \param out_dptr pointer to the result
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief get uint32 info vector from matrix
+ * @param handle a instance of data matrix
+ * @param field field name
+ * @param out_len The length of the field.
+ * @param out_dptr pointer to the result
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixGetUIntInfo(const DMatrixHandle handle,
                                  const char *field,
                                  bst_ulong* out_len,
                                  const unsigned **out_dptr);
-/*!
- * \brief get number of rows.
- * \param handle the handle to the DMatrix
- * \param out The address to hold number of rows.
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief Get the number of rows from a DMatrix.
+ *
+ * @param handle the handle to the DMatrix
+ * @param out The address to hold number of rows.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixNumRow(DMatrixHandle handle, bst_ulong *out);
-/*!
- * \brief get number of columns
- * \param handle the handle to the DMatrix
- * \param out The output of number of columns
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief Get the number of columns from a DMatrix.
+ *
+ * @param handle the handle to the DMatrix
+ * @param out The output of number of columns
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out);
 
-/*!
- * \brief Get number of valid values from DMatrix.
+/**
+ * @brief Get number of valid values from a DMatrix.
  *
- * \param handle the handle to the DMatrix
- * \param out The output of number of non-missing values
+ * @param handle the handle to the DMatrix
+ * @param out The output of number of non-missing values
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
 
-/*!
- * \brief Get the data split mode from DMatrix.
+/**
+ * @brief Get the data split mode from DMatrix.
  *
- * \param handle the handle to the DMatrix
- * \param out The output of the data split mode
+ * @param handle the handle to the DMatrix
+ * @param out The output of the data split mode
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixDataSplitMode(DMatrixHandle handle, bst_ulong *out);
 
 /**
- * \brief Get the predictors from DMatrix as CSR matrix for testing.  If this is a
+ * @brief Get the predictors from DMatrix as CSR matrix for testing.  If this is a
  *        quantized DMatrix, quantized values are returned instead.
  *
  * Unlike most of XGBoost C functions, caller of `XGDMatrixGetDataAsCSR` is required to
@@ -842,14 +883,14 @@ XGB_DLL int XGDMatrixDataSplitMode(DMatrixHandle handle, bst_ulong *out);
  *
  * @since 1.7.0
  *
- * \param handle the handle to the DMatrix
- * \param config JSON configuration string. At the moment it should be an empty document,
+ * @param handle the handle to the DMatrix
+ * @param config JSON configuration string. At the moment it should be an empty document,
  *               preserved for future use.
- * \param out_indptr  indptr of output CSR matrix.
- * \param out_indices Column index of output CSR matrix.
- * \param out_data    Data value of CSR matrix.
+ * @param out_indptr  indptr of output CSR matrix.
+ * @param out_indices Column index of output CSR matrix.
+ * @param out_data    Data value of CSR matrix.
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGDMatrixGetDataAsCSR(DMatrixHandle const handle, char const *config,
                                   bst_ulong *out_indptr, unsigned *out_indices, float *out_data);
@@ -922,38 +963,38 @@ XGB_DLL int XGBoosterFree(BoosterHandle handle);
  */
 XGB_DLL int XGBoosterReset(BoosterHandle handle);
 
-/*!
- * \brief Slice a model using boosting index. The slice m:n indicates taking all trees
+/**
+ * @brief Slice a model using boosting index. The slice m:n indicates taking all trees
  *        that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).
  *
- * \param handle Booster to be sliced.
- * \param begin_layer start of the slice
- * \param end_layer end of the slice; end_layer=0 is equivalent to
+ * @param handle Booster to be sliced.
+ * @param begin_layer start of the slice
+ * @param end_layer end of the slice; end_layer=0 is equivalent to
  *                  end_layer=num_boost_round
- * \param step step size of the slice
- * \param out Sliced booster.
+ * @param step step size of the slice
+ * @param out Sliced booster.
  *
- * \return 0 when success, -1 when failure happens, -2 when index is out of bound.
+ * @return 0 when success, -1 when failure happens, -2 when index is out of bound.
  */
 XGB_DLL int XGBoosterSlice(BoosterHandle handle, int begin_layer,
                            int end_layer, int step,
                            BoosterHandle *out);
 
-/*!
- * \brief Get number of boosted rounds from gradient booster.  When process_type is
+/**
+ * @brief Get number of boosted rounds from gradient booster.  When process_type is
  *        update, this number might drop due to removed tree.
- * \param handle Handle to booster.
- * \param out Pointer to output integer.
- * \return 0 when success, -1 when failure happens
+ * @param handle Handle to booster.
+ * @param out Pointer to output integer.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterBoostedRounds(BoosterHandle handle, int* out);
 
-/*!
- * \brief set parameters
- * \param handle handle
- * \param name  parameter name
- * \param value value of parameter
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief set parameters
+ * @param handle handle
+ * @param name  parameter name
+ * @param value value of parameter
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
                               const char *name,
@@ -962,23 +1003,23 @@ XGB_DLL int XGBoosterSetParam(BoosterHandle handle,
  * @example c-api-demo.c
  */
 
-/*!
- * \brief get number of features
- * \param handle Handle to booster.
- * \param out number of features
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief get number of features
+ * @param handle Handle to booster.
+ * @param out number of features
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterGetNumFeature(BoosterHandle handle, bst_ulong *out);
 /**
  * @example c-api-demo.c
  */
 
-/*!
- * \brief update the model in one round using dtrain
- * \param handle handle
- * \param iter current iteration rounds
- * \param dtrain training data
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief update the model in one round using dtrain
+ * @param handle handle
+ * @param iter current iteration rounds
+ * @param dtrain training data
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterUpdateOneIter(BoosterHandle handle, int iter, DMatrixHandle dtrain);
 /**
@@ -1009,15 +1050,15 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, fl
 XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, int iter,
                                   char const *grad, char const *hess);
 
-/*!
- * \brief get evaluation statistics for xgboost
- * \param handle handle
- * \param iter current iteration rounds
- * \param dmats pointers to data to be evaluated
- * \param evnames pointers to names of each data
- * \param len length of dmats
- * \param out_result the string containing evaluation statistics
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief get evaluation statistics for xgboost
+ * @param handle handle
+ * @param iter current iteration rounds
+ * @param dmats pointers to data to be evaluated
+ * @param evnames pointers to names of each data
+ * @param len length of dmats
+ * @param out_result the string containing evaluation statistics
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle dmats[],
                                  const char *evnames[], bst_ulong len, const char **out_result);
@@ -1034,21 +1075,21 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle d
  * @{
  */
 
-/*!
- * \brief make prediction based on dmat (deprecated, use \ref XGBoosterPredictFromDMatrix instead)
+/**
+ * @brief make prediction based on dmat (deprecated, use \ref XGBoosterPredictFromDMatrix instead)
  * \deprecated
  * \see XGBoosterPredictFromDMatrix()
  *
- * \param handle handle
- * \param dmat data matrix
- * \param option_mask bit-mask of options taken in prediction, possible values
+ * @param handle handle
+ * @param dmat data matrix
+ * @param option_mask bit-mask of options taken in prediction, possible values
  *          0:normal prediction
  *          1:output margin instead of transformed value
  *          2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
  *          4:output feature contributions to individual predictions
- * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
+ * @param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
  *    when the parameter is set to 0, we will use all the trees
- * \param training Whether the prediction function is used as part of a training loop.
+ * @param training Whether the prediction function is used as part of a training loop.
  *    Prediction can be run in 2 scenarios:
  *    1. Given data matrix X, obtain prediction y_pred from the model.
  *    2. Obtain the prediction for computing gradients. For example, DART booster performs dropout
@@ -1056,9 +1097,9 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle d
  *       inference step due to dropped trees.
  *    Set training=false for the first scenario. Set training=true for the second scenario.
  *    The second scenario applies when you are defining a custom objective function.
- * \param out_len used to store length of returning result
- * \param out_result used to set a pointer to array
- * \return 0 when success, -1 when failure happens
+ * @param out_len used to store length of returning result
+ * @param out_result used to set a pointer to array
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterPredict(BoosterHandle handle,
                              DMatrixHandle dmat,
@@ -1068,12 +1109,12 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
                              bst_ulong *out_len,
                              const float **out_result);
 
-/*!
- * \brief Make prediction from DMatrix, replacing \ref XGBoosterPredict.
+/**
+ * @brief Make prediction from DMatrix, replacing \ref XGBoosterPredict.
  *
- * \param handle Booster handle
- * \param dmat   DMatrix handle
- * \param config String encoded predict configuration in JSON format, with following
+ * @param handle Booster handle
+ * @param dmat   DMatrix handle
+ * @param config String encoded predict configuration in JSON format, with following
  *                      available fields in the JSON object:
  *
  *    "type": [0, 6]
@@ -1108,21 +1149,21 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
  *
  *   Example JSON input for running a normal prediction with strict output shape, 2 dim
  *   for softprob , 1 dim for others.
- *   \code
+ *   @code{javascript}
  *      {
  *         "type": 0,
  *         "training": false,
  *         "iteration_begin": 0,
  *         "iteration_end": 0,
  *         "strict_shape": true
- *     }
- *   \endcode
+ *      }
+ *   @endcode
  *
- * \param out_shape Shape of output prediction (copy before use).
- * \param out_dim   Dimension of output prediction.
- * \param out_result Buffer storing prediction value (copy before use).
+ * @param out_shape Shape of output prediction (copy before use).
+ * @param out_dim   Dimension of output prediction.
+ * @param out_result Buffer storing prediction value (copy before use).
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  *
  * @see XGBoosterPredictFromDense XGBoosterPredictFromCSR XGBoosterPredictFromCudaArray XGBoosterPredictFromCudaColumnar
  */
@@ -1134,24 +1175,24 @@ XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat
  */
 
 /**
- * \brief Inplace prediction from CPU dense matrix.
+ * @brief Inplace prediction from CPU dense matrix.
  *
  * \note If the booster is configured to run on a CUDA device, XGBoost falls back to run
  *       prediction with DMatrix with a performance warning.
  *
- * \param handle        Booster handle.
- * \param values        JSON encoded __array_interface__ to values.
- * \param config        See \ref XGBoosterPredictFromDMatrix for more info.
+ * @param handle        Booster handle.
+ * @param values        JSON encoded __array_interface__ to values.
+ * @param config        See \ref XGBoosterPredictFromDMatrix for more info.
  *   Additional fields for inplace prediction are:
  *     - "missing": float
- * \param m             An optional (NULL if not available) proxy DMatrix instance
+ * @param m             An optional (NULL if not available) proxy DMatrix instance
  *                      storing meta info.
  *
- * \param out_shape     See \ref XGBoosterPredictFromDMatrix for more info.
- * \param out_dim       See \ref XGBoosterPredictFromDMatrix for more info.
- * \param out_result    See \ref XGBoosterPredictFromDMatrix for more info.
+ * @param out_shape     See \ref XGBoosterPredictFromDMatrix for more info.
+ * @param out_dim       See \ref XGBoosterPredictFromDMatrix for more info.
+ * @param out_result    See \ref XGBoosterPredictFromDMatrix for more info.
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *values, char const *config,
                                       DMatrixHandle m, bst_ulong const **out_shape,
@@ -1186,27 +1227,27 @@ XGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *value
                                          const float **out_result);
 
 /**
- * \brief Inplace prediction from CPU CSR matrix.
+ * @brief Inplace prediction from CPU CSR matrix.
  *
  * \note If the booster is configured to run on a CUDA device, XGBoost falls back to run
  *       prediction with DMatrix with a performance warning.
  *
- * \param handle        Booster handle.
- * \param indptr        JSON encoded __array_interface__ to row pointer in CSR.
- * \param indices       JSON encoded __array_interface__ to column indices in CSR.
- * \param values        JSON encoded __array_interface__ to values in CSR..
- * \param ncol          Number of features in data.
- * \param config        See \ref XGBoosterPredictFromDMatrix for more info.
+ * @param handle        Booster handle.
+ * @param indptr        JSON encoded __array_interface__ to row pointer in CSR.
+ * @param indices       JSON encoded __array_interface__ to column indices in CSR.
+ * @param values        JSON encoded __array_interface__ to values in CSR..
+ * @param ncol          Number of features in data.
+ * @param config        See \ref XGBoosterPredictFromDMatrix for more info.
  *   Additional fields for inplace prediction are:
  *     - "missing": float
- * \param m             An optional (NULL if not available) proxy DMatrix instance
+ * @param m             An optional (NULL if not available) proxy DMatrix instance
  *                      storing meta info.
  *
- * \param out_shape     See \ref XGBoosterPredictFromDMatrix for more info.
- * \param out_dim       See \ref XGBoosterPredictFromDMatrix for more info.
- * \param out_result    See \ref XGBoosterPredictFromDMatrix for more info.
+ * @param out_shape     See \ref XGBoosterPredictFromDMatrix for more info.
+ * @param out_dim       See \ref XGBoosterPredictFromDMatrix for more info.
+ * @param out_result    See \ref XGBoosterPredictFromDMatrix for more info.
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, char const *indices,
                                     char const *values, bst_ulong ncol, char const *config,
@@ -1280,123 +1321,124 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *d
  *
  * - Functions with the term "Config" handles save/loading configuration.  It helps user
  *   to study the internal of XGBoost.  Also user can use the load method for specifying
- *   parameters in a structured way.  These functions are introduced in 1.0.0, and are not
- *   yet stable.
+ *   parameters in a structured way.  These functions were introduced in 1.0.0.
  *
- * - Functions with the term "Serialization" are combined of above two.  They are used in
- *   situations like check-pointing, or continuing training task in distributed
+ * - Functions with the term "Serialization" are combination of above two.  They are used
+ *   in situations like check-pointing, or continuing training task in a distributed
  *   environment.  In these cases the task must be carried out without any user
  *   intervention.
  *
  * @{
  */
 
-/*!
- * \brief Load model from existing file
+/**
+ * @brief Load the model from an existing file
+ *
+ * @param handle handle
+ * @param fname File name. The string must be UTF-8 encoded.
  *
- * \param handle handle
- * \param fname File URI or file name. The string must be UTF-8 encoded.
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
                                const char *fname);
-/*!
- * \brief Save model into existing file
+/**
+ * @brief Save the model into an existing file
  *
- * \param handle handle
- * \param fname File URI or file name. The string must be UTF-8 encoded.
- * \return 0 when success, -1 when failure happens
+ * @param handle handle
+ * @param fname File name. The string must be UTF-8 encoded.
+ *
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,
                                const char *fname);
-/*!
- * \brief load model from in memory buffer
+/**
+ * @brief load model from in memory buffer
  *
- * \param handle handle
- * \param buf pointer to the buffer
- * \param len the length of the buffer
- * \return 0 when success, -1 when failure happens
+ * @param handle handle
+ * @param buf pointer to the buffer
+ * @param len the length of the buffer
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
                                          const void *buf,
                                          bst_ulong len);
 
-/*!
- * \brief Save model into raw bytes, return header of the array.  User must copy the
+/**
+ * @brief Save model into raw bytes, return header of the array.  User must copy the
  *        result out, before next xgboost call
  *
- * \param handle handle
- * \param config JSON encoded string storing parameters for the function.  Following
+ * @param handle handle
+ * @param config JSON encoded string storing parameters for the function.  Following
  *               keys are expected in the JSON document:
  *               - "format": str
  *                 - json: Output booster will be encoded as JSON.
  *                 - ubj:  Output booster will be encoded as Universal binary JSON.
  *                   this format except for compatibility reasons.
- * \param out_len  The argument to hold the output length
- * \param out_dptr The argument to hold the output data pointer
+ * @param out_len  The argument to hold the output length
+ * @param out_dptr The argument to hold the output data pointer
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len,
                                        char const **out_dptr);
 
-/*!
- * \brief Memory snapshot based serialization method.  Saves everything states
+/**
+ * @brief Memory snapshot based serialization method.  Saves everything states
  * into buffer.
  *
- * \param handle handle
- * \param out_len the argument to hold the output length
- * \param out_dptr the argument to hold the output data pointer
- * \return 0 when success, -1 when failure happens
+ * @param handle handle
+ * @param out_len the argument to hold the output length
+ * @param out_dptr the argument to hold the output data pointer
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len,
                                        const char **out_dptr);
-/*!
- * \brief Memory snapshot based serialization method.  Loads the buffer returned
+/**
+ * @brief Memory snapshot based serialization method.  Loads the buffer returned
  *        from \ref XGBoosterSerializeToBuffer.
  *
- * \param handle handle
- * \param buf pointer to the buffer
- * \param len the length of the buffer
- * \return 0 when success, -1 when failure happens
+ * @param handle handle
+ * @param buf pointer to the buffer
+ * @param len the length of the buffer
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle,
                                            const void *buf, bst_ulong len);
 
-/*!
- * \brief Save XGBoost's internal configuration into a JSON document.  Currently the
+/**
+ * @brief Save XGBoost's internal configuration into a JSON document.  Currently the
  *        support is experimental, function signature may change in the future without
  *        notice.
  *
- * \param handle handle to Booster object.
- * \param out_len length of output string
- * \param out_str A valid pointer to array of characters.  The characters array is
+ * @param handle handle to Booster object.
+ * @param out_len length of output string
+ * @param out_str A valid pointer to array of characters.  The characters array is
  *                allocated and managed by XGBoost, while pointer to that array needs to
  *                be managed by caller.
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterSaveJsonConfig(BoosterHandle handle, bst_ulong *out_len,
                                     char const **out_str);
-/*!
- * \brief Load XGBoost's internal configuration from a JSON document.  Currently the
+/**
+ * @brief Load XGBoost's internal configuration from a JSON document.  Currently the
  *        support is experimental, function signature may change in the future without
  *        notice.
  *
- * \param handle handle to Booster object.
- * \param config string representation of a JSON document.
- * \return 0 when success, -1 when failure happens
+ * @param handle handle to Booster object.
+ * @param config string representation of a JSON document.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterLoadJsonConfig(BoosterHandle handle, char const *config);
 /**@}*/  // End of Serialization
 
-/*!
- * \brief dump model, return array of strings representing model dump
- * \param handle handle
- * \param fmap  name to fmap can be empty string
- * \param with_stats whether to dump with statistics
- * \param out_len length of output array
- * \param out_dump_array pointer to hold representing dump of each model
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief dump model, return array of strings representing model dump
+ * @param handle handle
+ * @param fmap  name to fmap can be empty string
+ * @param with_stats whether to dump with statistics
+ * @param out_len length of output array
+ * @param out_dump_array pointer to hold representing dump of each model
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterDumpModel(BoosterHandle handle,
                                const char *fmap,
@@ -1404,15 +1446,15 @@ XGB_DLL int XGBoosterDumpModel(BoosterHandle handle,
                                bst_ulong *out_len,
                                const char ***out_dump_array);
 
-/*!
- * \brief dump model, return array of strings representing model dump
- * \param handle handle
- * \param fmap  name to fmap can be empty string
- * \param with_stats whether to dump with statistics
- * \param format the format to dump the model in
- * \param out_len length of output array
- * \param out_dump_array pointer to hold representing dump of each model
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief dump model, return array of strings representing model dump
+ * @param handle handle
+ * @param fmap  name to fmap can be empty string
+ * @param with_stats whether to dump with statistics
+ * @param format the format to dump the model in
+ * @param out_len length of output array
+ * @param out_dump_array pointer to hold representing dump of each model
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle,
                                  const char *fmap,
@@ -1421,16 +1463,16 @@ XGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle,
                                  bst_ulong *out_len,
                                  const char ***out_dump_array);
 
-/*!
- * \brief dump model, return array of strings representing model dump
- * \param handle handle
- * \param fnum number of features
- * \param fname names of features
- * \param ftype types of features
- * \param with_stats whether to dump with statistics
- * \param out_len length of output array
- * \param out_models pointer to hold representing dump of each model
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief dump model, return array of strings representing model dump
+ * @param handle handle
+ * @param fnum number of features
+ * @param fname names of features
+ * @param ftype types of features
+ * @param with_stats whether to dump with statistics
+ * @param out_len length of output array
+ * @param out_models pointer to hold representing dump of each model
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle,
                                            int fnum,
@@ -1440,17 +1482,17 @@ XGB_DLL int XGBoosterDumpModelWithFeatures(BoosterHandle handle,
                                            bst_ulong *out_len,
                                            const char ***out_models);
 
-/*!
- * \brief dump model, return array of strings representing model dump
- * \param handle handle
- * \param fnum number of features
- * \param fname names of features
- * \param ftype types of features
- * \param with_stats whether to dump with statistics
- * \param format the format to dump the model in
- * \param out_len length of output array
- * \param out_models pointer to hold representing dump of each model
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief dump model, return array of strings representing model dump
+ * @param handle handle
+ * @param fnum number of features
+ * @param fname names of features
+ * @param ftype types of features
+ * @param with_stats whether to dump with statistics
+ * @param format the format to dump the model in
+ * @param out_len length of output array
+ * @param out_models pointer to hold representing dump of each model
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle,
                                              int fnum,
@@ -1461,91 +1503,91 @@ XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle,
                                              bst_ulong *out_len,
                                              const char ***out_models);
 
-/*!
- * \brief Get string attribute from Booster.
- * \param handle handle
- * \param key The key of the attribute.
- * \param out The result attribute, can be NULL if the attribute do not exist.
- * \param success Whether the result is contained in out.
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief Get string attribute from Booster.
+ * @param handle handle
+ * @param key The key of the attribute.
+ * @param out The result attribute, can be NULL if the attribute do not exist.
+ * @param success Whether the result is contained in out.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterGetAttr(BoosterHandle handle,
                              const char* key,
                              const char** out,
                              int *success);
-/*!
- * \brief Set or delete string attribute.
+/**
+ * @brief Set or delete string attribute.
  *
- * \param handle handle
- * \param key The key of the attribute.
- * \param value The value to be saved.
+ * @param handle handle
+ * @param key The key of the attribute.
+ * @param value The value to be saved.
  *              If nullptr, the attribute would be deleted.
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterSetAttr(BoosterHandle handle,
                              const char* key,
                              const char* value);
-/*!
- * \brief Get the names of all attribute from Booster.
- * \param handle handle
- * \param out_len the argument to hold the output length
- * \param out pointer to hold the output attribute stings
- * \return 0 when success, -1 when failure happens
+/**
+ * @brief Get the names of all attribute from Booster.
+ * @param handle handle
+ * @param out_len the argument to hold the output length
+ * @param out pointer to hold the output attribute stings
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
                                   bst_ulong* out_len,
                                   const char*** out);
 
-/*!
- * \brief Set string encoded feature info in Booster, similar to the feature
+/**
+ * @brief Set string encoded feature info in Booster, similar to the feature
  *        info in DMatrix.
  *
  * Accepted fields are:
  *   - feature_name
  *   - feature_type
  *
- * \param handle    An instance of Booster
- * \param field     Field name
- * \param features  Pointer to array of strings.
- * \param size      Size of `features` pointer (number of strings passed in).
+ * @param handle    An instance of Booster
+ * @param field     Field name
+ * @param features  Pointer to array of strings.
+ * @param size      Size of `features` pointer (number of strings passed in).
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,
                                        const char **features,
                                        const bst_ulong size);
 
-/*!
- * \brief Get string encoded feature info from Booster, similar to feature info
+/**
+ * @brief Get string encoded feature info from Booster, similar to the feature info
  *        in DMatrix.
  *
- * Accepted fields are:
+ * Accepted field names are:
  *   - feature_name
  *   - feature_type
  *
- * Caller is responsible for copying out the data, before next call to any API
+ * Caller is responsible for copying out the data, before the next call to any API
  * function of XGBoost.
  *
- * \param handle       An instance of Booster
- * \param field        Field name
- * \param len          Size of output pointer `features` (number of strings returned).
- * \param out_features Address of a pointer to array of strings. Result is stored in
+ * @param handle       An instance of Booster
+ * @param field        Field name
+ * @param len          Size of output pointer `features` (number of strings returned).
+ * @param out_features Address of a pointer to array of strings. Result is stored in
  *        thread local memory.
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
                                        bst_ulong *len,
                                        const char ***out_features);
 
-/*!
- * \brief Calculate feature scores for tree models.  When used on linear model, only the
+/**
+ * @brief Calculate feature scores for tree models.  When used on linear model, only the
  * `weight` importance type is defined, and output scores is a row major matrix with shape
  * [n_features, n_classes] for multi-class model.  For tree model, out_n_feature is always
  * equal to out_n_scores and has multiple definitions of importance type.
  *
- * \param handle          An instance of Booster
- * \param config          Parameters for computing scores encoded as JSON.  Accepted JSON keys are:
+ * @param handle          An instance of Booster
+ * @param config          Parameters for computing scores encoded as JSON.  Accepted JSON keys are:
  *   - importance_type: A JSON string with following possible values:
  *       * 'weight': the number of times a feature is used to split the data across all trees.
  *       * 'gain': the average gain across all splits the feature is used in.
@@ -1555,13 +1597,13 @@ XGB_DLL int XGBoosterGetStrFeatureInfo(BoosterHandle handle, const char *field,
  *   - feature_map: An optional JSON string with URI or path to the feature map file.
  *   - feature_names: An optional JSON array with string names for each feature.
  *
- * \param out_n_features  Length of output feature names.
- * \param out_features    An array of string as feature names, ordered the same as output scores.
- * \param out_dim         Dimension of output feature scores.
- * \param out_shape       Shape of output feature scores with length of `out_dim`.
- * \param out_scores      An array of floating point as feature scores with shape of `out_shape`.
+ * @param out_n_features  Length of output feature names.
+ * @param out_features    An array of string as feature names, ordered the same as output scores.
+ * @param out_dim         Dimension of output feature scores.
+ * @param out_shape       Shape of output feature scores with length of `out_dim`.
+ * @param out_scores      An array of floating point as feature scores with shape of `out_shape`.
  *
- * \return 0 when success, -1 when failure happens
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,
                                   bst_ulong *out_n_features, char const ***out_features,
@@ -1636,7 +1678,7 @@ typedef void *TrackerHandle; /* NOLINT */
  *
  * @param handle The handle to the created tracker.
  *
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGTrackerCreate(char const *config, TrackerHandle *handle);
 
@@ -1647,7 +1689,7 @@ XGB_DLL int XGTrackerCreate(char const *config, TrackerHandle *handle);
  * @param handle The handle to the tracker.
  * @param args The arguments returned as a JSON document.
  *
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGTrackerWorkerArgs(TrackerHandle handle, char const **args);
 
@@ -1658,7 +1700,7 @@ XGB_DLL int XGTrackerWorkerArgs(TrackerHandle handle, char const **args);
  * @param handle The handle to the tracker.
  * @param config Unused at the moment, preserved for the future.
  *
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGTrackerRun(TrackerHandle handle, char const *config);
 
@@ -1670,7 +1712,7 @@ XGB_DLL int XGTrackerRun(TrackerHandle handle, char const *config);
  * @param config JSON encoded configuration. No argument is required yet, preserved for
  *        the future.
  *
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGTrackerWaitFor(TrackerHandle handle, char const *config);
 
@@ -1681,7 +1723,7 @@ XGB_DLL int XGTrackerWaitFor(TrackerHandle handle, char const *config);
  *
  * @param handle The handle to the tracker.
  *
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGTrackerFree(TrackerHandle handle);
 
@@ -1717,7 +1759,7 @@ XGB_DLL int XGTrackerFree(TrackerHandle handle);
  *   - federated_client_key_path: Client key file path. Only needed for the SSL mode.
  *   - federated_client_cert_path: Client certificate file path. Only needed for the SSL mode.
  *
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGCommunicatorInit(char const* config);
 
@@ -1726,7 +1768,7 @@ XGB_DLL int XGCommunicatorInit(char const* config);
  *
  * Call this function after you have finished all jobs.
  *
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGCommunicatorFinalize(void);
 
@@ -1758,7 +1800,7 @@ XGB_DLL int XGCommunicatorIsDistributed(void);
  * who monitors the tracker.
  *
  * @param message The message to be printed.
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGCommunicatorPrint(char const *message);
 
@@ -1766,7 +1808,7 @@ XGB_DLL int XGCommunicatorPrint(char const *message);
  * @brief Get the name of the processor.
  *
  * @param name_str Pointer to received returned processor name.
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGCommunicatorGetProcessorName(const char** name_str);
 
@@ -1783,7 +1825,8 @@ XGB_DLL int XGCommunicatorGetProcessorName(const char** name_str);
  * @param send_receive_buffer Pointer to the send or receive buffer.
  * @param size Size of the data in bytes.
  * @param root The process rank to broadcast from.
- * @return 0 for success, -1 for failure.
+ *
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int root);
 
@@ -1806,7 +1849,7 @@ XGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int
  * @param data_type Enumeration of data type, see xgboost::collective::DataType in communicator.h.
  * @param op Enumeration of operation type, see xgboost::collective::Operation in communicator.h.
  *
- * @return 0 for success, -1 for failure.
+ * @return 0 when success, -1 when failure happens
  */
 XGB_DLL int XGCommunicatorAllreduce(void *send_receive_buffer, size_t count, int data_type, int op);
 
diff --git a/include/xgboost/collective/result.h b/include/xgboost/collective/result.h
index c126366a07a0..09285f4f0555 100644
--- a/include/xgboost/collective/result.h
+++ b/include/xgboost/collective/result.h
@@ -159,5 +159,6 @@ template <typename Fn>
   return fn();
 }
 
-void SafeColl(Result const& rc);
+void SafeColl(Result const& rc, char const* file = __builtin_FILE(),
+              std::int32_t line = __builtin_LINE());
 }  // namespace xgboost::collective
diff --git a/include/xgboost/collective/socket.h b/include/xgboost/collective/socket.h
index 6b4d846cf727..f0a21dd1bb06 100644
--- a/include/xgboost/collective/socket.h
+++ b/include/xgboost/collective/socket.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2022-2024, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #pragma once
 
@@ -301,7 +301,7 @@ class TCPSocket {
     };
 
 #if defined(_WIN32)
-    WSAPROTOCOL_INFOA info;
+    WSAPROTOCOL_INFOW info;
     socklen_t len = sizeof(info);
     xgboost_CHECK_SYS_CALL(
         getsockopt(handle_, SOL_SOCKET, SO_PROTOCOL_INFO, reinterpret_cast<char *>(&info), &len),
diff --git a/include/xgboost/context.h b/include/xgboost/context.h
index f32a07a033a1..5a1c58042b24 100644
--- a/include/xgboost/context.h
+++ b/include/xgboost/context.h
@@ -162,14 +162,6 @@ struct Context : public XGBoostParameter<Context> {
   bool fail_on_invalid_gpu_id{false};
   bool validate_parameters{false};
 
-  /**
-   * @brief Configure the parameter `device'. Deprecated, will remove once `gpu_id` is
-   *        removed.
-   *
-   * @param require_gpu Whether GPU is explicitly required by the user through other
-   *                    configurations.
-   */
-  void ConfigureGpuId(bool require_gpu);
   /**
    * @brief Returns the automatically chosen number of threads based on the `nthread`
    *        parameter and the system settting.
@@ -206,6 +198,13 @@ struct Context : public XGBoostParameter<Context> {
    * @brief Get the current device and ordinal.
    */
   [[nodiscard]] DeviceOrd Device() const { return device_; }
+
+   /**
+   * @brief Get the current device and ordinal, if it supports fp64,
+            otherwise returns default CPU
+   */
+  [[nodiscard]] DeviceOrd DeviceFP64() const;
+
   /**
    * @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU.
    */
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
index 341184a28d30..d3bc2074ad15 100644
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -291,7 +291,6 @@ struct BatchParam {
    * @brief The number of batches to pre-fetch for external memory.
    */
   std::int32_t n_prefetch_batches{3};
-
   /**
    * @brief Exact or others that don't need histogram.
    */
@@ -431,7 +430,7 @@ class SparsePage {
    * \return  The maximum number of columns encountered in this input batch. Useful when pushing many adapter batches to work out the total number of columns.
    */
   template <typename AdapterBatchT>
-  uint64_t Push(const AdapterBatchT& batch, float missing, int nthread);
+  bst_idx_t Push(AdapterBatchT const& batch, float missing, std::int32_t nthread);
 
   /*!
    * \brief Push a sparse page
@@ -532,26 +531,36 @@ struct ExtMemConfig {
   // Cache prefix, not used if the cache is in the host memory. (on_host is true)
   std::string cache;
   // Whether the ellpack page is stored in the host memory.
-  bool on_host{true};
+  bool on_host;
+  // Host cache/Total cache for the GPU impl.
+  float cache_host_ratio;
   // Minimum number of of bytes for each ellpack page in cache. Only used for in-host
   // ExtMemQdm.
-  std::int64_t min_cache_page_bytes{0};
+  std::int64_t min_cache_page_bytes;
   // Missing value.
-  float missing{std::numeric_limits<float>::quiet_NaN()};
-  // Maximum number of pages cached in device.
-  std::int64_t max_num_device_pages{0};
+  float missing;
   // The number of CPU threads.
   std::int32_t n_threads{0};
-
-  ExtMemConfig() = default;
-  ExtMemConfig(std::string cache, bool on_host, std::int64_t min_cache, float missing,
-               std::int64_t max_num_d, std::int32_t n_threads)
+  // The ratio of the cache that can be compressed. Used for testing.
+  float hw_decomp_ratio{std::numeric_limits<float>::quiet_NaN()};
+  // Fallback to using nvcomp. Used for testing.
+  bool allow_decomp_fallback{false};
+
+  ExtMemConfig() = delete;
+  ExtMemConfig(std::string cache, bool on_host, float h_ratio, std::int64_t min_cache,
+               float missing, std::int32_t n_threads)
       : cache{std::move(cache)},
         on_host{on_host},
+        cache_host_ratio{h_ratio},
         min_cache_page_bytes{min_cache},
         missing{missing},
-        max_num_device_pages{max_num_d},
         n_threads{n_threads} {}
+
+  ExtMemConfig& SetParamsForTest(float _hw_decomp_ratio, bool _allow_decomp_fallback) {
+    this->hw_decomp_ratio = _hw_decomp_ratio;
+    this->allow_decomp_fallback = _allow_decomp_fallback;
+    return *this;
+  }
 };
 
 /**
diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h
index 3f4e8540efa5..65940773ffee 100644
--- a/include/xgboost/gbm.h
+++ b/include/xgboost/gbm.h
@@ -11,19 +11,21 @@
 #include <dmlc/registry.h>
 #include <xgboost/base.h>
 #include <xgboost/data.h>
+#include <xgboost/gradient.h>  // for GradientContainer
 #include <xgboost/host_device_vector.h>
 #include <xgboost/model.h>
 
-#include <vector>
-#include <string>
 #include <functional>
 #include <memory>
+#include <string>
+#include <vector>
 
 namespace xgboost {
 
 class Json;
 class FeatureMap;
 class ObjFunction;
+class CatContainer;
 
 struct Context;
 struct LearnerModelParam;
@@ -47,16 +49,7 @@ class GradientBooster : public Model, public Configurable {
    * @param cfg configurations on both training and model parameters.
    */
   virtual void Configure(Args const& cfg) = 0;
-  /*!
-   * \brief load model from stream
-   * \param fi input stream.
-   */
-  virtual void Load(dmlc::Stream* fi) = 0;
-  /*!
-   * \brief save model to stream.
-   * \param fo output stream
-   */
-  virtual void Save(dmlc::Stream* fo) const = 0;
+
   /**
    * \brief Slice a model using boosting index. The slice m:n indicates taking all trees
    *        that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).
@@ -86,8 +79,8 @@ class GradientBooster : public Model, public Configurable {
    *                   the booster may change content of gpair
    * @param obj The objective function used for boosting.
    */
-  virtual void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
-                       PredictionCacheEntry*, ObjFunction const* obj) = 0;
+  virtual void DoBoost(DMatrix* p_fmat, GradientContainer* in_gpair,
+                       PredictionCacheEntry* prediction, ObjFunction const* obj) = 0;
 
   /**
    * \brief Generate predictions for given feature matrix
@@ -144,12 +137,12 @@ class GradientBooster : public Model, public Configurable {
                                                bst_layer_t layer_begin, bst_layer_t layer_end,
                                                bool approximate) = 0;
 
-  /*!
-   * \brief dump the model in the requested format
-   * \param fmap feature map that may help give interpretations of feature
-   * \param with_stats extra statistics while dumping model
-   * \param format the format to dump the model in
-   * \return a vector of dump for boosters.
+  /**
+   * @brief dump the model in the requested format
+   * @param fmap feature map that may help give interpretations of feature
+   * @param with_stats extra statistics while dumping model
+   * @param format the format to dump the model in
+   * @return a vector of dump for boosters.
    */
   [[nodiscard]] virtual std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
                                                            std::string format) const = 0;
@@ -159,15 +152,18 @@ class GradientBooster : public Model, public Configurable {
                             std::vector<bst_feature_t>* features,
                             std::vector<float>* scores) const = 0;
   /**
-   * @brief Whether the current booster uses GPU.
+   * @brief Getter for categories.
    */
-  [[nodiscard]] virtual bool UseGPU() const = 0;
-  /*!
-   * \brief create a gradient booster from given name
-   * \param name name of gradient booster
-   * \param generic_param Pointer to runtime parameters
-   * \param learner_model_param pointer to global model parameters
-   * \return The created booster.
+  [[nodiscard]] virtual CatContainer const* Cats() const {
+    LOG(FATAL) << "Retrieving categories is not supported by the current booster.";
+    return nullptr;
+  }
+  /**
+   * @brief create a gradient booster from given name
+   * @param name name of gradient booster
+   * @param generic_param Pointer to runtime parameters
+   * @param learner_model_param pointer to global model parameters
+   * @return The created booster.
    */
   static GradientBooster* Create(const std::string& name, Context const* ctx,
                                  LearnerModelParam const* learner_model_param);
diff --git a/include/xgboost/gradient.h b/include/xgboost/gradient.h
new file mode 100644
index 000000000000..da4ffe9741ec
--- /dev/null
+++ b/include/xgboost/gradient.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#pragma once
+
+#include <xgboost/base.h>    // for GradientPair
+#include <xgboost/linalg.h>  // for Matrix
+#include <xgboost/logging.h>
+
+#include <cstddef>  // for size_t
+
+namespace xgboost {
+/**
+ * @brief Container for gradient produced by objective.
+ */
+struct GradientContainer {
+  /** @brief Gradient used for multi-target tree split and linear model. */
+  linalg::Matrix<GradientPair> gpair;
+  /** @brief Gradient used for tree leaf value, optional. */
+  linalg::Matrix<GradientPair> value_gpair;
+
+  [[nodiscard]] bool HasValueGrad() const noexcept { return !value_gpair.Empty(); }
+
+  [[nodiscard]] std::size_t NumSplitTargets() const noexcept { return gpair.Shape(1); }
+  [[nodiscard]] std::size_t NumTargets() const noexcept {
+    return HasValueGrad() ? value_gpair.Shape(1) : this->gpair.Shape(1);
+  }
+
+  linalg::MatrixView<GradientPair const> ValueGrad(Context const* ctx) const {
+    if (HasValueGrad()) {
+      return this->value_gpair.View(ctx->Device());
+    }
+    return this->gpair.View(ctx->Device());
+  }
+
+  [[nodiscard]] linalg::Matrix<GradientPair> const* Grad() const { return &gpair; }
+  [[nodiscard]] linalg::Matrix<GradientPair>* Grad() { return &gpair; }
+
+  [[nodiscard]] linalg::Matrix<GradientPair> const* FullGradOnly() const {
+    if (this->HasValueGrad()) {
+      LOG(FATAL) << "Reduced gradient is not yet supported.";
+    }
+    return this->Grad();
+  }
+  [[nodiscard]] linalg::Matrix<GradientPair>* FullGradOnly() {
+    if (this->HasValueGrad()) {
+      LOG(FATAL) << "Reduced gradient is not yet supported.";
+    }
+    return this->Grad();
+  }
+};
+}  // namespace xgboost
diff --git a/include/xgboost/host_device_vector.h b/include/xgboost/host_device_vector.h
index d9dfeadbc7eb..1a0da50fa07a 100644
--- a/include/xgboost/host_device_vector.h
+++ b/include/xgboost/host_device_vector.h
@@ -101,6 +101,7 @@ class HostDeviceVector {
 
   [[nodiscard]] bool Empty() const { return Size() == 0; }
   [[nodiscard]] std::size_t Size() const;
+  [[nodiscard]] std::size_t SizeBytes() const { return this->Size() * sizeof(T); }
   [[nodiscard]] DeviceOrd Device() const;
   common::Span<T> DeviceSpan();
   common::Span<const T> ConstDeviceSpan() const;
diff --git a/include/xgboost/json.h b/include/xgboost/json.h
index ddf460377f21..14db1ad13c20 100644
--- a/include/xgboost/json.h
+++ b/include/xgboost/json.h
@@ -30,7 +30,11 @@ class Value {
   }
 
  public:
-  /*!\brief Simplified implementation of LLVM RTTI. */
+  /**
+   * @brief Simplified implementation of LLVM RTTI.
+   *
+   * @note The integer ID must be kept stable.
+   */
   enum class ValueKind : std::int64_t {
     kString = 0,
     kNumber = 1,
@@ -45,8 +49,11 @@ class Value {
     kI8Array = 9,
     kU8Array = 10,
     kI16Array = 11,
-    kI32Array = 12,
-    kI64Array = 13
+    kU16Array = 12,
+    kI32Array = 13,
+    kU32Array = 14,
+    kI64Array = 15,
+    kU64Array = 16,
   };
 
   explicit Value(ValueKind _kind) : kind_{_kind} {}
@@ -192,14 +199,26 @@ using U8Array = JsonTypedArray<std::uint8_t, Value::ValueKind::kU8Array>;
  * @brief Typed UBJSON array for int16_t.
  */
 using I16Array = JsonTypedArray<std::int16_t, Value::ValueKind::kI16Array>;
+/**
+ * @brief Typed UBJSON array for uint16_t.
+ */
+using U16Array = JsonTypedArray<std::uint16_t, Value::ValueKind::kU16Array>;
 /**
  * @brief Typed UBJSON array for int32_t.
  */
 using I32Array = JsonTypedArray<std::int32_t, Value::ValueKind::kI32Array>;
+/**
+ * @brief Typed UBJSON array for uint32_t.
+ */
+using U32Array = JsonTypedArray<std::uint32_t, Value::ValueKind::kU32Array>;
 /**
  * @brief Typed UBJSON array for int64_t.
  */
 using I64Array = JsonTypedArray<std::int64_t, Value::ValueKind::kI64Array>;
+/**
+ * @brief Typed UBJSON array for uint64_t.
+ */
+using U64Array = JsonTypedArray<std::uint64_t, Value::ValueKind::kU64Array>;
 
 class JsonObject : public Value {
  public:
diff --git a/include/xgboost/json_io.h b/include/xgboost/json_io.h
index b69f807aaab8..5845c05e7c30 100644
--- a/include/xgboost/json_io.h
+++ b/include/xgboost/json_io.h
@@ -143,8 +143,11 @@ class JsonWriter {
   virtual void Visit(I8Array  const* arr);
   virtual void Visit(U8Array const* arr);
   virtual void Visit(I16Array const* arr);
+  virtual void Visit(U16Array const* arr);
   virtual void Visit(I32Array  const* arr);
+  virtual void Visit(U32Array  const* arr);
   virtual void Visit(I64Array  const* arr);
+  virtual void Visit(U64Array  const* arr);
   virtual void Visit(JsonObject const* obj);
   virtual void Visit(JsonNumber const* num);
   virtual void Visit(JsonInteger const* num);
diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index 1499804c8592..ffaddfbe6442 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -1,6 +1,6 @@
 /**
- * Copyright 2015-2023 by XGBoost Contributors
- * \file learner.h
+ * Copyright 2015-2025, XGBoost Contributors
+ *
  * \brief Learner interface that integrates objective, gbm and evaluation together.
  *  This is the user facing XGBoost training module.
  * \author Tianqi Chen
@@ -8,22 +8,23 @@
 #ifndef XGBOOST_LEARNER_H_
 #define XGBOOST_LEARNER_H_
 
-#include <dmlc/io.h>          // for Serializable
-#include <xgboost/base.h>     // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair, ..
-#include <xgboost/context.h>  // for Context
-#include <xgboost/linalg.h>   // for Tensor, TensorView
-#include <xgboost/metric.h>   // for Metric
-#include <xgboost/model.h>    // for Configurable, Model
-#include <xgboost/span.h>     // for Span
-#include <xgboost/task.h>     // for ObjInfo
+#include <dmlc/io.h>           // for Serializable
+#include <xgboost/base.h>      // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair, ..
+#include <xgboost/context.h>   // for Context
+#include <xgboost/gradient.h>  // for GradientContainer
+#include <xgboost/linalg.h>    // for Vector, VectorView
+#include <xgboost/metric.h>    // for Metric
+#include <xgboost/model.h>     // for Configurable, Model
+#include <xgboost/span.h>      // for Span
+#include <xgboost/task.h>      // for ObjInfo
 
-#include <algorithm>          // for max
-#include <cstdint>            // for int32_t, uint32_t, uint8_t
-#include <map>                // for map
-#include <memory>             // for shared_ptr, unique_ptr
-#include <string>             // for string
-#include <utility>            // for move
-#include <vector>             // for vector
+#include <algorithm>  // for max
+#include <cstdint>    // for int32_t, uint32_t, uint8_t
+#include <map>        // for map
+#include <memory>     // for shared_ptr, unique_ptr
+#include <string>     // for string
+#include <utility>    // for move
+#include <vector>     // for vector
 
 namespace xgboost {
 class FeatureMap;
@@ -35,6 +36,7 @@ class Json;
 struct XGBAPIThreadLocalEntry;
 template <typename T>
 class HostDeviceVector;
+class CatContainer;
 
 enum class PredictionType : std::uint8_t {  // NOLINT
   kValue = 0,
@@ -46,25 +48,24 @@ enum class PredictionType : std::uint8_t {  // NOLINT
   kLeaf = 6
 };
 
-/*!
- * \brief Learner class that does training and prediction.
+/**
+ * @brief Learner class that does training and prediction.
  *  This is the user facing module of xgboost training.
  *  The Load/Save function corresponds to the model used in python/R.
- *  \code
+ *  @code
  *
- *  std::unique_ptr<Learner> learner(new Learner::Create(cache_mats));
- *  learner.Configure(configs);
+ *  std::unique_ptr<Learner> learner{Learner::Create(cache_mats)};
+ *  learner->Configure(configs);
  *
  *  for (int iter = 0; iter < max_iter; ++iter) {
  *    learner->UpdateOneIter(iter, train_mat);
  *    LOG(INFO) << learner->EvalOneIter(iter, data_sets, data_names);
  *  }
  *
- *  \endcode
+ *  @endcode
  */
 class Learner : public Model, public Configurable, public dmlc::Serializable {
  public:
-  /*! \brief virtual destructor */
   ~Learner() override;
   /*!
    * \brief Configure Learner based on set parameters.
@@ -87,7 +88,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
    * @param in_gpair The input gradient statistics.
    */
   virtual void BoostOneIter(std::int32_t iter, std::shared_ptr<DMatrix> train,
-                            linalg::Matrix<GradientPair>* in_gpair) = 0;
+                            GradientContainer* in_gpair) = 0;
   /*!
    * \brief evaluate the model for specific iteration using the configured metrics.
    * \param iter iteration number
@@ -151,9 +152,6 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   void LoadModel(Json const& in) override = 0;
   void SaveModel(Json* out) const override = 0;
 
-  virtual void LoadModel(dmlc::Stream* fi) = 0;
-  virtual void SaveModel(dmlc::Stream* fo) const = 0;
-
   /*!
    * \brief Set multiple parameters at once.
    *
@@ -170,11 +168,11 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
    */
   virtual void SetParam(const std::string& key, const std::string& value) = 0;
 
-  /*!
-   * \brief Get the number of features of the booster.
-   * \return number of features
+  /**
+   * @brief Get the number of features of the booster.
+   * @return The number of features
    */
-  virtual uint32_t GetNumFeature() const = 0;
+  virtual bst_feature_t GetNumFeature() const = 0;
 
   /*!
    * \brief Set additional attribute to the Booster.
@@ -224,16 +222,19 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
    * \param fn Output feature types
    */
   virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;
-
   /**
-   * \brief Slice the model.
+   * @brief Getter for categories.
+   */
+  [[nodiscard]] virtual CatContainer const* Cats() const = 0;
+  /**
+   * @brief Slice the model.
    *
    * See InplacePredict for layer parameters.
    *
-   * \param step step size between slice.
-   * \param out_of_bound Return true if end layer is out of bound.
+   * @param step step size between slice.
+   * @param out_of_bound Return true if end layer is out of bound.
    *
-   * \return a sliced model.
+   * @return a sliced model.
    */
   virtual Learner* Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step,
                          bool* out_of_bound) = 0;
@@ -283,7 +284,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
 struct LearnerModelParamLegacy;
 
 /**
- * \brief Strategy for building multi-target models.
+ * @brief Strategy for building multi-target models.
  */
 enum class MultiStrategy : std::int32_t {
   kOneOutputPerTree = 0,
@@ -291,50 +292,53 @@ enum class MultiStrategy : std::int32_t {
 };
 
 /**
- * \brief Basic model parameters, used to describe the booster.
+ * @brief Basic model parameters, used to describe the booster.
  */
 struct LearnerModelParam {
  private:
   /**
-   * \brief Global bias, this is just a scalar value but can be extended to vector when we
+   * @brief Global bias, this is just a scalar value but can be extended to vector when we
    *        support multi-class and multi-target.
+   *
+   * The value stored here is the value before applying the inverse link function, used
+   * for initializing the prediction matrix/vector.
    */
-  linalg::Tensor<float, 1> base_score_;
+  linalg::Vector<float> base_score_;
+
+  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
+                    MultiStrategy multi_strategy);
 
  public:
   /**
-   * \brief The number of features.
+   * @brief The number of features.
    */
   bst_feature_t num_feature{0};
   /**
-   * \brief The number of classes or targets.
+   * @brief The number of classes or targets.
    */
   std::uint32_t num_output_group{0};
   /**
-   * \brief Current task, determined by objective.
+   * @brief Current task, determined by objective.
    */
   ObjInfo task{ObjInfo::kRegression};
   /**
-   * \brief Strategy for building multi-target models.
+   * @brief Strategy for building multi-target models.
    */
   MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};
 
   LearnerModelParam() = default;
-  // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
-  // this one as an immutable copy.
   LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
-                    linalg::Tensor<float, 1> base_margin, ObjInfo t, MultiStrategy multi_strategy);
-  LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
-                    MultiStrategy multi_strategy);
-  LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_score,
+                    linalg::Vector<float> base_score, ObjInfo t, MultiStrategy multi_strategy);
+  // This ctor is only used by tests.
+  LearnerModelParam(bst_feature_t n_features, linalg::Vector<float> base_score,
                     std::uint32_t n_groups, bst_target_t n_targets, MultiStrategy multi_strategy)
       : base_score_{std::move(base_score)},
         num_feature{n_features},
         num_output_group{std::max(n_groups, n_targets)},
         multi_strategy{multi_strategy} {}
 
-  linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
-  [[nodiscard]] linalg::TensorView<float const, 1> BaseScore(DeviceOrd device) const;
+  linalg::VectorView<float const> BaseScore(Context const* ctx) const;
+  [[nodiscard]] linalg::VectorView<float const> BaseScore(DeviceOrd device) const;
 
   void Copy(LearnerModelParam const& that);
   [[nodiscard]] bool IsVectorLeaf() const noexcept {
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index 8aa7cd8550ed..d1b686b953f6 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -15,8 +15,8 @@
 
 #include <algorithm>
 #include <cassert>
-#include <cinttypes>  // for int32_t
-#include <cstddef>    // for size_t
+#include <cstddef>  // for size_t
+#include <cstdint>  // for int32_t
 #include <limits>
 #include <string>
 #include <tuple>  // for make_tuple
@@ -225,23 +225,6 @@ void ReshapeImpl(size_t (&out_shape)[D], I &&s, S &&...rest) {
   ReshapeImpl<dim + 1>(out_shape, std::forward<S>(rest)...);
 }
 
-template <typename Fn, typename Tup, size_t... I>
-LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t, std::index_sequence<I...>) {
-  return f(std::get<I>(t)...);
-}
-
-/**
- * C++ 17 style apply.
- *
- * \param f function to apply
- * \param t tuple of arguments
- */
-template <typename Fn, typename Tup>
-LINALG_HD decltype(auto) constexpr Apply(Fn &&f, Tup &&t) {
-  constexpr auto kSize = std::tuple_size<Tup>::value;
-  return Apply(std::forward<Fn>(f), std::forward<Tup>(t), std::make_index_sequence<kSize>{});
-}
-
 /**
  * C++ 17 conjunction
  */
@@ -290,7 +273,7 @@ enum Order : std::uint8_t {
  * some functions expect data types that can be used in everywhere (update prediction
  * cache for example).
  */
-template <typename T, int32_t kDim>
+template <typename T, std::int32_t kDim>
 class TensorView {
  public:
   using ShapeT = std::size_t[kDim];
@@ -317,7 +300,7 @@ class TensorView {
     }
   }
 
-  template <size_t old_dim, size_t new_dim, int32_t D, typename I>
+  template <size_t old_dim, size_t new_dim, std::int32_t D, typename I>
   LINALG_HD size_t MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],
                                 detail::RangeTag<I> &&range) const {
     static_assert(new_dim < D);
@@ -662,15 +645,26 @@ auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {
   return linalg::TensorView<T, 1>{{ptr, s}, {s}, device};
 }
 
+template <typename T>
+auto MakeVec(DeviceOrd device, common::Span<T> s) {
+  return linalg::TensorView<T, 1>{s, {s.size()}, device};
+}
+
+template <typename T>
+auto MakeVec(std::vector<T> const &v) {
+  return linalg::TensorView<std::add_const_t<T>, 1>{
+      {v.data(), v.size()}, {v.size()}, DeviceOrd::CPU()};
+}
+
 template <typename T>
 auto MakeVec(HostDeviceVector<T> *data) {
-  return MakeVec(data->Device().IsCUDA() ? data->DevicePointer() : data->HostPointer(),
+  return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(),
                  data->Size(), data->Device());
 }
 
 template <typename T>
 auto MakeVec(HostDeviceVector<T> const *data) {
-  return MakeVec(data->Device().IsCUDA() ? data->ConstDevicePointer() : data->ConstHostPointer(),
+  return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
                  data->Size(), data->Device());
 }
 
@@ -776,7 +770,7 @@ class Tensor {
     for (auto i = D; i < kDim; ++i) {
       shape_[i] = 1;
     }
-    if (device.IsCUDA()) {
+    if (!device.IsCPU()) {
       data_.SetDevice(device);
       data_.ConstDevicePointer();  // Pull to device;
     }
@@ -805,11 +799,11 @@ class Tensor {
       shape_[i] = 1;
     }
     auto size = detail::CalcSize(shape_);
-    if (device.IsCUDA()) {
+    if (!device.IsCPU()) {
       data_.SetDevice(device);
     }
     data_.Resize(size);
-    if (device.IsCUDA()) {
+    if (!device.IsCPU()) {
       data_.DevicePointer();  // Pull to device
     }
   }
@@ -963,7 +957,7 @@ template <typename T>
 using Vector = Tensor<T, 1>;
 
 /**
- * \brief Create an array without initialization.
+ * @brief Create an array without initialization.
  */
 template <typename T, typename... Index>
 auto Empty(Context const *ctx, Index &&...index) {
@@ -973,6 +967,17 @@ auto Empty(Context const *ctx, Index &&...index) {
   return t;
 }
 
+/**
+ * @brief Create an array with the same shape and dtype as the input.
+ */
+template <typename T, std::int32_t kDim>
+auto EmptyLike(Context const *ctx, Tensor<T, kDim> const &in) {
+  Tensor<T, kDim> t;
+  t.SetDevice(ctx->Device());
+  t.Reshape(in.Shape());
+  return t;
+}
+
 /**
  * \brief Create an array with value v.
  */
diff --git a/include/xgboost/logging.h b/include/xgboost/logging.h
index 86550cc13f70..eaed72f18bad 100644
--- a/include/xgboost/logging.h
+++ b/include/xgboost/logging.h
@@ -89,10 +89,8 @@ class LogCallbackRegistry {
  public:
   using Callback = void (*)(const char*);
   LogCallbackRegistry() {}
-  inline void Register(Callback log_callback) {}
-  inline Callback Get() const {
-    return nullptr;
-  }
+  inline void Register(Callback) {}
+  inline Callback Get() const { return nullptr; }
 };
 #endif  // !defined(XGBOOST_STRICT_R_MODE) || XGBOOST_STRICT_R_MODE == 0
 
diff --git a/include/xgboost/model.h b/include/xgboost/model.h
index 610c7a0f5c48..c9c045234501 100644
--- a/include/xgboost/model.h
+++ b/include/xgboost/model.h
@@ -1,15 +1,12 @@
-/*!
- * Copyright (c) 2019 by Contributors
- * \file model.h
- * \brief Defines the abstract interface for different components in XGBoost.
+/**
+ * Copyright 2019-2025, XGBoost Contributors
+ *
+ * @file model.h
+ * @brief Defines the abstract interface for different components in XGBoost.
  */
 #ifndef XGBOOST_MODEL_H_
 #define XGBOOST_MODEL_H_
 
-namespace dmlc {
-class Stream;
-}  // namespace dmlc
-
 namespace xgboost {
 
 class Json;
diff --git a/include/xgboost/multi_target_tree_model.h b/include/xgboost/multi_target_tree_model.h
index 430c5455f1e9..ca0fa716284c 100644
--- a/include/xgboost/multi_target_tree_model.h
+++ b/include/xgboost/multi_target_tree_model.h
@@ -9,7 +9,7 @@
 #include <xgboost/base.h>                // for bst_node_t, bst_target_t, bst_feature_t
 #include <xgboost/context.h>             // for Context
 #include <xgboost/host_device_vector.h>  // for HostDeviceVector
-#include <xgboost/linalg.h>              // for VectorView
+#include <xgboost/linalg.h>              // for VectorView, MatrixView
 #include <xgboost/model.h>               // for Model
 #include <xgboost/span.h>                // for Span
 
@@ -18,13 +18,18 @@
 #include <vector>   // for vector
 
 namespace xgboost {
+namespace tree {
+struct MultiTargetTreeView;
+}
 struct TreeParam;
+
 /**
  * @brief Tree structure for multi-target model.
  */
 class MultiTargetTree : public Model {
  public:
   static bst_node_t constexpr InvalidNodeId() { return -1; }
+  friend struct tree::MultiTargetTreeView;
 
  private:
   TreeParam const* param_;
@@ -37,13 +42,13 @@ class MultiTargetTree : public Model {
   HostDeviceVector<float> weights_;
 
   [[nodiscard]] linalg::VectorView<float const> NodeWeight(bst_node_t nidx) const {
-    auto beg = nidx * this->NumTarget();
-    auto v = this->weights_.ConstHostSpan().subspan(beg, this->NumTarget());
+    auto beg = nidx * this->NumTargets();
+    auto v = this->weights_.ConstHostSpan().subspan(beg, this->NumTargets());
     return linalg::MakeTensorView(DeviceOrd::CPU(), v, v.size());
   }
   [[nodiscard]] linalg::VectorView<float> NodeWeight(bst_node_t nidx) {
-    auto beg = nidx * this->NumTarget();
-    auto v = this->weights_.HostSpan().subspan(beg, this->NumTarget());
+    auto beg = nidx * this->NumTargets();
+    auto v = this->weights_.HostSpan().subspan(beg, this->NumTargets());
     return linalg::MakeTensorView(DeviceOrd::CPU(), v, v.size());
   }
 
@@ -51,13 +56,13 @@ class MultiTargetTree : public Model {
   explicit MultiTargetTree(TreeParam const* param);
   MultiTargetTree(MultiTargetTree const& that);
   MultiTargetTree& operator=(MultiTargetTree const& that) = delete;
-  MultiTargetTree(MultiTargetTree&& that) = default;
-  MultiTargetTree& operator=(MultiTargetTree&& that) = default;
+  MultiTargetTree(MultiTargetTree&& that) = delete;
+  MultiTargetTree& operator=(MultiTargetTree&& that) = delete;
 
   /**
-   * @brief Set the weight for a leaf.
+   * @brief Set the weight for the root.
    */
-  void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight);
+  void SetRoot(linalg::VectorView<float const> weight);
   /**
    * @brief Expand a leaf into split node.
    */
@@ -65,13 +70,12 @@ class MultiTargetTree : public Model {
               linalg::VectorView<float const> base_weight,
               linalg::VectorView<float const> left_weight,
               linalg::VectorView<float const> right_weight);
+  /** @see RegTree::SetLeaves */
+  void SetLeaves(std::vector<bst_node_t> leaves, common::Span<float const> weights);
 
   [[nodiscard]] bool IsLeaf(bst_node_t nidx) const {
     return left_.ConstHostVector()[nidx] == InvalidNodeId();
   }
-  [[nodiscard]] bst_node_t Parent(bst_node_t nidx) const {
-    return parent_.ConstHostVector().at(nidx);
-  }
   [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const {
     return left_.ConstHostVector().at(nidx);
   }
@@ -79,30 +83,18 @@ class MultiTargetTree : public Model {
     return right_.ConstHostVector().at(nidx);
   }
 
-  [[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const {
-    return split_index_.ConstHostVector()[nidx];
-  }
-  [[nodiscard]] float SplitCond(bst_node_t nidx) const {
-    return split_conds_.ConstHostVector()[nidx];
-  }
-  [[nodiscard]] bool DefaultLeft(bst_node_t nidx) const {
-    return default_left_.ConstHostVector()[nidx];
-  }
-  [[nodiscard]] bst_node_t DefaultChild(bst_node_t nidx) const {
-    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
-  }
-
-  [[nodiscard]] bst_target_t NumTarget() const;
+  [[nodiscard]] bst_target_t NumTargets() const;
+  [[nodiscard]] auto NumLeaves() const { return this->weights_.Size() / this->NumTargets(); }
 
   [[nodiscard]] std::size_t Size() const;
+  [[nodiscard]] MultiTargetTree* Copy(TreeParam const* param) const;
 
-  [[nodiscard]] bst_node_t Depth(bst_node_t nidx) const {
-    bst_node_t depth{0};
-    while (Parent(nidx) != InvalidNodeId()) {
-      ++depth;
-      nidx = Parent(nidx);
+  common::Span<float const> Weights(DeviceOrd device) const {
+    if (device.IsCPU()) {
+      return this->weights_.ConstHostSpan();
     }
-    return depth;
+    this->weights_.SetDevice(device);
+    return this->weights_.ConstDeviceSpan();
   }
 
   [[nodiscard]] linalg::VectorView<float const> LeafValue(bst_node_t nidx) const {
@@ -112,6 +104,8 @@ class MultiTargetTree : public Model {
 
   void LoadModel(Json const& in) override;
   void SaveModel(Json* out) const override;
+
+  [[nodiscard]] std::size_t MemCostBytes() const;
 };
 }  // namespace xgboost
 #endif  // XGBOOST_MULTI_TARGET_TREE_MODEL_H_
diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
index 41f10f09e6c3..497821590bc9 100644
--- a/include/xgboost/objective.h
+++ b/include/xgboost/objective.h
@@ -1,8 +1,8 @@
 /**
- * Copyright 2014-2024, XGBoost Contributors
- * \file objective.h
- * \brief interface of objective function used by xgboost.
- * \author Tianqi Chen, Kailong Chen
+ * Copyright 2014-2025, XGBoost Contributors
+ *
+ * @brief interface of objective function used by xgboost.
+ * @author Tianqi Chen, Kailong Chen
  */
 #ifndef XGBOOST_OBJECTIVE_H_
 #define XGBOOST_OBJECTIVE_H_
@@ -11,53 +11,52 @@
 #include <xgboost/base.h>
 #include <xgboost/data.h>
 #include <xgboost/host_device_vector.h>
+#include <xgboost/linalg.h>  // for Vector
 #include <xgboost/model.h>
 #include <xgboost/task.h>
 
-#include <cstdint>  // std::int32_t
+#include <cstdint>  // for int32_t
 #include <functional>
-#include <string>
+#include <string>  // for string
 
 namespace xgboost {
 
 class RegTree;
 struct Context;
 
-/*! \brief interface of objective function */
+/** @brief The interface of objective function */
 class ObjFunction : public Configurable {
  protected:
-  Context const* ctx_;
+  Context const* ctx_{nullptr};
 
  public:
   static constexpr float DefaultBaseScore() { return 0.5f; }
 
  public:
-  /*! \brief virtual destructor */
   ~ObjFunction() override = default;
-  /*!
-   * \brief Configure the objective with the specified parameters.
-   * \param args arguments to the objective function.
+  /**
+   * @brief Configure the objective with the specified parameters.
+   *
+   * @param args arguments to the objective function.
    */
   virtual void Configure(Args const& args) = 0;
   /**
    * @brief Get gradient over each of predictions, given existing information.
    *
-   * @param preds prediction of current round
-   * @param info information about labels, weights, groups in rank
+   * @param preds Raw prediction (before applying the inverse link) of the current round.
+   * @param info information about labels, weights, groups in rank.
    * @param iteration current iteration number.
    * @param out_gpair output of get gradient, saves gradient and second order gradient in
    */
   virtual void GetGradient(HostDeviceVector<float> const& preds, MetaInfo const& info,
                            std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) = 0;
 
-  /*! \return the default evaluation metric for the objective */
-  virtual const char* DefaultEvalMetric() const = 0;
+  /** @return the default evaluation metric for the objective */
+  [[nodiscard]] virtual const char* DefaultEvalMetric() const = 0;
   /**
-   * \brief Return the configuration for the default metric.
+   * @brief Return the configuration for the default metric.
    */
-  virtual Json DefaultMetricConfig() const { return Json{Null{}}; }
-
-  // the following functions are optional, most of time default implementation is good enough
+  [[nodiscard]] virtual Json DefaultMetricConfig() const { return Json{Null{}}; }
   /**
    * @brief Apply inverse link (activation) function to prediction values.
    *
@@ -75,25 +74,28 @@ class ObjFunction : public Configurable {
    */
   virtual void EvalTransform(HostDeviceVector<float>* io_preds) { this->PredTransform(io_preds); }
   /**
-   * @brief Apply link function to the intercept.
+   * @brief Apply the link function to the intercept.
    *
-   *   This is used to transform user-set base_score back to margin used by gradient
-   *   boosting
+   *   This is an inverse of `PredTransform` for most of the objectives (if there's a
+   *   valid inverse). It's used to transform user-set base_score back to margin used by
+   *   gradient boosting. The method converts objective-based valid outputs like
+   *   probability back to raw model outputs.
    *
-   * @return transformed value
+   * @param [in,out] base_score The intercept to transform.
    */
-  [[nodiscard]] virtual float ProbToMargin(float base_score) const { return base_score; }
+  virtual void ProbToMargin(linalg::Vector<float>* /*base_score*/) const {}
   /**
-   * @brief Obtain the initial estimation of prediction.
+   * @brief Obtain the initial estimation of prediction (intercept).
    *
-   *   The output in `base_score` represents prediction after apply the inverse link function.
+   *   The output in `base_score` represents prediction after apply the inverse link function
+   *   (valid prediction instead of raw).
    *
    * @param info MetaInfo that contains label.
    * @param base_score Output estimation.
    */
-  virtual void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const;
-  /*!
-   * \brief Return task of this objective.
+  virtual void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const;
+  /**
+   * @brief Return task of this objective.
    */
   [[nodiscard]] virtual struct ObjInfo Task() const = 0;
   /**
@@ -106,31 +108,33 @@ class ObjFunction : public Configurable {
     }
     return 1;
   }
+  /** @brief Getter of the context. */
+  [[nodiscard]] Context const* Ctx() const { return this->ctx_; }
 
   /**
-   * \brief Update the leaf values after a tree is built. Needed for objectives with 0
+   * @brief Update the leaf values after a tree is built. Needed for objectives with 0
    *        hessian.
    *
    *   Note that the leaf update is not well defined for distributed training as XGBoost
    *   computes only an average of quantile between workers. This breaks when some leaf
    *   have no sample assigned in a local worker.
    *
-   * \param position The leaf index for each rows.
-   * \param info MetaInfo providing labels and weights.
-   * \param learning_rate The learning rate for current iteration.
-   * \param prediction Model prediction after transformation.
-   * \param group_idx The group index for this tree, 0 when it's not multi-target or multi-class.
-   * \param p_tree Tree that needs to be updated.
+   * @param position The leaf index for each rows.
+   * @param info MetaInfo providing labels and weights.
+   * @param learning_rate The learning rate for current iteration.
+   * @param prediction Model prediction after transformation.
+   * @param group_idx The group index for this tree, 0 when it's not multi-target or multi-class.
+   * @param p_tree Tree that needs to be updated.
    */
   virtual void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& /*position*/,
                               MetaInfo const& /*info*/, float /*learning_rate*/,
                               HostDeviceVector<float> const& /*prediction*/,
-                              std::int32_t /*group_idx*/, RegTree* /*p_tree*/) const {}
-
-  /*!
-   * \brief Create an objective function according to name.
-   * \param ctx  Pointer to runtime parameters.
-   * \param name Name of the objective.
+                              bst_target_t /*group_idx*/, RegTree* /*p_tree*/) const {}
+  /**
+   * @brief Create an objective function according to the name.
+   *
+   * @param name Name of the objective.
+   * @param ctx  Pointer to the context.
    */
   static ObjFunction* Create(const std::string& name, Context const* ctx);
 };
diff --git a/include/xgboost/span.h b/include/xgboost/span.h
index 6692d2f68b73..29b1cf9326bc 100644
--- a/include/xgboost/span.h
+++ b/include/xgboost/span.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2018-2024, XGBoost contributors
+ * Copyright 2018-2025, XGBoost contributors
  * \brief span class based on ISO++20 span
  *
  * About NOLINTs in this file:
@@ -358,6 +358,10 @@ XGBOOST_DEVICE bool LexicographicalCompare(InputIt1 first1, InputIt1 last1,
 
 }  // namespace detail
 
+template <typename T>
+XGBOOST_DEVICE std::enable_if_t<!std::is_reference_v<T>, std::size_t> SizeBytes(std::size_t n) {
+  return n * sizeof(T);
+}
 
 /*!
  * \brief span class implementation, based on ISO++20 span<T>. The interface
@@ -556,7 +560,7 @@ class Span {
     return size_;
   }
   XGBOOST_DEVICE constexpr index_type size_bytes() const __span_noexcept {  // NOLINT
-    return size() * sizeof(T);
+    return SizeBytes<T>(size());
   }
 
   XGBOOST_DEVICE constexpr bool empty() const __span_noexcept {  // NOLINT
diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h
index 921fc5a1ebc8..bc8d4ade6d76 100644
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -1,177 +1,100 @@
 /**
  * Copyright 2014-2025, XGBoost Contributors
- * \file tree_model.h
- * \brief model structure for tree
+ *
+ * @brief model structure for tree
  * \author Tianqi Chen
  */
 #ifndef XGBOOST_TREE_MODEL_H_
 #define XGBOOST_TREE_MODEL_H_
 
-#include <dmlc/io.h>
-#include <dmlc/parameter.h>
 #include <xgboost/base.h>
 #include <xgboost/data.h>
 #include <xgboost/feature_map.h>
-#include <xgboost/linalg.h>  // for VectorView
+#include <xgboost/host_device_vector.h>  // for HostDeviceVector
+#include <xgboost/linalg.h>              // for VectorView
 #include <xgboost/logging.h>
 #include <xgboost/model.h>
 #include <xgboost/multi_target_tree_model.h>  // for MultiTargetTree
 
 #include <algorithm>
 #include <cstring>
-#include <limits>
-#include <memory>  // for make_unique
-#include <stack>
+#include <limits>  // for numeric_limits
+#include <memory>  // for unique_ptr
 #include <string>
+#include <type_traits>  // for is_signed_v
 #include <vector>
 
 namespace xgboost {
+
+namespace tree {
+struct ScalarTreeView;
+struct MultiTargetTreeView;
+}
+
 class Json;
 
-// FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
-// not be configured by users.
-/*! \brief meta parameters of the tree */
-struct TreeParam : public dmlc::Parameter<TreeParam> {
-  /*! \brief (Deprecated) number of start root */
-  int deprecated_num_roots{1};
-  /*! \brief total number of nodes */
-  int num_nodes{1};
-  /*!\brief number of deleted nodes */
-  int num_deleted{0};
-  /*! \brief maximum depth, this is a statistics of the tree */
-  int deprecated_max_depth{0};
-  /*! \brief number of features used for tree construction */
+/** @brief meta parameters of the tree */
+struct TreeParam {
+  /** @brief The number of nodes */
+  bst_node_t num_nodes{1};
+  /** @brief The number of deleted nodes */
+  bst_node_t num_deleted{0};
+  /** @brief The number of features used for tree construction */
   bst_feature_t num_feature{0};
-  /*!
-   * \brief leaf vector size, used for vector tree
-   * used to store more than one dimensional information in tree
-   */
+  /** @brief leaf vector size. Used by the vector leaf. */
   bst_target_t size_leaf_vector{1};
-  /*! \brief reserved part, make sure alignment works for 64bit */
-  int reserved[31];
-  /*! \brief constructor */
-  TreeParam() {
-    // assert compact alignment
-    static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int), "TreeParam: 64 bit align");
-    std::memset(reserved, 0, sizeof(reserved));
-  }
-
-  // Swap byte order for all fields. Useful for transporting models between machines with different
-  // endianness (big endian vs little endian)
-  [[nodiscard]] TreeParam ByteSwap() const {
-    TreeParam x = *this;
-    dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
-    dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
-    dmlc::ByteSwap(&x.num_deleted, sizeof(x.num_deleted), 1);
-    dmlc::ByteSwap(&x.deprecated_max_depth, sizeof(x.deprecated_max_depth), 1);
-    dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
-    dmlc::ByteSwap(&x.size_leaf_vector, sizeof(x.size_leaf_vector), 1);
-    dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
-    return x;
-  }
-
-  // declare the parameters
-  DMLC_DECLARE_PARAMETER(TreeParam) {
-    // only declare the parameters that can be set by the user.
-    // other arguments are set by the algorithm.
-    DMLC_DECLARE_FIELD(num_nodes).set_lower_bound(1).set_default(1);
-    DMLC_DECLARE_FIELD(num_feature)
-        .set_default(0)
-        .describe("Number of features used in tree construction.");
-    DMLC_DECLARE_FIELD(num_deleted).set_default(0);
-    DMLC_DECLARE_FIELD(size_leaf_vector)
-        .set_lower_bound(0)
-        .set_default(1)
-        .describe("Size of leaf vector, reserved for vector tree");
-  }
 
   bool operator==(const TreeParam& b) const {
     return num_nodes == b.num_nodes && num_deleted == b.num_deleted &&
            num_feature == b.num_feature && size_leaf_vector == b.size_leaf_vector;
   }
+
+  void FromJson(Json const& in);
+  void ToJson(Json* p_out) const;
 };
 
-/*! \brief node statistics used in regression tree */
+/** @brief node statistics used in regression tree */
 struct RTreeNodeStat {
-  /*! \brief loss change caused by current split */
-  bst_float loss_chg;
-  /*! \brief sum of hessian values, used to measure coverage of data */
-  bst_float sum_hess;
-  /*! \brief weight of current node */
-  bst_float base_weight;
-  /*! \brief number of child that is leaf node known up to now */
-  int leaf_child_cnt {0};
+  /** @brief loss change caused by current split */
+  float loss_chg;
+  /** @brief sum of hessian values, used to measure coverage of data */
+  float sum_hess;
+  /** @brief weight of current node */
+  float base_weight;
+  /** @brief number of child that is leaf node known up to now */
+  int leaf_child_cnt{0};
 
   RTreeNodeStat() = default;
-  RTreeNodeStat(float loss_chg, float sum_hess, float weight) :
-      loss_chg{loss_chg}, sum_hess{sum_hess}, base_weight{weight} {}
+  RTreeNodeStat(float loss_chg, float sum_hess, float weight)
+      : loss_chg{loss_chg}, sum_hess{sum_hess}, base_weight{weight} {}
   bool operator==(const RTreeNodeStat& b) const {
-    return loss_chg == b.loss_chg && sum_hess == b.sum_hess &&
-           base_weight == b.base_weight && leaf_child_cnt == b.leaf_child_cnt;
-  }
-  // Swap byte order for all fields. Useful for transporting models between machines with different
-  // endianness (big endian vs little endian)
-  [[nodiscard]] RTreeNodeStat ByteSwap() const {
-    RTreeNodeStat x = *this;
-    dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
-    dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
-    dmlc::ByteSwap(&x.base_weight, sizeof(x.base_weight), 1);
-    dmlc::ByteSwap(&x.leaf_child_cnt, sizeof(x.leaf_child_cnt), 1);
-    return x;
+    return loss_chg == b.loss_chg && sum_hess == b.sum_hess && base_weight == b.base_weight &&
+           leaf_child_cnt == b.leaf_child_cnt;
   }
 };
 
 /**
- * \brief Helper for defining copyable data structure that contains unique pointers.
- */
-template <typename T>
-class CopyUniquePtr {
-  std::unique_ptr<T> ptr_{nullptr};
-
- public:
-  CopyUniquePtr() = default;
-  CopyUniquePtr(CopyUniquePtr const& that) {
-    ptr_.reset(nullptr);
-    if (that.ptr_) {
-      ptr_ = std::make_unique<T>(*that);
-    }
-  }
-  T* get() const noexcept { return ptr_.get(); }  // NOLINT
-
-  T& operator*() { return *ptr_; }
-  T* operator->() noexcept { return this->get(); }
-
-  T const& operator*() const { return *ptr_; }
-  T const* operator->() const noexcept { return this->get(); }
-
-  explicit operator bool() const { return static_cast<bool>(ptr_); }
-  bool operator!() const { return !ptr_; }
-  void reset(T* ptr) { ptr_.reset(ptr); }  // NOLINT
-};
-
-/**
- * \brief define regression tree to be the most common tree model.
+ * @brief define regression tree to be the most common tree model.
  *
  *  This is the data structure used in xgboost's major tree models.
  */
 class RegTree : public Model {
  public:
-  using SplitCondT = bst_float;
+  using SplitCondT = float;
   static constexpr bst_node_t kInvalidNodeId{MultiTargetTree::InvalidNodeId()};
   static constexpr uint32_t kDeletedNodeMarker = std::numeric_limits<uint32_t>::max();
   static constexpr bst_node_t kRoot{0};
 
-  /*! \brief tree node */
+  /** @brief tree node */
   class Node {
    public:
     XGBOOST_DEVICE Node()  {
       // assert compact alignment
-      static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info),
-                    "Node: 64 bit align");
+      static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info), "Node: 64 bit align");
     }
-    Node(int32_t cleft, int32_t cright, int32_t parent,
-         uint32_t split_ind, float split_cond, bool default_left) :
-        parent_{parent}, cleft_{cleft}, cright_{cright} {
+    Node(int32_t cleft, int32_t cright, int32_t parent, uint32_t split_ind, float split_cond,
+         bool default_left)
+        : parent_{parent}, cleft_{cleft}, cright_{cright} {
       this->SetParent(parent_);
       this->SetSplit(split_ind, split_cond, default_left);
     }
@@ -261,16 +184,6 @@ class RegTree : public Model {
              info_.leaf_value == b.info_.leaf_value;
     }
 
-    [[nodiscard]] Node ByteSwap() const {
-      Node x = *this;
-      dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
-      dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
-      dmlc::ByteSwap(&x.cright_, sizeof(x.cright_), 1);
-      dmlc::ByteSwap(&x.sindex_, sizeof(x.sindex_), 1);
-      dmlc::ByteSwap(&x.info_, sizeof(x.info_), 1);
-      return x;
-    }
-
    private:
     /*!
      * \brief in leaf node, we have weights, in non-leaf nodes,
@@ -291,43 +204,47 @@ class RegTree : public Model {
     Info info_;
   };
 
-  /*!
-   * \brief change a non leaf node to a leaf node, delete its children
-   * \param rid node id of the node
-   * \param value new leaf value
+  /**
+   * @brief Change a non leaf node to a leaf node, delete its children
+   *
+   * @param nidx Node id
+   * @param value The new leaf value
    */
-  void ChangeToLeaf(int rid, bst_float value) {
-    CHECK(nodes_[nodes_[rid].LeftChild() ].IsLeaf());
-    CHECK(nodes_[nodes_[rid].RightChild()].IsLeaf());
-    this->DeleteNode(nodes_[rid].LeftChild());
-    this->DeleteNode(nodes_[rid].RightChild());
-    nodes_[rid].SetLeaf(value);
+  void ChangeToLeaf(bst_node_t nidx, float value) {
+    auto& h_nodes = nodes_.HostVector();
+    CHECK(h_nodes[h_nodes[nidx].LeftChild()].IsLeaf());
+    CHECK(h_nodes[h_nodes[nidx].RightChild()].IsLeaf());
+    this->DeleteNode(h_nodes[nidx].LeftChild());
+    this->DeleteNode(h_nodes[nidx].RightChild());
+    h_nodes[nidx].SetLeaf(value);
   }
-  /*!
-   * \brief collapse a non leaf node to a leaf node, delete its children
-   * \param rid node id of the node
-   * \param value new leaf value
+  /**
+   * @brief Collapse a non leaf node to a leaf node, delete its children
+   *
+   * @param nidx Node id
+   * @param value The new leaf value
    */
-  void CollapseToLeaf(int rid, bst_float value) {
-    if (nodes_[rid].IsLeaf()) return;
-    if (!nodes_[nodes_[rid].LeftChild() ].IsLeaf()) {
-      CollapseToLeaf(nodes_[rid].LeftChild(), 0.0f);
+  void CollapseToLeaf(bst_node_t nidx, float value) {
+    auto& h_nodes = nodes_.HostVector();
+    if (h_nodes[nidx].IsLeaf()) return;
+    if (!h_nodes[h_nodes[nidx].LeftChild()].IsLeaf()) {
+      CollapseToLeaf(h_nodes[nidx].LeftChild(), 0.0f);
     }
-    if (!nodes_[nodes_[rid].RightChild() ].IsLeaf()) {
-      CollapseToLeaf(nodes_[rid].RightChild(), 0.0f);
+    if (!h_nodes[h_nodes[nidx].RightChild()].IsLeaf()) {
+      CollapseToLeaf(h_nodes[nidx].RightChild(), 0.0f);
     }
-    this->ChangeToLeaf(rid, value);
+    this->ChangeToLeaf(nidx, value);
   }
 
   RegTree() {
-    param_.Init(Args{});
-    nodes_.resize(param_.num_nodes);
-    stats_.resize(param_.num_nodes);
-    split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
-    split_categories_segments_.resize(param_.num_nodes);
+    nodes_.HostVector().resize(param_.num_nodes);
+    stats_.HostVector().resize(param_.num_nodes);
+    split_types_.HostVector().resize(param_.num_nodes, FeatureType::kNumerical);
+    split_categories_segments_.HostVector().resize(param_.num_nodes);
+    auto& h_nodes = nodes_.HostVector();
     for (int i = 0; i < param_.num_nodes; i++) {
-      nodes_[i].SetLeaf(0.0f);
-      nodes_[i].SetParent(kInvalidNodeId);
+      h_nodes[i].SetLeaf(0.0f);
+      h_nodes[i].SetParent(kInvalidNodeId);
     }
   }
   /**
@@ -342,72 +259,36 @@ class RegTree : public Model {
   }
 
   /*! \brief get node given nid */
-  Node& operator[](int nid) {
-    return nodes_[nid];
-  }
-  /*! \brief get node given nid */
-  const Node& operator[](int nid) const {
-    return nodes_[nid];
-  }
+  Node& operator[](bst_node_t nidx) { return nodes_.HostVector()[nidx]; }
 
-  /*! \brief get const reference to nodes */
-  [[nodiscard]] const std::vector<Node>& GetNodes() const { return nodes_; }
+ public:
+  /** @brief Get const reference to nodes */
+  [[nodiscard]] common::Span<Node const> GetNodes(DeviceOrd device) const {
+    CHECK(!this->IsMultiTarget());
+    return device.IsCPU() ? nodes_.ConstHostSpan()
+                          : (nodes_.SetDevice(device), nodes_.ConstDeviceSpan());
+  }
 
-  /*! \brief get const reference to stats */
-  [[nodiscard]] const std::vector<RTreeNodeStat>& GetStats() const { return stats_; }
+  /** @brief Get const reference to stats */
+  [[nodiscard]] common::Span<RTreeNodeStat const> GetStats(DeviceOrd device) const {
+    CHECK(!this->IsMultiTarget());
+    return device.IsCPU() ? stats_.ConstHostSpan()
+                          : (stats_.SetDevice(device), stats_.ConstDeviceSpan());
+  }
 
   /*! \brief get node statistics given nid */
   RTreeNodeStat& Stat(int nid) {
-    return stats_[nid];
+    return stats_.HostVector()[nid];
   }
-  /*! \brief get node statistics given nid */
-  [[nodiscard]] const RTreeNodeStat& Stat(int nid) const {
-    return stats_[nid];
-  }
-
-  /*!
-   * \brief load model from stream
-   * \param fi input stream
-   */
-  void Load(dmlc::Stream* fi);
-  /*!
-   * \brief save model to stream
-   * \param fo output stream
-   */
-  void Save(dmlc::Stream* fo) const;
 
   void LoadModel(Json const& in) override;
   void SaveModel(Json* out) const override;
 
   bool operator==(const RegTree& b) const {
-    return nodes_ == b.nodes_ && stats_ == b.stats_ &&
+    return nodes_.ConstHostVector() == b.nodes_.ConstHostVector() &&
+           stats_.ConstHostVector() == b.stats_.ConstHostVector() &&
            deleted_nodes_ == b.deleted_nodes_ && param_ == b.param_;
   }
-  /* \brief Iterate through all nodes in this tree.
-   *
-   * \param Function that accepts a node index, and returns false when iteration should
-   *        stop, otherwise returns true.
-   */
-  template <typename Func> void WalkTree(Func func) const {
-    std::stack<bst_node_t> nodes;
-    nodes.push(kRoot);
-    auto &self = *this;
-    while (!nodes.empty()) {
-      auto nidx = nodes.top();
-      nodes.pop();
-      if (!func(nidx)) {
-        return;
-      }
-      auto left = self.LeftChild(nidx);
-      auto right = self.RightChild(nidx);
-      if (left != RegTree::kInvalidNodeId) {
-        nodes.push(left);
-      }
-      if (right != RegTree::kInvalidNodeId) {
-        nodes.push(right);
-      }
-    }
-  }
   /*!
    * \brief Compares whether 2 trees are equal from a user's perspective.  The equality
    *        compares only non-deleted nodes.
@@ -440,12 +321,23 @@ class RegTree : public Model {
                   float right_sum,
                   bst_node_t leaf_right_child = kInvalidNodeId);
   /**
-   * \brief Expands a leaf node into two additional leaf nodes for a multi-target tree.
+   * @brief Expands a leaf node into two additional leaf nodes for a multi-target tree.
    */
   void ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond, bool default_left,
                   linalg::VectorView<float const> base_weight,
                   linalg::VectorView<float const> left_weight,
                   linalg::VectorView<float const> right_weight);
+  /**
+   * @brief Set all leaf weights for a multi-target tree.
+   *
+   * The leaf weight can be different from the internal weight stored by @ref ExpandNode
+   * This function is used to set the leaf at the end of tree construction.
+   *
+   * @param leaves  The node indices for all leaves. This must contain all the leaves in this tree.
+   * @param weights Row-major matrix for leaf weights, each row contains a leaf specified by the
+   *                leaves parameter.
+   */
+  void SetLeaves(std::vector<bst_node_t> leaves, common::Span<float const> weights);
 
   /**
    * \brief Expands a leaf node with categories
@@ -468,9 +360,9 @@ class RegTree : public Model {
                          bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
                          float left_sum, float right_sum);
   /**
-   * \brief Whether this tree has categorical split.
+   * @brief Whether this tree has categorical split.
    */
-  [[nodiscard]] bool HasCategoricalSplit() const { return !split_categories_.empty(); }
+  [[nodiscard]] bool HasCategoricalSplit() const { return !split_categories_.Empty(); }
   /**
    * \brief Whether this is a multi-target tree.
    */
@@ -510,42 +402,21 @@ class RegTree : public Model {
   [[nodiscard]] bst_node_t GetNumLeaves() const;
   [[nodiscard]] bst_node_t GetNumSplitNodes() const;
 
-  /*!
-   * \brief get current depth
-   * \param nid node id
+  /**
+   * @brief Get the depth of a node.
    */
-  [[nodiscard]] std::int32_t GetDepth(bst_node_t nid) const {
-    if (IsMultiTarget()) {
-      return this->p_mt_tree_->Depth(nid);
-    }
-    int depth = 0;
-    while (!nodes_[nid].IsRoot()) {
-      ++depth;
-      nid = nodes_[nid].Parent();
-    }
-    return depth;
-  }
+  [[nodiscard]] bst_node_t GetDepth(bst_node_t nidx) const;
   /**
-   * \brief Set the leaf weight for a multi-target tree.
+   * @brief Set the root weight for a multi-target tree.
    */
-  void SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
+  void SetRoot(linalg::VectorView<float const> weight) {
     CHECK(IsMultiTarget());
-    return this->p_mt_tree_->SetLeaf(nidx, weight);
-  }
-
-  /*!
-   * \brief get maximum depth
-   * \param nid node id
-   */
-  [[nodiscard]] int MaxDepth(int nid) const {
-    if (nodes_[nid].IsLeaf()) return 0;
-    return std::max(MaxDepth(nodes_[nid].LeftChild()) + 1, MaxDepth(nodes_[nid].RightChild()) + 1);
+    return this->p_mt_tree_->SetRoot(weight);
   }
-
-  /*!
-   * \brief get maximum depth
+  /**
+   * @brief Get the maximum depth.
    */
-  int MaxDepth() { return MaxDepth(0); }
+  [[nodiscard]] bst_node_t MaxDepth() const;
 
   /*!
    * \brief dense feature vector that can be taken by RegTree
@@ -600,14 +471,6 @@ class RegTree : public Model {
     bool has_missing_;
   };
 
-  /*!
-   * \brief calculate the approximate feature contributions for the given root
-   * \param feat dense feature vector, if the feature is missing the field is set to NaN
-   * \param out_contribs output vector to hold the contributions
-   */
-  void CalculateContributionsApprox(const RegTree::FVec& feat,
-                                    std::vector<float>* mean_values,
-                                    bst_float* out_contribs) const;
   /*!
    * \brief dump the model in the requested format as a text string
    * \param fmap feature map that may help give interpretations of feature
@@ -617,35 +480,24 @@ class RegTree : public Model {
    */
   [[nodiscard]] std::string DumpModel(const FeatureMap& fmap, bool with_stats,
                                       std::string format) const;
-  /*!
-   * \brief Get split type for a node.
-   * \param nidx Index of node.
-   * \return The type of this split.  For leaf node it's always kNumerical.
-   */
-  [[nodiscard]] FeatureType NodeSplitType(bst_node_t nidx) const { return split_types_.at(nidx); }
-  /*!
-   * \brief Get split types for all nodes.
+  /**
+   * @brief Get split types for all nodes.
    */
-  [[nodiscard]] std::vector<FeatureType> const& GetSplitTypes() const {
-    return split_types_;
+  [[nodiscard]] common::Span<FeatureType const> GetSplitTypes(DeviceOrd device) const {
+    return device.IsCPU() ? split_types_.ConstHostSpan()
+                          : (split_types_.SetDevice(device), split_types_.ConstDeviceSpan());
   }
-  [[nodiscard]] common::Span<uint32_t const> GetSplitCategories() const {
-    return split_categories_;
+  [[nodiscard]] common::Span<uint32_t const> GetSplitCategories(DeviceOrd device) const {
+    return device.IsCPU()
+               ? split_categories_.ConstHostSpan()
+               : (split_categories_.SetDevice(device), split_categories_.ConstDeviceSpan());
   }
-  /*!
-   * \brief Get the bit storage for categories
-   */
-  [[nodiscard]] common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
-    auto node_ptr = GetCategoriesMatrix().node_ptr;
-    auto categories = GetCategoriesMatrix().categories;
-    auto segment = node_ptr[nidx];
-    auto node_cats = categories.subspan(segment.beg, segment.size);
-    return node_cats;
+  [[nodiscard]] auto const& GetSplitCategoriesPtr() const {
+    return split_categories_segments_.ConstHostVector();
   }
-  [[nodiscard]] auto const& GetSplitCategoriesPtr() const { return split_categories_segments_; }
 
   /**
-   * \brief CSR-like matrix for categorical splits.
+   * @brief CSR-like matrix for categorical splits.
    *
    * The fields of split_categories_segments_[i] are set such that the range
    * node_ptr[beg:(beg+size)] stores the bitset for the matching categories for the
@@ -661,80 +513,42 @@ class RegTree : public Model {
     common::Span<Segment const> node_ptr;
   };
 
-  [[nodiscard]] CategoricalSplitMatrix GetCategoriesMatrix() const {
+  [[nodiscard]] CategoricalSplitMatrix GetCategoriesMatrix(DeviceOrd device) const {
     CategoricalSplitMatrix view;
-    view.split_type = common::Span<FeatureType const>(this->GetSplitTypes());
-    view.categories = this->GetSplitCategories();
-    view.node_ptr = common::Span<CategoricalSplitMatrix::Segment const>(split_categories_segments_);
+    view.split_type = this->GetSplitTypes(device);
+    view.categories = this->GetSplitCategories(device);
+    if (device.IsCPU()) {
+      view.node_ptr = split_categories_segments_.ConstHostSpan();
+    } else {
+      split_categories_segments_.SetDevice(device);
+      view.node_ptr = split_categories_segments_.ConstDeviceSpan();
+    }
     return view;
   }
 
-  [[nodiscard]] bst_feature_t SplitIndex(bst_node_t nidx) const {
-    if (IsMultiTarget()) {
-      return this->p_mt_tree_->SplitIndex(nidx);
-    }
-    return (*this)[nidx].SplitIndex();
-  }
-  [[nodiscard]] float SplitCond(bst_node_t nidx) const {
-    if (IsMultiTarget()) {
-      return this->p_mt_tree_->SplitCond(nidx);
-    }
-    return (*this)[nidx].SplitCond();
-  }
-  [[nodiscard]] bool DefaultLeft(bst_node_t nidx) const {
-    if (IsMultiTarget()) {
-      return this->p_mt_tree_->DefaultLeft(nidx);
-    }
-    return (*this)[nidx].DefaultLeft();
-  }
-  [[nodiscard]] bst_node_t DefaultChild(bst_node_t nidx) const {
-    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
-  }
-  [[nodiscard]] bool IsRoot(bst_node_t nidx) const {
-    if (IsMultiTarget()) {
-      return nidx == kRoot;
-    }
-    return (*this)[nidx].IsRoot();
-  }
-  [[nodiscard]] bool IsLeaf(bst_node_t nidx) const {
-    if (IsMultiTarget()) {
-      return this->p_mt_tree_->IsLeaf(nidx);
-    }
-    return (*this)[nidx].IsLeaf();
-  }
-  [[nodiscard]] bst_node_t Parent(bst_node_t nidx) const {
-    if (IsMultiTarget()) {
-      return this->p_mt_tree_->Parent(nidx);
-    }
-    return (*this)[nidx].Parent();
-  }
   [[nodiscard]] bst_node_t LeftChild(bst_node_t nidx) const {
     if (IsMultiTarget()) {
       return this->p_mt_tree_->LeftChild(nidx);
     }
-    return (*this)[nidx].LeftChild();
+    return nodes_.ConstHostVector()[nidx].LeftChild();
   }
   [[nodiscard]] bst_node_t RightChild(bst_node_t nidx) const {
     if (IsMultiTarget()) {
       return this->p_mt_tree_->RightChild(nidx);
     }
-    return (*this)[nidx].RightChild();
-  }
-  [[nodiscard]] bool IsLeftChild(bst_node_t nidx) const {
-    if (IsMultiTarget()) {
-      CHECK_NE(nidx, kRoot);
-      auto p = this->p_mt_tree_->Parent(nidx);
-      return nidx == this->p_mt_tree_->LeftChild(p);
-    }
-    return (*this)[nidx].IsLeftChild();
+    return nodes_.ConstHostVector()[nidx].RightChild();
   }
   [[nodiscard]] bst_node_t Size() const {
     if (IsMultiTarget()) {
       return this->p_mt_tree_->Size();
     }
-    return this->nodes_.size();
+    return this->nodes_.Size();
   }
 
+  [[nodiscard]] RegTree* Copy() const;
+  tree::ScalarTreeView HostScView() const;
+  tree::MultiTargetTreeView HostMtView() const;
+
  private:
   template <bool typed>
   void LoadCategoricalSplit(Json const& in);
@@ -742,36 +556,36 @@ class RegTree : public Model {
   /*! \brief model parameter */
   TreeParam param_;
   // vector of nodes
-  std::vector<Node> nodes_;
+  HostDeviceVector<Node> nodes_;
   // free node space, used during training process
   std::vector<int>  deleted_nodes_;
   // stats of nodes
-  std::vector<RTreeNodeStat> stats_;
-  std::vector<FeatureType> split_types_;
+  HostDeviceVector<RTreeNodeStat> stats_;
+  HostDeviceVector<FeatureType> split_types_;
 
   // Categories for each internal node.
-  std::vector<uint32_t> split_categories_;
+  HostDeviceVector<uint32_t> split_categories_;
   // Ptr to split categories of each node.
-  std::vector<CategoricalSplitMatrix::Segment> split_categories_segments_;
+  HostDeviceVector<CategoricalSplitMatrix::Segment> split_categories_segments_;
   // ptr to multi-target tree with vector leaf.
-  CopyUniquePtr<MultiTargetTree> p_mt_tree_;
+  std::unique_ptr<MultiTargetTree> p_mt_tree_;
   // allocate a new node,
   // !!!!!! NOTE: may cause BUG here, nodes.resize
   bst_node_t AllocNode() {
     if (param_.num_deleted != 0) {
       int nid = deleted_nodes_.back();
       deleted_nodes_.pop_back();
-      nodes_[nid].Reuse();
+      nodes_.HostVector()[nid].Reuse();
       --param_.num_deleted;
       return nid;
     }
     int nd = param_.num_nodes++;
     CHECK_LT(param_.num_nodes, std::numeric_limits<int>::max())
         << "number of nodes in the tree exceed 2^31";
-    nodes_.resize(param_.num_nodes);
-    stats_.resize(param_.num_nodes);
-    split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
-    split_categories_segments_.resize(param_.num_nodes);
+    nodes_.HostVector().resize(param_.num_nodes);
+    stats_.HostVector().resize(param_.num_nodes);
+    split_types_.HostVector().resize(param_.num_nodes, FeatureType::kNumerical);
+    split_categories_segments_.HostVector().resize(param_.num_nodes);
     return nd;
   }
   // delete a tree node, keep the parent field to allow trace back
@@ -785,7 +599,7 @@ class RegTree : public Model {
     }
 
     deleted_nodes_.push_back(nid);
-    nodes_[nid].MarkDelete();
+    nodes_.HostVector()[nid].MarkDelete();
     ++param_.num_deleted;
   }
 };
diff --git a/include/xgboost/tree_updater.h b/include/xgboost/tree_updater.h
index 477c8e4a1785..7a96d71c5231 100644
--- a/include/xgboost/tree_updater.h
+++ b/include/xgboost/tree_updater.h
@@ -1,7 +1,7 @@
 /**
- * Copyright 2014-2023 by XGBoost Contributors
- * \file tree_updater.h
- * \brief General primitive for tree learning,
+ * Copyright 2014-2025, XGBoost Contributors
+ *
+ * @brief General primitive for tree learning,
  *   Updating a collection of trees given the information.
  * \author Tianqi Chen
  */
@@ -10,16 +10,17 @@
 
 #include <dmlc/registry.h>
 #include <xgboost/base.h>                // for Args, GradientPair
-#include <xgboost/data.h>                // DMatrix
+#include <xgboost/data.h>                // for DMatrix
+#include <xgboost/gradient.h>            // for GradientContainer
 #include <xgboost/host_device_vector.h>  // for HostDeviceVector
 #include <xgboost/linalg.h>              // for VectorView
 #include <xgboost/model.h>               // for Configurable
 #include <xgboost/span.h>                // for Span
 #include <xgboost/tree_model.h>          // for RegTree
 
-#include <functional>                    // for function
-#include <string>                        // for string
-#include <vector>                        // for vector
+#include <functional>  // for function
+#include <string>      // for string
+#include <vector>      // for vector
 
 namespace xgboost {
 namespace tree {
@@ -59,21 +60,21 @@ class TreeUpdater : public Configurable {
    */
   [[nodiscard]] virtual bool HasNodePosition() const { return false; }
   /**
-   * \brief perform update to the tree models
+   * @brief perform update to the tree models
    *
-   * \param param Hyper-parameter for constructing trees.
-   * \param gpair the gradient pair statistics of the data
-   * \param data The data matrix passed to the updater.
-   * \param out_position The leaf index for each row.  The index is negated if that row is
+   * @param param  Hyper-parameter for constructing trees.
+   * @param gpair  The gradient pair statistics of the data
+   * @param p_fmat The data matrix passed to the updater.
+   * @param out_position The leaf index for each row.  The index is negated if that row is
    *                     removed during sampling. So the 3th node is ~3.
-   * \param out_trees references the trees to be updated, updater will change the content of trees
+   * @param out_trees references the trees to be updated, updater will change the content of trees
    *   note: all the trees in the vector are updated, with the same statistics,
    *         but maybe different random seeds, usually one tree is passed in at a time,
    *         there can be multiple trees when we train random forest style model
    */
-  virtual void Update(tree::TrainParam const* param, linalg::Matrix<GradientPair>* gpair,
-                      DMatrix* data, common::Span<HostDeviceVector<bst_node_t>> out_position,
-                      const std::vector<RegTree*>& out_trees) = 0;
+  virtual void Update(tree::TrainParam const* param, GradientContainer* gpair, DMatrix* p_fmat,
+                      common::Span<HostDeviceVector<bst_node_t>> out_position,
+                      std::vector<RegTree*> const& out_trees) = 0;
 
   /*!
    * \brief determines whether updater has enough knowledge about a given dataset
diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h
index 785984174b2e..53ae594c2590 100644
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -5,7 +5,7 @@
 #define XGBOOST_VERSION_CONFIG_H_
 
 #define XGBOOST_VER_MAJOR 3  /* NOLINT */
-#define XGBOOST_VER_MINOR 1  /* NOLINT */
+#define XGBOOST_VER_MINOR 2  /* NOLINT */
 #define XGBOOST_VER_PATCH 0  /* NOLINT */
 
 #endif  // XGBOOST_VERSION_CONFIG_H_
diff --git a/jvm-packages/checkstyle.xml b/jvm-packages/checkstyle.xml
index 88ae2122e279..ebfd7cd88531 100644
--- a/jvm-packages/checkstyle.xml
+++ b/jvm-packages/checkstyle.xml
@@ -48,10 +48,6 @@
 
     <property name="fileExtensions" value="java, properties, xml"/>
 
-		<module name="SuppressionFilter">
-			  <property name="file" value="checkstyle-suppressions.xml"/>
-		</module>
-
     <!-- Checks for whitespace                               -->
     <!-- See http://checkstyle.sf.net/config_whitespace.html -->
     <module name="FileTabCharacter">
diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py
index 2708ff7a1904..5f2ca9f88890 100755
--- a/jvm-packages/create_jni.py
+++ b/jvm-packages/create_jni.py
@@ -168,10 +168,10 @@ def native_build(cli_args: argparse.Namespace) -> None:
 
     # for xgboost4j-spark
     maybe_makedirs("xgboost4j-spark/src/test/resources")
-    with cd("../demo/CLI/regression"):
+    with cd("../demo/data/regression"):
         run(f'"{sys.executable}" mapfeat.py')
         run(f'"{sys.executable}" mknfold.py machine.txt 1')
-    for file in glob.glob("../demo/CLI/regression/machine.txt.t*"):
+    for file in glob.glob("../demo/data/regression/machine.txt.t*"):
         cp(file, "xgboost4j-spark/src/test/resources")
     for file in glob.glob("../demo/data/agaricus.*"):
         cp(file, "xgboost4j-spark/src/test/resources")
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index f4cadbf9a787..4573d6160db2 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@
 
     <groupId>ml.dmlc</groupId>
     <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <packaging>pom</packaging>
     <name>XGBoost JVM Package</name>
     <description>JVM Package for XGBoost</description>
@@ -197,14 +197,12 @@
                         </executions>
                     </plugin>
                     <plugin>
-                        <groupId>org.sonatype.plugins</groupId>
-                        <artifactId>nexus-staging-maven-plugin</artifactId>
-                        <version>1.7.0</version>
+                        <groupId>org.sonatype.central</groupId>
+                        <artifactId>central-publishing-maven-plugin</artifactId>
+                        <version>0.7.0</version>
                         <extensions>true</extensions>
                         <configuration>
-                            <serverId>ossrh</serverId>
-                            <nexusUrl>https://oss.sonatype.org/</nexusUrl>
-                            <autoReleaseAfterClose>false</autoReleaseAfterClose>
+                            <publishingServerId>central</publishingServerId>
                         </configuration>
                     </plugin>
                     <plugin>
@@ -288,29 +286,6 @@
         </pluginManagement>
 
         <plugins>
-            <plugin>
-                <groupId>org.scalastyle</groupId>
-                <artifactId>scalastyle-maven-plugin</artifactId>
-                <version>1.0.0</version>
-                <configuration>
-                    <verbose>false</verbose>
-                    <failOnViolation>true</failOnViolation>
-                    <includeTestSourceDirectory>true</includeTestSourceDirectory>
-                    <sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
-                    <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
-                    <configLocation>scalastyle-config.xml</configLocation>
-                    <outputEncoding>UTF-8</outputEncoding>
-                </configuration>
-                <executions>
-                    <execution>
-                        <id>checkstyle</id>
-                        <phase>validate</phase>
-                        <goals>
-                            <goal>check</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-site-plugin</artifactId>
@@ -322,6 +297,8 @@
                 <version>3.6.0</version>
                 <configuration>
                     <configLocation>checkstyle.xml</configLocation>
+                    <suppressionsLocation>checkstyle-suppressions.xml</suppressionsLocation>
+                    <suppressionsFileExpression>checkstyle.suppressions.file</suppressionsFileExpression>
                     <failOnViolation>true</failOnViolation>
                 </configuration>
                 <executions>
diff --git a/jvm-packages/xgboost4j-example/README.md b/jvm-packages/xgboost4j-example/README.md
index 50f268e83ff3..b678fd8c91a7 100644
--- a/jvm-packages/xgboost4j-example/README.md
+++ b/jvm-packages/xgboost4j-example/README.md
@@ -9,7 +9,6 @@ XGBoost4J Code Examples
 * [Generalized Linear Model](src/main/java/ml/dmlc/xgboost4j/java/example/GeneralizedLinearModel.java)
 * [Cross validation](src/main/java/ml/dmlc/xgboost4j/java/example/CrossValidation.java)
 * [Predicting leaf indices](src/main/java/ml/dmlc/xgboost4j/java/example/PredictLeafIndices.java)
-* [External Memory](src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java)
 * [Early Stopping](src/main/java/ml/dmlc/xgboost4j/java/example/EarlyStopping.java)
 
 ## Scala API
@@ -21,7 +20,6 @@ XGBoost4J Code Examples
 * [Generalized Linear Model](src/main/scala/ml/dmlc/xgboost4j/scala/example/GeneralizedLinearModel.scala)
 * [Cross validation](src/main/scala/ml/dmlc/xgboost4j/scala/example/CrossValidation.scala)
 * [Predicting leaf indices](src/main/scala/ml/dmlc/xgboost4j/scala/example/PredictLeafIndices.scala)
-* [External Memory](src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala)
 
 ## Spark API
 * [Distributed Training with Spark](src/main/scala/ml/dmlc/xgboost4j/scala/example/spark/SparkMLlibPipeline.scala)
diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml
index 9a8408124c63..ded576df3b58 100644
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,11 +6,11 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>3.1.0-SNAPSHOT</version>
+        <version>3.2.0-SNAPSHOT</version>
     </parent>
     <name>xgboost4j-example</name>
     <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <packaging>jar</packaging>
     <build>
         <plugins>
diff --git a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java b/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java
deleted file mode 100644
index 70b2b85b5315..000000000000
--- a/jvm-packages/xgboost4j-example/src/main/java/ml/dmlc/xgboost4j/java/example/ExternalMemory.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- Copyright (c) 2014 by Contributors
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-package ml.dmlc.xgboost4j.java.example;
-
-import java.util.HashMap;
-
-import ml.dmlc.xgboost4j.java.Booster;
-import ml.dmlc.xgboost4j.java.DMatrix;
-import ml.dmlc.xgboost4j.java.XGBoost;
-import ml.dmlc.xgboost4j.java.XGBoostError;
-
-/**
- * simple example for using external memory version
- *
- * @author hzx
- */
-public class ExternalMemory {
-  public static void main(String[] args) throws XGBoostError {
-    //this is the only difference, add a # followed by a cache prefix name
-    //several cache file with the prefix will be generated
-    //currently only support convert from libsvm file
-    DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm#dtrain.cache");
-    DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm#dtest.cache");
-
-    //specify parameters
-    HashMap<String, Object> params = new HashMap<String, Object>();
-    params.put("eta", 1.0);
-    params.put("max_depth", 2);
-    params.put("silent", 1);
-    params.put("objective", "binary:logistic");
-
-    //performance notice: set nthread to be the number of your real cpu
-    //some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case
-    // set nthread=4
-    //param.put("nthread", num_real_cpu);
-
-    //specify watchList
-    HashMap<String, DMatrix> watches = new HashMap<String, DMatrix>();
-    watches.put("train", trainMat);
-    watches.put("test", testMat);
-
-    //set round
-    int round = 2;
-
-    //train a boost model
-    Booster booster = XGBoost.train(trainMat, params, round, watches, null, null);
-  }
-}
diff --git a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala b/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala
deleted file mode 100644
index d35715e3c733..000000000000
--- a/jvm-packages/xgboost4j-example/src/main/scala/ml/dmlc/xgboost4j/scala/example/ExternalMemory.scala
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- Copyright (c) 2014-2024 by Contributors
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-
-package ml.dmlc.xgboost4j.scala.example
-
-import scala.collection.mutable
-
-import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost}
-
-object ExternalMemory {
-  def main(args: Array[String]): Unit = {
-    // this is the only difference, add a # followed by a cache prefix name
-    // several cache file with the prefix will be generated
-    // currently only support convert from libsvm file
-    val trainMat = new DMatrix("../../demo/data/agaricus.txt.train?format=libsvm#dtrain.cache")
-    val testMat = new DMatrix("../../demo/data/agaricus.txt.test?format=libsvm#dtest.cache")
-
-    val params = new mutable.HashMap[String, Any]()
-    params += "eta" -> 1.0
-    params += "max_depth" -> 2
-    params += "silent" -> 1
-    params += "objective" -> "binary:logistic"
-
-    // performance notice: set nthread to be the number of your real cpu
-    // some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case
-    // set nthread=4
-    // param.put("nthread", num_real_cpu);
-
-    val watches = new mutable.HashMap[String, DMatrix]
-    watches += "train" -> trainMat
-    watches += "test" -> testMat
-
-    val round = 2
-    // train a model
-    val booster = XGBoost.train(trainMat, params.toMap, round, watches.toMap)
-
-    val trainPred = booster.predict(trainMat, true)
-    val testPred = booster.predict(testMat, true)
-
-    trainMat.setBaseMargin(trainPred)
-    testMat.setBaseMargin(testPred)
-
-    System.out.println("result of running from initial prediction")
-    XGBoost.train(trainMat, params.toMap, 1, watches.toMap)
-  }
-}
diff --git a/jvm-packages/xgboost4j-example/src/test/java/ml/dmlc/xgboost4j/java/example/JavaExamplesTest.java b/jvm-packages/xgboost4j-example/src/test/java/ml/dmlc/xgboost4j/java/example/JavaExamplesTest.java
index 74dc2f3938d9..6dd42ab37439 100644
--- a/jvm-packages/xgboost4j-example/src/test/java/ml/dmlc/xgboost4j/java/example/JavaExamplesTest.java
+++ b/jvm-packages/xgboost4j-example/src/test/java/ml/dmlc/xgboost4j/java/example/JavaExamplesTest.java
@@ -35,8 +35,6 @@ public void testExamples() throws XGBoostError, IOException {
     CustomObjective.main(args);
     System.out.println("EarlyStopping");
     EarlyStopping.main(args);
-    System.out.println("ExternalMemory");
-    ExternalMemory.main(args);
     System.out.println("GeneralizedLinearModel");
     GeneralizedLinearModel.main(args);
     System.out.println("PredictFirstNtree");
diff --git a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/ScalaExamplesTest.scala b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/ScalaExamplesTest.scala
index d7705f90e5ce..0f9dc35014cc 100644
--- a/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/ScalaExamplesTest.scala
+++ b/jvm-packages/xgboost4j-example/src/test/scala/ml/dmlc/xgboost4j/scala/example/ScalaExamplesTest.scala
@@ -28,8 +28,6 @@ class ScalaExamplesTest extends AnyFunSuite {
     CrossValidation.main(args)
     println("CustomObjective")
     CustomObjective.main(args)
-    println("ExternalMemory")
-    ExternalMemory.main(args)
     println("GeneralizedLinearModel")
     GeneralizedLinearModel.main(args)
     println("PredictFirstNTree")
diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml
index 96fe0563d499..adce92a8675f 100644
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,12 +6,12 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>3.1.0-SNAPSHOT</version>
+        <version>3.2.0-SNAPSHOT</version>
     </parent>
 
     <name>xgboost4j-flink</name>
     <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <properties>
       <flink-ml.version>2.2.0</flink-ml.version>
     </properties>
diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml
index a4768878f879..014d0fe7bb8d 100644
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,10 +6,43 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>3.1.0-SNAPSHOT</version>
+        <version>3.2.0-SNAPSHOT</version>
     </parent>
     <name>xgboost4j-spark-gpu</name>
+    <groupId>ml.dmlc</groupId>
     <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
+    <version>3.2.0-SNAPSHOT</version>
+    <description>JVM Package for XGBoost</description>
+    <url>https://github.com/dmlc/xgboost/tree/master/jvm-packages</url>
+    <licenses>
+        <license>
+            <name>The Apache License, Version 2.0</name>
+            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+        </license>
+    </licenses>
+    <developers>
+        <developer>
+            <name>Bobby Wang</name>
+            <email>wbo4958@gmail.com</email>
+        </developer>
+        <developer>
+            <name>Jiaming Yuan</name>
+            <email>jm.yuan@outlook.com</email>
+        </developer>
+        <developer>
+            <name>Hyunsu Cho</name>
+            <email>chohyu01@cs.washington.edu</email>
+        </developer>
+        <developer>
+            <name>CodingCat</name>
+            <email>codingcat@apache.org</email>
+        </developer>
+    </developers>
+    <scm>
+        <connection>scm:git:git:/github.com/dmlc/xgboost.git</connection>
+        <developerConnection>scm:git:ssh://github.com/dmlc/xgboost.git</developerConnection>
+        <url>https://github.com/dmlc/xgboost</url>
+    </scm>
     <build>
         <plugins>
             <plugin>
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/ExtMemQuantileDMatrix.java b/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/ExtMemQuantileDMatrix.java
index 0ae1a9b30b48..8a653b146a60 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/ExtMemQuantileDMatrix.java
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/java/ml/dmlc/xgboost4j/java/ExtMemQuantileDMatrix.java
@@ -30,17 +30,17 @@ public ExtMemQuantileDMatrix(Iterator<ColumnBatch> iter,
       int maxBin,
       DMatrix ref,
       int nthread,
-      int maxNumDevicePages,
       int maxQuantileBatches,
-      int minCachePageBytes) throws XGBoostError {
+      long minCachePageBytes,
+      float cacheHostRatio) throws XGBoostError {
     long[] out = new long[1];
     long[] refHandle = null;
     if (ref != null) {
       refHandle = new long[1];
       refHandle[0] = ref.getHandle();
     }
-    String conf = this.getConfig(missing, maxBin, nthread, maxNumDevicePages,
-        maxQuantileBatches, minCachePageBytes);
+    String conf = this.getConfig(missing, maxBin, nthread,
+                                 maxQuantileBatches, minCachePageBytes, cacheHostRatio);
     XGBoostJNI.checkCall(XGBoostJNI.XGExtMemQuantileDMatrixCreateFromCallback(
         iter, refHandle, conf, out));
     handle = out[0];
@@ -51,7 +51,7 @@ public ExtMemQuantileDMatrix(
       float missing,
       int maxBin,
       DMatrix ref) throws XGBoostError {
-    this(iter, missing, maxBin, ref, 0, -1, -1, -1);
+    this(iter, missing, maxBin, ref, 0, -1, -1, Float.NaN);
   }
 
   public ExtMemQuantileDMatrix(
@@ -61,23 +61,24 @@ public ExtMemQuantileDMatrix(
     this(iter, missing, maxBin, null);
   }
 
-  private String getConfig(float missing, int maxBin, int nthread, int maxNumDevicePages,
-      int maxQuantileBatches, int minCachePageBytes) {
+  private String getConfig(float missing, int maxBin, int nthread,
+                           int maxQuantileBatches, long minCachePageBytes, float cacheHostRatio) {
     Map<String, Object> conf = new java.util.HashMap<>();
     conf.put("missing", missing);
     conf.put("max_bin", maxBin);
     conf.put("nthread", nthread);
 
-    if (maxNumDevicePages > 0) {
-      conf.put("max_num_device_pages", maxNumDevicePages);
-    }
     if (maxQuantileBatches > 0) {
-      conf.put("max_quantile_batches", maxQuantileBatches);
+      conf.put("max_quantile_blocks", maxQuantileBatches);
     }
     if (minCachePageBytes > 0) {
       conf.put("min_cache_page_bytes", minCachePageBytes);
     }
 
+    if (cacheHostRatio >= 0.0 && cacheHostRatio <= 1.0) {
+      conf.put("cache_host_ratio", cacheHostRatio);
+    }
+
     conf.put("on_host", true);
     conf.put("cache_prefix", ".");
     ObjectMapper mapper = new ObjectMapper();
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/ExtMemQuantileDMatrix.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/ExtMemQuantileDMatrix.scala
index d978a1b1fcfd..d6cd447fde8c 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/ExtMemQuantileDMatrix.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/ExtMemQuantileDMatrix.scala
@@ -27,12 +27,12 @@ class ExtMemQuantileDMatrix private[scala](
            maxBin: Int,
            ref: Option[QuantileDMatrix],
            nthread: Int,
-           maxNumDevicePages: Int,
            maxQuantileBatches: Int,
-           minCachePageBytes: Int) {
+           minCachePageBytes: Long,
+           cacheHostRatio: Float) {
     this(new jExtMemQuantileDMatrix(iter.asJava, missing, maxBin,
       ref.map(_.jDMatrix).orNull,
-      nthread, maxNumDevicePages, maxQuantileBatches, minCachePageBytes))
+      nthread, maxQuantileBatches, minCachePageBytes, cacheHostRatio))
   }
 
   def this(iter: Iterator[ColumnBatch], missing: Float, maxBin: Int) {
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala
index ab0e4c6cabf1..d428e2782e51 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPlugin.scala
@@ -18,6 +18,7 @@ package ml.dmlc.xgboost4j.scala.spark
 
 import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
+import scala.util.Try
 
 import ai.rapids.cudf.Table
 import com.nvidia.spark.rapids.{ColumnarRdd, GpuColumnVectorUtils}
@@ -101,7 +102,7 @@ class GpuXGBoostPlugin extends XGBoostPlugin {
   private[spark] def validate[T <: XGBoostEstimator[T, M], M <: XGBoostModel[M]](
       estimator: XGBoostEstimator[T, M],
       dataset: Dataset[_]): Unit = {
-    require(estimator.getTreeMethod == "gpu_hist" || estimator.getDevice != "cpu",
+    require(estimator.getDevice != "cpu",
       "Using Spark-Rapids to accelerate XGBoost must set device=cuda")
   }
 
@@ -134,7 +135,7 @@ class GpuXGBoostPlugin extends XGBoostPlugin {
 
     val maxQuantileBatches = estimator.getMaxQuantileBatches
     val minCachePageBytes = estimator.getMinCachePageBytes
-    val maxNumDevicePages = estimator.getMaxNumDevicePages
+    val cacheHostRatio = Try(estimator.getCacheHostRatio).getOrElse(Float.NaN)
 
     /** build QuantileDMatrix on the executor side */
     def buildQuantileDMatrix(input: Iterator[Table],
@@ -143,8 +144,8 @@ class GpuXGBoostPlugin extends XGBoostPlugin {
       extMemPath match {
         case Some(_) =>
           val itr = new ExternalMemoryIterator(input, indices, extMemPath)
-          new ExtMemQuantileDMatrix(itr, missing, maxBin, ref, nthread, maxNumDevicePages,
-            maxQuantileBatches, minCachePageBytes)
+          new ExtMemQuantileDMatrix(itr, missing, maxBin, ref, nthread,
+            maxQuantileBatches, minCachePageBytes, cacheHostRatio)
 
         case None =>
           val itr = input.map { table =>
@@ -189,7 +190,6 @@ class GpuXGBoostPlugin extends XGBoostPlugin {
 
     val sconf = dataset.sparkSession.conf
     val rmmEnabled: Boolean = try {
-      sconf.get("spark.rapids.memory.gpu.pooling.enabled").toBoolean &&
       sconf.get("spark.rapids.memory.gpu.pool").trim.toLowerCase != "none"
     } catch {
       case _: Throwable => false // Any exception will return false
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala
index ab8cf66220b2..e3d1925bebee 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/test/scala/ml/dmlc/xgboost4j/scala/spark/GpuXGBoostPluginSuite.scala
@@ -129,10 +129,6 @@ class GpuXGBoostPluginSuite extends GpuTestSuite {
 
       classifier.setDevice("gpu")
       plugin.validate(classifier, df)
-
-      classifier.setDevice("cpu")
-      classifier.setTreeMethod("gpu_hist")
-      plugin.validate(classifier, df)
     }
   }
 
diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml
index 904c97a08bcd..c44858132775 100644
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>3.1.0-SNAPSHOT</version>
+        <version>3.2.0-SNAPSHOT</version>
     </parent>
     <name>xgboost4j-spark</name>
     <artifactId>xgboost4j-spark_2.12</artifactId>
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala
index cae44ab9aef1..45d8854d6f3e 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/Utils.scala
@@ -52,9 +52,9 @@ private[scala] object Utils {
     // TODO support sparsevector
     def asXGB: XGBLabeledPoint = v match {
       case v: DenseVector =>
-        XGBLabeledPoint(0.0f, v.size, null, v.values.map(_.toFloat))
+        new XGBLabeledPoint(0.0f, v.size, null, v.values.map(_.toFloat))
       case v: SparseVector =>
-        XGBLabeledPoint(0.0f, v.size, v.indices, v.toDense.values.map(_.toFloat))
+        new XGBLabeledPoint(0.0f, v.size, v.indices, v.toDense.values.map(_.toFloat))
     }
   }
 
@@ -112,6 +112,7 @@ private[scala] object Utils {
   val TRAIN_NAME = "train"
   val VALIDATION_NAME = "eval"
 
+  val TMP_FEATURE_ARRAY_NAME = "xgboost_eGdib29zdC1qdm0K_jvm"
 
   /** Executes the provided code block and then closes the resource */
   def withResource[T <: AutoCloseable, V](r: T)(block: T => V): V = {
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimator.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimator.scala
index abbffc370224..0e95ed352ecf 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimator.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimator.scala
@@ -32,7 +32,7 @@ import org.apache.spark.ml.util.{DefaultParamsWritable, MLReader, MLWritable, ML
 import org.apache.spark.ml.xgboost.{SparkUtils, XGBProbabilisticClassifierParams}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.functions.{array, col, udf}
 import org.apache.spark.sql.types._
 
 import ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint}
@@ -150,7 +150,12 @@ private[spark] trait XGBoostEstimator[
     // Get feature id(s)
     val (featureIds: Option[Seq[Int]], featureId: Option[Int]) =
       if (getFeaturesCols.length != 0) {
-        (Some(getFeaturesCols.map(schema.fieldIndex).toSeq), None)
+        // Columnars has been converted to array
+        if (schema.names.contains(Utils.TMP_FEATURE_ARRAY_NAME)) {
+          (None, Some(schema.fieldIndex(Utils.TMP_FEATURE_ARRAY_NAME)))
+        } else {
+          (Some(getFeaturesCols.map(schema.fieldIndex).toSeq), None)
+        }
       } else {
         (None, Some(schema.fieldIndex(getFeaturesCol)))
       }
@@ -188,30 +193,36 @@ private[spark] trait XGBoostEstimator[
   private[spark] def preprocess(dataset: Dataset[_]): (Dataset[_], ColumnIndices) = {
     val schema = dataset.schema
     validateFeatureType(schema)
-    val featureIsArray: Boolean = featureIsArrayType(schema)
 
     // Columns to be selected for XGBoost training
     val selectedCols: ArrayBuffer[Column] = ArrayBuffer.empty
 
     def selectCol(c: Param[String], targetType: DataType) = {
       if (isDefinedNonEmpty(c)) {
-        if (c == featuresCol) {
-          // If feature is array type, we force to cast it to array of float
-          val featureCol = if (featureIsArray) {
-            col($(featuresCol)).cast(ArrayType(FloatType))
-          } else col($(featuresCol))
-          selectedCols.append(featureCol)
-        } else {
           selectedCols.append(castIfNeeded(schema, $(c), targetType))
-        }
       }
     }
 
-    Seq(labelCol, featuresCol, weightCol, baseMarginCol).foreach(p => selectCol(p, FloatType))
+    Seq(labelCol, weightCol, baseMarginCol).foreach(p => selectCol(p, FloatType))
     this match {
       case p: HasGroupCol => selectCol(p.groupCol, IntegerType)
       case _ =>
     }
+
+    val featureCol = if (isSet(featuresCols)) {
+      // Make columnar to array
+      array(getFeaturesCols.map(col): _*)
+        .cast(ArrayType(FloatType))
+        .alias(Utils.TMP_FEATURE_ARRAY_NAME)
+    } else {
+      if (featureIsArrayType(schema)) {
+        col($(featuresCol)).cast(ArrayType(FloatType))
+      } else {
+        col($(featuresCol))
+      }
+    }
+    selectedCols.append(featureCol)
+
     val repartitioned = repartitionIfNeeded(dataset.select(selectedCols.toArray: _*))
     val sorted = sortPartitionIfNeeded(repartitioned)
     val columnIndices = buildColumnIndices(sorted.schema)
@@ -250,7 +261,7 @@ private[spark] trait XGBoostEstimator[
           // To make "0" meaningful, we convert sparse vector if possible to dense.
           features.toArray.map(_.toFloat)
       }
-      XGBLabeledPoint(label, values.length, null, values, weight, group, baseMargin)
+      new XGBLabeledPoint(label, values.length, null, values, weight, group, baseMargin)
     }
   }
 
@@ -363,7 +374,7 @@ private[spark] trait XGBoostEstimator[
 
   private[spark] def getRuntimeParameters(isLocal: Boolean,
       configs: Map[String, AnyRef] = Map.empty): RuntimeParams = {
-    val runOnGpu = if (getDevice != "cpu" || getTreeMethod == "gpu_hist") true else false
+    val runOnGpu = if (getDevice != "cpu") true else false
     RuntimeParams(
       getNumWorkers,
       getNumRound,
@@ -604,22 +615,29 @@ private[spark] trait XGBoostModel[M <: XGBoostModel[M]] extends Model[M] with ML
     if (PluginUtils.isPluginEnabled(dataset)) {
       return PluginUtils.getPlugin.get.transform(this, dataset)
     }
-    validateFeatureType(dataset.schema)
     val (schema, pred) = preprocess(dataset)
+    // Model could be trained with columnar, and the transform df could be array or vector
+    val (input, featureName, featureIsArray) = if (isSet(featuresCols) &&
+      getFeaturesCols.length > 0 &&
+      getFeaturesCols.forall(schema.names.contains)) {
+      (dataset.withColumn(Utils.TMP_FEATURE_ARRAY_NAME,
+        array(getFeaturesCols.map(col): _*).cast(ArrayType(FloatType))),
+        Utils.TMP_FEATURE_ARRAY_NAME,
+        true)
+    } else {
+      (dataset, getFeaturesCol, featureIsArrayType(dataset.schema))
+    }
+
     // Broadcast the booster to each executor.
-    val bBooster = dataset.sparkSession.sparkContext.broadcast(nativeBooster)
-    // TODO configurable
-    val inferBatchSize = 32 << 10
-    val featureName = getFeaturesCol
+    val bBooster = input.sparkSession.sparkContext.broadcast(nativeBooster)
+    val inferBatchSize = getInferBatchSize
     val missing = getMissing
 
-    val featureIsArray = featureIsArrayType(dataset.schema)
-
     // Here, we use RDD instead of DF to avoid different encoders for different
     // spark versions for the compatibility issue.
     // 3.5+, Encoders.row(schema)
     // 3.5-, RowEncoder(schema)
-    val outRDD = dataset.asInstanceOf[Dataset[Row]].rdd.mapPartitions { rowIter =>
+    val outRDD = input.asInstanceOf[Dataset[Row]].rdd.mapPartitions { rowIter =>
       rowIter.grouped(inferBatchSize).flatMap { batchRow =>
         val features = batchRow.iterator.map(row => {
           if (!featureIsArray) {
@@ -636,7 +654,7 @@ private[spark] trait XGBoostModel[M <: XGBoostModel[M]] extends Model[M] with ML
                 }
               case _ => throw new RuntimeException("Unsupported feature type")
             }
-            XGBLabeledPoint(0.0f, values.size, null, values)
+            new XGBLabeledPoint(0.0f, values.size, null, values)
           }
         })
         // DMatrix used to prediction
@@ -648,7 +666,8 @@ private[spark] trait XGBoostModel[M <: XGBoostModel[M]] extends Model[M] with ML
         }
       }
     }
-    val output = dataset.sparkSession.createDataFrame(outRDD, schema)
+    val output = input.sparkSession.createDataFrame(outRDD, schema)
+      .drop(Utils.TMP_FEATURE_ARRAY_NAME)
 
     bBooster.unpersist(blocking = false)
     postTransform(output, pred).toDF()
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala
index 208ba1bf6346..afad45437396 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/TreeBoosterParams.scala
@@ -231,7 +231,7 @@ private[spark] trait TreeBoosterParams extends Params {
 
 private[spark] object BoosterParams {
 
-  val supportedTreeMethods = HashSet("auto", "exact", "approx", "hist", "gpu_hist")
+  val supportedTreeMethods = HashSet("auto", "exact", "approx", "hist")
 
   val supportedUpdaters = HashSet("grow_colmaker", "grow_histmaker", "grow_quantile_histmaker",
     "grow_gpu_hist", "grow_gpu_approx", "sync", "refresh", "prune")
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala
index bf27c51f8a72..891c7362573d 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/XGBoostParams.scala
@@ -122,7 +122,7 @@ private[spark] trait SparkParams[T <: Params] extends HasFeaturesCols with HasFe
   final val numWorkers = new IntParam(this, "numWorkers", "Number of workers used to train xgboost",
     ParamValidators.gtEq(1))
 
-  final def getNumRound: Int = $(numRound)
+  final def getNumWorkers: Int = $(numWorkers)
 
   final val forceRepartition = new BooleanParam(this, "forceRepartition", "If the partition " +
     "is equal to numWorkers, xgboost won't repartition the dataset. Set forceRepartition to " +
@@ -133,6 +133,8 @@ private[spark] trait SparkParams[T <: Params] extends HasFeaturesCols with HasFe
   final val numRound = new IntParam(this, "numRound", "The number of rounds for boosting",
     ParamValidators.gtEq(1))
 
+  final def getNumRound: Int = $(numRound)
+
   final val numEarlyStoppingRounds = new IntParam(this, "numEarlyStoppingRounds", "Stop training " +
     "Number of rounds of decreasing eval metric to tolerate before stopping training",
     ParamValidators.gtEq(0))
@@ -188,39 +190,42 @@ private[spark] trait SparkParams[T <: Params] extends HasFeaturesCols with HasFe
 
   final def getUseExternalMemory: Boolean = $(useExternalMemory)
 
-  final val maxNumDevicePages = new IntParam(this, "maxNumDevicePages", "Maximum number of " +
-    "pages cached in device")
-
-  final def getMaxNumDevicePages: Int = $(maxNumDevicePages)
-
   final val maxQuantileBatches = new IntParam(this, "maxQuantileBatches", "Maximum quantile " +
     "batches")
 
   final def getMaxQuantileBatches: Int = $(maxQuantileBatches)
 
-  final val minCachePageBytes = new IntParam(this, "minCachePageBytes", "Minimum number of " +
+  final val minCachePageBytes = new LongParam(this, "minCachePageBytes", "Minimum number of " +
     "bytes for each ellpack page in cache. Only used for in-host")
 
-  final def getMinCachePageBytes: Int = $(minCachePageBytes)
+  final def getMinCachePageBytes: Long = $(minCachePageBytes)
+
+  final val cacheHostRatio = new FloatParam(this, "cacheHostRatio",
+    "Used by the GPU implementation. For GPU-based inputs, XGBoost can split the cache into " +
+      "host and device caches to reduce the data transfer overhead. This parameter specifies " +
+      "the size of host cache compared to the size of the entire cache: host / (host + device)",
+    ParamValidators.inRange(0.0, 1.0))
+
+  final def getCacheHostRatio: Float = $(cacheHostRatio)
 
   setDefault(numRound -> 100, numWorkers -> 1, inferBatchSize -> (32 << 10),
     numEarlyStoppingRounds -> 0, forceRepartition -> false, missing -> Float.NaN,
     featuresCols -> Array.empty, customObj -> null, customEval -> null,
     featureNames -> Array.empty, featureTypes -> Array.empty, useExternalMemory -> false,
-    maxNumDevicePages -> -1, maxQuantileBatches -> -1, minCachePageBytes -> -1)
+    maxQuantileBatches -> -1, minCachePageBytes -> -1)
 
   addNonXGBoostParam(numWorkers, numRound, numEarlyStoppingRounds, inferBatchSize, featuresCol,
     labelCol, baseMarginCol, weightCol, predictionCol, leafPredictionCol, contribPredictionCol,
     forceRepartition, featuresCols, customEval, customObj, featureTypes, featureNames)
 
-  final def getNumWorkers: Int = $(numWorkers)
-
   def setNumWorkers(value: Int): T = set(numWorkers, value).asInstanceOf[T]
 
   def setForceRepartition(value: Boolean): T = set(forceRepartition, value).asInstanceOf[T]
 
   def setNumRound(value: Int): T = set(numRound, value).asInstanceOf[T]
 
+  def setNumEarlyStoppingRounds(value: Int): T = set(numEarlyStoppingRounds, value).asInstanceOf[T]
+
   def setFeaturesCol(value: Array[String]): T = set(featuresCols, value).asInstanceOf[T]
 
   def setBaseMarginCol(value: String): T = set(baseMarginCol, value).asInstanceOf[T]
@@ -251,22 +256,32 @@ private[spark] trait SparkParams[T <: Params] extends HasFeaturesCols with HasFe
 
   def setUseExternalMemory(value: Boolean): T = set(useExternalMemory, value).asInstanceOf[T]
 
-  def setMaxNumDevicePages(value: Int): T = set(maxNumDevicePages, value).asInstanceOf[T]
-
   def setMaxQuantileBatches(value: Int): T = set(maxQuantileBatches, value).asInstanceOf[T]
 
-  def setMinCachePageBytes(value: Int): T = set(minCachePageBytes, value).asInstanceOf[T]
+  def setMinCachePageBytes(value: Long): T = set(minCachePageBytes, value).asInstanceOf[T]
+
+  def setCacheHostRatio(value: Float): T = set(cacheHostRatio, value)
+    .asInstanceOf[T]
 
   protected[spark] def featureIsArrayType(schema: StructType): Boolean =
     schema(getFeaturesCol).dataType.isInstanceOf[ArrayType]
 
-  protected[spark] def validateFeatureType(schema: StructType) = {
-    // Features cols must be Vector or Array.
-    val featureDataType = schema(getFeaturesCol).dataType
-
-    // Features column must be either ArrayType or VectorType.
-    if (!featureDataType.isInstanceOf[ArrayType] && !SparkUtils.isVectorType(featureDataType)) {
-      throw new IllegalArgumentException("Feature type must be either ArrayType or VectorType")
+  protected[spark] def validateFeatureType(schema: StructType): Unit = {
+    // If featuresCols is not set, need to check featuresCol which must be Vector or Array
+    if (!isSet(featuresCols)) {
+      // Features cols must be Vector or Array.
+      val featureDataType = schema(getFeaturesCol).dataType
+
+      // Features column must be either ArrayType or VectorType.
+      if (!featureDataType.isInstanceOf[ArrayType] && !SparkUtils.isVectorType(featureDataType)) {
+        throw new IllegalArgumentException("Feature type must be either ArrayType or VectorType")
+      }
+    } else {
+      // To check columns must be numeric type
+      require(getFeaturesCols.length > 0)
+      for (c <- getFeaturesCols) {
+        SparkUtils.checkNumericType(schema, c)
+      }
     }
   }
 }
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala
index 2c6ee9c51d98..8ff9839be7ee 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/PerTest.scala
@@ -103,6 +103,15 @@ trait PerTest extends BeforeAndAfterEach {
     }
   }
 
+  def smallBinaryClassificationColumnar: DataFrame = ss.createDataFrame(sc.parallelize(Seq(
+    (1.0, 2.0, 3.0, 1.0),
+    (0.0, 0.0, 0.0, 0.0),
+    (0.0, 3.0, 0.0, 0.0),
+    (2.0, 0.0, 4.0, 1.0),
+    (0.2, 1.2, 2.0, 0.0),
+    (0.5, 2.2, 1.7, 1.0)
+  ))).toDF("c1", "c2", "c3", "label")
+
   def smallBinaryClassificationVector: DataFrame = ss.createDataFrame(sc.parallelize(Seq(
     (1.0, 0.5, 1.0, Vectors.dense(1.0, 2.0, 3.0)),
     (0.0, 0.4, -3.0, Vectors.dense(0.0, 0.0, 0.0)),
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala
index b93bba9ef133..6bc40d8a10dc 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/TrainTestData.scala
@@ -47,7 +47,7 @@ trait TrainTestData {
         }
       }
 
-      XGBLabeledPoint(label, featureSize, null, values)
+      new XGBLabeledPoint(label, featureSize, null, values)
     }.toList
   }
 
@@ -58,7 +58,7 @@ trait TrainTestData {
       val label = original.head.toFloat
       val group = original.last.toInt
       val values = original.slice(1, length - 1).map(_.toFloat)
-      XGBLabeledPoint(label, values.size, null, values, 1f, group, Float.NaN)
+      new XGBLabeledPoint(label, values.size, null, values, 1f, group, Float.NaN)
     }.toList
   }
 }
@@ -70,7 +70,7 @@ object Classification extends TrainTestData {
   Random.setSeed(10)
   val randomWeights = Array.fill(train.length)(Random.nextFloat())
   val trainWithWeight = train.zipWithIndex.map { case (v, index) =>
-    XGBLabeledPoint(v.label, v.size, v.indices, v.values,
+    new XGBLabeledPoint(v.label, v.size, v.indices, v.values,
       randomWeights(index), v.group, v.baseMargin)
   }
 }
@@ -89,7 +89,7 @@ object MultiClassification extends TrainTestData {
   Random.setSeed(10)
   val randomWeights = Array.fill(train.length)(Random.nextFloat())
   val trainWithWeight = train.zipWithIndex.map { case (v, index) =>
-    XGBLabeledPoint(v.label, v.size, v.indices, v.values,
+    new XGBLabeledPoint(v.label, v.size, v.indices, v.values,
       randomWeights(index), v.group, v.baseMargin)
   }
 
@@ -104,7 +104,7 @@ object MultiClassification extends TrainTestData {
         values(i) = featuresAndLabel(i).toFloat
       }
 
-      XGBLabeledPoint(label, values.length - 1, null, values.take(values.length - 1))
+      new XGBLabeledPoint(label, values.length - 1, null, values.take(values.length - 1))
     }.toList
   }
 }
@@ -119,7 +119,7 @@ object Regression extends TrainTestData {
   Random.setSeed(10)
   val randomWeights = Array.fill(train.length)(Random.nextFloat())
   val trainWithWeight = train.zipWithIndex.map { case (v, index) =>
-    XGBLabeledPoint(v.label, v.size, v.indices, v.values,
+    new XGBLabeledPoint(v.label, v.size, v.indices, v.values,
       randomWeights(index), v.group, v.baseMargin)
   }
 
@@ -128,7 +128,7 @@ object Regression extends TrainTestData {
     val train: Seq[XGBLabeledPoint] = getLabeledPointsWithGroup("/rank.train.csv")
     // use the group as the weight
     val trainWithWeight = train.map { labelPoint =>
-      XGBLabeledPoint(labelPoint.label, labelPoint.size, labelPoint.indices, labelPoint.values,
+      new XGBLabeledPoint(labelPoint.label, labelPoint.size, labelPoint.indices, labelPoint.values,
         labelPoint.group, labelPoint.group, labelPoint.baseMargin)
     }
     val trainGroups = train.map(_.group)
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala
index 9b52d286c2d7..b9bbec0ee5cb 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostEstimatorSuite.scala
@@ -23,6 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.SparkException
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vectors}
+import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.xgboost.SparkUtils
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.types.{ArrayType, DoubleType, FloatType}
@@ -132,16 +133,6 @@ class XGBoostEstimatorSuite extends AnyFunSuite with PerTest with TmpFolderPerSu
       Map("device" -> "cuda")).setNumWorkers(1).setNumRound(1)
       .getRuntimeParameters(true)
     assert(runtimeParams.runOnGpu)
-
-    runtimeParams = new XGBoostClassifier(
-      Map("device" -> "cpu", "tree_method" -> "gpu_hist")).setNumWorkers(1).setNumRound(1)
-      .getRuntimeParameters(true)
-    assert(runtimeParams.runOnGpu)
-
-    runtimeParams = new XGBoostClassifier(
-      Map("device" -> "cuda", "tree_method" -> "gpu_hist")).setNumWorkers(1).setNumRound(1)
-      .getRuntimeParameters(true)
-    assert(runtimeParams.runOnGpu)
   }
 
   test("missing value exception for sparse vector") {
@@ -556,6 +547,109 @@ class XGBoostEstimatorSuite extends AnyFunSuite with PerTest with TmpFolderPerSu
     exception.getMessage.contains("SoftmaxMultiClassObj: label must be in [0, num_class).")
   }
 
+  test("Model trained on vector can transform on array/columnar input") {
+    val vectorDf = smallBinaryClassificationVector
+    val classifier = new XGBoostClassifier().setNumRound(2)
+
+    // The model is trained with vector as the input
+    val model = classifier.fit(vectorDf)
+
+    val columnarDf = smallBinaryClassificationColumnar
+
+    // Model is trained with vector input, it doesn't have columnar input information
+    val thrown = intercept[IllegalArgumentException] {
+      model.transform(columnarDf).collect()
+    }
+    assert(thrown.getMessage.contains("features does not exist"))
+
+    // Transform on columnar input
+    model.copy(ParamMap.empty)
+      .setFeaturesCol(Array("c1", "c2", "c3"))
+      .transform(columnarDf)
+      .collect()
+
+    // Transform on array input
+    val arrayDf = smallBinaryClassificationArray
+    model.copy(ParamMap.empty).transform(arrayDf).collect()
+  }
+
+  test("Model trained on array can transform on vector/columnar input") {
+    val arrayDf = smallBinaryClassificationArray
+    val classifier = new XGBoostClassifier().setNumRound(2)
+
+    // The model is trained with vector as the input
+    val model = classifier.fit(arrayDf)
+    val columnarDf = smallBinaryClassificationColumnar
+
+    // Model is trained with vector input, it doesn't have columnar input information
+    val thrown = intercept[IllegalArgumentException] {
+      model.transform(columnarDf).collect()
+    }
+    assert(thrown.getMessage.contains("features does not exist"))
+
+    // Transform on columnar input
+    model.copy(ParamMap.empty)
+      .setFeaturesCol(Array("c1", "c2", "c3"))
+      .transform(columnarDf)
+      .collect()
+
+    // Transform on vector input
+    val vectorDf = smallBinaryClassificationVector
+    model.copy(ParamMap.empty).transform(vectorDf).collect()
+  }
+
+  test("Model trained on columnar can transform on array/vector input") {
+    val columnarDf = smallBinaryClassificationColumnar
+    val features = Array("c1", "c2", "c3")
+    val classifier = new XGBoostClassifier().setNumRound(2).setFeaturesCol(features)
+    // The model is trained with vector as the input
+    val model = classifier.fit(columnarDf)
+
+    // Transform on vector df
+    val vectorDf = smallBinaryClassificationVector
+    model.transform(vectorDf).collect()
+
+    // Transform on array df
+    val arrayDf = smallBinaryClassificationArray
+    model.transform(arrayDf).collect()
+  }
+
+  test("Fit and transform with columnar input") {
+    val df = smallBinaryClassificationColumnar
+
+    val estimator = new XGBoostClassifier()
+      .setFeaturesCol(Array("c1", "c2", "c3"))
+      .setNumRound(1)
+
+    // without any issue
+    val model = estimator.fit(df)
+    assert(model.getFeaturesCols sameElements Array("c1", "c2", "c3"))
+
+    val transformedDF = model.transform(df)
+    assert(transformedDF.schema.names.contains("c1"))
+    assert(transformedDF.schema.names.contains("c2"))
+    assert(transformedDF.schema.names.contains("c3"))
+    assert(!transformedDF.schema.names.contains(Utils.TMP_FEATURE_ARRAY_NAME))
+  }
+
+  test("Support columnar") {
+    val df = smallBinaryClassificationColumnar
+
+    val classifier = new XGBoostClassifier().setFeaturesCol(Array("c1", "c2", "c3"))
+    assert(classifier.getFeaturesCols sameElements Array("c1", "c2", "c3"))
+
+    val (processed, _) = classifier.preprocess(df)
+    assert(!processed.schema.contains("c1"))
+    assert(!processed.schema.contains("c2"))
+    assert(!processed.schema.contains("c3"))
+
+    val matched = processed.schema(Utils.TMP_FEATURE_ARRAY_NAME).dataType match {
+      case ArrayType(FloatType, _) => true
+      case _ => false
+    }
+    assert(matched)
+  }
+
   test("Support array(float)") {
     val df = smallBinaryClassificationArray
     val matched = df.schema("features").dataType match {
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostParamsSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostParamsSuite.scala
new file mode 100644
index 000000000000..4a2b54cf15cf
--- /dev/null
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostParamsSuite.scala
@@ -0,0 +1,61 @@
+/*
+ Copyright (c) 2024 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+package ml.dmlc.xgboost4j.scala.spark
+
+import scala.util.Try
+
+import org.scalatest.funsuite.AnyFunSuite
+
+
+class XGBoostParamsSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite {
+
+  test("invalid parameters") {
+    val estimator = new XGBoostClassifier()
+
+    // We didn't set it by default
+    var thrown = intercept[RuntimeException] {
+      estimator.getCacheHostRatio
+    }
+    assert(thrown.getMessage.contains("Failed to find a default value for cacheHostRatio"))
+
+    val v = Try(estimator.getCacheHostRatio).getOrElse(Float.NaN)
+    assert(v.equals(Float.NaN))
+
+    // We didn't set it by default
+    thrown = intercept[RuntimeException] {
+      estimator.setCacheHostRatio(-1.0f)
+    }
+    assert(thrown.getMessage.contains("parameter cacheHostRatio given invalid value -1.0"))
+
+    Seq(0.0f, 0.2f, 1.0f).forall(v => {
+      estimator.setCacheHostRatio(v)
+      estimator.getCacheHostRatio == v
+    })
+
+    estimator.setCacheHostRatio(0.66f)
+    val v1 = Try(estimator.getCacheHostRatio).getOrElse(Float.NaN)
+    assert(v1 == 0.66f)
+  }
+
+  test("setNumEarlyStoppingRounds") {
+    val estimator = new XGBoostClassifier()
+    assert(estimator.getNumEarlyStoppingRounds == 0)
+    estimator.setNumEarlyStoppingRounds(10)
+    assert(estimator.getNumEarlyStoppingRounds == 10)
+  }
+
+}
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRankerSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRankerSuite.scala
index 063836538931..90aa368e13ad 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRankerSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRankerSuite.scala
@@ -140,11 +140,12 @@ class XGBoostRankerSuite extends AnyFunSuite with PerTest with TmpFolderPerSuite
       .setGroupCol("group")
 
     val (df, _) = ranker.preprocess(trainingDF)
+    val groupId = df.schema.fieldIndex("group")
     df.rdd.foreachPartition { iter => {
       var prevGroup = Int.MinValue
       while (iter.hasNext) {
         val curr = iter.next()
-        val group = curr.asInstanceOf[Row].getAs[Int](2)
+        val group = curr.asInstanceOf[Row].getAs[Int](groupId)
         assert(prevGroup <= group)
         prevGroup = group
       }
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index b9c144dd044f..db737711960b 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,11 +6,11 @@
     <parent>
         <groupId>ml.dmlc</groupId>
         <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>3.1.0-SNAPSHOT</version>
+        <version>3.2.0-SNAPSHOT</version>
     </parent>
     <name>xgboost4j</name>
     <artifactId>xgboost4j_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <dependencies>
@@ -64,13 +64,13 @@
     <build>
       <plugins>
           <plugin>
-              <groupId>org.apache.maven.plugins</groupId>
-              <artifactId>maven-javadoc-plugin</artifactId>
-              <version>3.11.1</version>
-              <configuration>
-                  <show>protected</show>
-                  <nohelp>true</nohelp>
-              </configuration>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-javadoc-plugin</artifactId>
+            <version>3.11.3</version>
+            <configuration>
+              <show>protected</show>
+              <nohelp>true</nohelp>
+            </configuration>
           </plugin>
           <plugin>
               <groupId>org.apache.maven.plugins</groupId>
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/LabeledPoint.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/LabeledPoint.java
new file mode 100644
index 000000000000..478583834e3e
--- /dev/null
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/LabeledPoint.java
@@ -0,0 +1,133 @@
+/*
+ Copyright (c) 2014-2025 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+package ml.dmlc.xgboost4j;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Objects;
+
+/**
+ * Labeled training data point.
+ * TODO(hcho3): Migrate Record class when we upgrade to Java 14+, to reduce boilerplate.
+ */
+public final class LabeledPoint implements Serializable {
+  private final float label;
+  private final int size;
+  private final int[] indices;
+  private final float[] values;
+  private final float weight;
+  private final int group;
+  private final float baseMargin;
+
+  /**
+   * @param label Label of this point.
+   * @param size Feature dimensionality
+   * @param indices Feature indices of this point or `null` if the data is dense.
+   * @param values Feature values of this point.
+   * @param weight Weight of this point.
+   * @param group Group of this point (used for ranking) or -1.
+   * @param baseMargin Initial prediction on this point or `Float.NaN`
+   */
+  public LabeledPoint(
+      float label, int size, int[] indices, float[] values, float weight,
+      int group, float baseMargin
+  ) {
+    assert (indices == null || indices.length == values.length):
+      "indices and values must have the same number of elements";
+    assert (indices == null || size >= indices.length):
+      "feature dimensionality must be greater equal than size of indices";
+    this.label = label;
+    this.size = size;
+    this.indices = indices;
+    this.values = values;
+    this.weight = weight;
+    this.group = group;
+    this.baseMargin = baseMargin;
+  }
+
+  /**
+   * @param label Label of this point.
+   * @param size Feature dimensionality
+   * @param indices Feature indices of this point or `null` if the data is dense.
+   * @param values Feature values of this point.
+   */
+  public LabeledPoint(
+      float label, int size, int[] indices, float[] values
+  ) {
+    this(label, size, indices, values, 1.0f, -1, Float.NaN);
+  }
+
+  /**
+   * @param label Label of this point.
+   * @param size Feature dimensionality
+   * @param indices Feature indices of this point or `null` if the data is dense.
+   * @param values Feature values of this point.
+   * @param weight Weight of this point.
+   */
+  public LabeledPoint(
+      float label, int size, int[] indices, float[] values, float weight
+  ) {
+    this(label, size, indices, values, weight, -1, Float.NaN);
+  }
+
+  /**
+   * @param label Label of this point.
+   * @param size Feature dimensionality
+   * @param indices Feature indices of this point or `null` if the data is dense.
+   * @param values Feature values of this point.
+   * @param weight Weight of this point.
+   * @param group Group of this point (used for ranking) or -1.
+   */
+  public LabeledPoint(
+      float label, int size, int[] indices, float[] values, float weight,
+      int group
+  ) {
+    this(label, size, indices, values, weight, group, Float.NaN);
+  }
+
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(this.label, this.size, Arrays.hashCode(this.indices),
+      Arrays.hashCode(this.values), this.weight, this.group, this.baseMargin);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    } else if (!(obj instanceof LabeledPoint)) {
+      return false;
+    } else {
+      LabeledPoint other = (LabeledPoint) obj;
+      return Objects.equals(label, other.label)
+        && Objects.equals(size, other.size)
+        && Arrays.equals(indices, other.indices)
+        && Arrays.equals(values, other.values)
+        && Objects.equals(weight, other.weight)
+        && Objects.equals(group, other.group)
+        && Objects.equals(baseMargin, other.baseMargin);
+    }
+  }
+
+  public float label() { return this.label; }
+  public int size() { return this.size; }
+  public int[] indices() { return this.indices; }
+  public float[] values() { return this.values; }
+  public float weight() { return this.weight; }
+  public int group() { return this.group; }
+  public float baseMargin() { return this.baseMargin; }
+}
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java
index 22ed6dc82166..6310f2b7b082 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java
@@ -31,7 +31,7 @@
 import org.apache.commons.logging.LogFactory;
 
 /**
- * Booster for xgboost, this is a model API that support interactive build of a XGBoost Model
+ * Booster for xgboost, this is a model API that support interactive build of an XGBoost Model
  */
 public class Booster implements Serializable, KryoSerializable {
   public static final String DEFAULT_FORMAT = "ubj";
@@ -39,7 +39,9 @@ public class Booster implements Serializable, KryoSerializable {
   // handle to the booster.
   private long handle = 0;
   private int version = 0;
+
   /**
+   * This enumeration defines the type of prediction to be made and is used for inplace predictions.
    * Type of prediction, used for inplace_predict.
    */
   public enum PredictionType {
@@ -58,9 +60,9 @@ public Integer getPType() {
   /**
    * Create a new Booster with empty stage.
    *
-   * @param params  Model parameters
-   * @param cacheMats Cached DMatrix entries,
-   *                  the prediction of these DMatrices will become faster than not-cached data.
+   * @param params  Model parameters that are used to build the Booster
+   * @param cacheMats Cached DMatrix entries that help increase the speed of Booster prediction
+   *
    * @throws XGBoostError native error
    */
   Booster(Map<String, Object> params, DMatrix[] cacheMats) throws XGBoostError {
@@ -70,7 +72,7 @@ public Integer getPType() {
 
   /**
    * Load a new Booster model from modelPath
-   * @param modelPath The path to the model.
+   * @param modelPath model path
    * @return The created Booster.
    * @throws XGBoostError
    */
@@ -89,7 +91,7 @@ static Booster loadModel(String modelPath) throws XGBoostError {
    * This can be used to load existing booster models saved by other xgboost bindings.
    *
    * @param buffer The byte contents of the booster.
-   * @return The created boosted
+   * @return The created booster.
    * @throws XGBoostError
    */
   static Booster loadModel(byte[] buffer) throws XGBoostError {
@@ -140,7 +142,7 @@ public final Map<String, String> getAttrs() throws XGBoostError {
   }
 
   /**
-   * Get attribute from the Booster.
+   * Get attribute value from the Booster based on the key provided.
    *
    * @param key   attribute key
    * @return attribute value
@@ -153,7 +155,7 @@ public final String getAttr(String key) throws XGBoostError {
   }
 
   /**
-   * Set attribute to the Booster.
+   * Set an attribute key-value pair to the Booster.
    *
    * @param key   attribute key
    * @param value attribute value
@@ -164,7 +166,7 @@ public final void setAttr(String key, String value) throws XGBoostError {
   }
 
   /**
-   * Set attributes to the Booster.
+   * Set multiple attribute key-value pairs to the Booster.
    *
    * @param attrs attributes key-value map
    * @throws XGBoostError native error
@@ -178,8 +180,8 @@ public void setAttrs(Map<String, String> attrs) throws XGBoostError {
   }
 
   /**
-   * Get feature names from the Booster.
-   * @return
+   * Get all the feature names from the Booster.
+   * @return An array of all the feature names.
    * @throws XGBoostError
    */
   public final String[] getFeatureNames() throws XGBoostError {
@@ -192,7 +194,7 @@ public final String[] getFeatureNames() throws XGBoostError {
   /**
    * Set feature names to the Booster.
    *
-   * @param featureNames
+   * @param featureNames An array of all the feature names.
    * @throws XGBoostError
    */
   public void setFeatureNames(String[] featureNames) throws XGBoostError {
@@ -202,7 +204,7 @@ public void setFeatureNames(String[] featureNames) throws XGBoostError {
 
   /**
    * Get feature types from the Booster.
-   * @return
+   * @return An array of all the feature types.
    * @throws XGBoostError
    */
   public final String[] getFeatureTypes() throws XGBoostError {
@@ -214,7 +216,7 @@ public final String[] getFeatureTypes() throws XGBoostError {
 
   /**
    * Set feature types to the Booster.
-   * @param featureTypes
+   * @param featureTypes An array of all the feature types.
    * @throws XGBoostError
    */
   public void setFeatureTypes(String[] featureTypes) throws XGBoostError {
@@ -241,7 +243,7 @@ public void update(DMatrix dtrain, IObjective obj) throws XGBoostError {
   }
 
   /**
-   * Update with customize obj func
+   * Update with customize object functon
    *
    * @param dtrain training data
    * @param iter   The current training iteration.
@@ -278,12 +280,12 @@ public void boost(DMatrix dtrain, int iter, float[] grad, float[] hess) throws X
   }
 
   /**
-   * evaluate with given dmatrixs.
+   * Evaluate the Booster model with given dmatrixs.
    *
    * @param evalMatrixs dmatrixs for evaluation
    * @param evalNames   name for eval dmatrixs, used for check results
    * @param iter        current eval iteration
-   * @return eval information
+   * @return eval Information containing the evaluation results
    * @throws XGBoostError native error
    */
   public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throws XGBoostError {
@@ -295,13 +297,13 @@ public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter) throw
   }
 
   /**
-   * evaluate with given dmatrixs.
+   * Evaluate the Booster model with given dmatrixs.
    *
    * @param evalMatrixs dmatrixs for evaluation
    * @param evalNames   name for eval dmatrixs, used for check results
    * @param iter        current eval iteration
    * @param metricsOut  output array containing the evaluation metrics for each evalMatrix
-   * @return eval information
+   * @return eval Information containing the evaluation results
    * @throws XGBoostError native error
    */
   public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, float[] metricsOut)
@@ -322,12 +324,12 @@ public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, int iter, float
   }
 
   /**
-   * evaluate with given customized Evaluation class
+   * Evaluate the Booster model given customized Evaluation class
    *
    * @param evalMatrixs evaluation matrix
    * @param evalNames   evaluation names
    * @param eval        custom evaluator
-   * @return eval information
+   * @return eval Information containing the evaluation results
    * @throws XGBoostError native error
    */
   public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, IEvaluation eval)
@@ -351,20 +353,20 @@ public String evalSet(DMatrix[] evalMatrixs, String[] evalNames, IEvaluation eva
   }
 
   /**
-   * Advanced predict function with all the options.
+   * An advanced prediction function with all the options.
    *
-   * @param data         data
+   * @param data         the test data for which prodictions are to be made
    * @param outputMargin output margin
    * @param treeLimit    limit number of trees, 0 means all trees.
    * @param predLeaf     prediction minimum to keep leafs
    * @param predContribs prediction feature contributions
-   * @return predict results
+   * @return predict two dimensional array of results, where each row corresponds to a prediction.
    */
-  private synchronized float[][] predict(DMatrix data,
-                                         boolean outputMargin,
-                                         int treeLimit,
-                                         boolean predLeaf,
-                                         boolean predContribs) throws XGBoostError {
+  private float[][] predict(DMatrix data,
+                            boolean outputMargin,
+                            int treeLimit,
+                            boolean predLeaf,
+                            boolean predContribs) throws XGBoostError {
     int optionMask = 0;
     if (outputMargin) {
       optionMask = 1;
@@ -506,10 +508,10 @@ public float[][] predictContrib(DMatrix data, int treeLimit) throws XGBoostError
   }
 
   /**
-   * Predict with data
+   * Make a prediction with test data in a DMatrix format.
    *
-   * @param data dmatrix storing the input
-   * @return predict result
+   * @param data dmatrix storing the test input on which predictions are to be made
+   * @return predict The results of the prediction, where each row corresponds to a prediction.
    * @throws XGBoostError native error
    */
   public float[][] predict(DMatrix data) throws XGBoostError {
@@ -517,11 +519,11 @@ public float[][] predict(DMatrix data) throws XGBoostError {
   }
 
   /**
-   * Predict with data
+   * Make a prediction with test data in a DMatrix format and output margin.
    *
-   * @param data  data
+   * @param data  dmatrix storing the test input on which predictions are to be made
    * @param outputMargin output margin
-   * @return predict results
+   * @return predict The results of the prediction, where each row corresponds to a prediction.
    */
   public float[][] predict(DMatrix data, boolean outputMargin) throws XGBoostError {
     return this.predict(data, outputMargin, 0, false, false);
@@ -530,10 +532,10 @@ public float[][] predict(DMatrix data, boolean outputMargin) throws XGBoostError
   /**
    * Advanced predict function with all the options.
    *
-   * @param data         data
+   * @param data         matrix storing the test input on which predictions are to be made
    * @param outputMargin output margin
    * @param treeLimit    limit number of trees, 0 means all trees.
-   * @return predict results
+   * @return predict The results of the prediction, where each row corresponds to a prediction.
    */
   public float[][] predict(DMatrix data, boolean outputMargin, int treeLimit) throws XGBoostError {
     return this.predict(data, outputMargin, treeLimit, false, false);
@@ -579,14 +581,25 @@ public void saveModel(OutputStream out, String format) throws XGBoostError, IOEx
   /**
    * Get the dump of the model as a string array
    *
+   * @param featureMap A string containing the path to a feature map.
    * @param withStats Controls whether the split statistics are output.
-   * @return dumped model information
+   * @return The dumped model information
    * @throws XGBoostError native error
    */
   public String[] getModelDump(String featureMap, boolean withStats) throws XGBoostError {
     return getModelDump(featureMap, withStats, "text");
   }
 
+  /**
+   * Get the dump of the model as a string array with specified feature map, stats,
+   * and the specified format.
+   *
+   * @param featureMap A string containing the path to a feature map.
+   * @param withStats Controls whether the split statistics are output.
+   * @param format The format in which the model is dumped (text, json, ubj).
+   * @return The dumped model information
+   * @throws XGBoostError
+   */
   public String[] getModelDump(String featureMap, boolean withStats, String format)
          throws XGBoostError {
     int statsFlag = 0;
@@ -616,6 +629,16 @@ public String[] getModelDump(String[] featureNames, boolean withStats) throws XG
     return getModelDump(featureNames, withStats, "text");
   }
 
+  /**
+   * Get the dump of the model as a string array with specified feature map, stats,
+   * and the specified format.
+   *
+   * @param featureNames An array of strings containing the feature names.
+   * @param withStats Controls whether the split statistics are output.
+   * @param format The format in which the model is dumped (text, json, ubj).
+   * @return The dumped model information
+   * @throws XGBoostError
+   */
   public String[] getModelDump(String[] featureNames, boolean withStats, String format)
       throws XGBoostError {
     int statsFlag = 0;
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ExternalCheckpointManager.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ExternalCheckpointManager.java
index d5b8e8b9cdea..a5d79b86044a 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ExternalCheckpointManager.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ExternalCheckpointManager.java
@@ -26,6 +26,12 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
+/**
+ * This class contains the methods that are required for managing the state of the training
+ * process. The training state is stored in a distributed file system, that consists of
+ * UBJ (Universal Binary JSON) model files.
+ * The class provides methods for saving, loading and cleaning up checkpoints.
+ */
 public class ExternalCheckpointManager {
 
   private Log logger = LogFactory.getLog("ExternalCheckpointManager");
@@ -33,6 +39,14 @@ public class ExternalCheckpointManager {
   private Path checkpointPath;  // directory for checkpoints
   private FileSystem fs;
 
+  /**
+   * This constructor creates a new Expternal Checkpoint Manager at the specified path in the
+   * specified file system.
+   *
+   * @param checkpointPath The directory path where checkpoints will be stored.
+   * @param fs The file system to use for storing checkpoints.
+   * @throws XGBoostError the error that is thrown is the checkpoint path is null or empty.
+   */
   public ExternalCheckpointManager(String checkpointPath, FileSystem fs) throws XGBoostError {
     if (checkpointPath == null || checkpointPath.isEmpty()) {
       throw new XGBoostError("cannot create ExternalCheckpointManager with null or" +
@@ -65,10 +79,24 @@ private Integer latest(List<Integer> versions) {
         .max(Comparator.comparing(Integer::valueOf)).get();
   }
 
+  /**
+   * This method cleans all the directories and files that are present in the checkpoint path.
+   * @throws IOException exception that is thrown when there is an error deleting the
+   * checkpoint path.
+   */
   public void cleanPath() throws IOException {
     fs.delete(checkpointPath, true);
   }
 
+  /**
+   * Read the checkpoint from the checkpoint path. Once the checkpoint path is read, we get
+   * the latest version of the checkpoint from all the checkpoint versions and lead it
+   * into the booster for the purpose of making predictions.
+   *
+   * @return The booster object that is used for making predictions.
+   * @throws IOException Any expection that occurs when reading the checkpoint path.
+   * @throws XGBoostError Any exception that occurs when loading the model into the booster.
+   */
   public Booster loadCheckpointAsBooster() throws IOException, XGBoostError {
     List<Integer> versions = getExistingVersions();
     if (versions.size() > 0) {
@@ -83,6 +111,15 @@ public Booster loadCheckpointAsBooster() throws IOException, XGBoostError {
     }
   }
 
+  /**
+   * This method updates the booster checkpoint to the the latest or current
+   * version and deleted all the previous versions of the checkpoint.
+   * @param boosterToCheckpoint The booster object that is to be checkpointed and
+   *                            saved as a model file.
+   * @throws IOException Any exception that occurs when writing the model file to the
+   * checkpoint path.
+   * @throws XGBoostError Any exception that occurs when saving the model from the booster.
+   */
   public void updateCheckpoint(Booster boosterToCheckpoint) throws IOException, XGBoostError {
     List<String> prevModelPaths = getExistingVersions().stream()
         .map(this::getPath).collect(Collectors.toList());
@@ -105,6 +142,13 @@ public void updateCheckpoint(Booster boosterToCheckpoint) throws IOException, XG
     }
   }
 
+  /**
+   * This method cleans up all the checkpoint versions that are higher than the current round.
+   * This is useful when multiple training instances are running and we want to make sure that
+   * only the checkpoints from the current training instance are retained.
+   * @param currentRound The current round of training.
+   * @throws IOException Any exception that occurs when deleting the checkpoint files.
+   */
   public void cleanUpHigherVersions(int currentRound) throws IOException {
     getExistingVersions().stream().filter(v -> v > currentRound).forEach(v -> {
       try {
@@ -114,7 +158,15 @@ public void cleanUpHigherVersions(int currentRound) throws IOException {
       }
     });
   }
-  // Get a list of iterations that need checkpointing.
+
+  /**
+   * Get a list of iterations that need checkpointing.
+   * @param firstRound The first round of training.
+   * @param checkpointInterval The interval at which checkpoints are to be saved.
+   * @param numOfRounds The number of rounds to be trained.
+   * @return A list of integer rounds that need checkpointing.
+   * @throws IOException Any exception that occurs when getting the list of rounds.
+   */
   public List<Integer> getCheckpointRounds(
       int firstRound, int checkpointInterval, int numOfRounds)
       throws IOException {
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/TrackerProperties.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/TrackerProperties.java
deleted file mode 100644
index 45a6b1e062a7..000000000000
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/TrackerProperties.java
+++ /dev/null
@@ -1,56 +0,0 @@
-package ml.dmlc.xgboost4j.java;
-
-import java.io.*;
-import java.net.URL;
-import java.util.Properties;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-public class TrackerProperties {
-  private static String PROPERTIES_FILENAME = "xgboost-tracker.properties";
-  private static String HOST_IP = "host-ip";
-
-  private static final Log logger = LogFactory.getLog(TrackerProperties.class);
-  private static TrackerProperties instance = new TrackerProperties();
-
-  private Properties properties;
-
-  private TrackerProperties() {
-    this.properties = new Properties();
-
-    InputStream inputStream = null;
-
-    try {
-      URL propertiesFileURL =
-          Thread.currentThread().getContextClassLoader().getResource(PROPERTIES_FILENAME);
-      if (propertiesFileURL != null){
-        inputStream = propertiesFileURL.openStream();
-      }
-    } catch (IOException e) {
-      logger.warn("Could not load " + PROPERTIES_FILENAME + " file. ", e);
-    }
-
-    if(inputStream != null){
-      try {
-        properties.load(inputStream);
-        logger.debug("Loaded properties from external source");
-      } catch (IOException e) {
-        logger.error("Error loading tracker properties file. Skipping and using defaults. ", e);
-      }
-      try {
-        inputStream.close();
-      } catch (IOException e) {
-        // ignore exception
-      }
-    }
-  }
-
-  public static TrackerProperties getInstance() {
-    return instance;
-  }
-
-  public String getHostIp(){
-    return this.properties.getProperty(HOST_IP);
-  }
-}
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java
index 71b4ff3f2873..f94ab78588e6 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoost.java
@@ -386,6 +386,16 @@ private static String getMetricNameFromlog(String evalInfo, String[] evalNames)
   }
 
   // visiable for testing
+
+  /**
+   * Decides whether the evaluation metrics are to be maximized or not.
+   *
+   * @param evalInfo The evaluation log string from which the metric name is inferred.
+   * @param evalNames The names of the evaluation matrices.
+   * @param params The parameters that contain information regarding whether the
+   *  evaluation metrics are to be maximized or not.
+   * @return True if the evaluation metrics are to be maximized, false otherwise.
+   */
   public static boolean isMaximizeEvaluation(String evalInfo,
                                              String[] evalNames,
                                              Map<String, Object> params) {
diff --git a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/LabeledPoint.scala b/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/LabeledPoint.scala
deleted file mode 100644
index ccdedbaa3704..000000000000
--- a/jvm-packages/xgboost4j/src/main/scala/ml/dmlc/xgboost4j/LabeledPoint.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- Copyright (c) 2014 by Contributors
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
-
-package ml.dmlc.xgboost4j
-
-/**
- * Labeled training data point.
- *
- * @param label Label of this point.
- * @param size Feature dimensionality
- * @param indices Feature indices of this point or `null` if the data is dense.
- * @param values Feature values of this point.
- * @param weight Weight of this point.
- * @param group Group of this point (used for ranking) or -1.
- * @param baseMargin Initial prediction on this point or `Float.NaN`
- */
-case class LabeledPoint(
-    label: Float,
-    size: Int,
-    indices: Array[Int],
-    values: Array[Float],
-    weight: Float = 1f,
-    group: Int = -1,
-    baseMargin: Float = Float.NaN) extends Serializable {
-  require(indices == null || indices.length == values.length,
-    "indices and values must have the same number of elements")
-
-  require(indices == null || size >= indices.length,
-    "feature dimensionality must be greater equal than size of indices")
-
-  def this(label: Float, size: Int, indices: Array[Int], values: Array[Float]) = {
-    // [[weight]] default duplicated to disambiguate the constructor call.
-    this(label, size, indices, values, 1.0f)
-  }
-}
diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java
index b686ddbed858..5b55e0a6342e 100644
--- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java
+++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java
@@ -294,7 +294,6 @@ public void saveLoadModelWithPath() throws XGBoostError, IOException {
     Booster bst2 = XGBoost.loadModel(temp.getAbsolutePath());
     assert (Arrays.equals(bst2.toByteArray("ubj"), booster.toByteArray("ubj")));
     assert (Arrays.equals(bst2.toByteArray("json"), booster.toByteArray("json")));
-    assert (Arrays.equals(bst2.toByteArray("deprecated"), booster.toByteArray("deprecated")));
     float[][] predicts2 = bst2.predict(testMat, true, 0);
     TestCase.assertTrue(eval.eval(predicts2, testMat) < 0.1f);
   }
@@ -327,7 +326,6 @@ public void saveLoadModelWithFeaturesWithPath() throws XGBoostError, IOException
     Booster bst2 = XGBoost.loadModel(temp.getAbsolutePath());
     assert (Arrays.equals(bst2.toByteArray("ubj"), booster.toByteArray("ubj")));
     assert (Arrays.equals(bst2.toByteArray("json"), booster.toByteArray("json")));
-    assert (Arrays.equals(bst2.toByteArray("deprecated"), booster.toByteArray("deprecated")));
     float[][] predicts2 = bst2.predict(testMat, true, 0);
     TestCase.assertTrue(eval.eval(predicts2, testMat) < 0.1f);
   }
@@ -899,4 +897,34 @@ public void testGetNumFeature() throws XGBoostError {
     Booster booster = trainBooster(trainMat, testMat);
     TestCase.assertEquals(booster.getNumFeature(), 126);
   }
+
+  @Test
+  public void testConcurrentPredict() throws InterruptedException, XGBoostError, ExecutionException, TimeoutException {
+    DMatrix trainMat = new DMatrix(this.train_uri);
+    DMatrix testMat = new DMatrix(this.test_uri);
+    Booster booster = trainBooster(trainMat, testMat);
+
+    float[][] expectedPredictions = booster.predict(testMat);
+
+    ExecutorService executor = Executors.newFixedThreadPool(10);
+    List<CompletableFuture<Void>> futures = new ArrayList<>();
+
+    //10 threads - each calling predict 50 times
+    for (int t = 0; t < 10; t++) {
+      futures.add(CompletableFuture.runAsync(() -> {
+        try {
+          for (int i = 0; i < 50; i++) {
+            float[][] predictions = booster.predict(testMat);
+            assertArrayEquals(expectedPredictions, predictions);
+          }
+        } catch (XGBoostError e) {
+          throw new RuntimeException(e);
+        }
+      }, executor));
+    }
+
+    CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))
+      .get(30, TimeUnit.SECONDS);
+    executor.shutdown();
+  }
 }
diff --git a/ops/conda_env/aarch64_test.yml b/ops/conda_env/aarch64_test.yml
index 14305ebbf090..02e0e97a3eb8 100644
--- a/ops/conda_env/aarch64_test.yml
+++ b/ops/conda_env/aarch64_test.yml
@@ -21,12 +21,10 @@ dependencies:
 - cmake
 - ninja
 - boto3
-- jsonschema
-- boto3
 - awscli
 - numba
 - llvmlite
-- loky
+- loky>=3.5.1
 - pyarrow
 - pyspark>=3.4.0
 - cloudpickle
diff --git a/ops/conda_env/linux_cpu_test.yml b/ops/conda_env/linux_cpu_test.yml
index e4c0b507c8e2..9a298ad399ed 100644
--- a/ops/conda_env/linux_cpu_test.yml
+++ b/ops/conda_env/linux_cpu_test.yml
@@ -3,7 +3,7 @@ channels:
 - conda-forge
 dependencies:
 - python=3.10
-- cmake
+- cmake>=3.26.4
 - c-compiler
 - cxx-compiler
 - ninja
@@ -30,11 +30,10 @@ dependencies:
 - pytest-cov
 - python-kubernetes
 - urllib3
-- jsonschema
 - boto3
 - awscli
 - py-ubjson
-- loky
+- loky>=3.5.1
 - pyarrow
 - protobuf
 - cloudpickle
diff --git a/ops/conda_env/linux_sycl_test.yml b/ops/conda_env/linux_sycl_test.yml
index 1761787662ee..c9390ff22a0b 100644
--- a/ops/conda_env/linux_sycl_test.yml
+++ b/ops/conda_env/linux_sycl_test.yml
@@ -4,9 +4,10 @@ channels:
 - https://software.repos.intel.com/python/conda/
 dependencies:
 - python=3.10
-- cmake
+- cmake>=3.26.4
 - c-compiler
 - cxx-compiler
+- gtest
 - pip
 - wheel
 - numpy
@@ -19,6 +20,6 @@ dependencies:
 - pytest-cov
 - dask=2024.11
 - ninja
-- dpcpp_linux-64
+- dpcpp_linux-64>=2024.2.1
 - onedpl-devel
 - intel-openmp
diff --git a/ops/conda_env/macos_cpu_test.yml b/ops/conda_env/macos_cpu_test.yml
index 29ff99e3504f..2518aa5b2157 100644
--- a/ops/conda_env/macos_cpu_test.yml
+++ b/ops/conda_env/macos_cpu_test.yml
@@ -6,8 +6,6 @@ dependencies:
 - pip
 - wheel
 - pyyaml
-- cpplint
-- pylint
 - numpy
 - scipy
 - llvm-openmp
@@ -20,22 +18,16 @@ dependencies:
 - python-graphviz
 - hypothesis
 - astroid
-- sphinx
 - sh
-- recommonmark
-- mock
-- breathe
 - pytest
 - pytest-cov
+- pytest-timeout
 - python-kubernetes
 - urllib3
-- jsonschema
 - boto3
 - awscli
-- loky
+- loky>=3.5.1
 - pyarrow
-- pyspark>=3.4.0
 - cloudpickle
 - pip:
   - setuptools
-  - sphinx_rtd_theme
diff --git a/ops/conda_env/win64_test.yml b/ops/conda_env/win64_test.yml
index 32b9339e6fc0..57e8ad0656c3 100644
--- a/ops/conda_env/win64_test.yml
+++ b/ops/conda_env/win64_test.yml
@@ -11,10 +11,9 @@ dependencies:
 - pytest
 - boto3
 - hypothesis
-- jsonschema
 - cupy>=13.2
 - python-graphviz
 - pip
 - py-ubjson
-- loky
+- loky>=3.5.1
 - pyarrow
diff --git a/ops/docker_run.py b/ops/docker_run.py
index ba6c8e8c98c0..949f7fb7807d 100644
--- a/ops/docker_run.py
+++ b/ops/docker_run.py
@@ -70,7 +70,6 @@ def docker_run(
     docker_run_cli_args.extend(
         itertools.chain.from_iterable([["-e", f"{k}={v}"] for k, v in user_ids.items()])
     )
-    docker_run_cli_args.extend(["-e", "NCCL_RAS_ENABLE=0"])
     docker_run_cli_args.extend(extra_args)
     docker_run_cli_args.append(image_uri)
     docker_run_cli_args.extend(command_args)
diff --git a/ops/pipeline/build-cpu-impl.sh b/ops/pipeline/build-cpu-impl.sh
index 55e205d3edfa..0ed2ee5a5e8e 100755
--- a/ops/pipeline/build-cpu-impl.sh
+++ b/ops/pipeline/build-cpu-impl.sh
@@ -24,7 +24,6 @@ case "${suite}" in
       -DUSE_DMLC_GTEST=ON \
       -DENABLE_ALL_WARNINGS=ON \
       -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \
-      -DBUILD_DEPRECATED_CLI=ON \
       -DCMAKE_PREFIX_PATH='/opt/grpc' \
       -DPLUGIN_FEDERATED=ON
     time ninja -v
@@ -40,7 +39,6 @@ case "${suite}" in
       -DUSE_DMLC_GTEST=ON \
       -DENABLE_ALL_WARNINGS=ON \
       -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \
-      -DBUILD_DEPRECATED_CLI=ON \
       -DUSE_SANITIZER=ON \
       -DENABLED_SANITIZERS="address;leak;undefined" \
       -DCMAKE_BUILD_TYPE=Debug \
diff --git a/ops/pipeline/build-cuda-impl.sh b/ops/pipeline/build-cuda-impl.sh
index 75cbaae03afe..473e239388a6 100755
--- a/ops/pipeline/build-cuda-impl.sh
+++ b/ops/pipeline/build-cuda-impl.sh
@@ -11,7 +11,14 @@ else
   cmake_args=''
 fi
 
-if [[ "${USE_RMM:-}" == 1 ]]
+if [[ "${USE_FEDERATED:-0}" == 1 ]]
+then
+  cmake_args="${cmake_args} -DPLUGIN_FEDERATED=ON"
+else
+  cmake_args="${cmake_args} -DPLUGIN_FEDERATED=OFF"
+fi
+
+if [[ "${USE_RMM:-0}" == 1 ]]
 then
   cmake_prefix_path='/opt/grpc;/opt/rmm;/opt/rmm/lib64/rapids/cmake'
   cmake_args="${cmake_args} -DPLUGIN_RMM=ON"
@@ -30,7 +37,6 @@ cmake .. \
   -DUSE_CUDA=ON \
   -DUSE_OPENMP=ON \
   -DHIDE_CXX_SYMBOLS=ON \
-  -DPLUGIN_FEDERATED=ON \
   -DUSE_NCCL=ON \
   -DUSE_NCCL_LIB_PATH=ON \
   -DNCCL_INCLUDE_DIR=/usr/include \
@@ -39,7 +45,6 @@ cmake .. \
   -DUSE_DMLC_GTEST=ON \
   -DENABLE_ALL_WARNINGS=ON \
   -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \
-  -DBUILD_DEPRECATED_CLI=ON \
   ${cmake_args}
 time ninja -v
 popd
diff --git a/ops/pipeline/build-cuda.sh b/ops/pipeline/build-cuda.sh
index 02b8d7ecd9ea..3458719bf090 100755
--- a/ops/pipeline/build-cuda.sh
+++ b/ops/pipeline/build-cuda.sh
@@ -16,6 +16,7 @@ then
 fi
 image_repo="$1"
 rmm_flag="$2"
+export USE_FEDERATED=1
 
 # Validate RMM flag
 case "${rmm_flag}" in
@@ -52,7 +53,7 @@ set -x
 
 python3 ops/docker_run.py \
   --image-uri ${BUILD_IMAGE_URI} \
-  --run-args='-e BUILD_ONLY_SM75 -e USE_RMM' \
+  --run-args='-e BUILD_ONLY_SM75 -e USE_RMM -e USE_FEDERATED' \
   -- ops/pipeline/build-cuda-impl.sh
 
 echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
diff --git a/ops/pipeline/build-cuda13.sh b/ops/pipeline/build-cuda13.sh
new file mode 100755
index 000000000000..8e24e8147b70
--- /dev/null
+++ b/ops/pipeline/build-cuda13.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+## Build XGBoost with CUDA 13
+
+set -euo pipefail
+
+if [[ -z "${GITHUB_SHA:-}" ]]
+then
+  echo "Make sure to set environment variable GITHUB_SHA"
+  exit 1
+fi
+
+IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8"
+export USE_RMM=0
+export USE_FEDERATED=0
+
+source ops/pipeline/classify-git-branch.sh
+source ops/pipeline/get-docker-registry-details.sh
+source ops/pipeline/get-image-tag.sh
+
+WHEEL_TAG=manylinux_2_28_x86_64
+BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}"
+MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}"
+
+echo "--- Build with CUDA"
+
+if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
+then
+  export BUILD_ONLY_SM75=1
+else
+  export BUILD_ONLY_SM75=0
+fi
+
+set -x
+
+python3 ops/script/pypi_variants.py --use-suffix=cu13 --require-nccl-dep=cu13
+
+python3 ops/docker_run.py \
+  --image-uri ${BUILD_IMAGE_URI} \
+  --run-args='-e BUILD_ONLY_SM75 -e USE_RMM -e USE_FEDERATED' \
+  -- ops/pipeline/build-cuda-impl.sh
+
+echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
+python3 ops/docker_run.py \
+  --image-uri ${MANYLINUX_IMAGE_URI} \
+  -- auditwheel repair --only-plat \
+  --plat ${WHEEL_TAG} python-package/dist/*.whl
+python3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \
+  wheelhouse/*.whl
+mv -v wheelhouse/*.whl python-package/dist/
+if ! unzip -l ./python-package/dist/*.whl | grep libgomp > /dev/null; then
+  echo "error: libgomp.so was not vendored in the wheel"
+  exit -1
+fi
+
+# Check size of wheel
+pydistcheck --config python-package/pyproject.toml python-package/dist/*.whl
+
+echo "--- Upload Python wheel"
+if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
+then
+  python3 ops/pipeline/manage-artifacts.py upload \
+    --s3-bucket xgboost-nightly-builds \
+    --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \
+    python-package/dist/*.whl
+fi
diff --git a/ops/pipeline/build-cpu-arm64-impl.sh b/ops/pipeline/build-python-wheels-arm64-impl.sh
similarity index 89%
rename from ops/pipeline/build-cpu-arm64-impl.sh
rename to ops/pipeline/build-python-wheels-arm64-impl.sh
index ae0aa7d5b4ce..263df67f1bd2 100755
--- a/ops/pipeline/build-cpu-arm64-impl.sh
+++ b/ops/pipeline/build-python-wheels-arm64-impl.sh
@@ -9,6 +9,7 @@ source activate aarch64_test
 echo "--- Build libxgboost from the source"
 mkdir -p build
 pushd build
+
 cmake .. \
   -GNinja \
   -DCMAKE_PREFIX_PATH="${CONDA_PREFIX}" \
@@ -17,8 +18,7 @@ cmake .. \
   -DGOOGLE_TEST=ON \
   -DUSE_DMLC_GTEST=ON \
   -DENABLE_ALL_WARNINGS=ON \
-  -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \
-  -DBUILD_DEPRECATED_CLI=ON
+  -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF
 time ninja -v
 
 echo "--- Run Google Test"
diff --git a/ops/pipeline/build-cpu-arm64.sh b/ops/pipeline/build-python-wheels-arm64.sh
similarity index 83%
rename from ops/pipeline/build-cpu-arm64.sh
rename to ops/pipeline/build-python-wheels-arm64.sh
index 7630996cebf0..ff38ceee13de 100755
--- a/ops/pipeline/build-cpu-arm64.sh
+++ b/ops/pipeline/build-python-wheels-arm64.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-## Build and test XGBoost with ARM64 CPU
+## Build and test XGBoost with ARM64 CPU (no GPU, no federated learning)
 
 set -euo pipefail
 
@@ -14,13 +14,15 @@ source ops/pipeline/get-docker-registry-details.sh
 source ops/pipeline/get-image-tag.sh
 
 WHEEL_TAG=manylinux_2_28_aarch64
-IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.aarch64:${IMAGE_TAG}
+IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}
 
 echo "--- Build CPU code targeting ARM64"
 set -x
+
+python3 ops/script/pypi_variants.py --use-suffix=na --require-nccl-dep=na
 python3 ops/docker_run.py \
   --image-uri ${IMAGE_URI} \
-  -- ops/pipeline/build-cpu-arm64-impl.sh
+  -- ops/pipeline/build-python-wheels-arm64-impl.sh
 
 echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
 python3 ops/docker_run.py \
diff --git a/ops/pipeline/build-manylinux2014.sh b/ops/pipeline/build-python-wheels-cpu.sh
old mode 100755
new mode 100644
similarity index 59%
rename from ops/pipeline/build-manylinux2014.sh
rename to ops/pipeline/build-python-wheels-cpu.sh
index 03d2f525a4bc..6f1418996749
--- a/ops/pipeline/build-manylinux2014.sh
+++ b/ops/pipeline/build-python-wheels-cpu.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Build Python wheels, CPU variant (no federated learning)
 
 set -euo pipefail
 
@@ -8,53 +9,33 @@ then
   exit 1
 fi
 
-if [[ "$#" -lt 1 ]]
+if [[ "$#" -lt 2 ]]
 then
-  echo "Usage: $0 {x86_64,aarch64}"
+  echo "Usage: $0 {manylinux2014,manylinux_2_28} {x86_64,aarch64}"
   exit 1
 fi
 
-arch="$1"
+manylinux_target="$1"
+arch="$2"
 
 source ops/pipeline/classify-git-branch.sh
 source ops/pipeline/get-docker-registry-details.sh
 source ops/pipeline/get-image-tag.sh
 
-WHEEL_TAG="manylinux2014_${arch}"
+WHEEL_TAG="${manylinux_target}_${arch}"
 IMAGE_REPO="xgb-ci.${WHEEL_TAG}"
 IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}"
 PYTHON_BIN="/opt/python/cp310-cp310/bin/python"
 
-echo "--- Build binary wheel for ${WHEEL_TAG}"
+echo "--- Build binary wheel for ${WHEEL_TAG} (CPU only)"
 set -x
 
-python3 ops/script/pypi_variants.py --variant=manylinux2014
-python3 ops/docker_run.py \
-  --image-uri "${IMAGE_URI}" \
-  -- bash -c \
-  "cd python-package && ${PYTHON_BIN} -m pip wheel --no-deps -v . --wheel-dir dist/"
-# discard the patch
-python3 ops/script/pypi_variants.py --variant=default
-
-python3 ops/docker_run.py \
-  --image-uri "${IMAGE_URI}" \
-  -- auditwheel repair --only-plat \
-  --plat ${WHEEL_TAG} python-package/dist/*.whl
-python3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \
-  wheelhouse/*.whl
-rm -rf python-package/dist/
-mkdir python-package/dist/
-mv -v wheelhouse/*.whl python-package/dist/
-
-echo "--- Build binary wheel for ${WHEEL_TAG} (CPU only)"
 # Patch to rename pkg to xgboost-cpu
-python3 ops/script/pypi_variants.py --variant=cpu
+python3 ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na
 python3 ops/docker_run.py \
   --image-uri "${IMAGE_URI}" \
   -- bash -c \
   "cd python-package && ${PYTHON_BIN} -m pip wheel --no-deps -v . --wheel-dir dist/"
-# discard the patch
-python3 ops/script/pypi_variants.py --variant=default
 
 python3 ops/docker_run.py \
   --image-uri "${IMAGE_URI}" \
@@ -65,6 +46,14 @@ python3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --
 rm -v python-package/dist/xgboost_cpu-*.whl
 mv -v wheelhouse/xgboost_cpu-*.whl python-package/dist/
 
+if ! unzip -l ./python-package/dist/*.whl | grep libgomp > /dev/null; then
+  echo "error: libgomp.so was not vendored in the wheel"
+  exit -1
+fi
+
+# Check size of wheel
+pydistcheck --config python-package/pyproject.toml python-package/dist/*.whl
+
 if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
 then
   python3 ops/pipeline/manage-artifacts.py upload \
diff --git a/ops/pipeline/build-python-wheels-macos.sh b/ops/pipeline/build-python-wheels-macos.sh
index 18ee2bd5b56b..9c04032d8587 100755
--- a/ops/pipeline/build-python-wheels-macos.sh
+++ b/ops/pipeline/build-python-wheels-macos.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Build Python wheels targeting MacOS (no federated learning)
 
 set -euox pipefail
 
diff --git a/ops/pipeline/build-r-docs.sh b/ops/pipeline/build-r-docs.sh
index 32af3a60fb43..28477c3e91b4 100755
--- a/ops/pipeline/build-r-docs.sh
+++ b/ops/pipeline/build-r-docs.sh
@@ -10,7 +10,8 @@ fi
 
 source ops/pipeline/get-docker-registry-details.sh
 
-IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.cpu_build_r_doc:main
+source ops/pipeline/get-image-tag.sh
+IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.cpu_build_r_doc:${IMAGE_TAG}
 
 echo "--- Build R package doc"
 set -x
diff --git a/ops/pipeline/build-test-cpu-nonomp.sh b/ops/pipeline/build-test-cpu-nonomp.sh
index 5bd6fa7f9d32..ee61c7c9cf76 100755
--- a/ops/pipeline/build-test-cpu-nonomp.sh
+++ b/ops/pipeline/build-test-cpu-nonomp.sh
@@ -12,8 +12,7 @@ cmake .. \
   -DGOOGLE_TEST=ON \
   -DUSE_DMLC_GTEST=ON \
   -DENABLE_ALL_WARNINGS=ON \
-  -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \
-  -DBUILD_DEPRECATED_CLI=ON
+  -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF
 time ninja -v
 ctest --extra-verbose
 popd
diff --git a/ops/pipeline/build-test-sycl.sh b/ops/pipeline/build-test-sycl.sh
index f3b651b18cf9..ee3a4f70049c 100755
--- a/ops/pipeline/build-test-sycl.sh
+++ b/ops/pipeline/build-test-sycl.sh
@@ -13,7 +13,7 @@ suite="$1"
 
 mkdir build
 pushd build
-cmake .. -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ \
+cmake .. -DGOOGLE_TEST=ON -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ \
   -DCMAKE_C_COMPILER=gcc -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
   -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -GNinja
 ninja
diff --git a/ops/pipeline/build-variant-wheels-impl.sh b/ops/pipeline/build-variant-wheels-impl.sh
new file mode 100755
index 000000000000..b033fc2f8571
--- /dev/null
+++ b/ops/pipeline/build-variant-wheels-impl.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+## Build Python wheels using Wheel Variant prototype (WheelNext)
+## Companion script for ops/pipeline/build-variant-wheels.sh
+
+set -eo pipefail
+
+set -x
+gosu root chown -R $(id -u):$(id -g) /opt/miniforge/envs /opt/miniforge/pkgs/cache
+gosu root chown $(id -u):$(id -g) /opt/miniforge/pkgs
+set +x
+
+mamba create -y -n wheelnext python=3.13 python-build
+
+source activate wheelnext
+
+# Cannot set -u before Conda env activation
+set -xu
+
+python -m pip install "variantlib[cli] @ git+https://github.com/wheelnext/variantlib.git@main"
+python -m pip install "nvidia-variant-provider @ git+https://github.com/wheelnext/nvidia-variant-provider.git@master"
+variantlib make-variant --no-isolation -f python-package/dist/xgboost-*.whl \
+  -p "nvidia :: cuda_version_lower_bound :: 12.0" \
+  -p "nvidia :: cuda_version_upper_bound :: 13" \
+  -o . --pyproject-toml python-package/pyproject.toml
diff --git a/ops/pipeline/build-variant-wheels.sh b/ops/pipeline/build-variant-wheels.sh
new file mode 100755
index 000000000000..25aaa048b2e0
--- /dev/null
+++ b/ops/pipeline/build-variant-wheels.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+## Build Python wheels using Wheel Variant prototype (WheelNext)
+
+set -euo pipefail
+
+if [[ -z "${GITHUB_SHA:-}" ]]
+then
+  echo "Make sure to set environment variable GITHUB_SHA"
+  exit 1
+fi
+
+image_repo='xgb-ci.gpu_build_rockylinux8'
+export USE_RMM=0
+export USE_FEDERATED=0
+
+source ops/pipeline/classify-git-branch.sh
+source ops/pipeline/get-docker-registry-details.sh
+source ops/pipeline/get-image-tag.sh
+
+WHEEL_TAG=manylinux_2_28_x86_64
+BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG}"
+MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}"
+
+echo "--- Build with CUDA"
+
+if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]]
+then
+  export BUILD_ONLY_SM75=1
+else
+  export BUILD_ONLY_SM75=0
+fi
+
+set -x
+
+python3 ops/docker_run.py \
+  --image-uri ${BUILD_IMAGE_URI} \
+  --run-args='-e BUILD_ONLY_SM75 -e USE_RMM -e USE_FEDERATED' \
+  -- ops/pipeline/build-cuda-impl.sh
+
+echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"
+python3 ops/docker_run.py \
+  --image-uri ${MANYLINUX_IMAGE_URI} \
+  -- auditwheel repair --only-plat \
+  --plat ${WHEEL_TAG} python-package/dist/*.whl
+python3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \
+  wheelhouse/*.whl
+mv -v wheelhouse/*.whl python-package/dist/
+
+echo "--- Convert Python wheel to variant wheel"
+python3 ops/docker_run.py \
+  --image-uri ${BUILD_IMAGE_URI} \
+  -- ops/pipeline/build-variant-wheels-impl.sh
diff --git a/ops/pipeline/build-win64-cpu.ps1 b/ops/pipeline/build-win64-cpu.ps1
index f80a12a0d891..52583358e98a 100644
--- a/ops/pipeline/build-win64-cpu.ps1
+++ b/ops/pipeline/build-win64-cpu.ps1
@@ -18,7 +18,7 @@ Write-Host "--- Build binary wheel"
 cd ..
 # Patch to rename pkg to xgboost-cpu
 conda activate
-python ops/script/pypi_variants.py --variant=cpu
+python ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na
 if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
 
 cd python-package
diff --git a/ops/pipeline/build-win64-gpu.ps1 b/ops/pipeline/build-win64-gpu.ps1
index dd99f77aa8a9..e73d7352ce93 100644
--- a/ops/pipeline/build-win64-gpu.ps1
+++ b/ops/pipeline/build-win64-gpu.ps1
@@ -19,7 +19,7 @@ git clone https://github.com/NVIDIA/cccl.git -b v2.6.1 --quiet
 mkdir build
 cd build
 cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON `
-  -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DBUILD_DEPRECATED_CLI=ON `
+  -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON `
   -DCMAKE_PREFIX_PATH="$(Get-Location)/../cccl" ${arch_flag}
 if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
 cmake --build . --config Release -- /m /nodeReuse:false `
diff --git a/ops/pipeline/test-cpp-cuda13.sh b/ops/pipeline/test-cpp-cuda13.sh
new file mode 100755
index 000000000000..2ccd7bea6abc
--- /dev/null
+++ b/ops/pipeline/test-cpp-cuda13.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+set -euox pipefail
+
+source ops/pipeline/get-docker-registry-details.sh
+source ops/pipeline/get-image-tag.sh
+
+IMAGE_REPO='xgb-ci.gpu_build_cuda13_rockylinux8'
+IMAGE_URI=${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}
+
+echo "--- Run Google Tests, using a single GPU, CUDA 13"
+python3 ops/docker_run.py --image-uri ${IMAGE_URI} --use-gpus \
+  --run-args='--privileged' \
+  -- build/testxgboost
diff --git a/ops/pipeline/test-python-macos.sh b/ops/pipeline/test-python-macos.sh
index 63b5690d1312..5c69fc4b7277 100755
--- a/ops/pipeline/test-python-macos.sh
+++ b/ops/pipeline/test-python-macos.sh
@@ -10,7 +10,7 @@ pushd build
 # Set prefix, to use OpenMP library from Conda env
 # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228
 # to learn why we don't use libomp from Homebrew.
-cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DBUILD_DEPRECATED_CLI=ON
+cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
 ninja
 popd
 
diff --git a/ops/pipeline/test-python-wheel-cuda13-impl.sh b/ops/pipeline/test-python-wheel-cuda13-impl.sh
new file mode 100755
index 000000000000..4660a8b7f46a
--- /dev/null
+++ b/ops/pipeline/test-python-wheel-cuda13-impl.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+## Companion script for ops/pipeline/test-python-wheel-cuda13.sh
+
+set -eo pipefail
+# Cannot set -u before Conda env activation
+
+# Set up Conda env
+gosu root chown -R $(id -u):$(id -g) /opt/miniforge/envs /opt/miniforge/pkgs/cache
+gosu root chown $(id -u):$(id -g) /opt/miniforge/pkgs
+mamba create -y -n gpu_test python=3.12 pytest cupy scipy numpy pandas scikit-learn joblib hypothesis
+
+source activate gpu_test
+
+set -xu
+
+pip install -v ./wheelhouse/*.whl
+
+echo "-- Run Python tests, using a single GPU, CUDA 13"
+python -c 'from cupy.cuda import jitify; jitify._init_module()'
+pytest -v -s -rxXs --durations=0 -m 'not mgpu' tests/python-gpu
diff --git a/ops/pipeline/test-python-wheel-cuda13.sh b/ops/pipeline/test-python-wheel-cuda13.sh
new file mode 100755
index 000000000000..279411779927
--- /dev/null
+++ b/ops/pipeline/test-python-wheel-cuda13.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+## Test XGBoost Python wheel on the Linux platform, CUDA 13
+
+set -euo pipefail
+
+source ops/pipeline/get-docker-registry-details.sh
+source ops/pipeline/get-image-tag.sh
+
+IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8"
+IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}"
+
+set -x
+python3 ops/docker_run.py --image-uri "${IMAGE_URI}" --use-gpus \
+  --run-args='--shm-size=4g --privileged' \
+  -- bash ops/pipeline/test-python-wheel-cuda13-impl.sh
diff --git a/ops/pipeline/test-python-wheel-impl.sh b/ops/pipeline/test-python-wheel-impl.sh
index 4620e6ebf7fc..5c24e31210d2 100755
--- a/ops/pipeline/test-python-wheel-impl.sh
+++ b/ops/pipeline/test-python-wheel-impl.sh
@@ -45,7 +45,6 @@ case "$suite" in
   mgpu)
     echo "-- Run Python tests, using multiple GPUs"
     python -c 'from cupy.cuda import jitify; jitify._init_module()'
-    export NCCL_RAS_ENABLE=0
     pytest -v -s -rxXs --durations=0 -m 'mgpu' tests/python-gpu
     pytest -v -s -rxXs --durations=0 tests/test_distributed/test_gpu_with_dask
     pytest -v -s -rxXs --durations=0 tests/test_distributed/test_gpu_with_spark
diff --git a/ops/script/change_scala_version.py b/ops/script/change_scala_version.py
index ed475a1f9582..4dfdd788c167 100644
--- a/ops/script/change_scala_version.py
+++ b/ops/script/change_scala_version.py
@@ -20,9 +20,10 @@ def main(args: argparse.Namespace) -> None:
             if target.is_dir():
                 print(f"Removing {target}...")
                 shutil.rmtree(target)
-        for target in pathlib.Path("jvm-packages/").glob("**/*.so"):
-            print(f"Removing {target}...")
-            target.unlink()
+        for ext in ["so", "dll", "dylib"]:
+            for target in pathlib.Path("jvm-packages/").glob(f"**/*.{ext}"):
+                print(f"Removing {target}...")
+                target.unlink()
 
     # Update pom.xml
     for pom in pathlib.Path("jvm-packages/").glob("**/pom.xml"):
diff --git a/ops/script/change_version.py b/ops/script/change_version.py
index c982ee7e4550..82752a05e5f7 100644
--- a/ops/script/change_version.py
+++ b/ops/script/change_version.py
@@ -52,16 +52,17 @@ def pypkg(
     with open(pyver_path, "w") as fd:
         fd.write(pyver + "\n")
 
-    pyprj_path = os.path.join("pyproject.toml.in")
-    with open(pyprj_path, "r") as fd:
-        pyprj = fd.read()
-    matched = re.search('version = "' + r"([0-9]+\.[0-9]+\.[0-9]+.*)" + '"', pyprj)
-    assert matched, "Couldn't find version string in pyproject.toml."
-    pyprj = pyprj[: matched.start(1)] + pyver + pyprj[matched.end(1) :]
-    with open(pyprj_path, "w") as fd:
-        fd.write(pyprj)
+    for pyprj_file in ["pyproject.toml.in", "pyproject.toml.stub.in"]:
+        pyprj_path = os.path.join(pyprj_file)
+        with open(pyprj_path, "r") as fd:
+            pyprj = fd.read()
+        matched = re.search('version = "' + r"([0-9]+\.[0-9]+\.[0-9]+.*)" + '"', pyprj)
+        assert matched, "Couldn't find version string in pyproject.toml."
+        pyprj = pyprj[: matched.start(1)] + pyver + pyprj[matched.end(1) :]
+        with open(pyprj_path, "w") as fd:
+            fd.write(pyprj)
 
-    make_pyproject("default")
+    make_pyproject(use_suffix="na", require_nccl_dep="cu12")
 
 
 @cd(R_PACKAGE)
@@ -152,9 +153,9 @@ def main(args: argparse.Namespace) -> None:
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--major", type=int)
-    parser.add_argument("--minor", type=int)
-    parser.add_argument("--patch", type=int)
+    parser.add_argument("--major", type=int, required=True)
+    parser.add_argument("--minor", type=int, required=True)
+    parser.add_argument("--patch", type=int, required=True)
     parser.add_argument("--rc", type=int, default=0)
     parser.add_argument("--is-rc", action="https://github.com/store_true")
     parser.add_argument("--is-dev", action="https://github.com/store_true")
diff --git a/ops/script/changelog.py b/ops/script/changelog.py
new file mode 100644
index 000000000000..552a82f2e49d
--- /dev/null
+++ b/ops/script/changelog.py
@@ -0,0 +1,32 @@
+"""Helper script for creating links to PRs for changelog. This should be used with the
+`sphinx-issues` extension.
+
+"""
+
+import argparse
+import os
+import re
+
+from test_utils import ROOT
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--version",
+        type=str,
+        required=True,
+        help="Major version of the changelog, e.g., 3.0.0 .",
+    )
+    args = parser.parse_args()
+    version = args.version
+
+    fname = os.path.join(ROOT, f"doc/changes/v{version}.rst")
+
+    with open(fname) as fd:
+        note = fd.read()
+
+    # E.g. #11285 -> :pr:`11285`.
+    regex = re.compile(r"(#)(\d+)")
+    note = re.sub(regex, r":pr:`\2`", note)
+    with open(fname, "w") as fd:
+        fd.write(note)
diff --git a/ops/script/inject_jvm_lib.sh b/ops/script/inject_jvm_lib.sh
index 82584aeaca92..ac84e98d8980 100755
--- a/ops/script/inject_jvm_lib.sh
+++ b/ops/script/inject_jvm_lib.sh
@@ -16,16 +16,15 @@ mkdir -p jvm-packages/xgboost4j/src/test/resources
 mkdir -p jvm-packages/xgboost4j-spark/src/test/resources
 mkdir -p jvm-packages/xgboost4j-spark-gpu/src/test/resources
 
-# Generate machine.txt.* files from the CLI regression demo
-# TODO(hcho3): Remove once CLI is removed
-pushd demo/CLI/regression
+# Generate machine.txt.* files from the regression demo
+pushd demo/data/regression
 python3 mapfeat.py
 python3 mknfold.py machine.txt 1
 popd
 
 cp -v demo/data/agaricus.* \
   jvm-packages/xgboost4j/src/test/resources
-cp -v demo/CLI/regression/machine.txt.t* demo/data/agaricus.* \
+cp -v demo/data/regression/machine.txt.t* demo/data/agaricus.* \
   jvm-packages/xgboost4j-spark/src/test/resources
 cp -v demo/data/veterans_lung_cancer.csv \
   jvm-packages/xgboost4j-spark/src/test/resources/rank.train.csv \
diff --git a/ops/script/lint_cpp.py b/ops/script/lint_cpp.py
index 2d00b219ceab..b89f29c6edbe 100644
--- a/ops/script/lint_cpp.py
+++ b/ops/script/lint_cpp.py
@@ -71,7 +71,6 @@ def __init__(self) -> None:
                     "-build/include,",
                     "+build/namespaces",
                     "+build/include_what_you_use",
-                    "+build/include_order",
                 ]
             )
         )
diff --git a/ops/script/lint_python.py b/ops/script/lint_python.py
index c4267accebbb..96b476d19471 100644
--- a/ops/script/lint_python.py
+++ b/ops/script/lint_python.py
@@ -17,6 +17,8 @@ class LintersPaths:
         # core
         "python-package/",
         # tests
+        "tests/python/generate_models.py",
+        "tests/python/test_model_compatibility.py",
         "tests/python/test_config.py",
         "tests/python/test_callback.py",
         "tests/python/test_collective.py",
@@ -29,14 +31,19 @@ class LintersPaths:
         "tests/python/test_objectives.py",
         "tests/python/test_ordinal.py",
         "tests/python/test_predict.py",
+        "tests/python/test_pickling.py",
+        "tests/python/test_plotting.py",
+        "tests/python/test_parse_tree.py",
         "tests/python/test_quantile_dmatrix.py",
         "tests/python/test_tracker.py",
         "tests/python/test_tree_regularization.py",
         "tests/python/test_training_continuation.py",
         "tests/python/test_shap.py",
         "tests/python/test_updaters.py",
+        "tests/python/test_interaction_constraints.py",
         "tests/python/test_model_io.py",
         "tests/python/test_with_pandas.py",
+        "tests/python/test_with_polars.py",
         "tests/python-gpu/",
         "tests/python-sycl/",
         "tests/test_distributed/test_federated/",
@@ -54,6 +61,7 @@ class LintersPaths:
         "demo/guide-python/categorical.py",
         "demo/guide-python/cat_pipeline.py",
         "demo/guide-python/cross_validation.py",
+        "demo/guide-python/custom_softmax.py",
         "demo/guide-python/feature_weights.py",
         "demo/guide-python/model_parser.py",
         "demo/guide-python/sklearn_parallel.py",
@@ -93,40 +101,26 @@ class LintersPaths:
         # core
         "python-package/",
         # tests
+        "tests/python/generate_models.py",
+        "tests/python/test_model_compatibility.py",
         "tests/python/test_collective.py",
         "tests/python/test_demos.py",
         "tests/python/test_data_iterator.py",
         "tests/python/test_multi_target.py",
         "tests/python/test_objectives.py",
-        "tests/python-gpu/test_gpu_data_iterator.py",
-        "tests/python-gpu/load_pickle.py",
-        "tests/python-gpu/test_gpu_training_continuation.py",
         "tests/python/test_model_io.py",
         "tests/python/test_ordinal.py",
+        "tests/python/test_interaction_constraints.py",
+        "tests/python-gpu/",
         "tests/test_distributed/test_federated/",
         "tests/test_distributed/test_gpu_federated/",
-        "tests/test_distributed/test_with_dask/test_ranking.py",
-        "tests/test_distributed/test_with_dask/test_external_memory.py",
+        "tests/test_distributed/test_with_dask/",
         "tests/test_distributed/test_with_spark/test_data.py",
         "tests/test_distributed/test_gpu_with_spark/test_data.py",
         "tests/test_distributed/test_gpu_with_dask/",
         # demo
         "demo/dask/",
-        "demo/guide-python/external_memory.py",
-        "demo/guide-python/distributed_extmem_basic.py",
-        "demo/guide-python/sklearn_examples.py",
-        "demo/guide-python/continuation.py",
-        "demo/guide-python/callbacks.py",
-        "demo/guide-python/cat_in_the_dat.py",
-        "demo/guide-python/categorical.py",
-        "demo/guide-python/cat_pipeline.py",
-        "demo/guide-python/feature_weights.py",
-        "demo/guide-python/model_parser.py",
-        "demo/guide-python/individual_trees.py",
-        "demo/guide-python/quantile_regression.py",
-        "demo/guide-python/quantile_data_iterator.py",
-        "demo/guide-python/multioutput_regression.py",
-        "demo/guide-python/learning_to_rank.py",
+        "demo/guide-python/",
         "demo/aft_survival/aft_survival_viz_demo.py",
         # CI
         "ops/",
diff --git a/ops/script/pypi_variants.py b/ops/script/pypi_variants.py
index d13b7d62c8ae..22717f5c1cd1 100644
--- a/ops/script/pypi_variants.py
+++ b/ops/script/pypi_variants.py
@@ -2,19 +2,21 @@
 
 import argparse
 import os
+import tomllib
 
+from packaging.version import Version
 from test_utils import PY_PACKAGE
 
 IN_PATH = os.path.join(PY_PACKAGE, "pyproject.toml.in")
+STUB_IN_PATH = os.path.join(PY_PACKAGE, "pyproject.toml.stub.in")
 OUT_PATH = os.path.join(PY_PACKAGE, "pyproject.toml")
 
-CHOICES = ["default", "cpu", "manylinux2014"]
-
-
-NCCL_WHL = """    \"nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'\","""
+NCCL_WHL = """    \"nvidia-nccl-{0} ; platform_system == 'Linux' and platform_machine != 'aarch64'\","""
 
 NAME = "{{ name }}"
 NCCL = "{{ nccl }}"
+VERSION = "{{ version }}"
+CUDA_VARIANTS = ["cu12", "cu13"]
 
 
 def copyfile(src: str, dst: str) -> None:
@@ -24,26 +26,59 @@ def copyfile(src: str, dst: str) -> None:
         fd.write(content)
 
 
-def make_pyproject(variant: str) -> None:
-    assert variant in CHOICES
-
-    with open(IN_PATH) as fd:
+def make_pyproject(
+    *, use_suffix: str, require_nccl_dep: str, create_stub: bool = False
+) -> None:
+    if use_suffix == "cpu" and require_nccl_dep != "na":
+        raise ValueError(
+            "xgboost-cpu cannot require NCCL dependency. "
+            "When setting --use-suffix='cpu', you must also set --require-nccl-dep='na'."
+        )
+    if (
+        use_suffix in CUDA_VARIANTS
+        and require_nccl_dep in CUDA_VARIANTS
+        and use_suffix != require_nccl_dep
+    ):
+        raise ValueError(
+            "Inconsistent choices for --use-suffix and --require-nccl-dep. "
+            "When --use-suffix is set to one of {{{0}}}, --require-nccl-dep must be "
+            "set to identical value as --use-suffix.".format(",".join(CUDA_VARIANTS))
+        )
+    if create_stub:
+        if use_suffix == "na":
+            raise ValueError("To create a stub package, --use-suffix must not be 'na'")
+        if require_nccl_dep != "na":
+            raise ValueError(
+                "To create a stub package, --require-nccl-dep must be 'na'"
+            )
+
+    with open(STUB_IN_PATH if create_stub else IN_PATH) as fd:
         pyproject = fd.read()
 
     readme_dft = os.path.join(PY_PACKAGE, "README.dft.rst")
     readme_cpu = os.path.join(PY_PACKAGE, "README.cpu.rst")
+    readme_stub = os.path.join(PY_PACKAGE, "README.stub.rst")
     readme = os.path.join(PY_PACKAGE, "README.rst")
-    if variant == "cpu":
-        pyproject = pyproject.replace(NAME, "xgboost-cpu").replace(NCCL, "")
+    pyproject = pyproject.replace(
+        NAME, f"xgboost-{use_suffix}" if use_suffix != "na" else "xgboost"
+    )
+    if create_stub:
+        copyfile(readme_stub, readme)
+        pyproject_parsed = tomllib.loads(pyproject)
+        pyproject = pyproject.replace(
+            VERSION, str(Version(pyproject_parsed["project"]["version"]))
+        )
+    elif use_suffix == "cpu":
         copyfile(readme_cpu, readme)
-    elif variant == "manylinux2014":
-        pyproject = pyproject.replace(NAME, "xgboost").replace(NCCL, "")
-        copyfile(readme_dft, readme)
     else:
-        pyproject = pyproject.replace(NAME, "xgboost").replace(NCCL, NCCL_WHL)
         copyfile(readme_dft, readme)
+    pyproject = pyproject.replace(
+        NCCL, NCCL_WHL.format(require_nccl_dep) if require_nccl_dep != "na" else ""
+    )
     pyproject = (
-        f"# Generated by `{os.path.basename(__file__)}`, don't edit.\n" + pyproject
+        f"# Generated by `{os.path.basename(__file__)}`, don't edit: "
+        f"'--use-suffix={use_suffix} --require-nccl-dep={require_nccl_dep} "
+        f"--create-stub={create_stub}'\n" + pyproject
     )
 
     with open(OUT_PATH, "w") as fd:
@@ -53,10 +88,30 @@ def make_pyproject(variant: str) -> None:
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--variant",
+        "--use-suffix",
         type=str,
-        choices=CHOICES,
-        default="default",
+        choices=["na", "cpu"] + CUDA_VARIANTS,
+        default="na",
+        help=(
+            "When using this option, rename the package name to xgboost-[suffix]. "
+            "Set to 'na' to disable"
+        ),
+    )
+    parser.add_argument(
+        "--require-nccl-dep",
+        type=str,
+        choices=["na"] + CUDA_VARIANTS,
+        required=True,
+        help="Which NCCL dependency to use; select 'na' to remove NCCL dependency",
+    )
+    parser.add_argument(
+        "--create-stub",
+        action="https://github.com/store_true",
+        help="Create a stub package that redirects users to install `xgboost`",
     )
     args = parser.parse_args()
-    make_pyproject(args.variant)
+    make_pyproject(
+        use_suffix=args.use_suffix,
+        require_nccl_dep=args.require_nccl_dep,
+        create_stub=args.create_stub,
+    )
diff --git a/dev/release-artifacts.py b/ops/script/release_artifacts.py
similarity index 87%
rename from dev/release-artifacts.py
rename to ops/script/release_artifacts.py
index fc6c0f3b1307..ef05a71420ac 100644
--- a/dev/release-artifacts.py
+++ b/ops/script/release_artifacts.py
@@ -5,39 +5,30 @@
 """
 
 import argparse
-import os
 import shutil
 import subprocess
 import tarfile
 import tempfile
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple
 from urllib.request import urlretrieve
 
 import tqdm
 from packaging import version
+from pypi_variants import make_pyproject
 from sh.contrib import git
+from test_utils import PY_PACKAGE
+from test_utils import ROOT as root_path
+from test_utils import DirectoryExcursion
 
 # S3 bucket hosting the release artifacts
 S3_BUCKET_URL = "https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds"
-ROOT = Path(__file__).absolute().parent.parent
-DIST = ROOT / "python-package" / "dist"
+DIST = Path(PY_PACKAGE) / "dist"
+ROOT = Path(root_path)
 
 pbar = None
 
 
-class DirectoryExcursion:
-    def __init__(self, path: Path) -> None:
-        self.path = path
-        self.curdir = Path.cwd().resolve()
-
-    def __enter__(self) -> None:
-        os.chdir(self.path)
-
-    def __exit__(self, *args: Any) -> None:
-        os.chdir(self.curdir)
-
-
 def show_progress(block_num: int, block_size: int, total_size: int) -> None:
     """Show file download progress."""
     global pbar
@@ -118,49 +109,57 @@ def make_python_sdist(
     dist_dir = outdir / "dist"
     dist_dir.mkdir(exist_ok=True)
 
-    # Apply patch to remove NCCL dependency
-    # Save the original content of pyproject.toml so that we can restore it later
+    # Build sdist for `xgboost-cpu`, `xgboost`.
+    for suffix, nccl_dep in [("cpu", "na"), ("na", "na")]:
+        with DirectoryExcursion(ROOT):
+            make_pyproject(use_suffix=suffix, require_nccl_dep=nccl_dep)
+        with DirectoryExcursion(ROOT / "python-package"):
+            subprocess.run(["python", "-m", "build", "--sdist"], check=True)
+            pkg_name = "xgboost" if suffix == "na" else f"xgboost_{suffix}"
+            sdist_name = (
+                f"{pkg_name}-{release}{rc}{rc_ver}.tar.gz"
+                if rc
+                else f"{pkg_name}-{release}.tar.gz"
+            )
+            src = DIST / sdist_name
+            subprocess.run(["twine", "check", str(src)], check=True)
+            dest = dist_dir / sdist_name
+            shutil.move(src, dest)
+
+    # Build stub package `xgboost-cu12`.
     with DirectoryExcursion(ROOT):
-        with open("python-package/pyproject.toml", "r") as f:
-            orig_pyproj_lines = f.read()
-        with open("ops/patch/remove_nccl_dep.patch", "r") as f:
-            patch_lines = f.read()
-        subprocess.run(
-            ["patch", "-p0"], input=patch_lines, check=True, text=True, encoding="utf-8"
-        )
+        make_pyproject(use_suffix="cu12", require_nccl_dep="na", create_stub=True)
 
     with DirectoryExcursion(ROOT / "python-package"):
         subprocess.run(["python", "-m", "build", "--sdist"], check=True)
         sdist_name = (
-            f"xgboost-{release}{rc}{rc_ver}.tar.gz"
+            f"xgboost_cu12-{release}{rc}{rc_ver}.tar.gz"
             if rc
-            else f"xgboost-{release}.tar.gz"
+            else f"xgboost_cu12-{release}.tar.gz"
         )
         src = DIST / sdist_name
         subprocess.run(["twine", "check", str(src)], check=True)
         dest = dist_dir / sdist_name
         shutil.move(src, dest)
 
-    with DirectoryExcursion(ROOT):
-        with open("python-package/pyproject.toml", "w") as f:
-            f.write(orig_pyproj_lines)
-
 
 def download_python_wheels(branch: str, commit_hash: str, outdir: Path) -> None:
     """Download all Python binary wheels for the specified branch."""
     full_platforms = [
         "win_amd64",
-        "manylinux2014_x86_64",
-        "manylinux2014_aarch64",
         "manylinux_2_28_x86_64",
         "manylinux_2_28_aarch64",
         "macosx_10_15_x86_64",
         "macosx_12_0_arm64",
     ]
+    cu13_platforms = [
+        "manylinux_2_28_x86_64",
+    ]
     minimal_platforms = [
         "win_amd64",
-        "manylinux2014_x86_64",
-        "manylinux2014_aarch64",
+        "win_arm64",
+        "manylinux_2_28_x86_64",
+        "manylinux_2_28_aarch64",
     ]
 
     # https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/release_3.0.0/4bfd4bf60d32e2d62426cc4070ccb5a5ba1ed078/xgboost-3.0.0rc1-py3-none-manylinux_2_28_x86_64.whl
@@ -169,6 +168,7 @@ def download_python_wheels(branch: str, commit_hash: str, outdir: Path) -> None:
     for pkg_name, platforms in [
         ("xgboost", full_platforms),
         ("xgboost_cpu", minimal_platforms),
+        ("xgboost_cu13", cu13_platforms),
     ]:
         src_filename_prefix = f"{pkg_name}-{args.release}-py3-none-"
         target_filename_prefix = f"{pkg_name}-{args.release}-py3-none-"
@@ -318,6 +318,7 @@ def main(args: argparse.Namespace) -> None:
         rc_ver: Optional[int] = None
     else:
         # RC release
+        assert release_parsed.pre is not None
         rc, rc_ver = release_parsed.pre
         if rc != "rc":
             raise ValueError(
diff --git a/plugin/example/custom_obj.cc b/plugin/example/custom_obj.cc
index 5d61e812ac9b..86f941945518 100644
--- a/plugin/example/custom_obj.cc
+++ b/plugin/example/custom_obj.cc
@@ -1,13 +1,14 @@
 /**
- * Copyright 2015-2023, XGBoost Contributors
+ * Copyright 2015-2025, XGBoost Contributors
  * \file custom_metric.cc
  * \brief This is an example to define plugin of xgboost.
  *  This plugin defines the additional metric function.
  */
 #include <xgboost/base.h>
-#include <xgboost/parameter.h>
-#include <xgboost/objective.h>
 #include <xgboost/json.h>
+#include <xgboost/linalg.h>  // for Vector
+#include <xgboost/objective.h>
+#include <xgboost/parameter.h>
 
 namespace xgboost::obj {
 // This is a helpful data structure to define parameters
@@ -62,9 +63,12 @@ class MyLogistic : public ObjFunction {
       pred = 1.0f / (1.0f + std::exp(-pred));
     }
   }
-  [[nodiscard]] float ProbToMargin(float base_score) const override {
+  void ProbToMargin(linalg::Vector<float>* base_score) const override {
     // transform probability to margin value
-    return -std::log(1.0f / base_score - 1.0f);
+    auto h_intercept = base_score->HostView();
+    for (std::size_t i = 0, n = h_intercept.Size(); i < n; ++i) {
+      h_intercept(i) = -std::log(1.0f / h_intercept(i) - 1.0f);
+    }
   }
 
   void SaveConfig(Json* p_out) const override {
diff --git a/plugin/federated/federated_comm.cuh b/plugin/federated/federated_comm.cuh
index 85cecb3eb331..b9474a46a96e 100644
--- a/plugin/federated/federated_comm.cuh
+++ b/plugin/federated/federated_comm.cuh
@@ -1,18 +1,18 @@
 /**
- * Copyright 2023-2024, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #pragma once
 
 #include <memory>  // for shared_ptr
 
 #include "../../src/collective/coll.h"          // for Coll
-#include "../../src/common/device_helpers.cuh"  // for CUDAStreamView
+#include "../../src/common/cuda_stream.h"       // for StreamRef
 #include "federated_comm.h"                     // for FederatedComm
 #include "xgboost/context.h"                    // for Context
 
 namespace xgboost::collective {
 class CUDAFederatedComm : public FederatedComm {
-  dh::CUDAStreamView stream_;
+  curt::StreamRef stream_;
 
  public:
   explicit CUDAFederatedComm(Context const* ctx, std::shared_ptr<FederatedComm const> impl);
diff --git a/plugin/sycl/common/hist_util.cc b/plugin/sycl/common/hist_util.cc
index 8f572e54c442..37567b8e75b3 100644
--- a/plugin/sycl/common/hist_util.cc
+++ b/plugin/sycl/common/hist_util.cc
@@ -7,6 +7,7 @@
 #include <algorithm>
 
 #include "../data/gradient_index.h"
+#include "../tree/hist_dispatcher.h"
 #include "hist_util.h"
 
 #include <sycl/sycl.hpp>
@@ -91,28 +92,27 @@ template ::sycl::event SubtractionHist(::sycl::queue* qu,
                               const GHistRow<double, MemoryType::on_device>& src2,
                               size_t size, ::sycl::event event_priv);
 
-inline auto GetBlocksParameters(::sycl::queue* qu, size_t size, size_t max_nblocks) {
-  struct _ {
-    size_t block_size, nblocks;
-  };
+template <typename GradientPairT>
+::sycl::event ReduceHist(::sycl::queue* qu, GradientPairT* hist_data,
+                         GradientPairT* hist_buffer_data,
+                         size_t  nblocks, size_t nbins,
+                         const ::sycl::event& event_main) {
+  auto event_save = qu->submit([&](::sycl::handler& cgh) {
+    cgh.depends_on(event_main);
+    cgh.parallel_for<>(::sycl::range<1>(nbins), [=](::sycl::item<1> pid) {
+      size_t idx_bin = pid.get_id(0);
 
-  const size_t min_block_size = 32;
-  const size_t max_compute_units =
-    qu->get_device().get_info<::sycl::info::device::max_compute_units>();
+      GradientPairT gpair = {0, 0};
 
-  size_t nblocks = max_compute_units;
+      for (size_t j = 0; j < nblocks; ++j) {
+        gpair += hist_buffer_data[j * nbins + idx_bin];
+      }
 
-  size_t block_size = size / nblocks + !!(size % nblocks);
-  if (block_size > (1u << 12)) {
-    nblocks = max_nblocks;
-    block_size = size / nblocks + !!(size % nblocks);
-  }
-  if (block_size < min_block_size) {
-    block_size = min_block_size;
-    nblocks = size / block_size + !!(size % block_size);
-  }
+      hist_data[idx_bin] = gpair;
+    });
+  });
 
-  return _{block_size, nblocks};
+  return event_save;
 }
 
 // Kernel with buffer using
@@ -123,6 +123,7 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu,
                             const GHistIndexMatrix& gmat,
                             GHistRow<FPType, MemoryType::on_device>* hist,
                             GHistRow<FPType, MemoryType::on_device>* hist_buffer,
+                            const tree::HistDispatcher<FPType>& dispatcher,
                             ::sycl::event event_priv) {
   using GradientPairT = xgboost::detail::GradientPairInternal<FPType>;
   const size_t size = row_indices.Size();
@@ -133,18 +134,13 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu,
   const uint32_t* offsets = gmat.cut.cut_ptrs_.ConstDevicePointer();
   const size_t nbins = gmat.nbins;
 
-  const size_t max_work_group_size =
-    qu->get_device().get_info<::sycl::info::device::max_work_group_size>();
-  const size_t work_group_size = n_columns < max_work_group_size ? n_columns : max_work_group_size;
-
-  // Captured structured bindings are a C++20 extension
-  const auto block_params = GetBlocksParameters(qu, size, hist_buffer->Size() / (nbins * 2));
-  const size_t block_size = block_params.block_size;
-  const size_t nblocks = block_params.nblocks;
+  const size_t work_group_size = dispatcher.work_group_size;
+  const size_t block_size = dispatcher.block.size;
+  const size_t nblocks = dispatcher.block.nblocks;
 
   GradientPairT* hist_buffer_data = hist_buffer->Data();
   auto event_fill = qu->fill(hist_buffer_data, GradientPairT(0, 0),
-                             nblocks * nbins * 2, event_priv);
+                             nblocks * nbins, event_priv);
   auto event_main = qu->submit([&](::sycl::handler& cgh) {
     cgh.depends_on(event_fill);
     cgh.parallel_for<>(::sycl::nd_range<2>(::sycl::range<2>(nblocks, work_group_size),
@@ -179,20 +175,84 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu,
   });
 
   GradientPairT* hist_data = hist->Data();
-  auto event_save = qu->submit([&](::sycl::handler& cgh) {
-    cgh.depends_on(event_main);
-    cgh.parallel_for<>(::sycl::range<1>(nbins), [=](::sycl::item<1> pid) {
-      size_t idx_bin = pid.get_id(0);
+  auto event_save = ReduceHist(qu, hist_data, hist_buffer_data, nblocks,
+                               nbins, event_main);
 
-      GradientPairT gpair = {0, 0};
+  return event_save;
+}
 
-      for (size_t j = 0; j < nblocks; ++j) {
-        gpair += hist_buffer_data[j * nbins + idx_bin];
-      }
+// Kernel with buffer and local hist using
+template<typename FPType, typename BinIdxType>
+::sycl::event BuildHistKernelLocal(::sycl::queue* qu,
+                            const HostDeviceVector<GradientPair>& gpair,
+                            const RowSetCollection::Elem& row_indices,
+                            const GHistIndexMatrix& gmat,
+                            GHistRow<FPType, MemoryType::on_device>* hist,
+                            GHistRow<FPType, MemoryType::on_device>* hist_buffer,
+                            const tree::HistDispatcher<FPType>& dispatcher,
+                            ::sycl::event event_priv) {
+  constexpr int kMaxNumBins = tree::HistDispatcher<FPType>::KMaxNumBins;
+  using GradientPairT = xgboost::detail::GradientPairInternal<FPType>;
+  const size_t size = row_indices.Size();
+  const size_t* rid = row_indices.begin;
+  const size_t n_columns = gmat.nfeatures;
+  const auto* pgh = gpair.ConstDevicePointer();
+  const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
+  const uint32_t* offsets = gmat.cut.cut_ptrs_.ConstDevicePointer();
+  const size_t nbins = gmat.nbins;
 
-      hist_data[idx_bin] = gpair;
+  const size_t work_group_size = dispatcher.work_group_size;
+  const size_t block_size = dispatcher.block.size;
+  const size_t nblocks = dispatcher.block.nblocks;
+
+  GradientPairT* hist_buffer_data = hist_buffer->Data();
+
+  auto event_main = qu->submit([&](::sycl::handler& cgh) {
+    cgh.depends_on(event_priv);
+    cgh.parallel_for<>(::sycl::nd_range<2>(::sycl::range<2>(nblocks, work_group_size),
+                                           ::sycl::range<2>(1, work_group_size)),
+                       [=](::sycl::nd_item<2> pid) {
+      size_t block = pid.get_global_id(0);
+      size_t feat = pid.get_global_id(1);
+
+      // This buffer will be keeped in L1/registers
+      GradientPairT hist_fast[kMaxNumBins];
+
+      GradientPairT* hist_local = hist_buffer_data + block * nbins;
+      for (size_t fid = feat; fid < n_columns; fid += work_group_size) {
+        size_t n_bins_feature = offsets[fid+1] - offsets[fid];
+
+        // Not all elements of hist_fast are actually used: n_bins_feature <= kMaxNumBins
+        // We initililize only the requared elements to prevent the unused go to cache.
+        for (int bin = 0; bin < n_bins_feature; ++bin) {
+          hist_fast[bin] = {0, 0};
+        }
+
+        for (size_t idx = 0; idx < block_size; ++idx) {
+          size_t i = block * block_size + idx;
+          if (i < size) {
+            size_t row_id = rid[i];
+
+            const size_t icol_start = n_columns * row_id;
+            const GradientPairT pgh_row(pgh[row_id].GetGrad(),
+                                        pgh[row_id].GetHess());
+
+            const BinIdxType* gr_index_local = gradient_index + icol_start;
+            uint32_t idx_bin = gr_index_local[fid];
+
+            hist_fast[idx_bin] += pgh_row;
+          }
+        }
+        for (int bin = 0 ; bin < n_bins_feature; ++bin) {
+          hist_local[bin + offsets[fid]] = hist_fast[bin];
+        }
+      }
     });
   });
+
+  GradientPairT* hist_data = hist->Data();
+  auto event_save = ReduceHist(qu, hist_data, hist_buffer_data, nblocks,
+                               nbins, event_main);
   return event_save;
 }
 
@@ -203,6 +263,7 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu,
                             const RowSetCollection::Elem& row_indices,
                             const GHistIndexMatrix& gmat,
                             GHistRow<FPType, MemoryType::on_device>* hist,
+                            const tree::HistDispatcher<FPType>& dispatcher,
                             ::sycl::event event_priv) {
   const size_t size = row_indices.Size();
   const size_t* rid = row_indices.begin;
@@ -214,7 +275,7 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu,
   FPType* hist_data = reinterpret_cast<FPType*>(hist->Data());
   const size_t nbins = gmat.nbins;
 
-  constexpr size_t work_group_size = 32;
+  size_t work_group_size = dispatcher.work_group_size;
   const size_t n_work_groups = n_columns / work_group_size + (n_columns % work_group_size > 0);
 
   auto event_fill = qu->fill(hist_data, FPType(0), nbins * 2, event_priv);
@@ -260,34 +321,47 @@ ::sycl::event BuildHistDispatchKernel(
                 GHistRow<FPType, MemoryType::on_device>* hist,
                 bool isDense,
                 GHistRow<FPType, MemoryType::on_device>* hist_buffer,
+                const DeviceProperties& device_prop,
                 ::sycl::event events_priv,
                 bool force_atomic_use) {
   const size_t size = row_indices.Size();
   const size_t n_columns = isDense ? gmat.nfeatures : gmat.row_stride;
   const size_t nbins = gmat.nbins;
+  const size_t max_num_bins = gmat.max_num_bins;
+  const size_t min_num_bins = gmat.min_num_bins;
 
-  // TODO(razdoburdin): replace the add-hock dispatching criteria by more sutable one
-  bool use_atomic = (size < nbins) || (gmat.max_num_bins == gmat.nbins / n_columns);
+  size_t max_n_blocks = hist_buffer->Size() / nbins;
+  auto dispatcher = tree::HistDispatcher<FPType>
+                       (device_prop, isDense, size, max_n_blocks, nbins,
+                        n_columns, max_num_bins, min_num_bins);
 
   // force_atomic_use flag is used only for testing
-  use_atomic = use_atomic || force_atomic_use;
+  bool use_atomic = dispatcher.use_atomics || force_atomic_use;
   if (!use_atomic) {
     if (isDense) {
-      return BuildHistKernel<FPType, BinIdxType, true>(qu, gpair, row_indices,
-                                                       gmat, hist, hist_buffer,
-                                                       events_priv);
+      if (dispatcher.use_local_hist) {
+        return BuildHistKernelLocal<FPType, BinIdxType>(qu, gpair, row_indices,
+                                                        gmat, hist, hist_buffer,
+                                                        dispatcher, events_priv);
+      } else {
+        return BuildHistKernel<FPType, BinIdxType, true>(qu, gpair, row_indices,
+                                                         gmat, hist, hist_buffer,
+                                                         dispatcher, events_priv);
+      }
     } else {
       return BuildHistKernel<FPType, uint32_t, false>(qu, gpair, row_indices,
                                                       gmat, hist, hist_buffer,
-                                                      events_priv);
+                                                      dispatcher, events_priv);
     }
   } else {
     if (isDense) {
       return BuildHistKernel<FPType, BinIdxType, true>(qu, gpair, row_indices,
-                                                       gmat, hist, events_priv);
+                                                       gmat, hist,
+                                                       dispatcher, events_priv);
     } else {
       return BuildHistKernel<FPType, uint32_t, false>(qu, gpair, row_indices,
-                                                      gmat, hist, events_priv);
+                                                      gmat, hist,
+                                                      dispatcher, events_priv);
     }
   }
 }
@@ -299,6 +373,7 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu,
                             const GHistIndexMatrix& gmat, const bool isDense,
                             GHistRow<FPType, MemoryType::on_device>* hist,
                             GHistRow<FPType, MemoryType::on_device>* hist_buffer,
+                            const DeviceProperties& device_prop,
                             ::sycl::event event_priv,
                             bool force_atomic_use) {
   const bool is_dense = isDense;
@@ -306,16 +381,19 @@ ::sycl::event BuildHistKernel(::sycl::queue* qu,
     case BinTypeSize::kUint8BinsTypeSize:
       return BuildHistDispatchKernel<FPType, uint8_t>(qu, gpair, row_indices,
                                                       gmat, hist, is_dense, hist_buffer,
+                                                      device_prop,
                                                       event_priv, force_atomic_use);
       break;
     case BinTypeSize::kUint16BinsTypeSize:
       return BuildHistDispatchKernel<FPType, uint16_t>(qu, gpair, row_indices,
                                                        gmat, hist, is_dense, hist_buffer,
+                                                       device_prop,
                                                        event_priv, force_atomic_use);
       break;
     case BinTypeSize::kUint32BinsTypeSize:
       return BuildHistDispatchKernel<FPType, uint32_t>(qu, gpair, row_indices,
                                                        gmat, hist, is_dense, hist_buffer,
+                                                       device_prop,
                                                        event_priv, force_atomic_use);
       break;
     default:
@@ -331,10 +409,12 @@ ::sycl::event GHistBuilder<GradientSumT>::BuildHist(
               GHistRowT<MemoryType::on_device>* hist,
               bool isDense,
               GHistRowT<MemoryType::on_device>* hist_buffer,
+              const DeviceProperties& device_prop,
               ::sycl::event event_priv,
               bool force_atomic_use) {
   return BuildHistKernel<GradientSumT>(qu_, gpair, row_indices, gmat,
-                                       isDense, hist, hist_buffer, event_priv,
+                                       isDense, hist, hist_buffer,
+                                       device_prop, event_priv,
                                        force_atomic_use);
 }
 
@@ -346,6 +426,7 @@ ::sycl::event GHistBuilder<float>::BuildHist(
               GHistRow<float, MemoryType::on_device>* hist,
               bool isDense,
               GHistRow<float, MemoryType::on_device>* hist_buffer,
+              const DeviceProperties& device_prop,
               ::sycl::event event_priv,
               bool force_atomic_use);
 template
@@ -356,6 +437,7 @@ ::sycl::event GHistBuilder<double>::BuildHist(
               GHistRow<double, MemoryType::on_device>* hist,
               bool isDense,
               GHistRow<double, MemoryType::on_device>* hist_buffer,
+              const DeviceProperties& device_prop,
               ::sycl::event event_priv,
               bool force_atomic_use);
 
diff --git a/plugin/sycl/common/hist_util.h b/plugin/sycl/common/hist_util.h
index d09678d4b662..3c71a7be20d9 100644
--- a/plugin/sycl/common/hist_util.h
+++ b/plugin/sycl/common/hist_util.h
@@ -14,6 +14,7 @@
 
 #include "../../src/common/hist_util.h"
 #include "../data/gradient_index.h"
+#include "../tree/hist_dispatcher.h"
 
 #include <sycl/sycl.hpp>
 
@@ -123,7 +124,7 @@ class ParallelGHistBuilder {
   }
 
   void Reset(size_t nblocks) {
-    hist_device_buffer_.Resize(qu_, nblocks * nbins_ * 2);
+    hist_device_buffer_.Resize(qu_, nblocks * nbins_);
   }
 
   GHistRowT& GetDeviceBuffer() {
@@ -161,6 +162,7 @@ class GHistBuilder {
                           GHistRowT<MemoryType::on_device>* HistCollection,
                           bool isDense,
                           GHistRowT<MemoryType::on_device>* hist_buffer,
+                          const DeviceProperties& device_prop,
                           ::sycl::event event,
                           bool force_atomic_use = false);
 
diff --git a/plugin/sycl/common/host_device_vector.cc b/plugin/sycl/common/host_device_vector.cc
index 6e4756ec35bd..0a32fae40279 100644
--- a/plugin/sycl/common/host_device_vector.cc
+++ b/plugin/sycl/common/host_device_vector.cc
@@ -16,6 +16,7 @@
 
 #include "../device_manager.h"
 #include "../data.h"
+#include "../predictor/node.h"
 
 namespace xgboost {
 template <typename T>
@@ -398,13 +399,17 @@ template class HostDeviceVector<bst_float>;
 template class HostDeviceVector<double>;
 template class HostDeviceVector<GradientPair>;
 template class HostDeviceVector<GradientPairPrecise>;
-template class HostDeviceVector<int32_t>;   // bst_node_t
-template class HostDeviceVector<uint8_t>;
-template class HostDeviceVector<int8_t>;
+template class HostDeviceVector<std::int32_t>;   // bst_node_t
+template class HostDeviceVector<std::uint8_t>;
+template class HostDeviceVector<std::int8_t>;
 template class HostDeviceVector<FeatureType>;
 template class HostDeviceVector<Entry>;
 template class HostDeviceVector<bst_idx_t>;
-template class HostDeviceVector<uint32_t>;  // bst_feature_t
+template class HostDeviceVector<std::uint32_t>;  // bst_feature_t
+template class HostDeviceVector<RegTree::Node>;
+template class HostDeviceVector<sycl::predictor::Node>;
+template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
+template class HostDeviceVector<RTreeNodeStat>;
 
 }  // namespace xgboost
 
diff --git a/plugin/sycl/common/linalg_op.cc b/plugin/sycl/common/linalg_op.cc
new file mode 100644
index 000000000000..387b01baa5c9
--- /dev/null
+++ b/plugin/sycl/common/linalg_op.cc
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021-2025, XGBoost Contributors
+ * \file linalg_op.h
+ */
+
+#include "../data.h"
+#include "../device_manager.h"
+
+#include "../../../src/common/optional_weight.h"  // for OptionalWeights
+#include "xgboost/context.h"  // for Context
+
+#include <sycl/sycl.hpp>
+
+namespace xgboost::sycl::linalg {
+void SmallHistogram(Context const* ctx, xgboost::linalg::MatrixView<float const> indices,
+                    xgboost::common::OptionalWeights const& weights,
+                    xgboost::linalg::VectorView<float> bins) {
+  sycl::DeviceManager device_manager;
+  auto* qu = device_manager.GetQueue(ctx->Device());
+
+  qu->submit([&](::sycl::handler& cgh) {
+    cgh.parallel_for<>(::sycl::range<1>(indices.Size()),
+                       [=](::sycl::id<1> pid) {
+      const size_t i = pid[0];
+      auto y = indices(i);
+      auto w = weights[i];
+      AtomicRef<float> bin_val(const_cast<float&>(bins(static_cast<std::size_t>(y))));
+      bin_val += w;
+    });
+  }).wait();
+}
+}  // namespace xgboost::sycl::linalg
diff --git a/plugin/sycl/common/linalg_op.h b/plugin/sycl/common/linalg_op.h
index 1439408093be..e246b73265d5 100644
--- a/plugin/sycl/common/linalg_op.h
+++ b/plugin/sycl/common/linalg_op.h
@@ -8,8 +8,6 @@
 #include <vector>
 #include <utility>
 
-#include "../../../src/common/linalg_op.h"
-
 #include "../data.h"
 #include "../device_manager.h"
 
@@ -99,17 +97,5 @@ bool Validate(DeviceOrd device, TensorView<T, D> t, Fn&& fn) {
 
 }  // namespace linalg
 }  // namespace sycl
-
-namespace linalg {
-template <typename T, int32_t D, typename Fn>
-void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
-  if (ctx->IsSycl()) {
-    sycl::linalg::ElementWiseKernel(t, fn);
-  } else {
-    ElementWiseKernelHost(t, ctx->Threads(), fn);
-  }
-}
-
-}  // namespace linalg
 }  // namespace xgboost
 #endif  // PLUGIN_SYCL_COMMON_LINALG_OP_H_
diff --git a/plugin/sycl/common/optional_weight.cc b/plugin/sycl/common/optional_weight.cc
new file mode 100644
index 000000000000..aa984a152dc3
--- /dev/null
+++ b/plugin/sycl/common/optional_weight.cc
@@ -0,0 +1,31 @@
+/*!
+ * Copyright by Contributors 2017-2025
+ */
+#include <sycl/sycl.hpp>
+
+#include "../../../src/common/optional_weight.h"
+
+#include "../device_manager.h"
+
+namespace xgboost::common::sycl_impl {
+double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights) {
+  sycl::DeviceManager device_manager;
+  auto* qu = device_manager.GetQueue(ctx->Device());
+
+  const auto* data = weights.Data();
+  double result = 0;
+  {
+    ::sycl::buffer<double> buff(&result, 1);
+    qu->submit([&](::sycl::handler& cgh) {
+      auto reduction = ::sycl::reduction(buff, cgh, ::sycl::plus<>());
+      cgh.parallel_for<>(::sycl::range<1>(weights.Size()), reduction,
+                        [=](::sycl::id<1> pid, auto& sum) {
+        size_t i = pid[0];
+        sum += data[i];
+      });
+    }).wait_and_throw();
+  }
+
+  return result;
+}
+}  // namespace xgboost::common::sycl_impl
diff --git a/plugin/sycl/context_helper.cc b/plugin/sycl/context_helper.cc
new file mode 100644
index 000000000000..d5ced146187c
--- /dev/null
+++ b/plugin/sycl/context_helper.cc
@@ -0,0 +1,26 @@
+/*!
+ * Copyright 2017-2025 by Contributors
+ * \file context_helper.cc
+ */
+
+#include <sycl/sycl.hpp>
+
+
+#include "device_manager.h"
+#include "context_helper.h"
+
+namespace xgboost {
+namespace sycl {
+
+DeviceOrd DeviceFP64(const DeviceOrd& device) {
+  DeviceManager device_manager;
+  bool support_fp64 = device_manager.GetQueue(device)->get_device().has(::sycl::aspect::fp64);
+  if (support_fp64) {
+    return device;
+  } else {
+    LOG(WARNING) << "Current device doesn't support fp64";
+    return DeviceOrd::CPU();
+  }
+}
+}  // namespace sycl
+}  // namespace xgboost
diff --git a/plugin/sycl/context_helper.h b/plugin/sycl/context_helper.h
new file mode 100644
index 000000000000..09d9a419fc5f
--- /dev/null
+++ b/plugin/sycl/context_helper.h
@@ -0,0 +1,17 @@
+/**
+ * Copyright 2021-2025, XGBoost Contributors
+ * \file context_helper.h
+ */
+#ifndef PLUGIN_SYCL_CONTEXT_HELPER_H_
+#define PLUGIN_SYCL_CONTEXT_HELPER_H_
+
+#include <xgboost/context.h>
+
+namespace xgboost {
+namespace sycl {
+
+DeviceOrd DeviceFP64(const DeviceOrd& device);
+
+}  // namespace sycl
+}  // namespace xgboost
+#endif  // PLUGIN_SYCL_CONTEXT_HELPER_H_
diff --git a/plugin/sycl/data/gradient_index.cc b/plugin/sycl/data/gradient_index.cc
index e6182e07b976..4f29f2d28fc6 100644
--- a/plugin/sycl/data/gradient_index.cc
+++ b/plugin/sycl/data/gradient_index.cc
@@ -50,10 +50,9 @@ void mergeSort(BinIdxType* begin, BinIdxType* end, BinIdxType* buf) {
 
 template <typename BinIdxType, bool isDense>
 void GHistIndexMatrix::SetIndexData(::sycl::queue* qu,
+                                    Context const * ctx,
                                     BinIdxType* index_data,
-                                    DMatrix *dmat,
-                                    size_t nbins,
-                                    size_t row_stride) {
+                                    DMatrix *dmat) {
   if (nbins == 0) return;
   const bst_float* cut_values = cut.cut_values_.ConstDevicePointer();
   const uint32_t* cut_ptrs = cut.cut_ptrs_.ConstDevicePointer();
@@ -61,17 +60,18 @@ void GHistIndexMatrix::SetIndexData(::sycl::queue* qu,
 
   BinIdxType* sort_data = reinterpret_cast<BinIdxType*>(sort_buff.Data());
 
-  ::sycl::event event;
   for (auto &batch : dmat->GetBatches<SparsePage>()) {
-    for (auto &batch : dmat->GetBatches<SparsePage>()) {
-      const xgboost::Entry *data_ptr = batch.data.ConstDevicePointer();
-      const bst_idx_t *offset_vec = batch.offset.ConstDevicePointer();
-      size_t batch_size = batch.Size();
-      if (batch_size > 0) {
-        const auto base_rowid = batch.base_rowid;
-        event = qu->submit([&](::sycl::handler& cgh) {
-          cgh.depends_on(event);
-          cgh.parallel_for<>(::sycl::range<1>(batch_size), [=](::sycl::item<1> pid) {
+    batch.data.SetDevice(ctx->Device());
+    batch.offset.SetDevice(ctx->Device());
+    const xgboost::Entry *data_ptr = batch.data.ConstDevicePointer();
+    const bst_idx_t *offset_vec = batch.offset.ConstDevicePointer();
+    size_t batch_size = batch.Size();
+    if (batch_size > 0) {
+      const auto base_rowid = batch.base_rowid;
+      size_t row_stride = this->row_stride;
+      size_t nbins = this->nbins;
+      qu->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for<>(::sycl::range<1>(batch_size), [=](::sycl::item<1> pid) {
           const size_t i = pid.get_id(0);
           const size_t ibegin = offset_vec[i];
           const size_t iend = offset_vec[i + 1];
@@ -92,23 +92,22 @@ void GHistIndexMatrix::SetIndexData(::sycl::queue* qu,
           }
         });
       });
-      }
     }
   }
   qu->wait();
 }
 
-void GHistIndexMatrix::ResizeIndex(size_t n_index, bool isDense) {
-  if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
+void GHistIndexMatrix::ResizeIndex(::sycl::queue* qu, size_t n_index) {
+  if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense_) {
     index.SetBinTypeSize(BinTypeSize::kUint8BinsTypeSize);
-    index.Resize((sizeof(uint8_t)) * n_index);
+    index.Resize(qu, (sizeof(uint8_t)) * n_index);
   } else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max())  &&
-    max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense) {
+    max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense_) {
     index.SetBinTypeSize(BinTypeSize::kUint16BinsTypeSize);
-    index.Resize((sizeof(uint16_t)) * n_index);
+    index.Resize(qu, (sizeof(uint16_t)) * n_index);
   } else {
     index.SetBinTypeSize(BinTypeSize::kUint32BinsTypeSize);
-    index.Resize((sizeof(uint32_t)) * n_index);
+    index.Resize(qu, (sizeof(uint32_t)) * n_index);
   }
 }
 
@@ -122,52 +121,58 @@ void GHistIndexMatrix::Init(::sycl::queue* qu,
   cut.SetDevice(ctx->Device());
 
   max_num_bins = max_bins;
-  const uint32_t nbins = cut.Ptrs().back();
-  this->nbins = nbins;
+  nbins = cut.Ptrs().back();
+
+  min_num_bins = nbins;
+  const size_t n_offsets = cut.cut_ptrs_.Size() - 1;
+  for (unsigned fid = 0; fid < n_offsets; ++fid) {
+    auto ibegin = cut.cut_ptrs_.ConstHostVector()[fid];
+    auto iend = cut.cut_ptrs_.ConstHostVector()[fid + 1];
+    min_num_bins = std::min<size_t>(min_num_bins, iend - ibegin);
+  }
 
   hit_count.SetDevice(ctx->Device());
   hit_count.Resize(nbins, 0);
 
-  this->p_fmat = dmat;
   const bool isDense = dmat->IsDense();
   this->isDense_ = isDense;
 
-  index.setQueue(qu);
-
   row_stride = 0;
   size_t n_rows = 0;
-  for (const auto& batch : dmat->GetBatches<SparsePage>()) {
-    const auto& row_offset = batch.offset.ConstHostVector();
-    batch.data.SetDevice(ctx->Device());
-    batch.offset.SetDevice(ctx->Device());
-    n_rows += batch.Size();
-    for (auto i = 1ull; i < row_offset.size(); i++) {
-      row_stride = std::max(row_stride, static_cast<size_t>(row_offset[i] - row_offset[i - 1]));
+  if (!isDense) {
+    for (const auto& batch : dmat->GetBatches<SparsePage>()) {
+      const auto& row_offset = batch.offset.ConstHostVector();
+      n_rows += batch.Size();
+      for (auto i = 1ull; i < row_offset.size(); i++) {
+        row_stride = std::max(row_stride, static_cast<size_t>(row_offset[i] - row_offset[i - 1]));
+      }
     }
+  } else {
+    row_stride = nfeatures;
+    n_rows = dmat->Info().num_row_;
   }
 
-  const size_t n_offsets = cut.cut_ptrs_.Size() - 1;
   const size_t n_index = n_rows * row_stride;
-  ResizeIndex(n_index, isDense);
+  ResizeIndex(qu, n_index);
 
   CHECK_GT(cut.cut_values_.Size(), 0U);
 
   if (isDense) {
     BinTypeSize curent_bin_size = index.GetBinTypeSize();
     if (curent_bin_size == BinTypeSize::kUint8BinsTypeSize) {
-      SetIndexData<uint8_t, true>(qu, index.data<uint8_t>(), dmat, nbins, row_stride);
+      SetIndexData<uint8_t, true>(qu, ctx, index.data<uint8_t>(), dmat);
 
     } else if (curent_bin_size == BinTypeSize::kUint16BinsTypeSize) {
-      SetIndexData<uint16_t, true>(qu, index.data<uint16_t>(), dmat, nbins, row_stride);
+      SetIndexData<uint16_t, true>(qu, ctx, index.data<uint16_t>(), dmat);
     } else {
       CHECK_EQ(curent_bin_size, BinTypeSize::kUint32BinsTypeSize);
-      SetIndexData<uint32_t, true>(qu, index.data<uint32_t>(), dmat, nbins, row_stride);
+      SetIndexData<uint32_t, true>(qu, ctx, index.data<uint32_t>(), dmat);
     }
   /* For sparse DMatrix we have to store index of feature for each bin
      in index field to chose right offset. So offset is nullptr and index is not reduced */
   } else {
     sort_buff.Resize(qu, n_rows * row_stride * sizeof(uint32_t));
-    SetIndexData<uint32_t, false>(qu, index.data<uint32_t>(), dmat, nbins, row_stride);
+    SetIndexData<uint32_t, false>(qu, ctx, index.data<uint32_t>(), dmat);
   }
 }
 
diff --git a/plugin/sycl/data/gradient_index.h b/plugin/sycl/data/gradient_index.h
index b88f2a8015ce..967ac9a87f9e 100644
--- a/plugin/sycl/data/gradient_index.h
+++ b/plugin/sycl/data/gradient_index.h
@@ -31,21 +31,9 @@ struct Index {
   Index& operator=(Index&& i) = delete;
   void SetBinTypeSize(BinTypeSize binTypeSize) {
     binTypeSize_ = binTypeSize;
-    switch (binTypeSize) {
-      case BinTypeSize::kUint8BinsTypeSize:
-        func_ = &GetValueFromUint8;
-        break;
-      case BinTypeSize::kUint16BinsTypeSize:
-        func_ = &GetValueFromUint16;
-        break;
-      case BinTypeSize::kUint32BinsTypeSize:
-        func_ = &GetValueFromUint32;
-        break;
-      default:
-        CHECK(binTypeSize == BinTypeSize::kUint8BinsTypeSize  ||
-              binTypeSize == BinTypeSize::kUint16BinsTypeSize ||
-              binTypeSize == BinTypeSize::kUint32BinsTypeSize);
-    }
+    CHECK(binTypeSize == BinTypeSize::kUint8BinsTypeSize  ||
+          binTypeSize == BinTypeSize::kUint16BinsTypeSize ||
+          binTypeSize == BinTypeSize::kUint32BinsTypeSize);
   }
   BinTypeSize GetBinTypeSize() const {
     return binTypeSize_;
@@ -65,8 +53,8 @@ struct Index {
     return data_.Size() / (binTypeSize_);
   }
 
-  void Resize(const size_t nBytesData) {
-    data_.Resize(qu_, nBytesData);
+  void Resize(::sycl::queue* qu, const size_t nBytesData) {
+    data_.Resize(qu, nBytesData);
   }
 
   uint8_t* begin() const {
@@ -77,28 +65,9 @@ struct Index {
     return data_.End();
   }
 
-  void setQueue(::sycl::queue* qu) {
-    qu_ = qu;
-  }
-
  private:
-  static uint32_t GetValueFromUint8(const uint8_t* t, size_t i) {
-    return reinterpret_cast<const uint8_t*>(t)[i];
-  }
-  static uint32_t GetValueFromUint16(const uint8_t* t, size_t i) {
-    return reinterpret_cast<const uint16_t*>(t)[i];
-  }
-  static uint32_t GetValueFromUint32(const uint8_t* t, size_t i) {
-    return reinterpret_cast<const uint32_t*>(t)[i];
-  }
-
-  using Func = uint32_t (*)(const uint8_t*, size_t);
-
   USMVector<uint8_t, MemoryType::on_device> data_;
   BinTypeSize binTypeSize_ {BinTypeSize::kUint8BinsTypeSize};
-  Func func_;
-
-  ::sycl::queue* qu_;
 };
 
 /*!
@@ -116,8 +85,8 @@ struct GHistIndexMatrix {
   USMVector<uint8_t, MemoryType::on_device> sort_buff;
   /*! \brief The corresponding cuts */
   xgboost::common::HistogramCuts cut;
-  DMatrix* p_fmat;
   size_t max_num_bins;
+  size_t min_num_bins;
   size_t nbins;
   size_t nfeatures;
   size_t row_stride;
@@ -127,11 +96,10 @@ struct GHistIndexMatrix {
             DMatrix *dmat, int max_num_bins);
 
   template <typename BinIdxType, bool isDense>
-  void SetIndexData(::sycl::queue* qu, BinIdxType* index_data,
-                    DMatrix *dmat,
-                    size_t nbins, size_t row_stride);
+  void SetIndexData(::sycl::queue* qu, Context const * ctx, BinIdxType* index_data,
+                    DMatrix *dmat);
 
-  void ResizeIndex(size_t n_index, bool isDense);
+  void ResizeIndex(::sycl::queue* qu, size_t n_index);
 
   inline void GetFeatureCounts(size_t* counts) const {
     auto nfeature = cut.cut_ptrs_.Size() - 1;
diff --git a/plugin/sycl/device_properties.h b/plugin/sycl/device_properties.h
new file mode 100644
index 000000000000..96f258737c2b
--- /dev/null
+++ b/plugin/sycl/device_properties.h
@@ -0,0 +1,69 @@
+/*!
+ * Copyright 2017-2025 by Contributors
+ * \file device_properties.h
+ */
+#ifndef PLUGIN_SYCL_DEVICE_PROPERTIES_H_
+#define PLUGIN_SYCL_DEVICE_PROPERTIES_H_
+
+#include <sycl/sycl.hpp>
+#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
+#include "../../src/common/common.h"               // for HumanMemUnit
+
+namespace xgboost {
+namespace sycl {
+
+class DeviceProperties {
+  void GetL2Size(const ::sycl::device& device) {
+    l2_size = device.get_info<::sycl::info::device::global_mem_cache_size>();
+    LOG(INFO) << "Detected L2 Size = " << ::xgboost::common::HumanMemUnit(l2_size);
+    l2_size_per_eu = static_cast<float>(l2_size) / max_compute_units;
+  }
+
+  void GetSRAMSize(const ::sycl::device& device) {
+    auto arch =
+      device.get_info<::sycl::ext::oneapi::experimental::info::device::architecture>();
+    size_t eu_per_core =
+      device.get_info<::sycl::ext::intel::info::device::gpu_eu_count_per_subslice>();
+    switch (arch) {
+      case ::sycl::ext::oneapi::experimental::architecture::intel_gpu_pvc: {
+        LOG(INFO) << "Xe-HPC (Ponte Vecchio) Architecture. L1 friendly optimization enabled.";
+        size_t l1_size = 512 * 1024;
+        size_t registers_size = 64 * 1024;
+        sram_size_per_eu = l1_size  / eu_per_core + registers_size;
+        break;
+      }
+      default:
+        sram_size_per_eu = 0;
+    }
+  }
+
+ public:
+  bool is_gpu;
+  bool usm_host_allocations;
+  size_t max_compute_units;
+  size_t max_work_group_size;
+  size_t sub_group_size;
+  float sram_size_per_eu = 0;
+  size_t l2_size = 0;
+  float l2_size_per_eu = 0;
+
+  DeviceProperties():
+    is_gpu(false) {}
+
+  explicit DeviceProperties(const ::sycl::device& device):
+    is_gpu(device.is_gpu()),
+    usm_host_allocations(device.has(::sycl::aspect::usm_host_allocations)),
+    max_compute_units(device.get_info<::sycl::info::device::max_compute_units>()),
+    max_work_group_size(device.get_info<::sycl::info::device::max_work_group_size>()),
+    sub_group_size(device.get_info<::sycl::info::device::sub_group_sizes>().back()) {
+      GetL2Size(device);
+      if (is_gpu) {
+        GetSRAMSize(device);
+      }
+    }
+};
+
+}  // namespace sycl
+}  // namespace xgboost
+
+#endif  // PLUGIN_SYCL_DEVICE_PROPERTIES_H_
diff --git a/plugin/sycl/predictor/node.h b/plugin/sycl/predictor/node.h
new file mode 100644
index 000000000000..feed8b3123dd
--- /dev/null
+++ b/plugin/sycl/predictor/node.h
@@ -0,0 +1,69 @@
+/*!
+ * Copyright by Contributors 2017-2025
+ * \file node.h
+ */
+#ifndef PLUGIN_SYCL_PREDICTOR_NODE_H_
+#define PLUGIN_SYCL_PREDICTOR_NODE_H_
+
+#include "../../src/gbm/gbtree_model.h"
+
+namespace xgboost {
+namespace sycl {
+namespace predictor {
+
+union NodeValue {
+  float leaf_weight;
+  float fvalue;
+};
+
+class Node {
+  int fidx;
+  int left_child_idx;
+  int right_child_idx;
+  NodeValue val;
+
+ public:
+  Node() = default;
+
+  explicit Node(const RegTree::Node& n) {
+    left_child_idx = n.LeftChild();
+    right_child_idx = n.RightChild();
+    fidx = n.SplitIndex();
+    if (n.DefaultLeft()) {
+      fidx |= (1U << 31);
+    }
+
+    if (n.IsLeaf()) {
+      val.leaf_weight = n.LeafValue();
+    } else {
+      val.fvalue = n.SplitCond();
+    }
+  }
+
+  int LeftChildIdx() const {return left_child_idx; }
+
+  int RightChildIdx() const {return right_child_idx; }
+
+  bool IsLeaf() const { return left_child_idx == -1; }
+
+  int GetFidx() const { return fidx & ((1U << 31) - 1U); }
+
+  bool MissingLeft() const { return (fidx >> 31) != 0; }
+
+  int MissingIdx() const {
+    if (MissingLeft()) {
+      return left_child_idx;
+    } else {
+      return right_child_idx;
+    }
+  }
+
+  float GetFvalue() const { return val.fvalue; }
+
+  float GetWeight() const { return val.leaf_weight; }
+};
+
+}  // namespace predictor
+}  // namespace sycl
+}  // namespace xgboost
+#endif  // PLUGIN_SYCL_PREDICTOR_NODE_H_
diff --git a/plugin/sycl/predictor/predictor.cc b/plugin/sycl/predictor/predictor.cc
index 43356f64eb0b..1b4f8f9ee2d6 100755
--- a/plugin/sycl/predictor/predictor.cc
+++ b/plugin/sycl/predictor/predictor.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright by Contributors 2017-2023
+ * Copyright by Contributors 2017-2025
  */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wtautological-constant-compare"
@@ -29,6 +29,24 @@
 #include "../../src/gbm/gbtree_model.h"
 
 #include "../device_manager.h"
+#include "../device_properties.h"
+#include "node.h"
+
+namespace xgboost::sycl_impl {
+void InitOutPredictions(Context const* ctx, linalg::VectorView<float const> base_score,
+                        linalg::MatrixView<float> predt) {
+  sycl::DeviceManager device_manager;
+  auto* qu = device_manager.GetQueue(predt.Device());
+  qu->submit([&](::sycl::handler& cgh) {
+    cgh.parallel_for<>(::sycl::range<1>(predt.Size()),
+                       [=](::sycl::id<1> pid) {
+      size_t k = pid[0];
+      auto [i, j] = xgboost::linalg::UnravelIndex(k, predt.Shape());
+      const_cast<float&>(predt(i, j)) = base_score(j);
+    });
+  }).wait_and_throw();
+}
+}  // namespace xgboost::sycl_impl
 
 namespace xgboost {
 namespace sycl {
@@ -36,68 +54,19 @@ namespace predictor {
 
 DMLC_REGISTRY_FILE_TAG(predictor_sycl);
 
-union NodeValue {
-  float leaf_weight;
-  float fvalue;
-};
-
-class Node {
-  int fidx;
-  int left_child_idx;
-  int right_child_idx;
-  NodeValue val;
-
- public:
-  explicit Node(const RegTree::Node& n) {
-    left_child_idx = n.LeftChild();
-    right_child_idx = n.RightChild();
-    fidx = n.SplitIndex();
-    if (n.DefaultLeft()) {
-      fidx |= (1U << 31);
-    }
-
-    if (n.IsLeaf()) {
-      val.leaf_weight = n.LeafValue();
-    } else {
-      val.fvalue = n.SplitCond();
-    }
-  }
-
-  int LeftChildIdx() const {return left_child_idx; }
-
-  int RightChildIdx() const {return right_child_idx; }
-
-  bool IsLeaf() const { return left_child_idx == -1; }
-
-  int GetFidx() const { return fidx & ((1U << 31) - 1U); }
-
-  bool MissingLeft() const { return (fidx >> 31) != 0; }
-
-  int MissingIdx() const {
-    if (MissingLeft()) {
-      return left_child_idx;
-    } else {
-      return right_child_idx;
-    }
-  }
-
-  float GetFvalue() const { return val.fvalue; }
-
-  float GetWeight() const { return val.leaf_weight; }
-};
-
 class DeviceModel {
  public:
-  USMVector<Node> nodes;
+  HostDeviceVector<Node> nodes;
   HostDeviceVector<size_t> first_node_position;
   HostDeviceVector<int> tree_group;
 
   void SetDevice(DeviceOrd device) {
+    nodes.SetDevice(device);
     first_node_position.SetDevice(device);
     tree_group.SetDevice(device);
   }
 
-  void Init(::sycl::queue* qu, const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end) {
+  void Init(const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end) {
     int n_nodes = 0;
     first_node_position.Resize((tree_end - tree_begin) + 1);
     auto& first_node_position_host = first_node_position.HostVector();
@@ -106,43 +75,88 @@ class DeviceModel {
       if (model.trees[tree_idx]->HasCategoricalSplit()) {
         LOG(FATAL) << "Categorical features are not yet supported by sycl";
       }
-      n_nodes += model.trees[tree_idx]->GetNodes().size();
+      n_nodes += model.trees[tree_idx]->Size();
       first_node_position_host[tree_idx - tree_begin + 1] = n_nodes;
     }
 
-    nodes.Resize(qu, n_nodes);
+    nodes.Resize(n_nodes);
     for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-      auto& src_nodes = model.trees[tree_idx]->GetNodes();
+      auto const& src_nodes = model.trees[tree_idx]->GetNodes(DeviceOrd::CPU());
       size_t n_nodes_shift = first_node_position_host[tree_idx - tree_begin];
       for (size_t node_idx = 0; node_idx < src_nodes.size(); node_idx++) {
-        nodes[node_idx + n_nodes_shift] = static_cast<Node>(src_nodes[node_idx]);
+        nodes.HostVector()[node_idx + n_nodes_shift] = static_cast<Node>(src_nodes[node_idx]);
       }
     }
 
     int num_group = model.learner_model_param->num_output_group;
     if (num_group > 1) {
-      tree_group.Resize(model.tree_info.size());
+      tree_group.Resize(model.tree_info.Size());
       auto& tree_group_host = tree_group.HostVector();
-      for (size_t tree_idx = 0; tree_idx < model.tree_info.size(); tree_idx++)
-        tree_group_host[tree_idx] = model.tree_info[tree_idx];
+      auto const& tree_group_in = model.tree_info.ConstHostVector();
+      for (size_t tree_idx = 0; tree_idx < tree_group_in.size(); tree_idx++)
+        tree_group_host[tree_idx] = tree_group_in[tree_idx];
     }
   }
 };
 
-float GetLeafWeight(const Node* nodes, const float* fval_buff, const uint8_t* miss_buff) {
-  const Node* node = nodes;
-  while (!node->IsLeaf()) {
-    if (miss_buff[node->GetFidx()] == 1) {
-      node = nodes + node->MissingIdx();
+// Binary search
+float BinarySearch(const Entry* begin_ptr, const Entry* end_ptr,
+                   size_t col_idx, size_t num_features) {
+  const size_t n_elems = end_ptr - begin_ptr;
+  if (n_elems == num_features) {
+    return (begin_ptr + col_idx)->fvalue;
+  }
+
+  // Since indexes are in range [0: num_features),
+  // we can squeeze the search window from [0: n_elems) to [offset_left: offset_right)
+  const size_t shift = (num_features - 1) - col_idx;
+  const size_t offset_left = shift > n_elems - 1 ? 0 : std::max<size_t>(0, (n_elems - 1) - shift);
+  const size_t offset_right = std::min<size_t>(col_idx + 1, n_elems);
+
+  end_ptr = begin_ptr + offset_right;
+  begin_ptr += offset_left;
+  const Entry* previous_middle = nullptr;
+  while (end_ptr != begin_ptr) {
+    const Entry* middle = begin_ptr + (end_ptr - begin_ptr) / 2;
+    if (middle == previous_middle) {
+      break;
     } else {
-      const float fvalue = fval_buff[node->GetFidx()];
-      if (fvalue < node->GetFvalue()) {
-        node = nodes + node->LeftChildIdx();
-      } else {
-        node = nodes + node->RightChildIdx();
-      }
+      previous_middle = middle;
+    }
+    if (middle->index == col_idx) {
+      return middle->fvalue;
+    } else if (middle->index < col_idx) {
+      begin_ptr = middle + 1;
+    } else {
+      end_ptr = middle;
+    }
+  }
+  return std::numeric_limits<float>::quiet_NaN();
+}
+
+size_t NextNodeIdx(float fvalue, const Node& node) {
+  if (std::isnan(fvalue)) {
+    return node.MissingIdx();
+  } else {
+    if (fvalue < node.GetFvalue()) {
+      return node.LeftChildIdx();
+    } else {
+      return node.RightChildIdx();
     }
   }
+}
+
+float GetLeafWeight(const Node* nodes, const Entry* first_entry,
+                    const Entry* last_entry, size_t num_features) {
+  size_t is_dense = (last_entry - first_entry == num_features);
+
+  const Node* node = nodes;
+  while (!node->IsLeaf()) {
+    const float fvalue = is_dense ?
+                         (first_entry + node->GetFidx())->fvalue :
+                         BinarySearch(first_entry, last_entry, node->GetFidx(), num_features);
+    node = nodes + NextNodeIdx(fvalue, *node);
+  }
   return node->GetWeight();
 }
 
@@ -150,60 +164,28 @@ float GetLeafWeight(const Node* nodes, const float* fval_buff) {
   const Node* node = nodes;
   while (!node->IsLeaf()) {
     const float fvalue = fval_buff[node->GetFidx()];
-    if (fvalue < node->GetFvalue()) {
-      node = nodes + node->LeftChildIdx();
-    } else {
-      node = nodes + node->RightChildIdx();
-    }
+    node = nodes + NextNodeIdx(fvalue, *node);
   }
   return node->GetWeight();
 }
 
 class Predictor : public xgboost::Predictor {
  public:
-  void InitOutPredictions(const MetaInfo& info,
-                          HostDeviceVector<bst_float>* out_preds,
-                          const gbm::GBTreeModel& model) const override {
-    device_model.SetDevice(ctx_->Device());
-    CHECK_NE(model.learner_model_param->num_output_group, 0);
-    size_t n = model.learner_model_param->num_output_group * info.num_row_;
-    size_t base_margin_size = info.base_margin_.Data()->Size();
-    out_preds->Resize(n);
-    if (base_margin_size == n) {
-      CHECK_EQ(out_preds->Size(), n);
-      out_preds->Copy(*(info.base_margin_.Data()));
-    } else {
-      auto base_score = model.learner_model_param->BaseScore(ctx_)(0);
-      if (base_margin_size > 0) {
-        std::ostringstream oss;
-        oss << "Ignoring the base margin, since it has incorrect length. "
-            << "The base margin must be an array of length ";
-        if (model.learner_model_param->num_output_group > 1) {
-          oss << "[num_class] * [number of data points], i.e. "
-              << model.learner_model_param->num_output_group << " * " << info.num_row_
-              << " = " << n << ". ";
-        } else {
-          oss << "[number of data points], i.e. " << info.num_row_ << ". ";
-        }
-        oss << "Instead, all data points will use "
-            << "base_score = " << base_score;
-        LOG(WARNING) << oss.str();
-      }
-      out_preds->Fill(base_score);
-    }
-    needs_buffer_update = true;
-  }
-
   explicit Predictor(Context const* context) :
       xgboost::Predictor::Predictor{context},
-      cpu_predictor(xgboost::Predictor::Create("cpu_predictor", context)) {
-        qu_ = device_manager.GetQueue(ctx_->Device());
-      }
+      cpu_predictor(xgboost::Predictor::Create("cpu_predictor", context)) {}
 
   void PredictBatch(DMatrix *dmat, PredictionCacheEntry *predts,
                     const gbm::GBTreeModel &model, bst_tree_t tree_begin,
                     bst_tree_t tree_end = 0) const override {
     auto* out_preds = &predts->predictions;
+    device_model.SetDevice(ctx_->Device());
+    qu_ = device_manager.GetQueue(ctx_->Device());
+    if (device_ != ctx_->Device()) {
+      device_ = ctx_->Device();
+      device_prop_ = DeviceProperties(qu_->get_device());
+    }
+
     out_preds->SetDevice(ctx_->Device());
     if (tree_end == 0) {
       tree_end = model.trees.size();
@@ -254,7 +236,117 @@ class Predictor : public xgboost::Predictor {
   }
 
  private:
-  template <bool any_missing>
+  // 8KB fits EU registers
+  static constexpr int kMaxFeatureBufferSize = 2048;
+
+  // Relative cost of reading and writing for discrete and integrated devices.
+  static constexpr float kCostCalibrationIntegrated = 64;
+  static constexpr float kCostCalibrationDescrete = 4;
+
+  template <bool any_missing, int kFeatureBufferSize = 8>
+  void PredictKernelBufferDispatch(::sycl::event* event,
+                                   const Entry* data,
+                                   float* out_predictions,
+                                   const size_t* row_ptr,
+                                   size_t num_rows,
+                                   size_t num_features,
+                                   size_t num_group,
+                                   size_t tree_begin,
+                                   size_t tree_end,
+                                   float sparsity) const {
+    if constexpr (kFeatureBufferSize > kMaxFeatureBufferSize) {
+      LOG(FATAL) << "Unreachable";
+    } else {
+      if (num_features > kFeatureBufferSize) {
+        PredictKernelBufferDispatch<any_missing, 2 * kFeatureBufferSize>
+                                   (event, data, out_predictions, row_ptr, num_rows,
+                                    num_features, num_group, tree_begin, tree_end, sparsity);
+      } else {
+        PredictKernelBuffer<any_missing, kFeatureBufferSize>
+                           (event, data, out_predictions, row_ptr, num_rows,
+                            num_features, num_group, tree_begin, tree_end, sparsity);
+      }
+    }
+  }
+
+  size_t GetBlockSize(size_t n_nodes, size_t num_features, size_t num_rows, float sparsity) const {
+    size_t max_compute_units = device_prop_.max_compute_units;
+    size_t l2_size = device_prop_.l2_size;
+    size_t sub_group_size = device_prop_.sub_group_size;
+    size_t nodes_bytes = n_nodes * sizeof(Node);
+    bool nodes_fit_l2 = l2_size > 2 * nodes_bytes;
+    size_t block_size = nodes_fit_l2
+                      // nodes and data fit L2
+                      ? 0.8 * (l2_size - nodes_bytes) / (sparsity * num_features * sizeof(Entry))
+                      // only data fit L2
+                      : 0.8 * (l2_size) / (sparsity * num_features * sizeof(Entry));
+    block_size = (block_size / sub_group_size) * sub_group_size;
+    if (block_size < max_compute_units * sub_group_size) {
+      block_size = max_compute_units * sub_group_size;
+    }
+
+    if (block_size > num_rows) block_size = num_rows;
+    return block_size;
+  }
+
+  template <bool any_missing, int kFeatureBufferSize>
+  void PredictKernelBuffer(::sycl::event* event,
+                           const Entry* data,
+                           float* out_predictions,
+                           const size_t* row_ptr,
+                           size_t num_rows,
+                           size_t num_features,
+                           size_t num_group,
+                           size_t tree_begin,
+                           size_t tree_end,
+                           float sparsity) const {
+    const Node* nodes = device_model.nodes.ConstDevicePointer();
+    const size_t* first_node_position = device_model.first_node_position.ConstDevicePointer();
+    const int* tree_group = device_model.tree_group.ConstDevicePointer();
+
+    size_t block_size = GetBlockSize(device_model.nodes.Size(),
+                                     num_features, num_rows, sparsity);
+    size_t n_blocks = num_rows / block_size + (num_rows % block_size > 0);
+
+    for (size_t block = 0; block < n_blocks; ++block) {
+      *event = qu_->submit([&](::sycl::handler& cgh) {
+        cgh.depends_on(*event);
+        cgh.parallel_for<>(::sycl::range<1>(block_size), [=](::sycl::id<1> pid) {
+          int row_idx = block * block_size + pid[0];
+          if (row_idx < num_rows) {
+            const Entry* first_entry = data + row_ptr[row_idx];
+            const Entry* last_entry = data + row_ptr[row_idx + 1];
+
+            float fvalues[kFeatureBufferSize];
+            if constexpr (any_missing) {
+              for (size_t fid = 0; fid < num_features; ++fid) {
+                fvalues[fid] = std::numeric_limits<float>::quiet_NaN();
+              }
+            }
+
+            for (const Entry* entry = first_entry; entry < last_entry; entry += 1) {
+              fvalues[entry->index] = entry->fvalue;
+            }
+            if (num_group == 1) {
+              float& sum = out_predictions[row_idx];
+              for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
+                const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];
+                sum += GetLeafWeight(first_node, fvalues);
+              }
+            } else {
+              for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
+                const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];
+                int out_prediction_idx = row_idx * num_group + tree_group[tree_idx];
+                out_predictions[out_prediction_idx] +=
+                    GetLeafWeight(first_node, fvalues);
+              }
+            }
+          }
+        });
+      });
+    }
+  }
+
   void PredictKernel(::sycl::event* event,
                      const Entry* data,
                      float* out_predictions,
@@ -263,59 +355,70 @@ class Predictor : public xgboost::Predictor {
                      size_t num_features,
                      size_t num_group,
                      size_t tree_begin,
-                     size_t tree_end) const {
-    const Node* nodes = device_model.nodes.DataConst();
+                     size_t tree_end,
+                     float sparsity) const {
+    const Node* nodes = device_model.nodes.ConstDevicePointer();
     const size_t* first_node_position = device_model.first_node_position.ConstDevicePointer();
     const int* tree_group = device_model.tree_group.ConstDevicePointer();
 
-    float* fval_buff_ptr = fval_buff.Data();
-    uint8_t* miss_buff_ptr = miss_buff.Data();
-    bool needs_buffer_update = this->needs_buffer_update;
-
-    *event = qu_->submit([&](::sycl::handler& cgh) {
-      cgh.depends_on(*event);
-      cgh.parallel_for<>(::sycl::range<1>(num_rows), [=](::sycl::id<1> pid) {
-        int row_idx = pid[0];
-        auto* fval_buff_row_ptr = fval_buff_ptr + num_features * row_idx;
-        auto* miss_buff_row_ptr = miss_buff_ptr + num_features * row_idx;
-
-        if (needs_buffer_update) {
-          const Entry* first_entry = data + row_ptr[row_idx];
-          const Entry* last_entry = data + row_ptr[row_idx + 1];
-          for (const Entry* entry = first_entry; entry < last_entry; entry += 1) {
-            fval_buff_row_ptr[entry->index] = entry->fvalue;
-            if constexpr (any_missing) {
-              miss_buff_row_ptr[entry->index] = 0;
-            }
-          }
-        }
-
-        if (num_group == 1) {
-          float sum = 0.0;
-          for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-            const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];
-            if constexpr (any_missing) {
-              sum += GetLeafWeight(first_node, fval_buff_row_ptr, miss_buff_row_ptr);
-            } else {
-              sum += GetLeafWeight(first_node, fval_buff_row_ptr);
-            }
-          }
-          out_predictions[row_idx] += sum;
-        } else {
-          for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-            const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];
-            int out_prediction_idx = row_idx * num_group + tree_group[tree_idx];
-            if constexpr (any_missing) {
-              out_predictions[out_prediction_idx] +=
-                GetLeafWeight(first_node, fval_buff_row_ptr, miss_buff_row_ptr);
+    size_t block_size = GetBlockSize(device_model.nodes.Size(),
+                                     num_features, num_rows, sparsity);
+    size_t n_blocks = num_rows / block_size + (num_rows % block_size > 0);
+
+    for (size_t block = 0; block < n_blocks; ++block) {
+      *event = qu_->submit([&](::sycl::handler& cgh) {
+        cgh.depends_on(*event);
+        cgh.parallel_for<>(::sycl::range<1>(block_size), [=](::sycl::id<1> pid) {
+          int row_idx = block * block_size + pid[0];
+          if (row_idx < num_rows) {
+            const Entry* first_entry = data + row_ptr[row_idx];
+            const Entry* last_entry = data + row_ptr[row_idx + 1];
+
+            if (num_group == 1) {
+              float& sum = out_predictions[row_idx];
+              for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
+                const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];
+                sum += GetLeafWeight(first_node, first_entry, last_entry, num_features);
+              }
             } else {
-              out_predictions[out_prediction_idx] +=
-                GetLeafWeight(first_node, fval_buff_row_ptr);
+              for (int tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
+                const Node* first_node = nodes + first_node_position[tree_idx - tree_begin];
+                int out_prediction_idx = row_idx * num_group + tree_group[tree_idx];
+                out_predictions[out_prediction_idx] +=
+                    GetLeafWeight(first_node, first_entry, last_entry, num_features);
+              }
             }
           }
-        }
+        });
       });
-    });
+    }
+  }
+
+  template <bool any_missing>
+  bool UseFvalueBuffer(size_t tree_begin,
+                       size_t tree_end,
+                       int num_features) const {
+    size_t n_nodes = device_model.nodes.Size();
+    size_t n_trees = tree_end - tree_begin;
+    float av_depth = std::log2(static_cast<float>(n_nodes) / n_trees);
+    // the last one is leaf
+    float av_nodes_per_traversal = av_depth - 1;
+    // number of reads in case of no-bufer
+    float n_reads = av_nodes_per_traversal * n_trees;
+    if (any_missing) {
+      // we use binary search for sparse
+      n_reads *= std::log2(static_cast<float>(num_features));
+    }
+
+    float cost_callibration = device_prop_.usm_host_allocations
+                            ? kCostCalibrationIntegrated
+                            : kCostCalibrationDescrete;
+
+    // number of writes in local memory.
+    float n_writes = num_features;
+    bool use_fvalue_buffer = (num_features <= kMaxFeatureBufferSize) &&
+                             (n_reads > cost_callibration * n_writes);
+    return use_fvalue_buffer;
   }
 
   template <bool any_missing>
@@ -327,7 +430,7 @@ class Predictor : public xgboost::Predictor {
     if (tree_end - tree_begin == 0) return;
     if (out_preds->Size() == 0) return;
 
-    device_model.Init(qu_, model, tree_begin, tree_end);
+    device_model.Init(model, tree_begin, tree_end);
 
     int num_group = model.learner_model_param->num_output_group;
     int num_features = dmat->Info().num_col_;
@@ -343,30 +446,30 @@ class Predictor : public xgboost::Predictor {
       if (batch_size > 0) {
         const auto base_rowid = batch.base_rowid;
 
-        if (needs_buffer_update) {
-          fval_buff.ResizeNoCopy(qu_, num_features * batch_size);
-          if constexpr (any_missing) {
-            miss_buff.ResizeAndFill(qu_, num_features * batch_size, 1, &event);
-          }
-        }
+        float sparsity = static_cast<float>(batch.data.Size()) / (batch_size * num_features);
 
-        PredictKernel<any_missing>(&event, data, out_predictions + base_rowid,
-                                   row_ptr, batch_size, num_features,
-                                   num_group, tree_begin, tree_end);
-        needs_buffer_update = (batch_size != out_preds->Size());
+        if (UseFvalueBuffer<any_missing>(tree_begin, tree_end, num_features)) {
+          PredictKernelBufferDispatch<any_missing>(&event, data,
+                                                   out_predictions + base_rowid * num_group,
+                                                   row_ptr, batch_size, num_features,
+                                                   num_group, tree_begin, tree_end, sparsity);
+        } else {
+          PredictKernel(&event, data,
+                        out_predictions + base_rowid * num_group,
+                        row_ptr, batch_size, num_features,
+                        num_group, tree_begin, tree_end, sparsity);
+        }
       }
     }
     qu_->wait();
   }
 
-  mutable USMVector<float,   MemoryType::on_device> fval_buff;
-  mutable USMVector<uint8_t, MemoryType::on_device> miss_buff;
+  mutable xgboost::DeviceOrd device_;
   mutable DeviceModel device_model;
-  mutable bool needs_buffer_update = true;
+  DeviceManager device_manager;
 
   mutable ::sycl::queue* qu_ = nullptr;
-
-  DeviceManager device_manager;
+  mutable DeviceProperties device_prop_;
 
   std::unique_ptr<xgboost::Predictor> cpu_predictor;
 };
diff --git a/plugin/sycl/tree/expand_entry.h b/plugin/sycl/tree/expand_entry.h
index 2520ff95db5a..807b27d44275 100644
--- a/plugin/sycl/tree/expand_entry.h
+++ b/plugin/sycl/tree/expand_entry.h
@@ -1,6 +1,5 @@
-/*!
- * Copyright 2017-2024 by Contributors
- * \file expand_entry.h
+/**
+ * Copyright 2017-2025, XGBoost Contributors
  */
 #ifndef PLUGIN_SYCL_TREE_EXPAND_ENTRY_H_
 #define PLUGIN_SYCL_TREE_EXPAND_ENTRY_H_
@@ -10,6 +9,7 @@
 #include "../../src/tree/constraints.h"
 #pragma GCC diagnostic pop
 #include "../../src/tree/hist/expand_entry.h"
+#include "../../src/tree/tree_view.h"
 
 namespace xgboost {
 namespace sycl {
@@ -22,15 +22,14 @@ struct ExpandEntry : public xgboost::tree::ExpandEntryImpl<ExpandEntry> {
 
   ExpandEntry(int nid, int depth) : ExpandEntryImpl{nid, depth} {}
 
-  inline bst_node_t GetSiblingId(const xgboost::RegTree* p_tree) const {
-    CHECK_EQ((*p_tree)[nid].IsRoot(), false);
-    const size_t parent_id = (*p_tree)[nid].Parent();
-    return GetSiblingId(p_tree, parent_id);
+  bst_node_t GetSiblingId(::xgboost::tree::ScalarTreeView const& tree) const {
+    CHECK_EQ(tree.IsRoot(nid), false);
+    const size_t parent_id = tree.Parent(nid);
+    return GetSiblingId(tree, parent_id);
   }
 
-  inline bst_node_t GetSiblingId(const xgboost::RegTree* p_tree, size_t parent_id) const {
-    return p_tree->IsLeftChild(nid) ? p_tree->RightChild(parent_id)
-                                    : p_tree->LeftChild(parent_id);
+  bst_node_t GetSiblingId(::xgboost::tree::ScalarTreeView const& tree, size_t parent_id) const {
+    return tree.IsLeftChild(nid) ? tree.RightChild(parent_id) : tree.LeftChild(parent_id);
   }
 
   bool IsValidImpl(xgboost::tree::TrainParam const &param, int32_t num_leaves) const {
diff --git a/plugin/sycl/tree/hist_dispatcher.h b/plugin/sycl/tree/hist_dispatcher.h
new file mode 100644
index 000000000000..fe3874a90656
--- /dev/null
+++ b/plugin/sycl/tree/hist_dispatcher.h
@@ -0,0 +1,170 @@
+/*!
+ * Copyright 2017-2025 by Contributors
+ * \file hist_dispatcher.h
+ */
+#ifndef PLUGIN_SYCL_TREE_HIST_DISPATCHER_H_
+#define PLUGIN_SYCL_TREE_HIST_DISPATCHER_H_
+
+#include <algorithm>
+#include <sycl/sycl.hpp>
+
+#include "../device_properties.h"
+
+namespace xgboost {
+namespace sycl {
+namespace tree {
+
+struct BlockParams { size_t size, nblocks; };
+
+template <typename FPType>
+class HistDispatcher {
+ public:
+  // Max n_blocks/max_compute_units ration.
+  // Higher -> better GPU utilisation with higer memory overhead.
+  constexpr static int kMaxGPUUtilisation = 4;
+  // Minimal value of block size for buffer-based hist building
+  constexpr static size_t KMinBlockSize = 32;
+  // Maximal value of block size, when increasing can affect performance
+  constexpr static size_t KMaxEffectiveBlockSize = 1u << 11;
+  // Maximal number of bins acceptable for local histograms
+  constexpr static size_t KMaxNumBins = 256;
+  // Amount of sram for local-histogram kernel launch
+  constexpr static float KLocalHistSRAM = 32. * 1024;
+  // Max workgroups size, used by atomic-based hist-building
+  constexpr static size_t kMaxWorkGroupSizeAtomic = 32;
+  // Max workgroups size, used for local histograms
+  constexpr static size_t kMaxWorkGroupSizeLocal = 256;
+  // Atomic efficency normalization
+  constexpr static float kAtomicEfficiencyNormalization = 16 * 1024;
+  // Block kernel launch penalty normalization
+  constexpr static float kBlockPenaltyNormalization = 32 * 1024;
+  // Relative weight of quadratic term in atomic penalty model
+  constexpr static float kAtomicQuadraticWeight = 1.0 / 8.0;
+  // Minimal value of threshold GPU load
+  constexpr static float kMinTh = 1.0 / 16.0;
+
+  bool use_local_hist = false;
+  bool use_atomics = false;
+  size_t work_group_size;
+  BlockParams block;
+
+  inline BlockParams GetBlocksParameters(size_t size, size_t max_nblocks,
+                                         size_t max_compute_units) const {
+    if (max_nblocks == 0) return {0, 0};
+    size_t nblocks = max_compute_units;
+
+    size_t block_size = size / nblocks + !!(size % nblocks);
+    while (block_size > (1u << 11)) {
+      nblocks *= 2;
+      if (nblocks >= max_nblocks) {
+        nblocks = max_nblocks;
+        block_size = size / nblocks + !!(size % nblocks);
+        break;
+      }
+      block_size = size / nblocks + !!(size % nblocks);
+    }
+
+    if (block_size < KMinBlockSize) {
+      block_size = KMinBlockSize;
+      nblocks = size / block_size + !!(size % block_size);
+    }
+
+    return {block_size, nblocks};
+  }
+
+  HistDispatcher(const DeviceProperties& device_prop, bool isDense, size_t size,
+                 size_t max_nblocks, size_t nbins, size_t ncolumns,
+                 size_t max_num_bins, size_t min_num_bins) {
+    block = GetBlocksParameters(size, max_nblocks, device_prop.max_compute_units);
+    work_group_size = std::min(ncolumns, device_prop.max_work_group_size);
+    if (!device_prop.is_gpu) return;
+
+    using GradientPairT = xgboost::detail::GradientPairInternal<FPType>;
+    /* If local histogram is possible and beneficial */
+    const int buff_size = nbins * sizeof(GradientPairT);
+    /* block_size writes into array of size max_num_bins are made,
+    * if (block_size < max_num_bins)
+    * most part of buffer isn't used and perf suffers.
+    */
+    const size_t th_block_size = max_num_bins;
+    use_local_hist = (buff_size < device_prop.sram_size_per_eu - KLocalHistSRAM)
+                      && isDense
+                      && (max_num_bins <= KMaxNumBins)
+                      && (block.size >= th_block_size);
+
+    /* Predict penalty from atomic usage and compare with one from block-based build with buffer */
+    // EUs processing different columns do not trigger conflicts.
+    float wg_per_columns = std::max(1.0f, static_cast<float>(ncolumns) / kMaxWorkGroupSizeAtomic);
+    /* Rows are processed per execution unit.
+    * Some EUs process different columns, and don't triiger conflicts.
+    * We use a worse case scenario, i.e. use the minimal number of bins per feature
+    */
+    float conflicts_per_bin = (device_prop.max_compute_units / wg_per_columns) / min_num_bins;
+
+    // Atomics resolve conflicts between EUs, so L2 size can be a proxy for atomic efficiency.
+    float atomic_efficency = device_prop.l2_size_per_eu / kAtomicEfficiencyNormalization;
+    // We use simple quadratic model to predict atomic penalty
+    float atomic_penalty = conflicts_per_bin
+                        + kAtomicQuadraticWeight * (conflicts_per_bin * conflicts_per_bin);
+
+    // Block-based builder operates with buffer of type FPType, placed in L2.
+    float base_block_penalty = kBlockPenaltyNormalization /
+                                device_prop.l2_size_per_eu * (sizeof(FPType) / 4);
+
+    if (block.nblocks >= device_prop.max_compute_units) {
+      // if GPU is fully loaded, we can simply compare penaltys.
+      use_atomics = base_block_penalty > atomic_penalty / atomic_efficency;
+    } else {
+      float blocks_per_eu = static_cast<float>(block.nblocks) / device_prop.max_compute_units;
+      /* The GPU is not 100% loaded. We need to take this into account in our model:
+      * block_penalty = base_block_penalty + base_time * (1 - blocks_per_eu);
+      *
+      * atomics should be used, if:
+      * block_penalty > atomic_penalty
+      *
+      * The normalization is chosen so that: base_time = 1
+      * base_block_penalty + 1 - blocks_per_eu > atomic_penalty / atomic_efficency
+      *
+      * blocks_per_eu < 1 + base_block_penalty - atomic_penalty / atomic_efficency
+      */
+      float th_block_per_eu = 1 + base_block_penalty - atomic_penalty / atomic_efficency;
+
+      /* We can't trust the decision of the approximate performance model
+      * if penalties are close to each other
+      * i.e. (1 + base_block_penalty) ~ (atomic_penalty / atomic_efficency)
+      * We manually limit the minimal value of th_block_per_eu,
+      * to determine the behaviour in this region.
+      */
+      th_block_per_eu = std::max<float>(kMinTh, th_block_per_eu);
+
+      use_atomics = (blocks_per_eu < th_block_per_eu);
+    }
+
+    if (use_atomics) {
+      work_group_size = std::min(kMaxWorkGroupSizeAtomic,
+                                 work_group_size);
+    } else if (use_local_hist) {
+      work_group_size = std::min(kMaxWorkGroupSizeLocal,
+                                 work_group_size);
+    }
+  }
+};
+
+// For some datasets buffer is not used, we estimate if it is the case.
+template<typename FPType>
+size_t GetRequiredBufferSize(const DeviceProperties& device_prop, size_t max_n_rows, size_t nbins,
+                             size_t ncolumns, size_t max_num_bins, size_t min_num_bins) {
+  size_t max_nblocks = HistDispatcher<FPType>::kMaxGPUUtilisation * device_prop.max_compute_units;
+  // Buffer size doesn't depend on isDense flag.
+  auto build_params = HistDispatcher<FPType>
+                      (device_prop, true, max_n_rows, max_nblocks, nbins,
+                       ncolumns, max_num_bins, min_num_bins);
+
+  return build_params.use_atomics ? 0 : build_params.block.nblocks;
+}
+
+}  // namespace tree
+}  // namespace sycl
+}  // namespace xgboost
+
+#endif  // PLUGIN_SYCL_TREE_HIST_DISPATCHER_H_
diff --git a/plugin/sycl/tree/hist_synchronizer.h b/plugin/sycl/tree/hist_synchronizer.h
index a6c9a6a83aeb..31d84413c41e 100644
--- a/plugin/sycl/tree/hist_synchronizer.h
+++ b/plugin/sycl/tree/hist_synchronizer.h
@@ -1,12 +1,12 @@
-/*!
- * Copyright 2017-2024 by Contributors
- * \file hist_synchronizer.h
+/**
+ * Copyright 2017-2025, XGBoost Contributors
  */
 #ifndef PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
 #define PLUGIN_SYCL_TREE_HIST_SYNCHRONIZER_H_
 
 #include <vector>
 
+#include "../../src/tree/tree_view.h"
 #include "../common/hist_util.h"
 #include "expand_entry.h"
 
@@ -20,18 +20,17 @@ class HistUpdater;
 template <typename GradientSumT>
 class HistSynchronizer {
  public:
-  virtual void SyncHistograms(HistUpdater<GradientSumT>* builder,
-                              const std::vector<int>& sync_ids,
-                              RegTree *p_tree) = 0;
+  virtual void SyncHistograms(HistUpdater<GradientSumT>* builder, const std::vector<int>& sync_ids,
+                              RegTree const* p_tree) = 0;
   virtual ~HistSynchronizer() = default;
 };
 
 template <typename GradientSumT>
 class BatchHistSynchronizer: public HistSynchronizer<GradientSumT> {
  public:
-  void SyncHistograms(HistUpdater<GradientSumT>* builder,
-                      const std::vector<int>& sync_ids,
-                      RegTree *p_tree) override {
+  void SyncHistograms(HistUpdater<GradientSumT>* builder, const std::vector<int>& sync_ids,
+                      RegTree const* p_tree) override {
+    auto tree = p_tree->HostScView();
     builder->builder_monitor_.Start("SyncHistograms");
     const size_t nbins = builder->hist_builder_.GetNumBins();
 
@@ -40,10 +39,10 @@ class BatchHistSynchronizer: public HistSynchronizer<GradientSumT> {
       const auto entry = builder->nodes_for_explicit_hist_build_[i];
       auto& this_hist = builder->hist_[entry.nid];
 
-      if (!(*p_tree)[entry.nid].IsRoot()) {
-        const size_t parent_id = (*p_tree)[entry.nid].Parent();
+      if (!(tree).IsRoot(entry.nid)) {
+        const size_t parent_id = tree.Parent(entry.nid);
         auto& parent_hist = builder->hist_[parent_id];
-        auto& sibling_hist = builder->hist_[entry.GetSiblingId(p_tree, parent_id)];
+        auto& sibling_hist = builder->hist_[entry.GetSiblingId(tree, parent_id)];
         hist_sync_events_[i] = common::SubtractionHist(builder->qu_, &sibling_hist, parent_hist,
                                                        this_hist, nbins, ::sycl::event());
       }
@@ -64,9 +63,9 @@ class BatchHistSynchronizer: public HistSynchronizer<GradientSumT> {
 template <typename GradientSumT>
 class DistributedHistSynchronizer: public HistSynchronizer<GradientSumT> {
  public:
-  void SyncHistograms(HistUpdater<GradientSumT>* builder,
-                      const std::vector<int>& sync_ids,
-                      RegTree *p_tree) override {
+  void SyncHistograms(HistUpdater<GradientSumT>* builder, const std::vector<int>& sync_ids,
+                      RegTree const* p_tree) override {
+    auto tree = p_tree->HostScView();
     builder->builder_monitor_.Start("SyncHistograms");
     const size_t nbins = builder->hist_builder_.GetNumBins();
     for (int node = 0; node < builder->nodes_for_explicit_hist_build_.size(); node++) {
@@ -76,9 +75,9 @@ class DistributedHistSynchronizer: public HistSynchronizer<GradientSumT> {
       auto& this_local = builder->hist_local_worker_[entry.nid];
       common::CopyHist(builder->qu_, &this_local, this_hist, nbins);
 
-      if (!(*p_tree)[entry.nid].IsRoot()) {
-        const size_t parent_id = (*p_tree)[entry.nid].Parent();
-        auto sibling_nid = entry.GetSiblingId(p_tree, parent_id);
+      if (!tree.IsRoot(entry.nid)) {
+        const size_t parent_id = tree.Parent(entry.nid);
+        auto sibling_nid = entry.GetSiblingId(tree, parent_id);
         auto& parent_hist = builder->hist_local_worker_[parent_id];
 
         auto& sibling_hist = builder->hist_[sibling_nid];
@@ -102,15 +101,16 @@ class DistributedHistSynchronizer: public HistSynchronizer<GradientSumT> {
                                const std::vector<ExpandEntry>& nodes,
                                const RegTree * p_tree) {
     const size_t nbins = builder->hist_builder_.GetNumBins();
+    auto tree = p_tree->HostScView();
     for (int node = 0; node < nodes.size(); node++) {
       const auto entry = nodes[node];
-      if (!((*p_tree)[entry.nid].IsLeftChild())) {
+      if (!(tree.IsLeftChild(entry.nid))) {
         auto& this_hist = builder->hist_[entry.nid];
 
-        if (!(*p_tree)[entry.nid].IsRoot()) {
-          const size_t parent_id = (*p_tree)[entry.nid].Parent();
+        if (!tree.IsRoot(entry.nid)) {
+          const size_t parent_id = tree.Parent(entry.nid);
           auto& parent_hist = builder->hist_[parent_id];
-          auto& sibling_hist = builder->hist_[entry.GetSiblingId(p_tree, parent_id)];
+          auto& sibling_hist = builder->hist_[entry.GetSiblingId(tree, parent_id)];
           common::SubtractionHist(builder->qu_, &this_hist, parent_hist,
                                   sibling_hist, nbins, ::sycl::event());
           builder->qu_->wait_and_throw();
diff --git a/plugin/sycl/tree/hist_updater.cc b/plugin/sycl/tree/hist_updater.cc
index 7009543dabb6..498f86590338 100644
--- a/plugin/sycl/tree/hist_updater.cc
+++ b/plugin/sycl/tree/hist_updater.cc
@@ -12,6 +12,7 @@
 #include "../../src/tree/common_row_partitioner.h"
 
 #include "../common/hist_util.h"
+#include "xgboost/linalg.h"
 #include "../../src/collective/allreduce.h"
 
 namespace xgboost {
@@ -34,8 +35,8 @@ void HistUpdater<GradientSumT>::ReduceHists(const std::vector<int>& sync_ids,
     qu_->memcpy(reduce_buffer_.data() + i * nbins, psrc, nbins*sizeof(GradientPairT)).wait();
   }
 
-  auto buffer_vec = linalg::MakeVec(reinterpret_cast<GradientSumT*>(reduce_buffer_.data()),
-                                    2 * nbins * sync_ids.size());
+  auto buffer_vec = ::xgboost::linalg::MakeVec(
+      reinterpret_cast<GradientSumT*>(reduce_buffer_.data()), 2 * nbins * sync_ids.size());
   auto rc = collective::Allreduce(ctx_, buffer_vec, collective::Op::kSum);
   SafeColl(rc);
 
@@ -67,9 +68,10 @@ void HistUpdater<GradientSumT>::BuildHistogramsLossGuide(
   nodes_for_explicit_hist_build_.clear();
   nodes_for_subtraction_trick_.clear();
   nodes_for_explicit_hist_build_.push_back(entry);
+  auto tree = p_tree->HostScView();
 
-  if (!(*p_tree)[entry.nid].IsRoot()) {
-    auto sibling_id = entry.GetSiblingId(p_tree);
+  if (!tree.IsRoot(entry.nid)) {
+    auto sibling_id = entry.GetSiblingId(tree);
     nodes_for_subtraction_trick_.emplace_back(sibling_id, p_tree->GetDepth(sibling_id));
   }
 
@@ -360,10 +362,9 @@ void HistUpdater<GradientSumT>::Update(
   builder_monitor_.Stop("Update");
 }
 
-template<typename GradientSumT>
+template <typename GradientSumT>
 bool HistUpdater<GradientSumT>::UpdatePredictionCache(
-    const DMatrix* data,
-    linalg::MatrixView<float> out_preds) {
+    const DMatrix* data, ::xgboost::linalg::MatrixView<float> out_preds) {
   CHECK(out_preds.Device().IsSycl());
   // p_last_fmat_ is a valid pointer as long as UpdatePredictionCache() is called in
   // conjunction with Update().
@@ -375,19 +376,20 @@ bool HistUpdater<GradientSumT>::UpdatePredictionCache(
 
   size_t n_nodes = row_set_collection_.Size();
   std::vector<::sycl::event> events(n_nodes);
+  auto tree = p_last_tree_->HostScView();
   for (size_t node = 0; node < n_nodes; node++) {
     const common::RowSetCollection::Elem& rowset = row_set_collection_[node];
     if (rowset.begin != nullptr && rowset.end != nullptr && rowset.Size() != 0) {
       int nid = rowset.node_id;
       // if a node is marked as deleted by the pruner, traverse upward to locate
       // a non-deleted leaf.
-      if ((*p_last_tree_)[nid].IsDeleted()) {
-        while ((*p_last_tree_)[nid].IsDeleted()) {
-          nid = (*p_last_tree_)[nid].Parent();
+      if (tree.IsDeleted(nid)) {
+        while (tree.IsDeleted(nid)) {
+          nid = tree.Parent(nid);
         }
-        CHECK((*p_last_tree_)[nid].IsLeaf());
+        CHECK(tree.IsLeaf(nid));
       }
-      bst_float leaf_value = (*p_last_tree_)[nid].LeafValue();
+      bst_float leaf_value = tree.LeafValue(nid);
       const size_t* rid = rowset.begin;
       const size_t num_rows = rowset.Size();
 
@@ -500,10 +502,6 @@ void HistUpdater<GradientSumT>::InitData(
     hist_.Init(qu_, nbins);
     hist_local_worker_.Init(qu_, nbins);
 
-    hist_buffer_.Init(qu_, nbins);
-    size_t buffer_size = kBufferSize;
-    hist_buffer_.Reset(kBufferSize);
-
     // initialize histogram builder
     hist_builder_ = common::GHistBuilder<GradientSumT>(qu_, nbins);
 
@@ -613,6 +611,18 @@ void HistUpdater<GradientSumT>::InitData(
       qexpand_depth_wise_.clear();
     }
   }
+
+  {
+    uint32_t nbins = gmat.cut.Ptrs().back();
+    hist_buffer_.Init(qu_, nbins);
+    bool isDense = data_layout_ != kSparseData;
+    const size_t ncolumns = isDense ? gmat.nfeatures : gmat.row_stride;
+    size_t buffer_size = GetRequiredBufferSize<GradientSumT>
+                         (device_properties_, info.num_row_, nbins, ncolumns,
+                          gmat.max_num_bins, gmat.min_num_bins);
+    hist_buffer_.Reset(buffer_size);
+  }
+
   builder_monitor_.Stop("InitData");
 }
 
@@ -641,7 +651,8 @@ void HistUpdater<GradientSumT>::ApplySplit(
 
   const size_t n_nodes = nodes.size();
   std::vector<int32_t> split_conditions(n_nodes);
-  CommonRowPartitioner::FindSplitConditions(nodes, *p_tree, gmat, &split_conditions);
+  auto tree = p_tree->HostScView();
+  CommonRowPartitioner::FindSplitConditions(nodes, tree, gmat, &split_conditions);
 
   partition_builder_.Init(qu_, n_nodes, [&](size_t node_in_set) {
     const int32_t nid = nodes[node_in_set].nid;
@@ -673,8 +684,9 @@ void HistUpdater<GradientSumT>::InitNewNode(int nid,
   builder_monitor_.Start("InitNewNode");
 
   snode_host_.resize(tree.NumNodes(), NodeEntry<GradientSumT>(param_));
+  auto sc_tree = tree.HostScView();
   {
-    if (tree[nid].IsRoot()) {
+    if (sc_tree.IsRoot(nid)) {
       GradStats<GradientSumT> grad_stat;
       if (data_layout_ == kDenseDataZeroBased || data_layout_ == kDenseDataOneBased) {
         const std::vector<uint32_t>& row_ptr = gmat.cut.Ptrs();
@@ -711,13 +723,13 @@ void HistUpdater<GradientSumT>::InitNewNode(int nid,
         }).wait_and_throw();
       }
       auto rc = collective::Allreduce(
-                      ctx_, linalg::MakeVec(reinterpret_cast<GradientSumT*>(&grad_stat), 2),
-                      collective::Op::kSum);
+          ctx_, ::xgboost::linalg::MakeVec(reinterpret_cast<GradientSumT*>(&grad_stat), 2),
+          collective::Op::kSum);
       SafeColl(rc);
       snode_host_[nid].stats = grad_stat;
     } else {
-      int parent_id = tree[nid].Parent();
-      if (tree[nid].IsLeftChild()) {
+      int parent_id = sc_tree.Parent(nid);
+      if (sc_tree.IsLeftChild(nid)) {
         snode_host_[nid].stats = snode_host_[parent_id].best.left_sum;
       } else {
         snode_host_[nid].stats = snode_host_[parent_id].best.right_sum;
@@ -728,7 +740,7 @@ void HistUpdater<GradientSumT>::InitNewNode(int nid,
   // calculating the weights
   {
     auto evaluator = tree_evaluator_.GetEvaluator();
-    bst_uint parentid = tree[nid].Parent();
+    bst_uint parentid = sc_tree.Parent(nid);
     snode_host_[nid].weight = evaluator.CalcWeight(parentid, snode_host_[nid].stats);
     snode_host_[nid].root_gain = evaluator.CalcGain(parentid, snode_host_[nid].stats);
   }
diff --git a/plugin/sycl/tree/hist_updater.h b/plugin/sycl/tree/hist_updater.h
index 6828c27a60d6..37ff6a8b3e9b 100644
--- a/plugin/sycl/tree/hist_updater.h
+++ b/plugin/sycl/tree/hist_updater.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017-2024 by Contributors
+ * Copyright 2017-2025, XGBoost Contributors
  * \file hist_updater.h
  */
 #ifndef PLUGIN_SYCL_TREE_HIST_UPDATER_H_
@@ -8,6 +8,7 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wtautological-constant-compare"
 #pragma GCC diagnostic ignored "-W#pragma-messages"
+#include <xgboost/linalg.h>  // for MatrixView
 #include <xgboost/tree_updater.h>
 #pragma GCC diagnostic pop
 
@@ -20,6 +21,7 @@
 #include "split_evaluator.h"
 #include "hist_synchronizer.h"
 #include "hist_row_adder.h"
+#include "hist_dispatcher.h"
 
 #include "../../src/common/random.h"
 #include "../data.h"
@@ -56,7 +58,7 @@ class HistUpdater {
                        const xgboost::tree::TrainParam& param,
                        FeatureInteractionConstraintHost int_constraints_,
                        DMatrix const* fmat)
-    : ctx_(ctx), qu_(qu), param_(param),
+    : ctx_(ctx), qu_(qu), device_properties_(qu->get_device()), param_(param),
       tree_evaluator_(qu, param, fmat->Info().num_col_),
       interaction_constraints_{std::move(int_constraints_)},
       p_last_tree_(nullptr), p_last_fmat_(fmat) {
@@ -79,8 +81,7 @@ class HistUpdater {
               xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,
               RegTree *p_tree);
 
-  bool UpdatePredictionCache(const DMatrix* data,
-                             linalg::MatrixView<float> p_out_preds);
+  bool UpdatePredictionCache(const DMatrix* data, ::xgboost::linalg::MatrixView<float> p_out_preds);
 
   void SetHistSynchronizer(HistSynchronizer<GradientSumT>* sync);
   void SetHistRowsAdder(HistRowsAdder<GradientSumT>* adder);
@@ -134,7 +135,8 @@ class HistUpdater {
                         GHistRowT<MemoryType::on_device>* hist_buffer,
                         ::sycl::event event_priv) {
     return hist_builder_.BuildHist(gpair, row_indices, gmat, hist,
-                                   data_layout_ != kSparseData, hist_buffer, event_priv);
+                                   data_layout_ != kSparseData, hist_buffer,
+                                   device_properties_, event_priv);
   }
 
   void InitNewNode(int nid,
@@ -198,9 +200,12 @@ class HistUpdater {
 
   //  --data fields--
   const Context* ctx_;
+  ::sycl::queue* qu_;
   bool has_fp64_support_;
   size_t sub_group_size_;
 
+  DeviceProperties device_properties_;
+
   // the internal row sets
   common::RowSetCollection row_set_collection_;
 
@@ -230,7 +235,6 @@ class HistUpdater {
   enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData };
   DataLayout data_layout_;
 
-  constexpr static size_t kBufferSize = 2048;
   common::GHistBuilder<GradientSumT> hist_builder_;
   common::ParallelGHistBuilder<GradientSumT> hist_buffer_;
   /*! \brief culmulative histogram of gradients. */
@@ -263,7 +267,6 @@ class HistUpdater {
   std::unique_ptr<HistRowsAdder<GradientSumT>> hist_rows_adder_;
 
   std::vector<GradientPairT> reduce_buffer_;
-  ::sycl::queue* qu_;
 };
 
 }  // namespace tree
diff --git a/plugin/sycl/tree/updater_quantile_hist.cc b/plugin/sycl/tree/updater_quantile_hist.cc
index a8fe602e6399..b8207bbaa676 100644
--- a/plugin/sycl/tree/updater_quantile_hist.cc
+++ b/plugin/sycl/tree/updater_quantile_hist.cc
@@ -8,6 +8,7 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wtautological-constant-compare"
 #pragma GCC diagnostic ignored "-W#pragma-messages"
+#include "xgboost/gradient.h"  // for GradientContainer
 #include "xgboost/tree_updater.h"
 #pragma GCC diagnostic pop
 
@@ -59,24 +60,22 @@ void QuantileHistMaker::SetPimpl(std::unique_ptr<HistUpdater<GradientSumT>>* pim
   }
 }
 
-template<typename GradientSumT>
-void QuantileHistMaker::CallUpdate(
-        const std::unique_ptr<HistUpdater<GradientSumT>>& pimpl,
-        xgboost::tree::TrainParam const *param,
-        linalg::Matrix<GradientPair> *gpair,
-        DMatrix *dmat,
-        xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,
-        const std::vector<RegTree *> &trees) {
+template <typename GradientSumT>
+void QuantileHistMaker::CallUpdate(const std::unique_ptr<HistUpdater<GradientSumT>> &pimpl,
+                                   xgboost::tree::TrainParam const *param,
+                                   ::xgboost::linalg::Matrix<GradientPair> *gpair, DMatrix *dmat,
+                                   xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,
+                                   const std::vector<RegTree *> &trees) {
   for (auto tree : trees) {
     pimpl->Update(param, gmat_, *(gpair->Data()), dmat, out_position, tree);
   }
 }
 
-void QuantileHistMaker::Update(xgboost::tree::TrainParam const *param,
-                               linalg::Matrix<GradientPair>* gpair,
+void QuantileHistMaker::Update(xgboost::tree::TrainParam const *param, GradientContainer *in_gpair,
                                DMatrix *dmat,
                                xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,
                                const std::vector<RegTree *> &trees) {
+  auto gpair = in_gpair->FullGradOnly();
   gpair->Data()->SetDevice(ctx_->Device());
   if (dmat != p_last_dmat_ || is_gmat_initialized_ == false) {
     updater_monitor_.Start("GmatInitialization");
@@ -106,8 +105,8 @@ void QuantileHistMaker::Update(xgboost::tree::TrainParam const *param,
   p_last_dmat_ = dmat;
 }
 
-bool QuantileHistMaker::UpdatePredictionCache(const DMatrix* data,
-                                              linalg::MatrixView<float> out_preds) {
+bool QuantileHistMaker::UpdatePredictionCache(const DMatrix *data,
+                                              ::xgboost::linalg::MatrixView<float> out_preds) {
   if (param_.subsample < 1.0f) return false;
 
   if (hist_precision_ == HistPrecision::fp32) {
diff --git a/plugin/sycl/tree/updater_quantile_hist.h b/plugin/sycl/tree/updater_quantile_hist.h
index e60153fa7d32..b6b2105ff1f8 100644
--- a/plugin/sycl/tree/updater_quantile_hist.h
+++ b/plugin/sycl/tree/updater_quantile_hist.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2017-2024 by Contributors
+/**
+ * Copyright 2017-2025, XGBoost Contributors
  * \file updater_quantile_hist.h
  */
 #ifndef PLUGIN_SYCL_TREE_UPDATER_QUANTILE_HIST_H_
@@ -8,21 +8,21 @@
 #include <dmlc/timer.h>
 #include <xgboost/tree_updater.h>
 
-#include <vector>
 #include <memory>
+#include <vector>
 
-#include "../data/gradient_index.h"
+#include "../../src/common/random.h"
+#include "../../src/tree/constraints.h"
 #include "../common/hist_util.h"
-#include "../common/row_set.h"
 #include "../common/partition_builder.h"
-#include "split_evaluator.h"
+#include "../common/row_set.h"
+#include "../data/gradient_index.h"
 #include "../device_manager.h"
 #include "hist_updater.h"
+#include "split_evaluator.h"
 #include "xgboost/data.h"
-
+#include "xgboost/gradient.h"  // for GradientContainer
 #include "xgboost/json.h"
-#include "../../src/tree/constraints.h"
-#include "../../src/common/random.h"
 
 namespace xgboost {
 namespace sycl {
@@ -48,14 +48,12 @@ class QuantileHistMaker: public TreeUpdater {
   }
   void Configure(const Args& args) override;
 
-  void Update(xgboost::tree::TrainParam const *param,
-              linalg::Matrix<GradientPair>* gpair,
-              DMatrix* dmat,
+  void Update(xgboost::tree::TrainParam const* param, GradientContainer* in_gpair, DMatrix* dmat,
               xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,
               const std::vector<RegTree*>& trees) override;
 
   bool UpdatePredictionCache(const DMatrix* data,
-                             linalg::MatrixView<float> out_preds) override;
+                             ::xgboost::linalg::MatrixView<float> out_preds) override;
 
   void LoadConfig(Json const& in) override {
     auto const& config = get<Object const>(in);
@@ -92,7 +90,7 @@ class QuantileHistMaker: public TreeUpdater {
   template<typename GradientSumT>
   void CallUpdate(const std::unique_ptr<HistUpdater<GradientSumT>>& builder,
                   xgboost::tree::TrainParam const *param,
-                  linalg::Matrix<GradientPair> *gpair,
+                  ::xgboost::linalg::Matrix<GradientPair> *gpair,
                   DMatrix *dmat,
                   xgboost::common::Span<HostDeviceVector<bst_node_t>> out_position,
                   const std::vector<RegTree *> &trees);
diff --git a/python-package/README.stub.rst b/python-package/README.stub.rst
new file mode 100644
index 000000000000..8ffbbce80dd8
--- /dev/null
+++ b/python-package/README.stub.rst
@@ -0,0 +1,5 @@
+======================================
+Placeholder for XGBoost Python Package
+======================================
+
+This package is a placeholder for the `xgboost` package.
diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml
index d188319dd4a0..46e95235d76f 100644
--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -1,4 +1,4 @@
-# Generated by `pypi_variants.py`, don't edit.
+# Generated by `pypi_variants.py`, don't edit: '--use-suffix=na --require-nccl-dep=cu12 --create-stub=False'
 [build-system]
 requires = [
     "hatchling>=1.12.1",
@@ -15,7 +15,7 @@ authors = [
     { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
     { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
 ]
-version = "3.1.0-dev"
+version = "3.2.0-dev"
 requires-python = ">=3.10"
 license = { text = "Apache-2.0" }
 classifiers = [
@@ -29,6 +29,7 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
 ]
 dependencies = [
     "numpy",
@@ -45,7 +46,7 @@ pandas = ["pandas>=1.2"]
 scikit-learn = ["scikit-learn"]
 dask = ["dask", "pandas", "distributed"]
 plotting = ["graphviz", "matplotlib"]
-pyspark = ["pyspark", "scikit-learn", "cloudpickle"]
+pyspark = ["pyspark>=3.4", "scikit-learn", "cloudpickle"]
 
 [tool.hatch.build.targets.wheel.hooks.custom]
 
@@ -59,9 +60,10 @@ follow_imports = "silent"
 
 [tool.pylint.main]
 ignore = ["tests"]
-extension-pkg-whitelist = ["numpy"]
+extension-pkg-whitelist = ["numpy", "cuda"]
 disable = [
     "import-error",
+    "invalid-name",
     "attribute-defined-outside-init",
     "import-outside-toplevel",
     "too-few-public-methods",
@@ -95,3 +97,10 @@ inspect = true
 ignore = ["compiled-objects-have-debug-symbols"]
 max_allowed_size_compressed = '300M'
 max_allowed_size_uncompressed = '500M'
+
+[variant.default-priorities]
+namespace = ["nvidia"]
+
+[variant.providers.nvidia]
+requires = ["nvidia-variant-provider>=0.0.1,<1.0.0"]
+plugin-api = "nvidia_variant_provider.plugin:NvidiaVariantPlugin"
diff --git a/python-package/pyproject.toml.in b/python-package/pyproject.toml.in
index 035e13a68227..01c7d60e15ca 100644
--- a/python-package/pyproject.toml.in
+++ b/python-package/pyproject.toml.in
@@ -14,7 +14,7 @@ authors = [
     { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
     { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
 ]
-version = "3.1.0-dev"
+version = "3.2.0-dev"
 requires-python = ">=3.10"
 license = { text = "Apache-2.0" }
 classifiers = [
@@ -28,6 +28,7 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
 ]
 dependencies = [
     "numpy",
@@ -44,7 +45,7 @@ pandas = ["pandas>=1.2"]
 scikit-learn = ["scikit-learn"]
 dask = ["dask", "pandas", "distributed"]
 plotting = ["graphviz", "matplotlib"]
-pyspark = ["pyspark", "scikit-learn", "cloudpickle"]
+pyspark = ["pyspark>=3.4", "scikit-learn", "cloudpickle"]
 
 [tool.hatch.build.targets.wheel.hooks.custom]
 
@@ -58,9 +59,10 @@ follow_imports = "silent"
 
 [tool.pylint.main]
 ignore = ["tests"]
-extension-pkg-whitelist = ["numpy"]
+extension-pkg-whitelist = ["numpy", "cuda"]
 disable = [
     "import-error",
+    "invalid-name",
     "attribute-defined-outside-init",
     "import-outside-toplevel",
     "too-few-public-methods",
@@ -94,3 +96,10 @@ inspect = true
 ignore = ["compiled-objects-have-debug-symbols"]
 max_allowed_size_compressed = '300M'
 max_allowed_size_uncompressed = '500M'
+
+[variant.default-priorities]
+namespace = ["nvidia"]
+
+[variant.providers.nvidia]
+requires = ["nvidia-variant-provider>=0.0.1,<1.0.0"]
+plugin-api = "nvidia_variant_provider.plugin:NvidiaVariantPlugin"
diff --git a/python-package/pyproject.toml.stub.in b/python-package/pyproject.toml.stub.in
new file mode 100644
index 000000000000..2a869746cbf8
--- /dev/null
+++ b/python-package/pyproject.toml.stub.in
@@ -0,0 +1,38 @@
+[build-system]
+requires = [
+    "hatchling>=1.12.1",
+]
+build-backend = "hatchling.build"
+
+[project]
+name = "{{ name }}"
+description = "XGBoost Python Package"
+readme = { file = "README.rst", content-type = "text/x-rst" }
+authors = [
+    { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
+    { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }
+]
+version = "3.2.0-dev"
+requires-python = ">=3.10"
+license = { text = "Apache-2.0" }
+classifiers = [
+    "License :: OSI Approved :: Apache Software License",
+    "Development Status :: 5 - Production/Stable",
+    "Operating System :: OS Independent",
+    "Typing :: Typed",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+]
+dependencies = [
+    "xgboost=={{ version }}",
+]
+
+[tool.hatch.build.targets.sdist]
+only-include = ["pyproject.toml"]
+
+[tool.hatch.build.targets.wheel]
+only-include = ["pyproject.toml"]
diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION
index 0f9d6b15dc04..df4a76732016 100644
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-3.1.0-dev
+3.2.0-dev
diff --git a/python-package/xgboost/_data_utils.py b/python-package/xgboost/_data_utils.py
index c651fa03c709..4531848aa782 100644
--- a/python-package/xgboost/_data_utils.py
+++ b/python-package/xgboost/_data_utils.py
@@ -2,11 +2,13 @@
 
 import copy
 import ctypes
-import functools
 import json
+from abc import ABC, abstractmethod
+from functools import cache as fcache
 from typing import (
     TYPE_CHECKING,
     Any,
+    Callable,
     Dict,
     List,
     Literal,
@@ -14,6 +16,7 @@
     Protocol,
     Tuple,
     Type,
+    TypeAlias,
     TypedDict,
     TypeGuard,
     Union,
@@ -23,7 +26,14 @@
 
 import numpy as np
 
-from ._typing import CNumericPtr, DataType, NumpyDType, NumpyOrCupy
+from ._typing import (
+    ArrowCatList,
+    CNumericPtr,
+    DataType,
+    FeatureTypes,
+    NumpyDType,
+    NumpyOrCupy,
+)
 from .compat import import_cupy, import_pyarrow, lazy_isinstance
 
 if TYPE_CHECKING:
@@ -39,18 +49,7 @@ def __array_interface__(self) -> "ArrayInf": ...
 
 class _CudaArrayLikeArg(Protocol):
     @property
-    def __cuda_array_interface__(self) -> "ArrayInf": ...
-
-
-class TransformedDf(Protocol):
-    """Protocol class for storing transformed dataframe."""
-
-    def array_interface(self) -> bytes:
-        """Get a JSON-encoded list of array interfaces."""
-
-    @property
-    def shape(self) -> Tuple[int, int]:
-        """Return the shape of the dataframe."""
+    def __cuda_array_interface__(self) -> "CudaArrayInf": ...
 
 
 ArrayInf = TypedDict(
@@ -65,7 +64,23 @@ def shape(self) -> Tuple[int, int]:
     },
 )
 
+CudaArrayInf = TypedDict(
+    "CudaArrayInf",
+    {
+        "data": Tuple[int, bool],
+        "typestr": str,
+        "version": Literal[3],
+        "strides": Optional[Tuple[int, ...]],
+        "shape": Tuple[int, ...],
+        "mask": Union["ArrayInf", None, _ArrayLikeArg],
+        "stream": int,
+    },
+)
+
 StringArray = TypedDict("StringArray", {"offsets": ArrayInf, "values": ArrayInf})
+CudaStringArray = TypedDict(
+    "CudaStringArray", {"offsets": CudaArrayInf, "values": CudaArrayInf}
+)
 
 
 def array_hasobject(data: DataType) -> bool:
@@ -77,14 +92,14 @@ def array_hasobject(data: DataType) -> bool:
     )
 
 
-def cuda_array_interface_dict(data: _CudaArrayLikeArg) -> ArrayInf:
+def cuda_array_interface_dict(data: _CudaArrayLikeArg) -> CudaArrayInf:
     """Returns a dictionary storing the CUDA array interface."""
     if array_hasobject(data):
         raise ValueError("Input data contains `object` dtype.  Expecting numeric data.")
     ainf = data.__cuda_array_interface__
     if "mask" in ainf:
         ainf["mask"] = ainf["mask"].__cuda_array_interface__  # type: ignore
-    return cast(ArrayInf, ainf)
+    return ainf
 
 
 def cuda_array_interface(data: _CudaArrayLikeArg) -> bytes:
@@ -199,12 +214,18 @@ def codes(self) -> "pd.Series": ...  # pylint: disable=missing-function-docstrin
     @property
     def dtype(self) -> np.dtype: ...  # pylint: disable=missing-function-docstring
 
+    @property
+    def values(self) -> np.ndarray: ...  # pylint: disable=missing-function-docstring
+
     def to_arrow(  # pylint: disable=missing-function-docstring
         self,
     ) -> Union["pa.StringArray", "pa.IntegerArray"]: ...
 
     @property
-    def __cuda_array_interface__(self) -> ArrayInf: ...
+    def __cuda_array_interface__(self) -> CudaArrayInf: ...
+
+    @property
+    def _column(self) -> Any: ...
 
 
 def _is_df_cat(data: Any) -> TypeGuard[DfCatAccessor]:
@@ -212,37 +233,64 @@ def _is_df_cat(data: Any) -> TypeGuard[DfCatAccessor]:
     return hasattr(data, "categories") and hasattr(data, "codes")
 
 
-@functools.cache
-def _arrow_typestr() -> Dict["pa.DataType", str]:
+@fcache
+def _arrow_npdtype() -> Dict[Any, Type[np.number]]:
     import pyarrow as pa
 
-    mapping = {
-        pa.int8(): "<i1",
-        pa.int16(): "<i2",
-        pa.int32(): "<i4",
-        pa.int64(): "<i8",
-        pa.uint8(): "<u1",
-        pa.uint16(): "<u2",
-        pa.uint32(): "<u4",
-        pa.uint64(): "<u8",
+    mapping: Dict[Any, Type[np.number]] = {
+        pa.int8(): np.int8,
+        pa.int16(): np.int16,
+        pa.int32(): np.int32,
+        pa.int64(): np.int64,
+        pa.uint8(): np.uint8,
+        pa.uint16(): np.uint16,
+        pa.uint32(): np.uint32,
+        pa.uint64(): np.uint64,
+        pa.float16(): np.float16,
+        pa.float32(): np.float32,
+        pa.float64(): np.float64,
     }
 
     return mapping
 
 
-def npstr_to_arrow_strarr(strarr: np.ndarray) -> Tuple[np.ndarray, str]:
-    """Convert a numpy string array to an arrow string array."""
-    lenarr = np.vectorize(len)
-    offsets = np.cumsum(np.concatenate([np.array([0], dtype=np.int64), lenarr(strarr)]))
-    values = strarr.sum()
-    assert "\0" not in values  # arrow string array doesn't need null terminal
-    return offsets.astype(np.int32), values
+@overload
+def _arrow_buf_inf(address: int, typestr: str, size: int, stream: None) -> ArrayInf: ...
 
 
-def _arrow_cat_inf(  # pylint: disable=too-many-locals
-    cats: "pa.StringArray",
-    codes: Union[_ArrayLikeArg, _CudaArrayLikeArg, "pa.IntegerArray"],
-) -> Tuple[StringArray, ArrayInf, Tuple]:
+@overload
+def _arrow_buf_inf(
+    address: int, typestr: str, size: int, stream: int
+) -> CudaArrayInf: ...
+
+
+def _arrow_buf_inf(
+    address: int, typestr: str, size: int, stream: Optional[int]
+) -> Union[ArrayInf, CudaArrayInf]:
+    if stream is not None:
+        jcuaif: CudaArrayInf = {
+            "data": (address, True),
+            "typestr": typestr,
+            "version": 3,
+            "strides": None,
+            "shape": (size,),
+            "mask": None,
+            "stream": stream,
+        }
+        return jcuaif
+
+    jaif: ArrayInf = {
+        "data": (address, True),
+        "typestr": typestr,
+        "version": 3,
+        "strides": None,
+        "shape": (size,),
+        "mask": None,
+    }
+    return jaif
+
+
+def _arrow_cat_names_inf(cats: "pa.StringArray") -> Tuple[StringArray, Any]:
     if not TYPE_CHECKING:
         pa = import_pyarrow()
 
@@ -254,50 +302,77 @@ def _arrow_cat_inf(  # pylint: disable=too-many-locals
     assert offset.is_cpu
 
     off_len = len(cats) + 1
-    if offset.size != off_len * (np.iinfo(np.int32).bits / 8):
-        raise TypeError("Arrow dictionary type offsets is required to be 32 bit.")
 
-    joffset: ArrayInf = {
-        "data": (offset.address, True),
-        "typestr": "<i4",
-        "version": 3,
-        "strides": None,
-        "shape": (off_len,),
-        "mask": None,
-    }
+    def get_n_bytes(typ: Type) -> int:
+        return off_len * (np.iinfo(typ).bits // 8)
+
+    if offset.size == get_n_bytes(np.int64):
+        if not isinstance(cats, pa.LargeStringArray):
+            arrow_str_error = "Expecting a `pyarrow.Array`."
+            raise TypeError(arrow_str_error + f" Got: {type(cats)}.")
+        # Convert to 32bit integer, arrow recommends against the use of i64. Also,
+        # XGBoost cannot handle large number of categories (> 2**31).
+        i32cats = cats.cast(pa.string())
+        mask, offset, data = i32cats.buffers()
+
+    if offset.size != get_n_bytes(np.int32):
+        raise TypeError(
+            "Arrow dictionary type offsets is required to be 32-bit integer."
+        )
+
+    joffset = _arrow_buf_inf(offset.address, "<i4", off_len, None)
+    jdata = _arrow_buf_inf(data.address, "|i1", data.size, None)
+    # Categories should not have missing values.
+    assert mask is None
 
-    def make_buf_inf(buf: pa.Buffer, typestr: str) -> ArrayInf:
-        return {
-            "data": (buf.address, True),
-            "typestr": typestr,
+    jnames: StringArray = {"offsets": joffset, "values": jdata}
+    return jnames, (mask, offset, data)
+
+
+def _arrow_array_inf(
+    array: "pa.Array",
+) -> ArrayInf:
+    """Helper for handling categorical codes."""
+    if not TYPE_CHECKING:
+        pa = import_pyarrow()
+    if not isinstance(array, pa.Array):  # pylint: disable=E0606
+        raise TypeError(f"Invalid input type: {type(array)}")
+
+    mask, data = array.buffers()
+    jdata = make_array_interface(
+        data.address,
+        shape=(len(array),),
+        dtype=_arrow_npdtype()[array.type],
+        is_cuda=not data.is_cpu,
+    )
+
+    if mask is not None:
+        jmask: Optional[ArrayInf] = {
+            "data": (mask.address, True),
+            "typestr": "<t1",
             "version": 3,
             "strides": None,
-            "shape": (buf.size,),
+            "shape": (len(array),),
             "mask": None,
         }
+        if not mask.is_cpu:
+            jmask["stream"] = STREAM_PER_THREAD  # type: ignore
+    else:
+        jmask = None
 
-    jdata = make_buf_inf(data, "<i1")
-    # Categories should not have missing values.
-    assert mask is None
-
-    jnames: StringArray = {"offsets": joffset, "values": jdata}
-
-    def make_array_inf(
-        array: Any,
-    ) -> Tuple[ArrayInf, Optional[Tuple[pa.Buffer, pa.Buffer]]]:
-        """Helper for handling categorical codes."""
-        # Handle cuDF data
-        if hasattr(array, "__cuda_array_interface__"):
-            inf = cuda_array_interface_dict(array)
-            return inf, None
+    jdata["mask"] = jmask
+    return jdata
 
-        # Other types (like arrow itself) are not yet supported.
-        raise TypeError("Invalid input type.")
 
-    cats_tmp = (mask, offset, data)
-    jcodes, codes_tmp = make_array_inf(codes)
+def arrow_cat_inf(  # pylint: disable=too-many-locals
+    cats: "pa.StringArray",
+    codes: Union[_ArrayLikeArg, _CudaArrayLikeArg, "pa.IntegerArray"],
+) -> Tuple[StringArray, ArrayInf, Tuple]:
+    """Get the array interface representation of a string-based category array."""
+    jnames, cats_tmp = _arrow_cat_names_inf(cats)
+    jcodes = _arrow_array_inf(codes)
 
-    return jnames, jcodes, (cats_tmp, codes_tmp)
+    return jnames, jcodes, (cats_tmp, None)
 
 
 def _ensure_np_dtype(
@@ -312,74 +387,8 @@ def _ensure_np_dtype(
     return data, dtype
 
 
-@overload
-def array_interface_dict(data: np.ndarray) -> ArrayInf: ...
-
-
-@overload
-def array_interface_dict(
-    data: DfCatAccessor,
-) -> Tuple[StringArray, ArrayInf, Tuple]: ...
-
-
-@overload
-def array_interface_dict(
-    data: "pa.DictionaryArray",
-) -> Tuple[StringArray, ArrayInf, Tuple]: ...
-
-
-def array_interface_dict(  # pylint: disable=too-many-locals
-    data: Union[np.ndarray, DfCatAccessor],
-) -> Union[ArrayInf, Tuple[StringArray, ArrayInf, Optional[Tuple]]]:
+def array_interface_dict(data: np.ndarray) -> ArrayInf:
     """Returns an array interface from the input."""
-    # Handle categorical values
-    if _is_df_cat(data):
-        cats = data.categories
-        # pandas uses -1 to represent missing values for categorical features
-        codes = data.codes.replace(-1, np.nan)
-
-        if np.issubdtype(cats.dtype, np.floating) or np.issubdtype(
-            cats.dtype, np.integer
-        ):
-            # Numeric index type
-            name_values = cats.values
-            jarr_values = array_interface_dict(name_values)
-            code_values = codes.values
-            jarr_codes = array_interface_dict(code_values)
-            return jarr_values, jarr_codes, (name_values, code_values)
-
-        # String index type
-        name_offsets, name_values = npstr_to_arrow_strarr(cats.values)
-        name_offsets, _ = _ensure_np_dtype(name_offsets, np.int32)
-        joffsets = array_interface_dict(name_offsets)
-        bvalues = name_values.encode("utf-8")
-
-        ptr = ctypes.c_void_p.from_buffer(ctypes.c_char_p(bvalues)).value
-        assert ptr is not None
-
-        jvalues: ArrayInf = {
-            "data": (ptr, True),
-            "typestr": "|i1",
-            "shape": (len(name_values),),
-            "strides": None,
-            "version": 3,
-            "mask": None,
-        }
-        jnames: StringArray = {"offsets": joffsets, "values": jvalues}
-
-        code_values = codes.values
-        jcodes = array_interface_dict(code_values)
-
-        buf = (
-            name_offsets,
-            name_values,
-            bvalues,
-            code_values,
-        )  # store temporary values
-        return jnames, jcodes, buf
-
-    # Handle numeric values
-    assert isinstance(data, np.ndarray)
     if array_hasobject(data):
         raise ValueError("Input data contains `object` dtype.  Expecting numeric data.")
     ainf = data.__array_interface__
@@ -388,6 +397,62 @@ def array_interface_dict(  # pylint: disable=too-many-locals
     return cast(ArrayInf, ainf)
 
 
+def pd_cat_inf(  # pylint: disable=too-many-locals
+    cats: DfCatAccessor, codes: "pd.Series"
+) -> Tuple[Union[StringArray, ArrayInf], ArrayInf, Tuple]:
+    """Get the array interface representation of pandas category accessor."""
+    # pandas uses -1 to represent missing values for categorical features
+    codes = codes.replace(-1, np.nan)
+
+    if np.issubdtype(cats.dtype, np.floating) or np.issubdtype(cats.dtype, np.integer):
+        # Numeric index type
+        name_values_num = cats.values
+        jarr_values = array_interface_dict(name_values_num)
+        code_values = codes.values
+        jarr_codes = array_interface_dict(code_values)
+        return jarr_values, jarr_codes, (name_values_num, code_values)
+
+    def npstr_to_arrow_strarr(strarr: np.ndarray) -> Tuple[np.ndarray, str]:
+        """Convert a numpy string array to an arrow string array."""
+        lenarr = np.vectorize(len)
+        offsets = np.cumsum(
+            np.concatenate([np.array([0], dtype=np.int64), lenarr(strarr)])
+        )
+        values = strarr.sum()
+        assert "\0" not in values  # arrow string array doesn't need null terminal
+        return offsets.astype(np.int32), values
+
+    # String index type
+    name_offsets, name_values = npstr_to_arrow_strarr(cats.values)
+    name_offsets, _ = _ensure_np_dtype(name_offsets, np.int32)
+    joffsets = array_interface_dict(name_offsets)
+    bvalues = name_values.encode("utf-8")
+
+    ptr = ctypes.c_void_p.from_buffer(ctypes.c_char_p(bvalues)).value
+    assert ptr is not None
+
+    jvalues: ArrayInf = {
+        "data": (ptr, True),
+        "typestr": "|i1",
+        "shape": (len(name_values),),
+        "strides": None,
+        "version": 3,
+        "mask": None,
+    }
+    jnames: StringArray = {"offsets": joffsets, "values": jvalues}
+
+    code_values = codes.values
+    jcodes = array_interface_dict(code_values)
+
+    buf = (
+        name_offsets,
+        name_values,
+        bvalues,
+        code_values,
+    )  # store temporary values
+    return jnames, jcodes, buf
+
+
 def array_interface(data: np.ndarray) -> bytes:
     """Make array interface str."""
     interface = array_interface_dict(data)
@@ -404,18 +469,283 @@ def check_cudf_meta(data: _CudaArrayLikeArg, field: str) -> None:
         raise ValueError(f"Missing value is not allowed for: {field}")
 
 
-def cudf_cat_inf(
+class ArrowSchema(ctypes.Structure):
+    """The Schema type from arrow C array."""
+
+    _fields_ = [
+        ("format", ctypes.c_char_p),
+        ("name", ctypes.c_char_p),
+        ("metadata", ctypes.c_char_p),
+        ("flags", ctypes.c_int64),
+        ("n_children", ctypes.c_int64),
+        ("children", ctypes.POINTER(ctypes.c_void_p)),
+        ("dictionary", ctypes.c_void_p),
+        ("release", ctypes.c_void_p),
+        ("private_data", ctypes.c_void_p),
+    ]
+
+
+class ArrowArray(ctypes.Structure):
+    """The Array type from arrow C array."""
+
+
+ArrowArray._fields_ = [  # pylint: disable=protected-access
+    ("length", ctypes.c_int64),
+    ("null_count", ctypes.c_int64),
+    ("offset", ctypes.c_int64),
+    ("n_buffers", ctypes.c_int64),
+    ("n_children", ctypes.c_int64),
+    ("buffers", ctypes.POINTER(ctypes.c_void_p)),
+    ("children", ctypes.POINTER(ctypes.POINTER(ArrowArray))),
+    ("dictionary", ctypes.POINTER(ArrowArray)),
+    ("release", ctypes.c_void_p),
+    ("private_data", ctypes.c_void_p),
+]
+
+
+class ArrowDeviceArray(ctypes.Structure):
+    """The Array type from arrow C device array."""
+
+    _fields_ = [
+        ("array", ArrowArray),
+        ("device_id", ctypes.c_int64),
+        ("device_type", ctypes.c_int32),
+        ("sync_event", ctypes.c_void_p),
+        ("reserved", ctypes.c_int64 * 3),
+    ]
+
+
+PyCapsule_GetName = ctypes.pythonapi.PyCapsule_GetName
+PyCapsule_GetName.restype = ctypes.c_char_p
+PyCapsule_GetName.argtypes = [ctypes.py_object]
+
+
+PyCapsule_GetPointer = ctypes.pythonapi.PyCapsule_GetPointer
+PyCapsule_GetPointer.restype = ctypes.c_void_p
+PyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_char_p]
+
+
+def wait_event(event_hdl: int) -> None:
+    """Wait for CUDA event exported by arrow."""
+    # cuda-python is a dependency of cuDF.
+    from cuda.bindings import runtime as cudart
+
+    event = ctypes.cast(event_hdl, ctypes.POINTER(ctypes.c_int64))
+    (status,) = cudart.cudaStreamWaitEvent(
+        STREAM_PER_THREAD,
+        event.contents.value,
+        cudart.cudaEventWaitDefault,
+    )
+    if status != cudart.cudaError_t.cudaSuccess:
+        _, msg = cudart.cudaGetErrorString(status)
+        raise ValueError(msg)
+
+
+def cudf_cat_inf(  # pylint: disable=too-many-locals
     cats: DfCatAccessor, codes: "pd.Series"
-) -> Tuple[Union[ArrayInf, StringArray], ArrayInf, Tuple]:
+) -> Tuple[Union[CudaArrayInf, CudaStringArray], ArrayInf, Tuple]:
     """Obtain the cuda array interface for cuDF categories."""
     cp = import_cupy()
     is_num_idx = cp.issubdtype(cats.dtype, cp.floating) or cp.issubdtype(
         cats.dtype, cp.integer
     )
     if is_num_idx:
-        cats_ainf = cats.__cuda_array_interface__
+        cats_ainf = cuda_array_interface_dict(cats)
         codes_ainf = cuda_array_interface_dict(codes)
         return cats_ainf, codes_ainf, (cats, codes)
 
-    joffset, jdata, buf = _arrow_cat_inf(cats.to_arrow(), codes)
-    return joffset, jdata, buf
+    # pylint: disable=protected-access
+    arrow_col = cats._column.to_pylibcudf(mode="read")
+    # Tuple[types.CapsuleType, types.CapsuleType]
+    schema, array = arrow_col.__arrow_c_device_array__()
+
+    array_ptr = PyCapsule_GetPointer(array, PyCapsule_GetName(array))
+    schema_ptr = PyCapsule_GetPointer(schema, PyCapsule_GetName(schema))
+
+    # Cast to arrow array
+    arrow_device_array = ctypes.cast(
+        array_ptr, ctypes.POINTER(ArrowDeviceArray)
+    ).contents
+    wait_event(arrow_device_array.sync_event)
+    assert arrow_device_array.device_type == 2  # 2 is CUDA
+
+    arrow_array = arrow_device_array.array
+    mask, offset, data = (
+        arrow_array.buffers[0],
+        arrow_array.buffers[1],
+        arrow_array.buffers[2],
+    )
+    # Categories should not have missing values.
+    assert mask is None
+    assert arrow_array.n_children == 0
+    assert arrow_array.n_buffers == 3
+    assert arrow_array.offset == 0
+
+    # Cast to ArrowSchema
+    arrow_schema = ctypes.cast(schema_ptr, ctypes.POINTER(ArrowSchema)).contents
+    assert arrow_schema.format in (b"u", b"U", b"vu")  # utf8, large utf8
+    if arrow_schema.format in (b"u", b"vu"):
+        joffset: CudaArrayInf = _arrow_buf_inf(
+            offset, "<i4", arrow_array.length + 1, STREAM_PER_THREAD
+        )
+    elif arrow_schema.format == b"U":
+        raise TypeError("Large string for category index (names) is not supported.")
+    else:
+        raise TypeError(
+            "Unexpected type for category index. It's neither numeric nor string."
+        )
+    # 0 size for unknown
+    jdata: CudaArrayInf = _arrow_buf_inf(data, "|i1", 0, STREAM_PER_THREAD)
+    jnames: CudaStringArray = {
+        "offsets": joffset,
+        "values": jdata,
+    }
+
+    jcodes = cuda_array_interface_dict(codes)
+    return jnames, jcodes, (arrow_col,)
+
+
+class Categories:
+    """An internal storage class for categories returned by the DMatrix and the
+    Booster. This class is designed to be opaque. It is intended to be used exclusively
+    by XGBoost as an intermediate storage for re-coding categorical data.
+
+    The categories are saved along with the booster object. As a result, users don't
+    need to preserve this class for re-coding. Use the booster model IO instead if you
+    want to preserve the categories in a stable format.
+
+    .. versionadded:: 3.1.0
+
+    .. warning::
+
+        This class is internal.
+
+    .. code-block:: python
+
+        Xy = xgboost.QuantileDMatrix(X, y, enable_categorical=True)
+        booster = xgboost.train({}, Xy)
+
+        categories = booster.get_categories() # Get categories
+
+        # Use categories as a reference for re-coding
+        Xy_new = xgboost.QuantileDMatrix(
+            X_new, y_new, feature_types=categories, enable_categorical=True, ref=Xy
+        )
+
+        # Categories will be part of the `model.json`.
+        booster.save_model("model.json")
+
+    """
+
+    def __init__(
+        self,
+        handle: Tuple[ctypes.c_void_p, Callable[[], None]],
+        arrow_arrays: Optional[ArrowCatList],
+    ) -> None:
+        # The handle type is a bundle of the handle and the free call. Otherwise, we
+        # will have to import the lib and checkcall inside the __del__ method from the
+        # core module to avoid cyclic model dependency. Importing modules in __del__ can
+        # result in Python abort if __del__ is called during exception handling
+        # (interpreter is shutting down).
+        self._handle, self._free = handle
+        self._arrow_arrays = arrow_arrays
+
+    def to_arrow(self) -> ArrowCatList:
+        """Get the categories in the dataset. The results are stored in a list of
+        (feature name, arrow array) pairs, with one array for each categorical
+        feature. If a feature is numerical, then the corresponding column in the list is
+        None. A value error will be raised if this container was created without the
+        `export_to_arrow` option.
+
+        """
+        if self._arrow_arrays is None:
+            raise ValueError(
+                "The `export_to_arrow` option of the `get_categories` method"
+                " is required."
+            )
+        return self._arrow_arrays
+
+    def empty(self) -> bool:
+        """Returns True if there's no category."""
+        return self._handle.value is None
+
+    def get_handle(self) -> int:
+        """Internal method for retrieving the handle."""
+        assert self._handle.value
+        return self._handle.value
+
+    def __del__(self) -> None:
+        if self._handle.value is None:
+            return
+        self._free()
+
+
+def get_ref_categories(
+    feature_types: Optional[Union[FeatureTypes, Categories]],
+) -> Tuple[Optional[FeatureTypes], Optional[Categories]]:
+    """Get the optional reference categories from the `feature_types`. This is used by
+    various `DMatrix` where the `feature_types` is reused for specifying the reference
+    categories.
+
+    """
+    if isinstance(feature_types, Categories):
+        ref_categories = feature_types
+        feature_types = None
+    else:
+        ref_categories = None
+    return feature_types, ref_categories
+
+
+# Type schema for storing JSON-encoded array interface
+AifType: TypeAlias = List[
+    Union[
+        # numeric column
+        Union[ArrayInf, CudaArrayInf],
+        # categorical column
+        Tuple[
+            # (cuda) numeric index | (cuda) string index
+            Union[ArrayInf, CudaArrayInf, StringArray, CudaStringArray],
+            Union[ArrayInf, CudaArrayInf],  # codes
+        ],
+    ]
+]
+
+
+class TransformedDf(ABC):
+    """Internal class for storing transformed dataframe.
+
+    Parameters
+    ----------
+    ref_categories :
+        Optional reference categories used for re-coding.
+
+    aitfs :
+        Array interface for each column.
+
+    """
+
+    temporary_buffers: List[Tuple] = []
+
+    def __init__(self, ref_categories: Optional[Categories], aitfs: AifType) -> None:
+        self.ref_categories = ref_categories
+        if ref_categories is not None and ref_categories.get_handle() is not None:
+            aif = ref_categories.get_handle()
+            self.ref_aif: Optional[int] = aif
+        else:
+            self.ref_aif = None
+
+        self.aitfs = aitfs
+
+    def array_interface(self) -> bytes:
+        """Return a byte string for JSON encoded array interface."""
+        if self.ref_categories is not None:
+            ref_inf: dict = {"ref_categories": self.ref_aif, "columns": self.aitfs}
+            inf = bytes(json.dumps(ref_inf), "utf-8")
+        else:
+            inf = bytes(json.dumps(self.aitfs), "utf-8")
+        return inf
+
+    @property
+    @abstractmethod
+    def shape(self) -> Tuple[int, int]:
+        """Return the shape of the dataframe."""
diff --git a/python-package/xgboost/_typing.py b/python-package/xgboost/_typing.py
index 97536ccfb2cb..b1df93f6f348 100644
--- a/python-package/xgboost/_typing.py
+++ b/python-package/xgboost/_typing.py
@@ -13,12 +13,11 @@
     Sequence,
     Tuple,
     Type,
+    TypeAlias,
     TypeVar,
     Union,
 )
 
-# os.PathLike/string/numpy.array/scipy.sparse/pd.DataFrame/dt.Frame/
-# cudf.DataFrame/cupy.array/dlpack
 import numpy as np
 
 DataType = Any
@@ -30,12 +29,18 @@
 
 ArrayLike = Any
 if TYPE_CHECKING:
+    import pyarrow as pa
+
     PathLike = Union[str, os.PathLike[str]]
 else:
     PathLike = Union[str, os.PathLike]
+
+ArrowCatCol: TypeAlias = Optional[Union["pa.StringArray", "pa.NumericArray"]]
+ArrowCatList: TypeAlias = List[Tuple[str, Optional[ArrowCatCol]]]
+
 CupyT = ArrayLike  # maybe need a stub for cupy arrays
 NumpyOrCupy = Any
-NumpyDType = Union[str, Type[np.number]]  # pylint: disable=invalid-name
+NumpyDType = Union[str, Type[np.number]]
 PandasDType = Any  # real type is pandas.core.dtypes.base.ExtensionDtype
 
 FloatCompatible = Union[float, np.float32, np.float64]
@@ -114,3 +119,6 @@
 # template parameter
 _T = TypeVar("_T")
 _F = TypeVar("_F", bound=Callable[..., Any])
+
+_ScoreList = Union[List[float], List[Tuple[float, float]]]
+EvalsLog: TypeAlias = Dict[str, Dict[str, _ScoreList]]
diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py
index c5d4f35580d9..30d8147417f0 100644
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@@ -15,6 +15,7 @@
     Optional,
     Sequence,
     Tuple,
+    TypeAlias,
     TypeVar,
     Union,
     cast,
@@ -23,6 +24,7 @@
 import numpy
 
 from . import collective
+from ._typing import EvalsLog, _ScoreList
 from .core import (
     Booster,
     DMatrix,
@@ -41,7 +43,6 @@
 ]
 
 _Score = Union[float, Tuple[float, float]]
-_ScoreList = Union[List[float], List[Tuple[float, float]]]
 
 _Model = Any  # real type is Union[Booster, CVPack]; need more work
 
@@ -54,7 +55,7 @@ class TrainingCallback(ABC):
 
     """
 
-    EvalsLog = Dict[str, Dict[str, _ScoreList]]  # pylint: disable=invalid-name
+    EvalsLog: TypeAlias = EvalsLog
 
     def __init__(self) -> None:
         pass
@@ -172,12 +173,12 @@ def __init__(
             raise TypeError(msg)
 
         self.metric = metric
-        self.history: TrainingCallback.EvalsLog = collections.OrderedDict()
+        self.history: EvalsLog = collections.OrderedDict()
         self._output_margin = output_margin
         self.is_cv = is_cv
 
         if self.is_cv:
-            self.aggregated_cv = None
+            self.aggregated_cv: Optional[list[tuple[str, float, float]]] = None
 
     def before_training(self, model: _Model) -> _Model:
         """Function called before training."""
@@ -301,9 +302,7 @@ def __init__(
             self.learning_rates = lambda epoch: cast(Sequence, learning_rates)[epoch]
         super().__init__()
 
-    def after_iteration(
-        self, model: _Model, epoch: int, evals_log: TrainingCallback.EvalsLog
-    ) -> bool:
+    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
         model.set_param("learning_rate", self.learning_rates(epoch))
         return False
 
@@ -372,7 +371,7 @@ def __init__(
         self.rounds = rounds
         self.save_best = save_best
         self.maximize = maximize
-        self.stopping_history: TrainingCallback.EvalsLog = {}
+        self.stopping_history: EvalsLog = {}
         self._min_delta = min_delta
         if self._min_delta < 0:
             raise ValueError("min_delta must be greater or equal to 0.")
@@ -386,8 +385,8 @@ def before_training(self, model: _Model) -> _Model:
         self.starting_round = model.num_boosted_rounds()
         if not isinstance(model, Booster) and self.save_best:
             raise ValueError(
-                "`save_best` is not applicable to the `cv` function as it doesn't return"
-                " a model."
+                "`save_best` is not applicable to the `cv` function as it doesn't"
+                " return a model."
             )
         return model
 
@@ -454,9 +453,7 @@ def minimize(new: _Score, best: _Score) -> bool:
             return True
         return False
 
-    def after_iteration(
-        self, model: _Model, epoch: int, evals_log: TrainingCallback.EvalsLog
-    ) -> bool:
+    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
         epoch += self.starting_round  # training continuation
         msg = "Must have at least 1 validation dataset for early stopping."
         if len(evals_log.keys()) < 1:
@@ -555,9 +552,7 @@ def _fmt_metric(
             msg = f"\t{data + '-' + metric}:{score:.5f}"
         return msg
 
-    def after_iteration(
-        self, model: _Model, epoch: int, evals_log: TrainingCallback.EvalsLog
-    ) -> bool:
+    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
         if not evals_log:
             return False
 
@@ -636,9 +631,7 @@ def before_training(self, model: _Model) -> _Model:
         self._start = model.num_boosted_rounds()
         return model
 
-    def after_iteration(
-        self, model: _Model, epoch: int, evals_log: TrainingCallback.EvalsLog
-    ) -> bool:
+    def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
         if self._epoch == self._iterations:
             path = os.path.join(
                 self._path,
diff --git a/python-package/xgboost/collective.py b/python-package/xgboost/collective.py
index 715853d0ab54..ac75d54c51af 100644
--- a/python-package/xgboost/collective.py
+++ b/python-package/xgboost/collective.py
@@ -16,6 +16,7 @@
 LOGGER = logging.getLogger("[xgboost.collective]")
 
 
+_Conf: TypeAlias = Dict[str, Union[int, str]]
 _ArgVals: TypeAlias = Optional[Union[int, str]]
 _Args: TypeAlias = Dict[str, _ArgVals]
 
@@ -36,7 +37,8 @@ class Config:
         See `dmlc_timeout` in :py:meth:`init`. This is only used for communicators, not
         the tracker. They are different parameters since the timeout for tracker limits
         only the time for starting and finalizing the communication group, whereas the
-        timeout for communicators limits the time used for collective operations.
+        timeout for communicators limits the time used for collective operations, like
+        :py:meth:`allreduce`.
 
     tracker_host_ip : See :py:class:`~xgboost.tracker.RabitTracker`.
 
@@ -53,7 +55,7 @@ class Config:
     tracker_port: Optional[int] = None
     tracker_timeout: Optional[int] = None
 
-    def get_comm_config(self, args: _Args) -> _Args:
+    def get_comm_config(self, args: _Conf) -> _Conf:
         """Update the arguments for the communicator."""
         if self.retry is not None:
             args["dmlc_retry"] = self.retry
@@ -93,7 +95,8 @@ def init(**args: _ArgVals) -> None:
           - federated_client_cert: Client certificate file path. Only needed for the SSL
             mode.
 
-        Use upper case for environment variables, use lower case for runtime configuration.
+        Use upper case for environment variables, use lower case for runtime
+        configuration.
 
     """
     _check_call(_LIB.XGCommunicatorInit(make_jcargs(**args)))
@@ -121,17 +124,17 @@ def get_world_size() -> int:
 
     Returns
     -------
-    n : int
+    n :
         Total number of process.
     """
     ret = _LIB.XGCommunicatorGetWorldSize()
     return ret
 
 
-def is_distributed() -> int:
+def is_distributed() -> bool:
     """If the collective communicator is distributed."""
     is_dist = _LIB.XGCommunicatorIsDistributed()
-    return is_dist
+    return bool(is_dist)
 
 
 def communicator_print(msg: Any) -> None:
@@ -159,8 +162,8 @@ def get_processor_name() -> str:
 
     Returns
     -------
-    name : str
-        the name of processor(host)
+    name :
+        The name of processor(host)
     """
     name_str = ctypes.c_char_p()
     _check_call(_LIB.XGCommunicatorGetProcessorName(ctypes.byref(name_str)))
@@ -253,7 +256,7 @@ class Op(IntEnum):
     BITWISE_XOR = 5
 
 
-def allreduce(data: np.ndarray, op: Op) -> np.ndarray:  # pylint:disable=invalid-name
+def allreduce(data: np.ndarray, op: Op) -> np.ndarray:
     """Perform allreduce, return the result.
 
     Parameters
diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py
index 34939805d053..3556d26d08d1 100644
--- a/python-package/xgboost/compat.py
+++ b/python-package/xgboost/compat.py
@@ -1,15 +1,19 @@
-# pylint: disable=invalid-name,unused-import
+# pylint: disable=unused-import
 """For compatibility and optional dependencies."""
 import functools
 import importlib.util
 import logging
 import sys
 import types
-from typing import Any, Sequence, cast
+from typing import TYPE_CHECKING, Any, Sequence, TypeGuard, cast
 
 import numpy as np
 
-from ._typing import _T
+from ._typing import _T, DataType
+
+if TYPE_CHECKING:
+    import pandas as pd
+    import pyarrow as pa
 
 assert sys.version_info[0] == 3, "Python 2 is no longer supported."
 
@@ -31,17 +35,6 @@ def lazy_isinstance(instance: Any, module: str, name: str) -> bool:
     return is_same_module and has_same_name
 
 
-# pandas
-try:
-    from pandas import DataFrame, Series
-
-    PANDAS_INSTALLED = True
-except ImportError:
-    DataFrame = object
-    Series = object
-    PANDAS_INSTALLED = False
-
-
 # sklearn
 try:
     from sklearn import __version__ as _sklearn_version
@@ -139,6 +132,14 @@ def import_pyarrow() -> types.ModuleType:
     return pa
 
 
+@functools.cache
+def import_pandas() -> types.ModuleType:
+    """Import pandas with memory cache."""
+    import pandas as pd
+
+    return pd
+
+
 @functools.cache
 def import_polars() -> types.ModuleType:
     """Import polars with memory cache."""
@@ -147,6 +148,14 @@ def import_polars() -> types.ModuleType:
     return pl
 
 
+@functools.cache
+def is_pandas_available() -> bool:
+    """Check the pandas package is available or not."""
+    if importlib.util.find_spec("pandas") is None:
+        return False
+    return True
+
+
 try:
     import scipy.sparse as scipy_sparse
     from scipy.sparse import csr_matrix as scipy_csr
@@ -155,6 +164,84 @@ def import_polars() -> types.ModuleType:
     scipy_csr = object
 
 
+def _is_polars_lazyframe(data: DataType) -> bool:
+    return lazy_isinstance(data, "polars.lazyframe.frame", "LazyFrame")
+
+
+def _is_polars_series(data: DataType) -> bool:
+    return lazy_isinstance(data, "polars.series.series", "Series")
+
+
+def _is_polars(data: DataType) -> bool:
+    lf = _is_polars_lazyframe(data)
+    df = lazy_isinstance(data, "polars.dataframe.frame", "DataFrame")
+    return lf or df
+
+
+def _is_arrow(data: DataType) -> TypeGuard["pa.Table"]:
+    return lazy_isinstance(data, "pyarrow.lib", "Table")
+
+
+def _is_cudf_df(data: DataType) -> bool:
+    return lazy_isinstance(data, "cudf.core.dataframe", "DataFrame")
+
+
+def _is_cudf_ser(data: DataType) -> bool:
+    return lazy_isinstance(data, "cudf.core.series", "Series")
+
+
+def _is_cudf_pandas(data: DataType) -> bool:
+    """Must go before both pandas and cudf checks."""
+    return (_is_pandas_df(data) or _is_pandas_series(data)) and lazy_isinstance(
+        type(data), "cudf.pandas.fast_slow_proxy", "_FastSlowProxyMeta"
+    )
+
+
+def _is_pandas_df(data: DataType) -> TypeGuard["pd.DataFrame"]:
+    return lazy_isinstance(data, "pandas.core.frame", "DataFrame")
+
+
+def _is_pandas_series(data: DataType) -> TypeGuard["pd.Series"]:
+    return lazy_isinstance(data, "pandas.core.series", "Series")
+
+
+def _is_modin_df(data: DataType) -> bool:
+    return lazy_isinstance(data, "modin.pandas.dataframe", "DataFrame")
+
+
+def _is_modin_series(data: DataType) -> bool:
+    return lazy_isinstance(data, "modin.pandas.series", "Series")
+
+
+def is_dataframe(data: DataType) -> bool:
+    """Whether the input is a dataframe. Currently supported dataframes:
+
+    - pandas
+    - cudf
+    - cudf.pandas
+    - polars
+    - pyarrow
+    - modin
+
+
+    """
+    return any(
+        p(data)
+        for p in (
+            _is_polars,
+            _is_polars_series,
+            _is_arrow,
+            _is_cudf_df,
+            _is_cudf_ser,
+            _is_cudf_pandas,
+            _is_pandas_df,
+            _is_pandas_series,
+            _is_modin_df,
+            _is_modin_series,
+        )
+    )
+
+
 def concat(value: Sequence[_T]) -> _T:  # pylint: disable=too-many-return-statements
     """Concatenate row-wise."""
     if isinstance(value[0], np.ndarray):
@@ -167,7 +254,7 @@ def concat(value: Sequence[_T]) -> _T:  # pylint: disable=too-many-return-statem
     if scipy_sparse and isinstance(value[0], scipy_sparse.spmatrix):
         # other sparse format will be converted to CSR.
         return scipy_sparse.vstack(value, format="csr")
-    if PANDAS_INSTALLED and isinstance(value[0], (DataFrame, Series)):
+    if _is_pandas_df(value[0]) or _is_pandas_series(value[0]):
         from pandas import concat as pd_concat
 
         return pd_concat(value, axis=0)
diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
index 2d058365c7ad..58aef079a839 100644
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -1,6 +1,7 @@
-# pylint: disable=too-many-arguments, too-many-branches, invalid-name
+# pylint: disable=too-many-arguments, too-many-branches
 # pylint: disable=too-many-lines, too-many-locals
 """Core XGBoost Library."""
+
 import copy
 import ctypes
 import json
@@ -38,6 +39,7 @@
 import scipy.sparse
 
 from ._data_utils import (
+    Categories,
     TransformedDf,
     array_interface,
     cuda_array_interface,
@@ -47,6 +49,7 @@
 from ._typing import (
     _T,
     ArrayLike,
+    ArrowCatList,
     BoosterParam,
     CFloatPtr,
     CNumeric,
@@ -67,17 +70,17 @@
     c_bst_ulong,
 )
 from .compat import (
-    PANDAS_INSTALLED,
-    DataFrame,
     import_polars,
     import_pyarrow,
+    is_pandas_available,
     is_pyarrow_available,
     py_str,
 )
-from .libpath import find_lib_path, is_sphinx_build
+from .libpath import find_lib_path
+from .objective import TreeObjective
 
 if TYPE_CHECKING:
-    import pyarrow as pa
+    from pandas import DataFrame as PdDataFrame
 
 
 class XGBoostError(ValueError):
@@ -346,7 +349,7 @@ def _check_distributed_params(kwargs: Dict[str, Any]) -> None:
 def _validate_feature_info(
     feature_info: Sequence[str], n_features: int, is_column_split: bool, name: str
 ) -> List[str]:
-    if isinstance(feature_info, str) or not isinstance(feature_info, Sequence):
+    if not isinstance(feature_info, (str, Sequence, Categories)):
         raise TypeError(
             f"Expecting a sequence of strings for {name}, got: {type(feature_info)}"
         )
@@ -377,30 +380,6 @@ def build_info() -> dict:
     return res
 
 
-def _check_glibc() -> None:
-    if is_sphinx_build():
-        return
-
-    glibc_ver = build_info().get("GLIBC_VERSION", None)
-    if glibc_ver is not None and (
-        glibc_ver[0] < 2 or glibc_ver[0] == 2 and glibc_ver[1] < 28
-    ):
-        warnings.warn(
-            "Your system has an old version of glibc (< 2.28). We will stop supporting "
-            "Linux distros with glibc older than 2.28 after **May 31, 2025**. "
-            "Please upgrade to a recent Linux distro (with glibc >= 2.28) to use "
-            "future versions of XGBoost.\n"
-            "Note: You have installed the 'manylinux2014' variant of XGBoost. Certain "
-            "features such as GPU algorithms or federated learning are not available. "
-            "To use these features, please upgrade to a recent Linux distro with glibc "
-            "2.28+, and install the 'manylinux_2_28' variant.",
-            FutureWarning,
-        )
-
-
-_check_glibc()
-
-
 def _numpy2ctypes_type(dtype: Type[np.number]) -> Type[CNumeric]:
     _NUMPY_TO_CTYPES_MAPPING: Dict[Type[np.number], Type[CNumeric]] = {
         np.float32: ctypes.c_float,
@@ -540,7 +519,7 @@ class DataIter(ABC):  # pylint: disable=too-many-instance-attributes
         with GPU-based :py:class:`ExtMemQuantileDMatrix`. When using GPU-based external
         memory with the data cached in the host memory, XGBoost can concatenate the
         pages internally to increase the batch size for the GPU. The default page size
-        is about 1/8 of the total device memory. Users can manually set the value based
+        is about 1/16 of the total device memory. Users can manually set the value based
         on the actual hardware and datasets. Set this to 0 to disable page
         concatenation.
 
@@ -779,6 +758,63 @@ def inner_f(*args: Any, **kwargs: Any) -> _T:
 _deprecate_positional_args = require_keyword_args(False)
 
 
+def _get_categories(
+    cfn: Callable[[ctypes.c_char_p], int],
+    feature_names: FeatureNames,
+    n_features: int,
+) -> ArrowCatList:
+    if not is_pyarrow_available():
+        raise ImportError(
+            "`pyarrow` is required for exporting categories to arrow arrays."
+        )
+
+    if not TYPE_CHECKING:
+        pa = import_pyarrow()
+    else:
+        import pyarrow as pa
+
+    results: ArrowCatList = []
+
+    ret = ctypes.c_char_p()
+    _check_call(cfn(ret))
+    if ret.value is None:
+        results = [(feature_names[i], None) for i in range(n_features)]
+        return results
+
+    retstr = ret.value.decode()  # pylint: disable=no-member
+    jcats = json.loads(retstr)
+    assert isinstance(jcats, list) and len(jcats) == n_features
+
+    for fidx in range(n_features):
+        f_jcats = jcats[fidx]
+        if f_jcats is None:
+            # Numeric data
+            results.append((feature_names[fidx], None))
+            continue
+
+        if "offsets" not in f_jcats:
+            values = from_array_interface(f_jcats)
+            pa_values = pa.Array.from_pandas(values)
+            results.append((feature_names[fidx], pa_values))
+            continue
+
+        joffsets = f_jcats["offsets"]
+        jvalues = f_jcats["values"]
+        offsets = from_array_interface(joffsets)
+        values = from_array_interface(jvalues)
+        pa_offsets = pa.array(offsets).buffers()
+        pa_values = pa.array(values).buffers()
+        assert (
+            pa_offsets[0] is None and pa_values[0] is None
+        ), "Should not have null mask."
+        pa_dict = pa.StringArray.from_buffers(
+            len(offsets) - 1, pa_offsets[1], pa_values[1]
+        )
+        results.append((feature_names[fidx], pa_dict))
+
+    return results
+
+
 @unique
 class DataSplitMode(IntEnum):
     """Supported data split mode for DMatrix."""
@@ -807,7 +843,7 @@ def __init__(
         missing: Optional[float] = None,
         silent: bool = False,
         feature_names: Optional[FeatureNames] = None,
-        feature_types: Optional[FeatureTypes] = None,
+        feature_types: Optional[Union[FeatureTypes, Categories]] = None,
         nthread: Optional[int] = None,
         group: Optional[ArrayLike] = None,
         qid: Optional[ArrayLike] = None,
@@ -850,21 +886,21 @@ def __init__(
         feature_types :
 
             Set types for features. If `data` is a DataFrame type and passing
-            `enable_categorical=True`, the types will be deduced automatically
-            from the column types.
+            `enable_categorical=True`, the types will be deduced automatically from the
+            column types.
 
-            Otherwise, one can pass a list-like input with the same length as number
-            of columns in `data`, with the following possible values:
+            Otherwise, one can pass a list-like input with the same length as number of
+            columns in `data`, with the following possible values:
 
             - "c", which represents categorical columns.
             - "q", which represents numeric columns.
             - "int", which represents integer columns.
             - "i", which represents boolean columns.
 
-            Note that, while categorical types are treated differently from
-            the rest for model fitting purposes, the other types do not influence
-            the generated model, but have effects in other functionalities such as
-            feature importances.
+            Note that, while categorical types are treated differently from the rest for
+            model fitting purposes, the other types do not influence the generated
+            model, but have effects in other functionalities such as feature
+            importances.
 
             For categorical features, the input is assumed to be preprocessed and
             encoded by the users. The encoding can be done via
@@ -872,6 +908,13 @@ def __init__(
             `.cat.codes` method. This is useful when users want to specify categorical
             features without having to construct a dataframe as input.
 
+            .. versionadded:: 3.1.0
+
+            Alternatively, user can pass a :py:class:`~xgboost.core.Categories` object
+            returned from previous training as a reference for re-coding. One can obtain
+            the reference with the :py:meth:`.get_categories` from the previous training
+            DMatrix or the Booster. This feature is experimental.
+
         nthread :
             Number of threads to use for loading data when parallelization is
             applicable. If -1, uses maximum threads available on the system.
@@ -891,12 +934,13 @@ def __init__(
 
             .. note:: This parameter is experimental
 
-            Experimental support of specializing for categorical features.
+            Experimental support of specializing for categorical features. See
+            :doc:`/tutorials/categorical` for more info.
 
-            If passing `True` and `data` is a data frame (from supported libraries such as
-            Pandas, Modin or cuDF), The DMatrix recognizes categorical columns and
-            automatically set the `feature_types` parameter. If `data` is not a data
-            frame, this argument is ignored.
+            If passing `True` and `data` is a data frame (from supported libraries such
+            as Pandas, Modin, polars, and cuDF), The DMatrix recognizes categorical
+            columns and automatically set the `feature_types` parameter. If `data` is
+            not a data frame, this argument is ignored.
 
             If passing `False` and `data` is a data frame with categorical columns, it
             will result in an error.
@@ -904,7 +948,10 @@ def __init__(
             See notes in the :py:class:`DataIter` for consistency requirement when the
             input is an iterator.
 
-            JSON/UBJSON serialization format is required for this.
+            .. versionchanged:: 3.1.0
+
+            XGBoost can remember the encoding of categories when the input is a
+            dataframe.
 
         """
         if group is not None and qid is not None:
@@ -1284,69 +1331,45 @@ def get_quantile_cut(self) -> Tuple[np.ndarray, np.ndarray]:
         assert data.dtype == np.float32
         return indptr, data
 
-    def get_categories(self) -> Optional[Dict[str, "pa.DictionaryArray"]]:
-        """Get the categories in the dataset. Return `None` if there's no categorical
-        features.
+    def get_categories(self, export_to_arrow: bool = False) -> Categories:
+        """Get the categories in the dataset.
+
+        .. versionadded:: 3.1.0
 
         .. warning::
 
-            This function is still working in progress.
+            This function is experimental.
 
-        .. versionadded:: 3.1.0
+        Parameters
+        ----------
+        export_to_arrow :
+            The returned container will contain a list of ``pyarrow`` arrays for the
+            categories. See the :py:meth:`~Categories.to_arrow` for more info.
 
         """
-        if not is_pyarrow_available():
-            raise ImportError("`pyarrow` is required for exporting categories.")
-
-        if TYPE_CHECKING:
-            import pyarrow as pa
-        else:
-            pa = import_pyarrow()
-
-        n_features = self.num_col()
         fnames = self.feature_names
+        n_features = self.num_col()
         if fnames is None:
             fnames = [str(i) for i in range(n_features)]
 
-        results: Dict[str, "pa.DictionaryArray"] = {}
-
-        ret = ctypes.c_char_p()
-        _check_call(_LIB.XGBDMatrixGetCategories(self.handle, ctypes.byref(ret)))
-        if ret.value is None:
-            return None
-
-        retstr = ret.value.decode()  # pylint: disable=no-member
-        jcats = json.loads(retstr)
-        assert isinstance(jcats, list) and len(jcats) == n_features
-
-        for fidx in range(n_features):
-            f_jcats = jcats[fidx]
-            if f_jcats is None:
-                # Numeric data
-                results[fnames[fidx]] = None
-                continue
-
-            if "offsets" not in f_jcats:
-                values = from_array_interface(f_jcats)
-                pa_values = pa.Array.from_pandas(values)
-                results[fnames[fidx]] = pa_values
-                continue
-
-            joffsets = f_jcats["offsets"]
-            jvalues = f_jcats["values"]
-            offsets = from_array_interface(joffsets, True)
-            values = from_array_interface(jvalues, True)
-            pa_offsets = pa.array(offsets).buffers()
-            pa_values = pa.array(values).buffers()
-            assert (
-                pa_offsets[0] is None and pa_values[0] is None
-            ), "Should not have null mask."
-            pa_dict = pa.StringArray.from_buffers(
-                len(offsets) - 1, pa_offsets[1], pa_values[1]
+        hdl = ctypes.c_void_p()
+        if export_to_arrow:
+            arrow_arrays = _get_categories(
+                lambda ret: _LIB.XGDMatrixGetCategoriesExportToArrow(
+                    self.handle, None, ctypes.byref(hdl), ctypes.byref(ret)
+                ),
+                fnames,
+                n_features,
+            )
+        else:
+            arrow_arrays = None
+            _check_call(
+                _LIB.XGDMatrixGetCategories(self.handle, None, ctypes.byref(hdl))
             )
-            results[fnames[fidx]] = pa_dict
 
-        return results
+        return Categories(
+            (hdl, lambda: _check_call(_LIB.XGBCategoriesFree(hdl))), arrow_arrays
+        )
 
     def num_row(self) -> int:
         """Get the number of rows in the DMatrix."""
@@ -1645,12 +1668,12 @@ class QuantileDMatrix(DMatrix, _RefMixIn):
 
     max_quantile_batches :
         For GPU-based inputs from an iterator, XGBoost handles incoming batches with
-        multiple growing substreams. This parameter sets the maximum number of batches
-        before XGBoost can cut the sub-stream and create a new one. This can help bound
-        the memory usage. By default, XGBoost grows new sub-streams exponentially until
-        batches are exhausted. Only used for the training dataset and the default is
-        None (unbounded). Lastly, if the `data` is a single batch instead of an
-        iterator, this parameter has no effect.
+        multiple growing sub-streams. This parameter sets the maximum number of batches
+        before XGBoost can cut a sub-stream and create a new one. This can help bound
+        the memory usage. By default, XGBoost grows a sub-stream exponentially until
+        batches are exhausted. This option is only used for the training dataset and the
+        default is None (unbounded). Lastly, if the `data` is a single batch instead of
+        an iterator, this parameter has no effect.
 
         .. versionadded:: 3.0.0
 
@@ -1816,8 +1839,8 @@ def __init__(  # pylint: disable=super-init-not-called
         max_bin: Optional[int] = None,
         ref: Optional[DMatrix] = None,
         enable_categorical: bool = False,
-        max_num_device_pages: Optional[int] = None,
         max_quantile_batches: Optional[int] = None,
+        cache_host_ratio: Optional[float] = None,
     ) -> None:
         """
         Parameters
@@ -1825,18 +1848,20 @@ def __init__(  # pylint: disable=super-init-not-called
         data :
             A user-defined :py:class:`DataIter` for loading data.
 
-        max_num_device_pages :
-            For a GPU-based validation dataset, XGBoost can optionally cache some pages
-            in device memory instead of host memory to reduce data transfer. Each cached
-            page has size of `min_cache_page_bytes`. Set this to 0 if you don't want
-            pages to be cached in the device memory. This can be useful for preventing
-            OOM error where there are more than one validation datasets. The default
-            number of device-based page is 1. Lastly, XGBoost infers whether a dataset
-            is used for valdiation by checking whether ref is not None.
-
         max_quantile_batches :
             See :py:class:`QuantileDMatrix`.
 
+        cache_host_ratio :
+
+            .. versionadded:: 3.1.0
+
+            Used by the GPU implementation. For GPU-based inputs, XGBoost can split the
+            cache into host and device caches to reduce the data transfer overhead. This
+            parameter specifies the size of host cache compared to the size of the
+            entire cache: :math:`host / (host + device)`.
+
+            See :ref:`extmem-adaptive-cache` for more info.
+
         """
         self.max_bin = max_bin
         self.missing = missing if missing is not None else np.nan
@@ -1846,8 +1871,10 @@ def __init__(  # pylint: disable=super-init-not-called
             data,
             ref,
             enable_categorical=enable_categorical,
-            max_num_device_pages=max_num_device_pages,
             max_quantile_blocks=max_quantile_batches,
+            cache_host_ratio=(
+                None if cache_host_ratio is None else float(cache_host_ratio)
+            ),
         )
         assert self.handle is not None
 
@@ -1857,8 +1884,8 @@ def _init(
         ref: Optional[DMatrix],
         *,
         enable_categorical: bool,
-        max_num_device_pages: Optional[int] = None,
         max_quantile_blocks: Optional[int] = None,
+        cache_host_ratio: Optional[float] = None,
     ) -> None:
         args = make_jcargs(
             missing=self.missing,
@@ -1867,9 +1894,9 @@ def _init(
             on_host=it.on_host,
             max_bin=self.max_bin,
             min_cache_page_bytes=it.min_cache_page_bytes,
-            max_num_device_pages=max_num_device_pages,
             # It's called blocks internally due to block-based quantile sketching.
             max_quantile_blocks=max_quantile_blocks,
+            cache_host_ratio=cache_host_ratio,
         )
         handle = ctypes.c_void_p()
         reset_callback, next_callback = it.get_callbacks(enable_categorical)
@@ -1927,7 +1954,6 @@ def __init__(
         cache: Optional[Sequence[DMatrix]] = None,
         model_file: Optional[Union["Booster", bytearray, os.PathLike, str]] = None,
     ) -> None:
-        # pylint: disable=invalid-name
         """
         Parameters
         ----------
@@ -1941,7 +1967,7 @@ def __init__(
         cache = cache if cache is not None else []
         for d in cache:
             if not isinstance(d, DMatrix):
-                raise TypeError(f"invalid cache item: {type(d).__name__}", cache)
+                raise TypeError(f"Invalid cache item: {type(d).__name__}", cache)
 
         dmats = c_array(ctypes.c_void_p, [d.handle for d in cache])
         self.handle: Optional[ctypes.c_void_p] = ctypes.c_void_p()
@@ -2043,7 +2069,7 @@ def __del__(self) -> None:
             self.handle = None
 
     def __getstate__(self) -> Dict:
-        # can't pickle ctypes pointers, put model content in bytearray
+        # can't pickle ctypes pointers, put model content in a bytearray
         this = self.__dict__.copy()
         handle = this["handle"]
         if handle is not None:
@@ -2059,7 +2085,7 @@ def __getstate__(self) -> Dict:
         return this
 
     def __setstate__(self, state: Dict) -> None:
-        # reconstruct handle from raw data
+        # reconstruct the handle from raw data
         handle = state["handle"]
         if handle is not None:
             buf = handle
@@ -2304,6 +2330,33 @@ def feature_names(self) -> Optional[FeatureNames]:
     def feature_names(self, features: Optional[FeatureNames]) -> None:
         self._set_feature_info(features, "feature_name")
 
+    def get_categories(self, export_to_arrow: bool = False) -> Categories:
+        """Same method as :py:meth:`DMatrix.get_categories`."""
+
+        fnames = self.feature_names
+        n_features = self.num_features()
+        if fnames is None:
+            fnames = [str(i) for i in range(n_features)]
+
+        hdl = ctypes.c_void_p()
+        if export_to_arrow:
+            arrow_arrays = _get_categories(
+                lambda ret: _LIB.XGBoosterGetCategoriesExportToArrow(
+                    self.handle, None, ctypes.byref(hdl), ctypes.byref(ret)
+                ),
+                fnames,
+                n_features,
+            )
+        else:
+            arrow_arrays = None
+            _check_call(
+                _LIB.XGBoosterGetCategories(self.handle, None, ctypes.byref(hdl))
+            )
+
+        return Categories(
+            (hdl, lambda: _check_call(_LIB.XGBCategoriesFree(hdl))), arrow_arrays
+        )
+
     def set_param(
         self,
         params: Union[Dict, Iterable[Tuple[str, Any]], str],
@@ -2325,13 +2378,18 @@ def set_param(
         for key, val in cast(Iterable[Tuple[str, str]], params):
             if isinstance(val, np.ndarray):
                 val = val.tolist()
+            elif hasattr(val, "__cuda_array_interface__") and hasattr(val, "tolist"):
+                val = val.tolist()
             if val is not None:
                 _check_call(
                     _LIB.XGBoosterSetParam(self.handle, c_str(key), c_str(str(val)))
                 )
 
     def update(
-        self, dtrain: DMatrix, iteration: int, fobj: Optional[Objective] = None
+        self,
+        dtrain: DMatrix,
+        iteration: int,
+        fobj: Optional[Objective] = None,
     ) -> None:
         """Update for one iteration, with objective function calculated
         internally.  This function should not be called directly by users.
@@ -2347,7 +2405,7 @@ def update(
 
         """
         if not isinstance(dtrain, DMatrix):
-            raise TypeError(f"invalid training matrix: {type(dtrain).__name__}")
+            raise TypeError(f"Invalid training matrix: {type(dtrain).__name__}")
         self._assign_dmatrix_features(dtrain)
 
         if fobj is None:
@@ -2358,11 +2416,31 @@ def update(
             )
         else:
             pred = self.predict(dtrain, output_margin=True, training=True)
-            grad, hess = fobj(pred, dtrain)
-            self.boost(dtrain, iteration=iteration, grad=grad, hess=hess)
+            vgrad: Optional[ArrayLike]
+            vhess: Optional[ArrayLike]
+            vgrad, vhess = fobj(pred, dtrain)
+            if isinstance(fobj, TreeObjective):
+                sgrad, shess = fobj.split_grad(vgrad, vhess)
+            else:
+                sgrad, shess = vgrad, vhess
+                vgrad, vhess = None, None
+            self.boost(
+                dtrain,
+                iteration=iteration,
+                grad=sgrad,
+                hess=shess,
+                _vgrad=vgrad,
+                _vhess=vhess,
+            )
 
     def boost(
-        self, dtrain: DMatrix, iteration: int, grad: NumpyOrCupy, hess: NumpyOrCupy
+        self,
+        dtrain: DMatrix,
+        iteration: int,
+        grad: NumpyOrCupy,
+        hess: NumpyOrCupy,
+        _vgrad: Optional[NumpyOrCupy] = None,  # WIP vector-leaf support
+        _vhess: Optional[NumpyOrCupy] = None,  # WIP vector-leaf support
     ) -> None:
         """Boost the booster for one iteration with customized gradient statistics.
         Like :py:func:`xgboost.Booster.update`, this function should not be called
@@ -2413,15 +2491,29 @@ def grad_arrinf(array: NumpyOrCupy) -> bytes:
 
             return interface
 
-        _check_call(
-            _LIB.XGBoosterTrainOneIter(
-                self.handle,
-                dtrain.handle,
-                iteration,
-                grad_arrinf(grad),
-                grad_arrinf(hess),
+        if _vgrad is not None or _vhess is not None:
+            assert _vhess is not None and _vgrad is not None
+            _check_call(
+                _LIB.XGBoosterTrainOneIterWithSplitGrad(
+                    self.handle,
+                    dtrain.handle,
+                    iteration,
+                    grad_arrinf(grad),
+                    grad_arrinf(hess),
+                    grad_arrinf(_vgrad),
+                    grad_arrinf(_vhess),
+                )
+            )
+        else:
+            _check_call(
+                _LIB.XGBoosterTrainOneIter(
+                    self.handle,
+                    dtrain.handle,
+                    iteration,
+                    grad_arrinf(grad),
+                    grad_arrinf(hess),
+                )
             )
-        )
 
     def eval_set(
         self,
@@ -2430,7 +2522,6 @@ def eval_set(
         feval: Optional[Metric] = None,
         output_margin: bool = True,
     ) -> str:
-        # pylint: disable=invalid-name
         """Evaluate a set of data.
 
         Parameters
@@ -2555,9 +2646,9 @@ def predict(
             prediction. Note the final column is the bias term.
 
         approx_contribs :
-            Approximate the contributions of each feature.  Used when ``pred_contribs`` or
-            ``pred_interactions`` is set to True.  Changing the default of this parameter
-            (False) is not recommended.
+            Approximate the contributions of each feature.  Used when ``pred_contribs``
+            or ``pred_interactions`` is set to True.  Changing the default of this
+            parameter (False) is not recommended.
 
         pred_interactions :
             When this is True the output will be a matrix of size (nsample,
@@ -2575,10 +2666,10 @@ def predict(
 
         training :
             Whether the prediction value is used for training.  This can effect `dart`
-            booster, which performs dropouts during training iterations but use all trees
-            for inference. If you want to obtain result with dropouts, set this parameter
-            to `True`.  Also, the parameter is set to true when obtaining prediction for
-            custom objective function.
+            booster, which performs dropouts during training iterations but use all
+            trees for inference. If you want to obtain result with dropouts, set this
+            parameter to `True`.  Also, the parameter is set to true when obtaining
+            prediction for custom objective function.
 
             .. versionadded:: 1.0.0
 
@@ -2591,8 +2682,8 @@ def predict(
             .. versionadded:: 1.4.0
 
         strict_shape :
-            When set to True, output shape is invariant to whether classification is used.
-            For both value and margin prediction, the output shape is (n_samples,
+            When set to True, output shape is invariant to whether classification is
+            used.  For both value and margin prediction, the output shape is (n_samples,
             n_groups), n_groups == 1 when multi-class is not used.  Default to False, in
             which case the output shape can be (n_samples, ) if multi-class is not used.
 
@@ -3112,8 +3203,8 @@ def get_fscore(self, fmap: PathLike = "") -> Dict[str, Union[float, List[float]]
 
         .. note:: Zero-importance features will not be included
 
-           Keep in mind that this function does not include zero-importance feature, i.e.
-           those features that have not been used in any split conditions.
+           Keep in mind that this function does not include zero-importance feature,
+           i.e.  those features that have not been used in any split conditions.
 
         Parameters
         ----------
@@ -3129,7 +3220,8 @@ def get_score(
         """Get feature importance of each feature.
         For tree model Importance type can be defined as:
 
-        * 'weight': the number of times a feature is used to split the data across all trees.
+        * 'weight': the number of times a feature is used to split the data across all
+           trees.
         * 'gain': the average gain across all splits the feature is used in.
         * 'cover': the average coverage across all splits the feature is used in.
         * 'total_gain': the total gain across all splits the feature is used in.
@@ -3137,13 +3229,13 @@ def get_score(
 
         .. note::
 
-           For linear model, only "weight" is defined and it's the normalized coefficients
-           without bias.
+           For linear model, only "weight" is defined and it's the normalized
+           coefficients without bias.
 
         .. note:: Zero-importance features will not be included
 
-           Keep in mind that this function does not include zero-importance feature, i.e.
-           those features that have not been used in any split conditions.
+           Keep in mind that this function does not include zero-importance feature,
+           i.e.  those features that have not been used in any split conditions.
 
         Parameters
         ----------
@@ -3189,7 +3281,7 @@ def get_score(
         return results
 
     # pylint: disable=too-many-statements
-    def trees_to_dataframe(self, fmap: PathLike = "") -> DataFrame:
+    def trees_to_dataframe(self, fmap: PathLike = "") -> "PdDataFrame":
         """Parse a boosted tree model text dump into a pandas DataFrame structure.
 
         This feature is only defined when the decision tree model is chosen as base
@@ -3202,8 +3294,10 @@ def trees_to_dataframe(self, fmap: PathLike = "") -> DataFrame:
            The name of feature map file.
         """
         # pylint: disable=too-many-locals
+        from pandas import DataFrame
+
         fmap = os.fspath(os.path.expanduser(fmap))
-        if not PANDAS_INSTALLED:
+        if not is_pandas_available():
             raise ImportError(
                 (
                     "pandas must be available to use this method."
@@ -3354,7 +3448,7 @@ def get_split_value_histogram(
         fmap: PathLike = "",
         bins: Optional[int] = None,
         as_pandas: bool = True,
-    ) -> Union[np.ndarray, DataFrame]:
+    ) -> Union[np.ndarray, "PdDataFrame"]:
         """Get split value histogram of a feature
 
         Parameters
@@ -3410,9 +3504,11 @@ def get_split_value_histogram(
                     "Split value historgam doesn't support categorical split."
                 )
 
-        if as_pandas and PANDAS_INSTALLED:
+        if as_pandas and is_pandas_available():
+            from pandas import DataFrame
+
             return DataFrame(nph_stacked, columns=["SplitValue", "Count"])
-        if as_pandas and not PANDAS_INSTALLED:
+        if as_pandas and not is_pandas_available():
             warnings.warn(
                 "Returning histogram as ndarray"
                 " (as_pandas == True, but pandas is not installed).",
diff --git a/python-package/xgboost/dask/__init__.py b/python-package/xgboost/dask/__init__.py
index 6db30fbf5c3d..252a34d6c1ed 100644
--- a/python-package/xgboost/dask/__init__.py
+++ b/python-package/xgboost/dask/__init__.py
@@ -1,5 +1,5 @@
 # pylint: disable=too-many-arguments, too-many-locals
-# pylint: disable=missing-class-docstring, invalid-name
+# pylint: disable=missing-class-docstring
 # pylint: disable=too-many-lines
 """
 Dask extensions for distributed training
@@ -55,7 +55,7 @@
 import logging
 from collections import defaultdict
 from contextlib import contextmanager
-from functools import partial, update_wrapper, wraps
+from functools import partial, update_wrapper
 from threading import Thread
 from typing import (
     Any,
@@ -87,12 +87,13 @@
 from distributed import Future
 
 from .. import collective, config
+from .._data_utils import Categories
 from .._typing import FeatureNames, FeatureTypes, IterationRange
 from ..callback import TrainingCallback
 from ..collective import Config as CollConfig
 from ..collective import _Args as CollArgs
 from ..collective import _ArgVals as CollArgsVals
-from ..compat import DataFrame, lazy_isinstance
+from ..compat import _is_cudf_df
 from ..core import (
     Booster,
     DMatrix,
@@ -120,8 +121,8 @@
 )
 from ..tracker import RabitTracker
 from ..training import train as worker_train
-from .data import _create_dmatrix, _create_quantile_dmatrix, no_group_split
-from .utils import get_address_from_user, get_n_threads
+from .data import _get_dmatrices, no_group_split
+from .utils import _DASK_2024_12_1, _DASK_2025_3_0, get_address_from_user, get_n_threads
 
 _DaskCollection: TypeAlias = Union[da.Array, dd.DataFrame, dd.Series]
 _DataT: TypeAlias = Union[da.Array, dd.DataFrame]  # do not use series as predictor
@@ -237,15 +238,11 @@ def __init__(self, **args: CollArgsVals) -> None:
         super().__init__(**args)
 
         worker = distributed.get_worker()
-        with distributed.worker_client() as client:
-            info = client.scheduler_info()
-            w = info["workers"][worker.address]
-            wid = w["id"]
         # We use task ID for rank assignment which makes the RABIT rank consistent (but
         # not the same as task ID is string and "10" is sorted before "2") with dask
-        # worker ID. This outsources the rank assignment to dask and prevents
+        # worker name. This outsources the rank assignment to dask and prevents
         # non-deterministic issue.
-        self.args["DMLC_TASK_ID"] = f"[xgboost.dask-{wid}]:" + str(worker.address)
+        self.args["DMLC_TASK_ID"] = f"[xgboost.dask-{worker.name}]:{worker.address}"
 
 
 def _get_client(client: Optional["distributed.Client"]) -> "distributed.Client":
@@ -318,6 +315,10 @@ def __init__(
 
         self.feature_names = feature_names
         self.feature_types = feature_types
+        if isinstance(feature_types, Categories):
+            raise TypeError(
+                "The Dask interface can handle categories from DataFrame automatically."
+            )
         self.missing = missing if missing is not None else numpy.nan
         self.enable_categorical = enable_categorical
 
@@ -354,7 +355,7 @@ def __init__(
             label_upper_bound=label_upper_bound,
         )
 
-    def __await__(self) -> Generator:
+    def __await__(self) -> Generator[None, None, "DaskDMatrix"]:
         return self._init.__await__()
 
     async def _map_local_data(
@@ -639,12 +640,6 @@ def _create_fn_args(self, worker_addr: str) -> Dict[str, Any]:
         return args
 
 
-def _dmatrix_from_list_of_parts(is_quantile: bool, **kwargs: Any) -> DMatrix:
-    if is_quantile:
-        return _create_quantile_dmatrix(**kwargs)
-    return _create_dmatrix(**kwargs)
-
-
 async def _get_rabit_args(
     client: "distributed.Client",
     n_workers: int,
@@ -682,6 +677,7 @@ async def _get_rabit_args(
         _start_tracker, n_workers, sched_addr, user_addr, coll_cfg.tracker_timeout
     )
     env = coll_cfg.get_comm_config(env)
+    assert env is not None
     return env
 
 
@@ -721,37 +717,6 @@ async def _check_workers_are_alive(
         raise RuntimeError(f"Missing required workers: {missing_workers}")
 
 
-def _get_dmatrices(
-    train_ref: dict,
-    train_id: int,
-    *refs: dict,
-    evals_id: Sequence[int],
-    evals_name: Sequence[str],
-    n_threads: int,
-) -> Tuple[DMatrix, List[Tuple[DMatrix, str]]]:
-    # Create training DMatrix
-    Xy = _dmatrix_from_list_of_parts(**train_ref, nthread=n_threads)
-    # Create evaluation DMatrices
-    evals: List[Tuple[DMatrix, str]] = []
-    for i, ref in enumerate(refs):
-        # Same DMatrix as the training
-        if evals_id[i] == train_id:
-            evals.append((Xy, evals_name[i]))
-            continue
-        if ref.get("ref", None) is not None:
-            if ref["ref"] != train_id:
-                raise ValueError(
-                    "The training DMatrix should be used as a reference to evaluation"
-                    " `QuantileDMatrix`."
-                )
-            del ref["ref"]
-            eval_Xy = _dmatrix_from_list_of_parts(**ref, nthread=n_threads, ref=Xy)
-        else:
-            eval_Xy = _dmatrix_from_list_of_parts(**ref, nthread=n_threads)
-        evals.append((eval_Xy, evals_name[i]))
-    return Xy, evals
-
-
 async def _train_async(
     *,
     client: "distributed.Client",
@@ -803,6 +768,8 @@ def do_train(  # pylint: disable=too-many-positional-arguments
                 evals_id=evals_id,
                 evals_name=evals_name,
                 n_threads=n_threads,
+                # We need the model for reference categories.
+                model=xgb_model,
             )
 
             booster = worker_train(
@@ -906,12 +873,11 @@ def train(  # pylint: disable=unused-argument
 
     """
     client = _get_client(client)
-    args = locals()
     return client.sync(
         _train_async,
         global_config=config.get_config(),
         dconfig=_get_dask_config(),
-        **args,
+        **locals(),
     )
 
 
@@ -929,7 +895,7 @@ def _maybe_dataframe(
         # In older versions of dask, the partition is actually a numpy array when input
         # is dataframe.
         index = getattr(data, "index", None)
-        if lazy_isinstance(data, "cudf.core.dataframe", "DataFrame"):
+        if _is_cudf_df(data):
             import cudf
 
             if prediction.size == 0:
@@ -939,10 +905,14 @@ def _maybe_dataframe(
                 prediction, columns=columns, dtype=numpy.float32, index=index
             )
         else:
+            import pandas as pd
+
             if prediction.size == 0:
-                return DataFrame({}, columns=columns, dtype=numpy.float32, index=index)
+                return pd.DataFrame(
+                    {}, columns=columns, dtype=numpy.float32, index=index
+                )
 
-            prediction = DataFrame(
+            prediction = pd.DataFrame(
                 prediction, columns=columns, dtype=numpy.float32, index=index
             )
     return prediction
@@ -1476,35 +1446,33 @@ async def _predict_async(
         iteration_range: Optional[IterationRange],
     ) -> Any:
         iteration_range = self._get_iteration_range(iteration_range)
-        if self._can_use_inplace_predict():
-            predts = await inplace_predict(
-                client=self.client,
-                model=self.get_booster(),
-                data=data,
-                iteration_range=iteration_range,
-                predict_type="margin" if output_margin else "value",
-                missing=self.missing,
-                base_margin=base_margin,
-                validate_features=validate_features,
-            )
-            if isinstance(predts, dd.DataFrame):
-                predts = predts.to_dask_array()
-        else:
-            test_dmatrix: DaskDMatrix = await DaskDMatrix(  # type: ignore
-                self.client,
-                data=data,
-                base_margin=base_margin,
-                missing=self.missing,
-                feature_types=self.feature_types,
-            )
-            predts = await predict(
-                self.client,
-                model=self.get_booster(),
-                data=test_dmatrix,
-                output_margin=output_margin,
-                validate_features=validate_features,
-                iteration_range=iteration_range,
-            )
+        # Dask doesn't support gblinear and accepts only Dask collection types (array
+        # and dataframe). We can perform inplace predict.
+        assert self._can_use_inplace_predict()
+        predts = await inplace_predict(
+            client=self.client,
+            model=self.get_booster(),
+            data=data,
+            iteration_range=iteration_range,
+            predict_type="margin" if output_margin else "value",
+            missing=self.missing,
+            base_margin=base_margin,
+            validate_features=validate_features,
+        )
+        if isinstance(predts, dd.DataFrame):
+            predts = predts.to_dask_array()
+            # Make sure the booster is part of the task graph implicitly
+            # only needed for certain versions of dask.
+            if _DASK_2024_12_1() and not _DASK_2025_3_0():
+                # Fixes this issue for dask>=2024.1.1,<2025.3.0
+                # Dask==2025.3.0 fails with:
+                #     RuntimeError: Attempting to use an asynchronous
+                #     Client in a synchronous context of `dask.compute`
+                #
+                # Dask==2025.4.0 fails with:
+                #     TypeError: Value type is not supported for data
+                #     iterator:<class 'distributed.client.Future'>
+                predts = predts.persist()
         return predts
 
     @_deprecate_positional_args
@@ -1532,7 +1500,7 @@ async def _apply_async(
         iteration_range: Optional[IterationRange] = None,
     ) -> Any:
         iteration_range = self._get_iteration_range(iteration_range)
-        test_dmatrix: DaskDMatrix = await DaskDMatrix(  # type: ignore
+        test_dmatrix: DaskDMatrix = await DaskDMatrix(
             self.client,
             data=X,
             missing=self.missing,
@@ -1919,7 +1887,7 @@ class DaskXGBRanker(XGBRankerMixIn, DaskScikitLearnBase):
     def __init__(
         self,
         *,
-        objective: str = "rank:pairwise",
+        objective: str = "rank:ndcg",
         allow_group_split: bool = False,
         coll_cfg: Optional[CollConfig] = None,
         **kwargs: Any,
@@ -2036,8 +2004,8 @@ def check_ser(
         ) -> TypeGuard[Optional[dd.Series]]:
             if not isinstance(qid, dd.Series) and qid is not None:
                 raise TypeError(
-                    f"When `allow_group_split` is set to False, {name} is required to be"
-                    " a series."
+                    f"When `allow_group_split` is set to False, {name} is required to "
+                    "be a series."
                 )
             return True
 
diff --git a/python-package/xgboost/dask/data.py b/python-package/xgboost/dask/data.py
index 2a80874e9821..a216b2ebe14f 100644
--- a/python-package/xgboost/dask/data.py
+++ b/python-package/xgboost/dask/data.py
@@ -23,10 +23,13 @@
 from dask import dataframe as dd
 
 from .. import collective as coll
-from .._typing import FeatureNames
+from .._data_utils import Categories
+from .._typing import FeatureNames, FeatureTypes
 from ..compat import concat, import_cupy
-from ..core import DataIter, DMatrix, QuantileDMatrix
+from ..core import Booster, DataIter, DMatrix, QuantileDMatrix
 from ..data import is_on_cuda
+from ..sklearn import get_model_categories, pick_ref_categories
+from ..training import _RefError
 
 LOGGER = logging.getLogger("[xgboost.dask]")
 
@@ -50,7 +53,7 @@ def __init__(
         self,
         data: List[Any],
         feature_names: Optional[FeatureNames] = None,
-        feature_types: Optional[Union[Any, List[Any]]] = None,
+        feature_types: Optional[Union[FeatureTypes, Categories]] = None,
         feature_weights: Optional[Any] = None,
         **kwargs: Optional[List[Any]],
     ) -> None:
@@ -251,6 +254,7 @@ def map_fn(i: int) -> pd.DataFrame:
 
 
 def _get_worker_parts(list_of_parts: _DataParts) -> Dict[str, List[Any]]:
+    """Convert list of dictionaries into a dictionary of lists."""
     assert isinstance(list_of_parts, list)
     result: Dict[str, List[Any]] = {}
 
@@ -275,6 +279,19 @@ def append(i: int, name: str) -> None:
     return result
 
 
+def _extract_data(
+    parts: _DataParts,
+    model: Optional[Booster],
+    feature_types: Optional[FeatureTypes],
+    xy_cats: Optional[Categories],
+) -> Tuple[Dict[str, List[Any]], Optional[Union[FeatureTypes, Categories]]]:
+    unzipped_dict = _get_worker_parts(parts)
+    X = unzipped_dict["data"][0]
+    _, model_cats = get_model_categories(X, model, feature_types)
+    model_cats = pick_ref_categories(X, model_cats, xy_cats)
+    return unzipped_dict, model_cats
+
+
 def _get_is_cuda(parts: Optional[_DataParts]) -> bool:
     if parts is not None:
         is_cuda = is_on_cuda(parts[0].get("data"))
@@ -294,10 +311,15 @@ def _make_empty(is_cuda: bool) -> np.ndarray:
     return empty
 
 
+def _warn_empty() -> None:
+    worker = distributed.get_worker()
+    LOGGER.warning("Worker %s has an empty DMatrix.", worker.address)
+
+
 def _create_quantile_dmatrix(
     *,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[Union[Any, List[Any]]],
+    feature_types: Optional[FeatureTypes],
     feature_weights: Optional[Any],
     missing: float,
     nthread: int,
@@ -306,11 +328,12 @@ def _create_quantile_dmatrix(
     enable_categorical: bool,
     max_quantile_batches: Optional[int],
     ref: Optional[DMatrix] = None,
+    model: Optional[Booster],
+    Xy_cats: Optional[Categories],
 ) -> QuantileDMatrix:
-    worker = distributed.get_worker()
     is_cuda = _get_is_cuda(parts)
     if parts is None:
-        LOGGER.warning("Worker %s has an empty DMatrix.", worker.address)
+        _warn_empty()
         return QuantileDMatrix(
             _make_empty(is_cuda),
             feature_names=feature_names,
@@ -321,14 +344,15 @@ def _create_quantile_dmatrix(
             max_quantile_batches=max_quantile_batches,
         )
 
-    it = DaskPartitionIter(
-        **_get_worker_parts(parts),
-        feature_types=feature_types,
-        feature_names=feature_names,
-        feature_weights=feature_weights,
-    )
+    unzipped_dict, model_cats = _extract_data(parts, model, feature_types, Xy_cats)
+
     return QuantileDMatrix(
-        it,
+        DaskPartitionIter(
+            **unzipped_dict,
+            feature_types=model_cats,
+            feature_names=feature_names,
+            feature_weights=feature_weights,
+        ),
         missing=missing,
         nthread=nthread,
         max_bin=max_bin,
@@ -341,12 +365,14 @@ def _create_quantile_dmatrix(
 def _create_dmatrix(  # pylint: disable=too-many-locals
     *,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[Union[Any, List[Any]]],
+    feature_types: Optional[FeatureTypes],
     feature_weights: Optional[Any],
     missing: float,
     nthread: int,
     enable_categorical: bool,
     parts: Optional[_DataParts],
+    model: Optional[Booster],
+    Xy_cats: Optional[Categories],
 ) -> DMatrix:
     """Get data that local to worker from DaskDMatrix.
 
@@ -355,20 +381,15 @@ def _create_dmatrix(  # pylint: disable=too-many-locals
     A DMatrix object.
 
     """
-    worker = distributed.get_worker()
-    list_of_parts = parts
     is_cuda = _get_is_cuda(parts)
-
-    if list_of_parts is None:
-        msg = f"Worker {worker.address} has an empty DMatrix."
-        LOGGER.warning(msg)
-        Xy = DMatrix(
+    if parts is None:
+        _warn_empty()
+        return DMatrix(
             _make_empty(is_cuda),
             feature_names=feature_names,
             feature_types=feature_types,
             enable_categorical=enable_categorical,
         )
-        return Xy
 
     T = TypeVar("T")
 
@@ -377,19 +398,64 @@ def concat_or_none(data: Sequence[Optional[T]]) -> Optional[T]:
             return None
         return concat(data)
 
-    unzipped_dict = _get_worker_parts(list_of_parts)
+    unzipped_dict, model_cats = _extract_data(parts, model, feature_types, Xy_cats)
+
     concated_dict: Dict[str, Any] = {}
     for key, value in unzipped_dict.items():
         v = concat_or_none(value)
         concated_dict[key] = v
 
-    Xy = DMatrix(
+    return DMatrix(
         **concated_dict,
         missing=missing,
         feature_names=feature_names,
-        feature_types=feature_types,
+        feature_types=model_cats,
         nthread=nthread,
         enable_categorical=enable_categorical,
         feature_weights=feature_weights,
     )
-    return Xy
+
+
+def _dmatrix_from_list_of_parts(is_quantile: bool, **kwargs: Any) -> DMatrix:
+    if is_quantile:
+        return _create_quantile_dmatrix(**kwargs)
+    return _create_dmatrix(**kwargs)
+
+
+def _get_dmatrices(
+    train_ref: dict,
+    train_id: int,
+    *refs: dict,
+    evals_id: Sequence[int],
+    evals_name: Sequence[str],
+    n_threads: int,
+    model: Optional[Booster],
+) -> Tuple[DMatrix, List[Tuple[DMatrix, str]]]:
+    # Create the training DMatrix
+    Xy = _dmatrix_from_list_of_parts(
+        **train_ref, nthread=n_threads, model=model, Xy_cats=None
+    )
+
+    # Create evaluation DMatrices
+    evals: List[Tuple[DMatrix, str]] = []
+    Xy_cats = Xy.get_categories()
+
+    for i, ref in enumerate(refs):
+        # Same DMatrix as the training
+        if evals_id[i] == train_id:
+            evals.append((Xy, evals_name[i]))
+            continue
+        # Check whether the training DMatrix has been used as a reference.
+        if ref.get("ref", None) is not None:
+            if ref["ref"] != train_id:
+                raise ValueError(_RefError)
+            del ref["ref"]  # Avoid duplicated parameter in the next fn call.
+            eval_xy = _dmatrix_from_list_of_parts(
+                **ref, nthread=n_threads, ref=Xy, Xy_cats=Xy_cats, model=model
+            )
+        else:
+            eval_xy = _dmatrix_from_list_of_parts(
+                **ref, nthread=n_threads, Xy_cats=Xy_cats, model=model
+            )
+        evals.append((eval_xy, evals_name[i]))
+    return Xy, evals
diff --git a/python-package/xgboost/dask/utils.py b/python-package/xgboost/dask/utils.py
index 7f71ca6e3bc2..e9a004f6e8d1 100644
--- a/python-package/xgboost/dask/utils.py
+++ b/python-package/xgboost/dask/utils.py
@@ -2,9 +2,13 @@
 
 import logging
 import warnings
+from functools import cache as fcache
 from typing import Any, Dict, Optional, Tuple
 
+import dask
 import distributed
+from packaging.version import Version
+from packaging.version import parse as parse_version
 
 from ..collective import Config
 
@@ -97,3 +101,18 @@ def get_address_from_user(
         port = coll_cfg.tracker_port
 
     return host_ip, port
+
+
+@fcache
+def _DASK_VERSION() -> Version:
+    return parse_version(dask.__version__)
+
+
+@fcache
+def _DASK_2024_12_1() -> bool:
+    return _DASK_VERSION() >= parse_version("2024.12.1")
+
+
+@fcache
+def _DASK_2025_3_0() -> bool:
+    return _DASK_VERSION() >= parse_version("2025.3.0")
diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py
index 4ada55472348..f0917c615d39 100644
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -1,6 +1,7 @@
 # pylint: disable=too-many-arguments, too-many-branches, too-many-lines
 # pylint: disable=too-many-return-statements
 """Data dispatching for DMatrix."""
+
 import ctypes
 import functools
 import json
@@ -15,7 +16,7 @@
     Optional,
     Sequence,
     Tuple,
-    Type,
+    TypeAlias,
     TypeGuard,
     Union,
 )
@@ -23,21 +24,24 @@
 import numpy as np
 
 from ._data_utils import (
-    ArrayInf,
+    AifType,
+    Categories,
     DfCatAccessor,
-    StringArray,
     TransformedDf,
+    _arrow_array_inf,
     _ensure_np_dtype,
     _is_df_cat,
     array_hasobject,
     array_interface,
     array_interface_dict,
+    arrow_cat_inf,
     check_cudf_meta,
     cuda_array_interface,
     cuda_array_interface_dict,
     cudf_cat_inf,
+    get_ref_categories,
     is_arrow_dict,
-    make_array_interface,
+    pd_cat_inf,
 )
 from ._typing import (
     CupyT,
@@ -52,7 +56,18 @@
     c_bst_ulong,
 )
 from .compat import (
-    DataFrame,
+    _is_arrow,
+    _is_cudf_df,
+    _is_cudf_pandas,
+    _is_cudf_ser,
+    _is_modin_df,
+    _is_modin_series,
+    _is_pandas_df,
+    _is_pandas_series,
+    _is_polars,
+    _is_polars_lazyframe,
+    _is_polars_series,
+    import_pandas,
     import_polars,
     import_pyarrow,
     is_pyarrow_available,
@@ -71,10 +86,11 @@
 
 if TYPE_CHECKING:
     import pyarrow as pa
+    from pandas import DataFrame as PdDataFrame
     from pandas import Series as PdSeries
 
 
-DispatchedDataBackendReturnType = Tuple[
+DispatchedDataBackendReturnType: TypeAlias = Tuple[
     ctypes.c_void_p, Optional[FeatureNames], Optional[FeatureTypes]
 ]
 
@@ -282,22 +298,6 @@ def _from_numpy_array(
     return handle, feature_names, feature_types
 
 
-def _is_pandas_df(data: DataType) -> TypeGuard[DataFrame]:
-    try:
-        import pandas as pd
-    except ImportError:
-        return False
-    return isinstance(data, pd.DataFrame)
-
-
-def _is_modin_df(data: DataType) -> bool:
-    try:
-        import modin.pandas as pd
-    except ImportError:
-        return False
-    return isinstance(data, pd.DataFrame)
-
-
 _pandas_dtype_mapper = {
     "int8": "int",
     "int16": "int",
@@ -373,14 +373,14 @@ def _invalid_dataframe_dtype(data: DataType) -> None:
 
 
 def pandas_feature_info(
-    data: DataFrame,
+    data: "PdDataFrame",
     meta: Optional[str],
     feature_names: Optional[FeatureNames],
     feature_types: Optional[FeatureTypes],
     enable_categorical: bool,
 ) -> Tuple[Optional[FeatureNames], Optional[FeatureTypes]]:
     """Handle feature info for pandas dataframe."""
-    import pandas as pd
+    pd = import_pandas()
 
     # handle feature names
     if feature_names is None and meta is None:
@@ -445,7 +445,7 @@ def is_pa_ext_categorical_dtype(dtype: Any) -> bool:
 
 @functools.cache
 def _lazy_load_pd_is_cat() -> Callable[[PandasDType], bool]:
-    import pandas as pd
+    pd = import_pandas()
 
     if hasattr(pd.util, "version") and hasattr(pd.util.version, "Version"):
         Version = pd.util.version.Version
@@ -469,7 +469,7 @@ def is_pd_cat_dtype(dtype: PandasDType) -> bool:
 
 @functools.cache
 def _lazy_load_pd_is_sparse() -> Callable[[PandasDType], bool]:
-    import pandas as pd
+    pd = import_pandas()
 
     if hasattr(pd.util, "version") and hasattr(pd.util.version, "Version"):
         Version = pd.util.version.Version
@@ -495,7 +495,7 @@ def is_pd_sparse_dtype(dtype: PandasDType) -> bool:
 
 def pandas_pa_type(ser: Any) -> np.ndarray:
     """Handle pandas pyarrow extention."""
-    import pandas as pd
+    pd = import_pandas()
 
     if TYPE_CHECKING:
         import pyarrow as pa
@@ -536,7 +536,9 @@ def _lazy_load_pd_floats() -> tuple:
     return Float32Dtype, Float64Dtype
 
 
-def pandas_transform_data(data: DataFrame) -> List[Union[np.ndarray, DfCatAccessor]]:
+def pandas_transform_data(
+    data: "PdDataFrame",
+) -> List[Union[np.ndarray, DfCatAccessor]]:
     """Handle categorical dtype and extension types from pandas."""
     Float32Dtype, Float64Dtype = _lazy_load_pd_floats()
 
@@ -608,32 +610,28 @@ class PandasTransformed(TransformedDf):
     """A storage class for transformed pandas DataFrame."""
 
     def __init__(
-        self, columns: List[Union[np.ndarray, DfCatAccessor, "pa.DictionaryType"]]
+        self,
+        columns: List[Union[np.ndarray, DfCatAccessor]],
+        ref_categories: Optional[Categories],
     ) -> None:
         self.columns = columns
 
-        aitfs = []
-        self.temporary_buffers = []
+        aitfs: AifType = []
 
         # Get the array interface representation for each column.
         for col in self.columns:
-            inf = array_interface_dict(col)
-            if isinstance(inf, tuple):
+            if _is_df_cat(col):
                 # Categorical column
-                jnames, jcodes, buf = inf
-                # Store the transformed results to avoid garbage collection.
+                jnames, jcodes, buf = pd_cat_inf(col.categories, col.codes)
                 self.temporary_buffers.append(buf)
-                aitfs.append([jnames, jcodes])
+                aitfs.append((jnames, jcodes))
             else:
+                assert isinstance(col, np.ndarray)
+                inf = array_interface_dict(col)
                 # Numeric column
                 aitfs.append(inf)
 
-        self.aitfs = aitfs
-
-    def array_interface(self) -> bytes:
-        """Return a byte string for JSON encoded array interface."""
-        sarrays = bytes(json.dumps(self.aitfs), "utf-8")
-        return sarrays
+        super().__init__(ref_categories=ref_categories, aitfs=aitfs)
 
     @property
     def shape(self) -> Tuple[int, int]:
@@ -651,21 +649,26 @@ def shape(self) -> Tuple[int, int]:
 
 
 def _transform_pandas_df(
-    data: DataFrame,
+    data: "PdDataFrame",
     enable_categorical: bool,
     feature_names: Optional[FeatureNames] = None,
-    feature_types: Optional[FeatureTypes] = None,
+    feature_types: Optional[Union[FeatureTypes, Categories]] = None,
     meta: Optional[str] = None,
 ) -> Tuple[PandasTransformed, Optional[FeatureNames], Optional[FeatureTypes]]:
     if meta and len(data.columns) > 1 and meta not in _matrix_meta:
         raise ValueError(f"DataFrame for {meta} cannot have multiple columns")
 
+    feature_types, ref_categories = get_ref_categories(feature_types)
     feature_names, feature_types = pandas_feature_info(
         data, meta, feature_names, feature_types, enable_categorical
     )
 
     arrays = pandas_transform_data(data)
-    return PandasTransformed(arrays), feature_names, feature_types
+    return (
+        PandasTransformed(arrays, ref_categories=ref_categories),
+        feature_names,
+        feature_types,
+    )
 
 
 def _meta_from_pandas_df(
@@ -686,12 +689,12 @@ def _meta_from_pandas_df(
 
 def _from_pandas_df(
     *,
-    data: DataFrame,
+    data: "PdDataFrame",
     enable_categorical: bool,
     missing: FloatCompatible,
     nthread: int,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
     data_split_mode: DataSplitMode = DataSplitMode.ROW,
 ) -> DispatchedDataBackendReturnType:
     df, feature_names, feature_types = _transform_pandas_df(
@@ -711,14 +714,6 @@ def _from_pandas_df(
     return handle, feature_names, feature_types
 
 
-def _is_pandas_series(data: DataType) -> bool:
-    try:
-        import pandas as pd
-    except ImportError:
-        return False
-    return isinstance(data, pd.Series)
-
-
 def _meta_from_pandas_series(
     data: DataType, name: str, dtype: Optional[NumpyDType], handle: ctypes.c_void_p
 ) -> None:
@@ -736,14 +731,6 @@ def _meta_from_pandas_series(
     _meta_from_numpy(data, name, dtype, handle)
 
 
-def _is_modin_series(data: DataType) -> bool:
-    try:
-        import modin.pandas as pd
-    except ImportError:
-        return False
-    return isinstance(data, pd.Series)
-
-
 def _from_pandas_series(
     *,
     data: DataType,
@@ -768,75 +755,44 @@ def _from_pandas_series(
     )
 
 
-@functools.cache
-def _arrow_npdtype() -> Dict[Any, Type[np.number]]:
-    import pyarrow as pa
-
-    mapping: Dict[Any, Type[np.number]] = {
-        pa.int8(): np.int8,
-        pa.int16(): np.int16,
-        pa.int32(): np.int32,
-        pa.int64(): np.int64,
-        pa.uint8(): np.uint8,
-        pa.uint16(): np.uint16,
-        pa.uint32(): np.uint32,
-        pa.uint64(): np.uint64,
-        pa.float16(): np.float16,
-        pa.float32(): np.float32,
-        pa.float64(): np.float64,
-    }
-
-    return mapping
-
-
 class ArrowTransformed(TransformedDf):
     """A storage class for transformed arrow table."""
 
     def __init__(
-        self, columns: List[Union["pa.NumericArray", "pa.DictionaryArray"]]
+        self,
+        columns: List[Union["pa.NumericArray", "pa.DictionaryArray"]],
+        ref_categories: Optional[Categories] = None,
     ) -> None:
         self.columns = columns
 
-    def array_interface(self) -> bytes:
-        """Return a byte string for JSON encoded array interface."""
+        self.temporary_buffers: List[Tuple] = []
+
         if TYPE_CHECKING:
             import pyarrow as pa
         else:
             pa = import_pyarrow()
 
-        def array_inf(col: Union["pa.NumericArray", "pa.DictionaryArray"]) -> ArrayInf:
-            buffers = col.buffers()
+        aitfs: AifType = []
+
+        def push_series(col: Union["pa.NumericArray", "pa.DictionaryArray"]) -> None:
             if isinstance(col, pa.DictionaryArray):
-                mask, _, data = col.buffers()
+                cats = col.dictionary
+                codes = col.indices
+                if not isinstance(cats, (pa.StringArray, pa.LargeStringArray)):
+                    raise TypeError(
+                        "Only string-based categorical index is supported for arrow."
+                    )
+                jnames, jcodes, buf = arrow_cat_inf(cats, codes)
+                self.temporary_buffers.append(buf)
+                aitfs.append((jnames, jcodes))
             else:
-                mask, data = buffers
+                jdata = _arrow_array_inf(col)
+                aitfs.append(jdata)
 
-            assert data.is_cpu
-            assert col.offset == 0
+        for col in self.columns:
+            push_series(col)
 
-            jdata = make_array_interface(
-                data.address,
-                shape=(len(col),),
-                dtype=_arrow_npdtype()[col.type],
-                is_cuda=not data.is_cpu,
-            )
-            if mask is not None:
-                jmask: ArrayInf = {
-                    "data": (mask.address, True),
-                    "typestr": "<t1",
-                    "version": 3,
-                    "strides": None,
-                    "shape": (len(col),),
-                    "mask": None,
-                }
-                if not data.is_cpu:
-                    jmask["stream"] = 2  # type: ignore
-                jdata["mask"] = jmask
-            return jdata
-
-        arrays = list(map(array_inf, self.columns))
-        sarrays = bytes(json.dumps(arrays), "utf-8")
-        return sarrays
+        super().__init__(ref_categories=ref_categories, aitfs=aitfs)
 
     @property
     def shape(self) -> Tuple[int, int]:
@@ -844,15 +800,11 @@ def shape(self) -> Tuple[int, int]:
         return len(self.columns[0]), len(self.columns)
 
 
-def _is_arrow(data: DataType) -> bool:
-    return lazy_isinstance(data, "pyarrow.lib", "Table")
-
-
 def _transform_arrow_table(
     data: "pa.Table",
-    _: bool,  # not used yet, enable_categorical
+    enable_categorical: bool,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
 ) -> Tuple[ArrowTransformed, Optional[FeatureNames], Optional[FeatureTypes]]:
     if TYPE_CHECKING:
         import pyarrow as pa
@@ -860,6 +812,7 @@ def _transform_arrow_table(
         pa = import_pyarrow()
 
     t_names, t_types = _arrow_feature_info(data)
+    feature_types, ref_categories = get_ref_categories(feature_types)
 
     if feature_names is None:
         feature_names = t_names
@@ -872,9 +825,13 @@ def _transform_arrow_table(
         col: Union["pa.NumericArray", "pa.DictionaryArray"] = col0.combine_chunks()
         if isinstance(col, pa.BooleanArray):
             col = col.cast(pa.int8())  # bit-compressed array, not supported.
+        if is_arrow_dict(col) and not enable_categorical:
+            # None because the function doesn't know how to get the type info from arrow
+            # table.
+            _invalid_dataframe_dtype(None)
         columns.append(col)
 
-    df_t = ArrowTransformed(columns)
+    df_t = ArrowTransformed(columns, ref_categories=ref_categories)
     return df_t, feature_names, feature_types
 
 
@@ -884,7 +841,7 @@ def _from_arrow_table(  # pylint: disable=too-many-positional-arguments
     missing: FloatCompatible,
     n_threads: int,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
     data_split_mode: DataSplitMode = DataSplitMode.ROW,
 ) -> DispatchedDataBackendReturnType:
     df_t, feature_names, feature_types = _transform_arrow_table(
@@ -937,10 +894,6 @@ def _arrow_feature_info(data: DataType) -> Tuple[List[str], List]:
     def map_type(name: str) -> str:
         col = table.column(name)
         if isinstance(col.type, pa.DictionaryType):
-            raise NotImplementedError(
-                "Categorical feature is not yet supported with the current input data "
-                "type."
-            )
             return CAT_T  # pylint: disable=unreachable
 
         return _arrow_dtype()[col.type]
@@ -959,20 +912,6 @@ def _meta_from_arrow_table(
     _meta_from_pandas_df(table.to_pandas(), name=name, dtype=dtype, handle=handle)
 
 
-def _is_polars_lazyframe(data: DataType) -> bool:
-    return lazy_isinstance(data, "polars.lazyframe.frame", "LazyFrame")
-
-
-def _is_polars_series(data: DataType) -> bool:
-    return lazy_isinstance(data, "polars.series.series", "Series")
-
-
-def _is_polars(data: DataType) -> bool:
-    lf = _is_polars_lazyframe(data)
-    df = lazy_isinstance(data, "polars.dataframe.frame", "DataFrame")
-    return lf or df
-
-
 def _check_pyarrow_for_polars() -> None:
     if not is_pyarrow_available():
         raise ImportError("`pyarrow` is required for polars.")
@@ -982,7 +921,7 @@ def _transform_polars_df(
     data: DataType,
     enable_categorical: bool,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
 ) -> Tuple[ArrowTransformed, Optional[FeatureNames], Optional[FeatureTypes]]:
     if _is_polars_lazyframe(data):
         df = data.collect()
@@ -1007,7 +946,7 @@ def _from_polars_df(  # pylint: disable=too-many-positional-arguments
     missing: FloatCompatible,
     n_threads: int,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
     data_split_mode: DataSplitMode = DataSplitMode.ROW,
 ) -> DispatchedDataBackendReturnType:
     df_t, feature_names, feature_types = _transform_polars_df(
@@ -1026,20 +965,6 @@ def _from_polars_df(  # pylint: disable=too-many-positional-arguments
     return handle, feature_names, feature_types
 
 
-def _is_cudf_df(data: DataType) -> bool:
-    return lazy_isinstance(data, "cudf.core.dataframe", "DataFrame")
-
-
-def _is_cudf_pandas(data: DataType) -> bool:
-    """Must go before both pandas and cudf checks."""
-    return (
-        lazy_isinstance(data, "pandas.core.frame", "DataFrame")
-        or lazy_isinstance(data, "pandas.core.series", "Series")
-    ) and lazy_isinstance(
-        type(data), "cudf.pandas.fast_slow_proxy", "_FastSlowProxyMeta"
-    )
-
-
 @functools.cache
 def _lazy_load_cudf_is_cat() -> Callable[[Any], bool]:
     try:
@@ -1067,26 +992,20 @@ def _lazy_load_cudf_is_bool() -> Callable[[Any], bool]:
 class CudfTransformed(TransformedDf):
     """A storage class for transformed cuDF dataframe."""
 
-    def __init__(self, columns: List[Union["PdSeries", DfCatAccessor]]) -> None:
+    def __init__(
+        self,
+        columns: List[Union["PdSeries", DfCatAccessor]],
+        ref_categories: Optional[Categories],
+    ) -> None:
         self.columns = columns
         # Buffers for temporary data that cannot be freed until the data is consumed by
         # the DMatrix or the booster.
-        self.temporary_buffers: List[Tuple] = []
 
-        aitfs: List[
-            Union[
-                ArrayInf,  # numeric column
-                Tuple[  # categorical column
-                    Union[ArrayInf, StringArray],  # string index, numeric index
-                    ArrayInf,  # codes
-                ],
-            ]
-        ] = []
+        aitfs: AifType = []
 
         def push_series(ser: Any) -> None:
             if _is_df_cat(ser):
                 cats, codes = ser.categories, ser.codes
-                cats_ainf: Union[StringArray, ArrayInf]  # string or numeric index
                 cats_ainf, codes_ainf, buf = cudf_cat_inf(cats, codes)
                 self.temporary_buffers.append(buf)
                 aitfs.append((cats_ainf, codes_ainf))
@@ -1098,12 +1017,7 @@ def push_series(ser: Any) -> None:
         for col in self.columns:
             push_series(col)
 
-        self.aitfs = aitfs
-
-    def array_interface(self) -> bytes:
-        """Return a byte string for JSON encoded array interface."""
-        sarrays = bytes(json.dumps(self.aitfs), "utf-8")
-        return sarrays
+        super().__init__(ref_categories=ref_categories, aitfs=aitfs)
 
     @property
     def shape(self) -> Tuple[int, int]:
@@ -1118,7 +1032,7 @@ def shape(self) -> Tuple[int, int]:
 def _transform_cudf_df(
     data: DataType,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
     enable_categorical: bool,
 ) -> Tuple[
     CudfTransformed,
@@ -1156,6 +1070,7 @@ def _transform_cudf_df(
             feature_names = list(data.columns.map(str))
 
     # handle feature types
+    feature_types, ref_categories = get_ref_categories(feature_types)
     if feature_types is None:
         feature_types = []
         for dtype in dtypes:
@@ -1175,16 +1090,20 @@ def _transform_cudf_df(
         else:
             result.append(data)
     else:
-        for col in data:
-            dtype = data[col].dtype
+        for col, dtype in zip(data.columns, data.dtypes):
+            series = data[col]
             if is_categorical_dtype(dtype) and enable_categorical:
-                result.append(data[col].cat)
+                result.append(series.cat)
             elif is_categorical_dtype(dtype):
                 raise ValueError(_ENABLE_CAT_ERR)
             else:
-                result.append(data[col])
+                result.append(series)
 
-    return CudfTransformed(result), feature_names, feature_types
+    return (
+        CudfTransformed(result, ref_categories=ref_categories),
+        feature_names,
+        feature_types,
+    )
 
 
 def _from_cudf_df(
@@ -1193,7 +1112,7 @@ def _from_cudf_df(
     missing: FloatCompatible,
     nthread: int,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
     enable_categorical: bool,
 ) -> DispatchedDataBackendReturnType:
     df, feature_names, feature_types = _transform_cudf_df(
@@ -1210,10 +1129,6 @@ def _from_cudf_df(
     return handle, feature_names, feature_types
 
 
-def _is_cudf_ser(data: DataType) -> bool:
-    return lazy_isinstance(data, "cudf.core.series", "Series")
-
-
 def _is_cupy_alike(data: DataType) -> bool:
     return hasattr(data, "__cuda_array_interface__")
 
@@ -1386,11 +1301,21 @@ def dispatch_data_backend(
     missing: FloatCompatible,  # Or Optional[Float]
     threads: int,
     feature_names: Optional[FeatureNames],
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
     enable_categorical: bool = False,
     data_split_mode: DataSplitMode = DataSplitMode.ROW,
 ) -> DispatchedDataBackendReturnType:
     """Dispatch data for DMatrix."""
+
+    def check_cats(
+        feature_types: Optional[Union[FeatureTypes, Categories]],
+    ) -> TypeGuard[Optional[FeatureTypes]]:
+        if isinstance(feature_types, Categories):
+            raise ValueError(
+                "Reference category is only supported by DataFrame inputs."
+            )
+        return True
+
     if (
         not _is_cudf_ser(data)
         and not _is_pandas_series(data)
@@ -1398,6 +1323,7 @@ def dispatch_data_backend(
     ):
         _check_data_shape(data)
     if is_scipy_csr(data):
+        assert check_cats(feature_types)
         return _from_scipy_csr(
             data=data,
             missing=missing,
@@ -1407,6 +1333,7 @@ def dispatch_data_backend(
             data_split_mode=data_split_mode,
         )
     if is_scipy_csc(data):
+        assert check_cats(feature_types)
         return _from_scipy_csc(
             data=data,
             missing=missing,
@@ -1416,6 +1343,7 @@ def dispatch_data_backend(
             data_split_mode=data_split_mode,
         )
     if is_scipy_coo(data):
+        assert check_cats(feature_types)
         return _from_scipy_csr(
             data=data.tocsr(),
             missing=missing,
@@ -1425,6 +1353,7 @@ def dispatch_data_backend(
             data_split_mode=data_split_mode,
         )
     if _is_np_array_like(data):
+        assert check_cats(feature_types)
         return _from_numpy_array(
             data=data,
             missing=missing,
@@ -1434,8 +1363,10 @@ def dispatch_data_backend(
             data_split_mode=data_split_mode,
         )
     if _is_uri(data):
+        assert check_cats(feature_types)
         return _from_uri(data, missing, feature_names, feature_types, data_split_mode)
     if _is_list(data):
+        assert check_cats(feature_types)
         return _from_list(
             data=data,
             missing=missing,
@@ -1445,6 +1376,7 @@ def dispatch_data_backend(
             data_split_mode=data_split_mode,
         )
     if _is_tuple(data):
+        assert check_cats(feature_types)
         return _from_tuple(
             data=data,
             missing=missing,
@@ -1480,7 +1412,7 @@ def dispatch_data_backend(
     if _is_cudf_pandas(data):
         data = data._fsproxy_fast  # pylint: disable=protected-access
     if _is_pandas_series(data):
-        import pandas as pd
+        pd = import_pandas()
 
         data = pd.DataFrame(data)
     if _is_pandas_df(data):
@@ -1503,13 +1435,19 @@ def dispatch_data_backend(
             enable_categorical=enable_categorical,
         )
     if _is_cupy_alike(data):
+        assert check_cats(feature_types)
         return _from_cupy_array(data, missing, threads, feature_names, feature_types)
     if _is_cupy_csr(data):
         raise TypeError("cupyx CSR is not supported yet.")
     if _is_cupy_csc(data):
         raise TypeError("cupyx CSC is not supported yet.")
     if _is_dlpack(data):
+        assert check_cats(feature_types)
         return _from_dlpack(data, missing, threads, feature_names, feature_types)
+    if _is_modin_series(data):
+        pd = import_pandas()
+
+        data = pd.DataFrame(data)
     if _is_modin_df(data):
         return _from_pandas_df(
             data=data,
@@ -1519,16 +1457,9 @@ def dispatch_data_backend(
             feature_names=feature_names,
             feature_types=feature_types,
         )
-    if _is_modin_series(data):
-        return _from_pandas_series(
-            data=data,
-            missing=missing,
-            nthread=threads,
-            enable_categorical=enable_categorical,
-            feature_names=feature_names,
-            feature_types=feature_types,
-        )
+
     if _has_array_protocol(data):
+        assert check_cats(feature_types)
         array = np.asarray(data)
         return _from_numpy_array(
             data=array,
@@ -1540,6 +1471,7 @@ def dispatch_data_backend(
 
     converted = _convert_unknown_data(data)
     if converted is not None:
+        assert check_cats(feature_types)
         return _from_scipy_csr(
             data=converted,
             missing=missing,
@@ -1687,7 +1619,7 @@ class SingleBatchInternalIter(DataIter):  # pylint: disable=R0902
 
     def __init__(self, **kwargs: Any) -> None:
         self.kwargs = kwargs
-        self.it = 0  # pylint: disable=invalid-name
+        self.it = 0
 
         # This does not necessarily increase memory usage as the data transformation
         # might use memory.
@@ -1741,7 +1673,7 @@ def _proxy_transform(
         )
         return df_pl, feature_names, feature_types
     if _is_pandas_series(data):
-        import pandas as pd
+        pd = import_pandas()
 
         data = pd.DataFrame(data)
     if _is_arrow(data):
@@ -1759,7 +1691,16 @@ def _proxy_transform(
 
 def is_on_cuda(data: Any) -> bool:
     """Whether the data is a CUDA-based data structure."""
-    return any(p(data) for p in (_is_cudf_df, _is_cudf_ser, _is_cupy_alike, _is_dlpack))
+    return any(
+        p(data)
+        for p in (
+            _is_cudf_df,
+            _is_cudf_ser,
+            _is_cudf_pandas,
+            _is_cupy_alike,
+            _is_dlpack,
+        )
+    )
 
 
 def dispatch_proxy_set_data(
diff --git a/python-package/xgboost/objective.py b/python-package/xgboost/objective.py
new file mode 100644
index 000000000000..2a05e15339e3
--- /dev/null
+++ b/python-package/xgboost/objective.py
@@ -0,0 +1,49 @@
+"""Experimental support for a new objective interface with target dimension
+reduction.
+
+.. warning::
+
+  Do not use this module unless you want to participate in development.
+
+.. versionadded:: 3.2.0
+
+"""
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Tuple
+
+from ._typing import ArrayLike
+
+if TYPE_CHECKING:
+    from .core import DMatrix
+
+
+class Objective(ABC):
+    """Base class for custom objective functions.
+
+    .. warning::
+
+        Do not use this class unless you want to participate in development.
+
+    """
+
+    @abstractmethod
+    def __call__(
+        self, y_pred: ArrayLike, dtrain: "DMatrix"
+    ) -> Tuple[ArrayLike, ArrayLike]: ...
+
+
+class TreeObjective(Objective):
+    """Base class for tree-specific custom objective functions.
+
+    .. warning::
+
+        Do not use this class unless you want to participate in development.
+
+    """
+
+    def split_grad(
+        self, grad: ArrayLike, hess: ArrayLike
+    ) -> Tuple[ArrayLike, ArrayLike]:
+        """Provide a different gradient type for finding tree structures."""
+        return grad, hess
diff --git a/python-package/xgboost/plotting.py b/python-package/xgboost/plotting.py
index 552261c2d1a0..57279cd13757 100644
--- a/python-package/xgboost/plotting.py
+++ b/python-package/xgboost/plotting.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-locals, too-many-arguments, invalid-name,
+# pylint: disable=too-many-locals, too-many-arguments
 # pylint: disable=too-many-branches
 """Plotting Library."""
 import json
@@ -120,7 +120,7 @@ def plot_importance(
 
     if show_values is True:
         for x, y in zip(values, ylocs):
-            ax.text(x + 1, y, values_format.format(v=x), va="center")
+            ax.text(x + 1, float(y), values_format.format(v=x), va="center")
 
     ax.set_yticks(ylocs)
     ax.set_yticklabels(labels)
diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 86f550298a35..909133f1ae33 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -1,5 +1,7 @@
-# pylint: disable=too-many-arguments, too-many-locals, invalid-name, fixme, too-many-lines
+# pylint: disable=too-many-arguments, too-many-locals, fixme, too-many-lines
 """Scikit-Learn Wrapper interface for XGBoost."""
+
+import collections
 import copy
 import json
 import os
@@ -25,7 +27,15 @@
 import numpy as np
 from scipy.special import softmax
 
-from ._typing import ArrayLike, FeatureNames, FeatureTypes, IterationRange, ModelIn
+from ._data_utils import Categories
+from ._typing import (
+    ArrayLike,
+    EvalsLog,
+    FeatureNames,
+    FeatureTypes,
+    IterationRange,
+    ModelIn,
+)
 from .callback import TrainingCallback
 
 # Do not use class names on scikit-learn directly.  Re-define the classes on
@@ -38,6 +48,7 @@
     _sklearn_Tags,
     _sklearn_version,
     import_cupy,
+    is_dataframe,
 )
 from .config import config_context
 from .core import (
@@ -78,13 +89,13 @@ def _check_rf_callback(
     if early_stopping_rounds is not None or callbacks is not None:
         raise NotImplementedError(
             "`early_stopping_rounds` and `callbacks` are not implemented for"
-            " random forest."
+            " the sklearn random forest estimator interface."
         )
 
 
 def _can_use_qdm(tree_method: Optional[str], device: Optional[str]) -> bool:
     not_sycl = (device is None) or (not device.startswith("sycl"))
-    return tree_method in ("hist", "gpu_hist", None, "auto") and not_sycl
+    return tree_method in ("hist", None, "auto") and not_sycl
 
 
 class _SklObjWProto(Protocol):
@@ -92,7 +103,7 @@ def __call__(
         self,
         y_true: ArrayLike,
         y_pred: ArrayLike,
-        sample_weight: Optional[ArrayLike],
+        sample_weight: Optional[ArrayLike] = None,
     ) -> Tuple[ArrayLike, ArrayLike]: ...
 
 
@@ -324,7 +335,7 @@ def task(i: int) -> float:
     scale_pos_weight : {Optional[float]}
         Balancing of positive and negative weights.
 
-    base_score : {Optional[float]}
+    base_score : {Optional[Union[float, List[float]]]}
 
         The initial prediction score of all instances, global bias.
 
@@ -387,7 +398,7 @@ def task(i: int) -> float:
         .. versionadded:: 1.7.0
 
         Used for specifying feature types without constructing a dataframe. See
-        :py:class:`DMatrix` for details.
+        the :py:class:`DMatrix` for details.
 
     feature_weights : Optional[ArrayLike]
 
@@ -432,7 +443,7 @@ def task(i: int) -> float:
         - ``one_output_per_tree``: One model for each target.
         - ``multi_output_tree``:  Use multi-target trees.
 
-    eval_metric : {Optional[Union[str, List[str], Callable]]}
+    eval_metric : {Optional[Union[str, List[Union[str, Callable]], Callable]]}
 
         .. versionadded:: 1.6.0
 
@@ -604,6 +615,65 @@ def adddoc(cls: TDoc) -> TDoc:
     return adddoc
 
 
+def get_model_categories(
+    X: ArrayLike,
+    model: Optional[Union[Booster, str]],
+    feature_types: Optional[FeatureTypes],
+) -> Tuple[Optional[Union[Booster, str]], Optional[Union[FeatureTypes, Categories]]]:
+    """Extract the optional reference categories from the booster. Used for training
+    continuation. The result should be passed to the :py:func:`pick_ref_categories`.
+
+    """
+    # Skip if it's not a dataframe as there's no new encoding to be recoded.
+    #
+    # This function helps override the `feature_types` parameter. The `feature_types`
+    # from user is not useful when input is a dataframe as the real feature type should
+    # be encoded into the DF.
+    if model is None or not is_dataframe(X):
+        return model, feature_types
+
+    if isinstance(model, str):
+        model = Booster(model_file=model)
+
+    categories = model.get_categories()
+    if not categories.empty():
+        # override the `feature_types`.
+        return model, categories
+    # Convert empty into None.
+    return model, feature_types
+
+
+def pick_ref_categories(
+    X: Any,
+    model_cats: Optional[Union[FeatureTypes, Categories]],
+    Xy_cats: Optional[Categories],
+) -> Optional[Union[FeatureTypes, Categories]]:
+    """Use the reference categories from the model. If none, then use the reference
+    categories from the training DMatrix.
+
+    Parameters
+    ----------
+    X :
+        Input feature matrix.
+
+    model_cats :
+        Optional categories stored in the previous model (training continuation). This
+        should come from the :py:func:`get_model_categories`.
+
+    Xy_cats :
+        Optional categories from the training DMatrix. Used for re-coding the validation
+        dataset.
+
+    """
+    categories: Optional[Categories] = None
+    if not isinstance(model_cats, Categories) and is_dataframe(X):
+        categories = Xy_cats
+    if categories is not None and not categories.empty():
+        model_cats = categories
+
+    return model_cats
+
+
 def _wrap_evaluation_matrices(
     *,
     missing: float,
@@ -621,10 +691,13 @@ def _wrap_evaluation_matrices(
     eval_qid: Optional[Sequence[Any]],
     create_dmatrix: Callable,
     enable_categorical: bool,
-    feature_types: Optional[FeatureTypes],
+    feature_types: Optional[Union[FeatureTypes, Categories]],
 ) -> Tuple[Any, List[Tuple[Any, str]]]:
-    """Convert array_like evaluation matrices into DMatrix.  Perform validation on the
-    way."""
+    """Convert array_like evaluation matrices into DMatrix. Perform sanity checks on the
+    way.
+
+    """
+    # Feature_types contains the optional reference categories from the booster object.
     train_dmatrix = create_dmatrix(
         data=X,
         label=y,
@@ -640,6 +713,10 @@ def _wrap_evaluation_matrices(
     )
 
     n_validation = 0 if eval_set is None else len(eval_set)
+    if hasattr(train_dmatrix, "get_categories"):
+        Xy_cats = train_dmatrix.get_categories()
+    else:
+        Xy_cats = None
 
     def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence:
         if meta is None:
@@ -663,7 +740,7 @@ def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence:
 
         evals = []
         for i, (valid_X, valid_y) in enumerate(eval_set):
-            # Skip the duplicated entry.
+            # Skip the entry if it's the training DMatrix.
             if all(
                 (
                     valid_X is X,
@@ -675,20 +752,23 @@ def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence:
                 )
             ):
                 evals.append(train_dmatrix)
-            else:
-                m = create_dmatrix(
-                    data=valid_X,
-                    label=valid_y,
-                    weight=sample_weight_eval_set[i],
-                    group=eval_group[i],
-                    qid=eval_qid[i],
-                    base_margin=base_margin_eval_set[i],
-                    missing=missing,
-                    enable_categorical=enable_categorical,
-                    feature_types=feature_types,
-                    ref=train_dmatrix,
-                )
-                evals.append(m)
+                continue
+
+            feature_types = pick_ref_categories(valid_X, feature_types, Xy_cats)
+            m = create_dmatrix(
+                data=valid_X,
+                label=valid_y,
+                weight=sample_weight_eval_set[i],
+                group=eval_group[i],
+                qid=eval_qid[i],
+                base_margin=base_margin_eval_set[i],
+                missing=missing,
+                enable_categorical=enable_categorical,
+                feature_types=feature_types,
+                ref=train_dmatrix,
+            )
+            evals.append(m)
+
         nevals = len(evals)
         eval_names = [f"validation_{i}" for i in range(nevals)]
         evals = list(zip(evals, eval_names))
@@ -746,7 +826,7 @@ def __init__(
         reg_alpha: Optional[float] = None,
         reg_lambda: Optional[float] = None,
         scale_pos_weight: Optional[float] = None,
-        base_score: Optional[float] = None,
+        base_score: Optional[Union[float, List[float]]] = None,
         random_state: Optional[
             Union[np.random.RandomState, np.random.Generator, int]
         ] = None,
@@ -763,7 +843,7 @@ def __init__(
         max_cat_to_onehot: Optional[int] = None,
         max_cat_threshold: Optional[int] = None,
         multi_strategy: Optional[str] = None,
-        eval_metric: Optional[Union[str, List[str], Callable]] = None,
+        eval_metric: Optional[Union[str, List[Union[str, Callable]], Callable]] = None,
         early_stopping_rounds: Optional[int] = None,
         callbacks: Optional[List[TrainingCallback]] = None,
         **kwargs: Any,
@@ -806,6 +886,12 @@ def __init__(
         self.validate_parameters = validate_parameters
         self.enable_categorical = enable_categorical
         self.feature_types = feature_types
+        if isinstance(self.feature_types, Categories):
+            raise TypeError(
+                "If you are training with a prior model (training continuation), "
+                "The scikit-learn interface can automatically reuse the categories from"
+                " that model."
+            )
         self.feature_weights = feature_weights
         self.max_cat_to_onehot = max_cat_to_onehot
         self.max_cat_threshold = max_cat_threshold
@@ -823,6 +909,7 @@ def _more_tags(self) -> Dict[str, bool]:
             tags["non_deterministic"] = True
 
         tags["categorical"] = self.enable_categorical
+        tags["string"] = self.enable_categorical
         return tags
 
     @staticmethod
@@ -970,7 +1057,7 @@ def get_params(self, deep: bool = True) -> Dict[str, Any]:
         #                     XGBRegressor -> XGBModel -> BaseEstimator
         #                                     XGBModel -> BaseEstimator
         #
-        params = super().get_params(deep)
+        params = super().get_params(deep)  # pylint: disable=no-member
         cp = copy.copy(self)
         # If the immediate parent defines get_params(), use that.
         if callable(getattr(cp.__class__.__bases__[0], "get_params", None)):
@@ -1061,7 +1148,9 @@ def _load_model_attributes(self, config: dict) -> None:
 
         self.objective = config["learner"]["objective"]["name"]
         self.booster = config["learner"]["gradient_booster"]["name"]
-        self.base_score = config["learner"]["learner_model_param"]["base_score"]
+        self.base_score = json.loads(
+            config["learner"]["learner_model_param"]["base_score"]
+        )
         self.feature_types = booster.feature_types
 
         if is_classifier(self):
@@ -1103,14 +1192,42 @@ def _duplicated(parameter: str) -> None:
 
         # - configure callable evaluation metric
         metric: Optional[Metric] = None
+
+        def custom_metric(m: Callable) -> Metric:
+            if self._get_type() == "ranker":
+                wrapped = ltr_metric_decorator(m, self.n_jobs)
+            else:
+                wrapped = _metric_decorator(m)
+            return wrapped
+
+        def invalid_type(m: Any) -> None:
+            msg = f"Invalid type for the `eval_metric`: {type(m)}"
+            raise TypeError(msg)
+
         if self.eval_metric is not None:
             if callable(self.eval_metric):
-                if self._get_type() == "ranker":
-                    metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
-                else:
-                    metric = _metric_decorator(self.eval_metric)
-            else:
+                metric = custom_metric(self.eval_metric)
+            elif isinstance(self.eval_metric, str):
                 params.update({"eval_metric": self.eval_metric})
+            else:
+                # A sequence of metrics
+                if not isinstance(self.eval_metric, collections.abc.Sequence):
+                    invalid_type(self.eval_metric)
+                # Could be a list of strings or callables
+                builtin_metrics: List[str] = []
+                for m in self.eval_metric:
+                    if callable(m):
+                        if metric is not None:
+                            raise NotImplementedError(
+                                "Using multiple custom metrics is not yet supported."
+                            )
+                        metric = custom_metric(m)
+                    elif isinstance(m, str):
+                        builtin_metrics.append(m)
+                    else:
+                        invalid_type(m)
+                if builtin_metrics:
+                    params.update({"eval_metric": builtin_metrics})
 
         if feature_weights is not None:
             _deprecated("feature_weights")
@@ -1141,7 +1258,7 @@ def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
                 pass
         return DMatrix(**kwargs, nthread=self.n_jobs)
 
-    def _set_evaluation_result(self, evals_result: TrainingCallback.EvalsLog) -> None:
+    def _set_evaluation_result(self, evals_result: EvalsLog) -> None:
         if evals_result:
             self.evals_result_ = cast(Dict[str, Dict[str, List[float]]], evals_result)
 
@@ -1160,7 +1277,7 @@ def fit(
         base_margin_eval_set: Optional[Sequence[ArrayLike]] = None,
         feature_weights: Optional[ArrayLike] = None,
     ) -> "XGBModel":
-        # pylint: disable=invalid-name,attribute-defined-outside-init
+        # pylint: disable=attribute-defined-outside-init
         """Fit gradient boosting model.
 
         Note that calling ``fit()`` multiple times will cause the model object to be
@@ -1217,8 +1334,9 @@ def fit(
             model, metric, params, feature_weights = self._configure_fit(
                 xgb_model, params, feature_weights
             )
+            model, feature_types = get_model_categories(X, model, self.feature_types)
 
-            evals_result: TrainingCallback.EvalsLog = {}
+            evals_result: EvalsLog = {}
             train_dmatrix, evals = _wrap_evaluation_matrices(
                 missing=self.missing,
                 X=X,
@@ -1235,7 +1353,7 @@ def fit(
                 eval_qid=None,
                 create_dmatrix=self._create_dmatrix,
                 enable_categorical=self.enable_categorical,
-                feature_types=self.feature_types,
+                feature_types=feature_types,
             )
 
             if callable(self.objective):
@@ -1262,9 +1380,7 @@ def fit(
             return self
 
     def _can_use_inplace_predict(self) -> bool:
-        if self.booster != "gblinear":
-            return True
-        return False
+        return self.booster != "gblinear"
 
     def _get_iteration_range(
         self, iteration_range: Optional[IterationRange]
@@ -1387,6 +1503,7 @@ def apply(
                 missing=self.missing,
                 feature_types=self.feature_types,
                 nthread=self.n_jobs,
+                enable_categorical=self.enable_categorical,
             )
             return self.get_booster().predict(
                 test_dmatrix, pred_leaf=True, iteration_range=iteration_range
@@ -1535,8 +1652,10 @@ def intercept_(self) -> np.ndarray:
         b = self.get_booster()
         if booster_config != "gblinear":  # gbtree, dart
             config = json.loads(b.save_config())
-            intercept = config["learner"]["learner_model_param"]["base_score"]
-            return np.array([float(intercept)], dtype=np.float32)
+            intercept = json.loads(
+                config["learner"]["learner_model_param"]["base_score"]
+            )
+            return np.array(intercept, dtype=np.float32)
 
         return np.array(
             json.loads(b.get_dump(dump_format="json")[0])["bias"], dtype=np.float32
@@ -1573,7 +1692,7 @@ def _cls_predict_proba(n_classes: int, prediction: PredtT, vstack: Callable) ->
 """,
 )
 class XGBClassifier(XGBClassifierBase, XGBModel):
-    # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
+    # pylint: disable=missing-docstring,too-many-instance-attributes
     @_deprecate_positional_args
     def __init__(
         self,
@@ -1611,7 +1730,6 @@ def fit(
     ) -> "XGBClassifier":
         # pylint: disable = attribute-defined-outside-init,too-many-statements
         with config_context(verbosity=self.verbosity):
-            evals_result: TrainingCallback.EvalsLog = {}
             # We keep the n_classes_ as a simple member instead of loading it from
             # booster in a Python property. This way we can have efficient and
             # thread-safe prediction.
@@ -1660,6 +1778,9 @@ def fit(
             model, metric, params, feature_weights = self._configure_fit(
                 xgb_model, params, feature_weights
             )
+            model, feature_types = get_model_categories(X, model, self.feature_types)
+
+            evals_result: EvalsLog = {}
             train_dmatrix, evals = _wrap_evaluation_matrices(
                 missing=self.missing,
                 X=X,
@@ -1676,7 +1797,7 @@ def fit(
                 eval_qid=None,
                 create_dmatrix=self._create_dmatrix,
                 enable_categorical=self.enable_categorical,
-                feature_types=self.feature_types,
+                feature_types=feature_types,
             )
 
             self._Booster = train(
@@ -2019,7 +2140,7 @@ def _get_qid(
         :py:meth:`fit` for more info.""",
 )
 class XGBRanker(XGBRankerMixIn, XGBModel):
-    # pylint: disable=missing-docstring,too-many-arguments,invalid-name
+    # pylint: disable=missing-docstring,too-many-arguments
     @_deprecate_positional_args
     def __init__(self, *, objective: str = "rank:ndcg", **kwargs: Any):
         super().__init__(objective=objective, **kwargs)
@@ -2154,6 +2275,14 @@ def fit(
 
         """
         with config_context(verbosity=self.verbosity):
+            params = self.get_xgb_params()
+
+            model, metric, params, feature_weights = self._configure_fit(
+                xgb_model, params, feature_weights
+            )
+            model, feature_types = get_model_categories(X, model, self.feature_types)
+
+            evals_result: EvalsLog = {}
             train_dmatrix, evals = _wrap_evaluation_matrices(
                 missing=self.missing,
                 X=X,
@@ -2170,15 +2299,9 @@ def fit(
                 eval_qid=eval_qid,
                 create_dmatrix=self._create_ltr_dmatrix,
                 enable_categorical=self.enable_categorical,
-                feature_types=self.feature_types,
+                feature_types=feature_types,
             )
 
-            evals_result: TrainingCallback.EvalsLog = {}
-            params = self.get_xgb_params()
-
-            model, metric, params, feature_weights = self._configure_fit(
-                xgb_model, params, feature_weights
-            )
             self._Booster = train(
                 params,
                 train_dmatrix,
diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py
index 3db9b1409351..f7640cc7699e 100644
--- a/python-package/xgboost/spark/core.py
+++ b/python-package/xgboost/spark/core.py
@@ -2,7 +2,7 @@
 
 import base64
 
-# pylint: disable=fixme, protected-access, no-member, invalid-name
+# pylint: disable=fixme, protected-access, no-member
 # pylint: disable=too-many-lines, too-many-branches
 import json
 import logging
@@ -51,6 +51,7 @@
 from pyspark.sql.functions import col, countDistinct, pandas_udf, rand, struct
 from pyspark.sql.types import (
     ArrayType,
+    BooleanType,
     DoubleType,
     FloatType,
     IntegerType,
@@ -122,7 +123,6 @@
     "qid_col",
     "repartition_random_shuffle",
     "pred_contrib_col",
-    "use_gpu",
     "launch_tracker_on_driver",
     "coll_cfg",
 ]
@@ -142,7 +142,6 @@
 _inverse_pyspark_param_alias_map = {v: k for k, v in _pyspark_param_alias_map.items()}
 
 _unsupported_xgb_params = [
-    "gpu_id",  # we have "device" pyspark param instead.
     "enable_categorical",  # Use feature_types param to specify categorical feature instead
     "n_jobs",  # Do not allow user to set it, will use `spark.task.cpus` value instead.
     "nthread",  # Ditto
@@ -218,16 +217,6 @@ class _SparkXGBParams(
         ),
         TypeConverters.toString,
     )
-    use_gpu = Param(
-        Params._dummy(),
-        "use_gpu",
-        (
-            "Deprecated, use `device` instead. A boolean variable. Set use_gpu=true "
-            "if the executors are running on GPU instances. Currently, only one GPU per"
-            " task is supported."
-        ),
-        TypeConverters.toBoolean,
-    )
     force_repartition = Param(
         Params._dummy(),
         "force_repartition",
@@ -503,14 +492,10 @@ def _validate_params(self) -> None:
     def _run_on_gpu(self) -> bool:
         """If train or transform on the gpu according to the parameters"""
 
-        return (
-            use_cuda(self.getOrDefault(self.device))
-            or self.getOrDefault(self.use_gpu)
-            or self.getOrDefault(self.getParam("tree_method")) == "gpu_hist"
-        )
+        return use_cuda(self.getOrDefault(self.device))
 
     def _col_is_defined_not_empty(self, param: "Param[str]") -> bool:
-        return self.isDefined(param) and self.getOrDefault(param) != ""
+        return self.isDefined(param) and self.getOrDefault(param) not in (None, "")
 
 
 def _validate_and_convert_feature_col_as_float_col_list(
@@ -630,7 +615,6 @@ def __init__(self) -> None:
         self._setDefault(
             num_workers=1,
             device="cpu",
-            use_gpu=False,
             force_repartition=False,
             repartition_random_shuffle=False,
             feature_names=None,
@@ -642,7 +626,7 @@ def __init__(self) -> None:
 
         self.logger = get_logger(self.__class__.__name__)
 
-    def setParams(self, **kwargs: Any) -> None:  # pylint: disable=invalid-name
+    def setParams(self, **kwargs: Any) -> None:
         """
         Set params for the estimator.
         """
@@ -844,6 +828,11 @@ def _prepare_input(self, dataset: DataFrame) -> Tuple[DataFrame, FeatureProp]:
         sc = _get_spark_session().sparkContext
         max_concurrent_tasks = _get_max_num_concurrent_tasks(sc)
 
+        if feature_prop.has_validation_col:
+            dtype = dataset.schema[alias.valid].dataType
+            if not isinstance(dtype, BooleanType):
+                raise TypeError("The validation indicator must be boolean type.")
+
         if num_workers > max_concurrent_tasks:
             get_logger(self.__class__.__name__).warning(
                 "The num_workers %s set for xgboost distributed "
diff --git a/python-package/xgboost/spark/estimator.py b/python-package/xgboost/spark/estimator.py
index 011f7ea0b715..aae4412a2fd6 100644
--- a/python-package/xgboost/spark/estimator.py
+++ b/python-package/xgboost/spark/estimator.py
@@ -1,9 +1,8 @@
 """Xgboost pyspark integration submodule for estimator API."""
 
-# pylint: disable=fixme, protected-access, no-member, invalid-name
+# pylint: disable=protected-access, no-member
 # pylint: disable=unused-argument, too-many-locals
 
-import warnings
 from typing import Any, List, Optional, Type, Union
 
 import numpy as np
@@ -77,12 +76,6 @@ def set_param_attrs(attr_name: str, param: Param) -> None:
         set_param_attrs(name, param_obj)
 
 
-def _deprecated_use_gpu() -> None:
-    warnings.warn(
-        "`use_gpu` is deprecated since 2.0.0, use `device` instead", FutureWarning
-    )
-
-
 class SparkXGBRegressor(_SparkXGBEstimator):
     """SparkXGBRegressor is a PySpark ML estimator. It implements the XGBoost regression
     algorithm based on XGBoost python library, and it can be used in PySpark Pipeline
@@ -140,11 +133,6 @@ class SparkXGBRegressor(_SparkXGBEstimator):
     num_workers:
         How many XGBoost workers to be used to train.
         Each XGBoost worker corresponds to one spark task.
-    use_gpu:
-        .. deprecated:: 2.0.0
-
-        Use `device` instead.
-
     device:
 
         .. versionadded:: 2.0.0
@@ -214,7 +202,6 @@ def __init__(  # pylint:disable=too-many-arguments
         weight_col: Optional[str] = None,
         base_margin_col: Optional[str] = None,
         num_workers: int = 1,
-        use_gpu: Optional[bool] = None,
         device: Optional[str] = None,
         force_repartition: bool = False,
         repartition_random_shuffle: bool = False,
@@ -225,8 +212,6 @@ def __init__(  # pylint:disable=too-many-arguments
     ) -> None:
         super().__init__()
         input_kwargs = self._input_kwargs
-        if use_gpu:
-            _deprecated_use_gpu()
         self.setParams(**input_kwargs)
 
     @classmethod
@@ -327,11 +312,6 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
     num_workers:
         How many XGBoost workers to be used to train.
         Each XGBoost worker corresponds to one spark task.
-    use_gpu:
-        .. deprecated:: 2.0.0
-
-        Use `device` instead.
-
     device:
 
         .. versionadded:: 2.0.0
@@ -401,7 +381,6 @@ def __init__(  # pylint:disable=too-many-arguments
         weight_col: Optional[str] = None,
         base_margin_col: Optional[str] = None,
         num_workers: int = 1,
-        use_gpu: Optional[bool] = None,
         device: Optional[str] = None,
         force_repartition: bool = False,
         repartition_random_shuffle: bool = False,
@@ -416,8 +395,6 @@ def __init__(  # pylint:disable=too-many-arguments
         # binary or multinomial input dataset, and we need to remove the fixed default
         # param value as well to avoid causing ambiguity.
         input_kwargs = self._input_kwargs
-        if use_gpu:
-            _deprecated_use_gpu()
         self.setParams(**input_kwargs)
         self._setDefault(objective=None)
 
@@ -517,11 +494,6 @@ class SparkXGBRanker(_SparkXGBEstimator):
     num_workers:
         How many XGBoost workers to be used to train.
         Each XGBoost worker corresponds to one spark task.
-    use_gpu:
-        .. deprecated:: 2.0.0
-
-        Use `device` instead.
-
     device:
 
         .. versionadded:: 2.0.0
@@ -597,7 +569,6 @@ def __init__(  # pylint:disable=too-many-arguments
         base_margin_col: Optional[str] = None,
         qid_col: Optional[str] = None,
         num_workers: int = 1,
-        use_gpu: Optional[bool] = None,
         device: Optional[str] = None,
         force_repartition: bool = False,
         repartition_random_shuffle: bool = False,
@@ -608,8 +579,6 @@ def __init__(  # pylint:disable=too-many-arguments
     ) -> None:
         super().__init__()
         input_kwargs = self._input_kwargs
-        if use_gpu:
-            _deprecated_use_gpu()
         self.setParams(**input_kwargs)
 
     @classmethod
diff --git a/python-package/xgboost/spark/params.py b/python-package/xgboost/spark/params.py
index f173d3301286..af2f4f9d6588 100644
--- a/python-package/xgboost/spark/params.py
+++ b/python-package/xgboost/spark/params.py
@@ -39,7 +39,7 @@ class HasBaseMarginCol(Params):
 class HasFeaturesCols(Params):
     """
     Mixin for param features_cols: a list of feature column names.
-    This parameter is taken effect only when use_gpu is enabled.
+    This parameter is taken effect only when GPU is enabled.
     """
 
     features_cols = Param(
diff --git a/python-package/xgboost/testing/__init__.py b/python-package/xgboost/testing/__init__.py
index 8c582b647f10..85804d9434e6 100644
--- a/python-package/xgboost/testing/__init__.py
+++ b/python-package/xgboost/testing/__init__.py
@@ -3,7 +3,7 @@
 
 """
 
-# pylint: disable=invalid-name,missing-function-docstring
+# pylint: disable=missing-function-docstring
 import importlib.util
 import os
 import platform
@@ -190,10 +190,6 @@ def no_dask_cudf() -> PytestSkip:
     return no_mod("dask_cudf")
 
 
-def no_json_schema() -> PytestSkip:
-    return no_mod("jsonschema")
-
-
 def no_graphviz() -> PytestSkip:
     return no_mod("graphviz")
 
@@ -511,19 +507,6 @@ def non_decreasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:
     return all((y - x) >= -tolerance for x, y in zip(L, L[1:]))
 
 
-def predictor_equal(lhs: xgb.DMatrix, rhs: xgb.DMatrix) -> bool:
-    """Assert whether two DMatrices contain the same predictors."""
-    lcsr = lhs.get_data()
-    rcsr = rhs.get_data()
-    return all(
-        (
-            np.array_equal(lcsr.data, rcsr.data),
-            np.array_equal(lcsr.indices, rcsr.indices),
-            np.array_equal(lcsr.indptr, rcsr.indptr),
-        )
-    )
-
-
 M = TypeVar("M", xgb.Booster, xgb.XGBModel)
 
 
@@ -638,7 +621,7 @@ def ls_obj(
 ) -> Tuple[np.ndarray, np.ndarray]:
     """Least squared error."""
     grad = y_pred - y_true
-    hess = np.ones(len(y_true))
+    hess = np.ones(grad.shape)
     if sample_weight is not None:
         grad *= sample_weight
         hess *= sample_weight
diff --git a/python-package/xgboost/testing/basic_models.py b/python-package/xgboost/testing/basic_models.py
new file mode 100644
index 000000000000..dae99d7380a9
--- /dev/null
+++ b/python-package/xgboost/testing/basic_models.py
@@ -0,0 +1,81 @@
+"""Tests for basic features of the Booster."""
+
+from typing import Tuple
+
+import numpy as np
+
+from xgboost import testing as tm
+
+from ..core import Booster, DMatrix
+from ..training import cv, train
+from .utils import Device
+
+
+def run_custom_objective(  # pylint: disable=too-many-locals
+    tree_method: str,
+    device: Device,
+    dtrain: DMatrix,
+    dtest: DMatrix,
+) -> None:
+    """Tests custom objective and metric functions."""
+    param = {
+        "max_depth": 2,
+        "eta": 1,
+        "objective": "reg:logistic",
+        "tree_method": tree_method,
+        "device": device,
+    }
+    watchlist = [(dtest, "eval"), (dtrain, "train")]
+    num_round = 10
+
+    def evalerror(preds: np.ndarray, dtrain: DMatrix) -> Tuple[str, np.float64]:
+        return tm.eval_error_metric(preds, dtrain, rev_link=True)
+
+    # test custom_objective in training
+    bst = train(
+        param,
+        dtrain,
+        num_round,
+        evals=watchlist,
+        obj=tm.logregobj,
+        custom_metric=evalerror,
+    )
+    assert isinstance(bst, Booster)
+    preds = bst.predict(dtest)
+    labels = dtest.get_label()
+    err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(
+        len(preds)
+    )
+    assert err < 0.1
+
+    # test custom_objective in cross-validation
+    cv(
+        param,
+        dtrain,
+        num_round,
+        nfold=5,
+        seed=0,
+        obj=tm.logregobj,
+        custom_metric=evalerror,
+    )
+
+    # test maximize parameter
+    def neg_evalerror(preds: np.ndarray, dtrain: DMatrix) -> Tuple[str, float]:
+        labels = dtrain.get_label()
+        preds = 1.0 / (1.0 + np.exp(-preds))
+        return "error", float(sum(labels == (preds > 0.0))) / len(labels)
+
+    bst2 = train(
+        param,
+        dtrain,
+        num_round,
+        evals=watchlist,
+        obj=tm.logregobj,
+        custom_metric=neg_evalerror,
+        maximize=True,
+    )
+    preds2 = bst2.predict(dtest)
+    err2 = sum(
+        1 for i in range(len(preds2)) if int(preds2[i] > 0.5) != labels[i]
+    ) / float(len(preds2))
+    assert err == err2
diff --git a/python-package/xgboost/testing/callbacks.py b/python-package/xgboost/testing/callbacks.py
new file mode 100644
index 000000000000..740b389cfde4
--- /dev/null
+++ b/python-package/xgboost/testing/callbacks.py
@@ -0,0 +1,188 @@
+# pylint: disable=too-many-locals
+"""Tests for callback functions."""
+
+import json
+from itertools import product
+from typing import Dict, List, Tuple
+
+from ..callback import LearningRateScheduler
+from ..core import Booster, DMatrix
+from ..training import cv, train
+from .utils import Device
+
+
+def run_eta_decay(
+    tree_method: str, dtrain: DMatrix, dtest: DMatrix, device: Device
+) -> None:
+    """Test learning rate scheduler, used by both CPU and GPU tests."""
+    scheduler = LearningRateScheduler
+
+    watchlist = [(dtest, "eval"), (dtrain, "train")]
+    num_round = 4
+
+    # learning_rates as a list
+    # init eta with 0 to check whether learning_rates work
+    param = {
+        "max_depth": 2,
+        "eta": 0,
+        "objective": "binary:logistic",
+        "eval_metric": "error",
+        "tree_method": tree_method,
+        "device": device,
+    }
+    evals_result: Dict[str, Dict] = {}
+    bst = train(
+        param,
+        dtrain,
+        num_round,
+        evals=watchlist,
+        callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
+        evals_result=evals_result,
+    )
+    eval_errors_0 = list(map(float, evals_result["eval"]["error"]))
+    assert isinstance(bst, Booster)
+    # validation error should decrease, if eta > 0
+    assert eval_errors_0[0] > eval_errors_0[-1]
+
+    # init learning_rate with 0 to check whether learning_rates work
+    param = {
+        "max_depth": 2,
+        "learning_rate": 0,
+        "objective": "binary:logistic",
+        "eval_metric": "error",
+        "tree_method": tree_method,
+        "device": device,
+    }
+    evals_result = {}
+
+    bst = train(
+        param,
+        dtrain,
+        num_round,
+        evals=watchlist,
+        callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
+        evals_result=evals_result,
+    )
+    eval_errors_1 = list(map(float, evals_result["eval"]["error"]))
+    assert isinstance(bst, Booster)
+    # validation error should decrease, if learning_rate > 0
+    assert eval_errors_1[0] > eval_errors_1[-1]
+
+    # check if learning_rates override default value of eta/learning_rate
+    param = {
+        "max_depth": 2,
+        "objective": "binary:logistic",
+        "eval_metric": "error",
+        "tree_method": tree_method,
+        "device": device,
+    }
+    evals_result = {}
+    bst = train(
+        param,
+        dtrain,
+        num_round,
+        evals=watchlist,
+        callbacks=[scheduler([0, 0, 0, 0])],
+        evals_result=evals_result,
+    )
+    eval_errors_2 = list(map(float, evals_result["eval"]["error"]))
+    assert isinstance(bst, Booster)
+    # validation error should not decrease, if eta/learning_rate = 0
+    assert eval_errors_2[0] == eval_errors_2[-1]
+
+    # learning_rates as a customized decay function
+    def eta_decay(ithround: int, num_boost_round: int = num_round) -> float:
+        return num_boost_round / (ithround + 1)
+
+    evals_result = {}
+    bst = train(
+        param,
+        dtrain,
+        num_round,
+        evals=watchlist,
+        callbacks=[scheduler(eta_decay)],
+        evals_result=evals_result,
+    )
+    eval_errors_3 = list(map(float, evals_result["eval"]["error"]))
+
+    assert isinstance(bst, Booster)
+
+    assert eval_errors_3[0] == eval_errors_2[0]
+
+    for i in range(1, len(eval_errors_0)):
+        assert eval_errors_3[i] != eval_errors_2[i]
+
+    cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])
+
+
+def tree_methods_objs() -> List[Tuple[str, str]]:
+    """Test parameters for the leaf output test."""
+    return list(
+        product(
+            ["approx", "hist"],
+            [
+                "binary:logistic",
+                "reg:absoluteerror",
+                "reg:quantileerror",
+            ],
+        )
+    )
+
+
+def run_eta_decay_leaf_output(
+    tree_method: str, objective: str, dtrain: DMatrix, dtest: DMatrix, device: Device
+) -> None:
+    """check decay has effect on leaf output."""
+    num_round = 4
+    scheduler = LearningRateScheduler
+
+    watchlist = [(dtest, "eval"), (dtrain, "train")]
+
+    param = {
+        "max_depth": 2,
+        "objective": objective,
+        "eval_metric": "error",
+        "tree_method": tree_method,
+        "device": device,
+    }
+    if objective == "reg:quantileerror":
+        param["quantile_alpha"] = 0.3
+
+    def eta_decay_0(i: int) -> float:
+        return num_round / (i + 1)
+
+    bst0 = train(
+        param,
+        dtrain,
+        num_round,
+        evals=watchlist,
+        callbacks=[scheduler(eta_decay_0)],
+    )
+
+    def eta_decay_1(i: int) -> float:
+        if i > 1:
+            return 5.0
+        return num_round / (i + 1)
+
+    bst1 = train(
+        param,
+        dtrain,
+        num_round,
+        evals=watchlist,
+        callbacks=[scheduler(eta_decay_1)],
+    )
+    bst_json0 = bst0.save_raw(raw_format="json")
+    bst_json1 = bst1.save_raw(raw_format="json")
+
+    j0 = json.loads(bst_json0)
+    j1 = json.loads(bst_json1)
+
+    tree_2th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][2]
+    tree_2th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][2]
+    assert tree_2th_0["base_weights"] == tree_2th_1["base_weights"]
+    assert tree_2th_0["split_conditions"] == tree_2th_1["split_conditions"]
+
+    tree_3th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][3]
+    tree_3th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][3]
+    assert tree_3th_0["base_weights"] != tree_3th_1["base_weights"]
+    assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
diff --git a/python-package/xgboost/testing/collective.py b/python-package/xgboost/testing/collective.py
new file mode 100644
index 000000000000..f3eaea0b52be
--- /dev/null
+++ b/python-package/xgboost/testing/collective.py
@@ -0,0 +1,15 @@
+"""Collective module related utilities."""
+
+import socket
+
+
+def get_avail_port() -> int:
+    """Returns a port that's available during the function call. It doesn't prevent the
+    port from being used after the function returns as we can't reserve the port. The
+    utility makes a test more likely to pass.
+
+    """
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server:
+        server.bind(("127.0.0.1", 0))
+        port = server.getsockname()[1]
+    return port
diff --git a/python-package/xgboost/testing/dask.py b/python-package/xgboost/testing/dask.py
index 877d1bdf9723..79ca59e2b9ba 100644
--- a/python-package/xgboost/testing/dask.py
+++ b/python-package/xgboost/testing/dask.py
@@ -1,12 +1,13 @@
 """Tests for dask shared by different test modules."""
 
-from typing import Any, List, Literal, Tuple, cast
+from typing import Any, List, Literal, Tuple, Type, cast
 
 import numpy as np
 import pandas as pd
 from dask import array as da
 from dask import dataframe as dd
 from distributed import Client, get_worker
+from packaging.version import parse as parse_version
 from sklearn.datasets import make_classification
 
 import xgboost as xgb
@@ -15,9 +16,13 @@
 from xgboost.testing.updater import get_basescore
 
 from .. import dask as dxgb
+from .._typing import EvalsLog
 from ..dask import _get_rabit_args
+from ..dask.utils import _DASK_VERSION
 from .data import make_batches
 from .data import make_categorical as make_cat_local
+from .ordinal import make_recoded
+from .utils import Device, assert_allclose
 
 
 def check_init_estimation_clf(
@@ -128,7 +133,7 @@ def check_external_memory(  # pylint: disable=too-many-locals
             Xy: xgb.DMatrix = xgb.ExtMemQuantileDMatrix(it, nthread=n_threads)
         else:
             Xy = xgb.DMatrix(it, nthread=n_threads)
-        results: xgb.callback.TrainingCallback.EvalsLog = {}
+        results: EvalsLog = {}
         xgb.train(
             {"tree_method": "hist", "nthread": n_threads, "device": device},
             Xy,
@@ -159,7 +164,7 @@ def check_external_memory(  # pylint: disable=too-many-locals
     else:
         Xy = xgb.DMatrix(X, yconcat, weight=wconcat, nthread=n_threads)
 
-    results_local: xgb.callback.TrainingCallback.EvalsLog = {}
+    results_local: EvalsLog = {}
     xgb.train(
         {"tree_method": "hist", "nthread": n_threads, "device": device},
         Xy,
@@ -179,7 +184,8 @@ def get_rabit_args(client: Client, n_workers: int) -> Any:
 
 def get_client_workers(client: Client) -> List[str]:
     "Get workers from a dask client."
-    workers = client.scheduler_info()["workers"]
+    kwargs = {"n_workers": -1} if _DASK_VERSION() >= parse_version("2025.4.0") else {}
+    workers = client.scheduler_info(**kwargs)["workers"]
     return list(workers.keys())
 
 
@@ -241,7 +247,12 @@ def check_no_group_split(client: Client, device: str) -> None:
         client, 1024, 128, n_query_groups=4, max_rel=5, device=device
     )
 
-    ltr = dxgb.DaskXGBRanker(allow_group_split=False, n_estimators=36, device=device)
+    ltr = dxgb.DaskXGBRanker(
+        allow_group_split=False,
+        n_estimators=36,
+        device=device,
+        objective="rank:pairwise",
+    )
     ltr.fit(
         X_tr,
         y_tr,
@@ -310,3 +321,77 @@ def pack(**kwargs: Any) -> dd.DataFrame:
     if onehot:
         return dd.get_dummies(X), y
     return X, y
+
+
+# pylint: disable=too-many-locals
+def run_recode(client: Client, device: Device) -> None:
+    """Run re-coding test with the Dask interface."""
+
+    def create_dmatrix(
+        DMatrixT: Type[dxgb.DaskDMatrix], *args: Any, **kwargs: Any
+    ) -> dxgb.DaskDMatrix:
+        if DMatrixT is dxgb.DaskQuantileDMatrix:
+            ref = kwargs.pop("ref", None)
+            return DMatrixT(*args, ref=ref, **kwargs)
+
+        kwargs.pop("ref", None)
+        return DMatrixT(*args, **kwargs)
+
+    def run(DMatrixT: Type[dxgb.DaskDMatrix]) -> None:
+        enc, reenc, y, _, _ = make_recoded(device, n_features=96)
+        to = get_client_workers(client)
+
+        denc, dreenc, dy = (
+            dd.from_pandas(enc, npartitions=8).persist(workers=to),
+            dd.from_pandas(reenc, npartitions=8).persist(workers=to),
+            da.from_array(y, chunks=(y.shape[0] // 8,)).persist(workers=to),
+        )
+
+        Xy = create_dmatrix(DMatrixT, client, denc, dy, enable_categorical=True)
+        Xy_valid = create_dmatrix(
+            DMatrixT, client, dreenc, dy, enable_categorical=True, ref=Xy
+        )
+        # Base model
+        results = dxgb.train(
+            client, {"device": device}, Xy, evals=[(Xy_valid, "Valid")]
+        )
+
+        # Training continuation
+        Xy = create_dmatrix(DMatrixT, client, denc, dy, enable_categorical=True)
+        Xy_valid = create_dmatrix(
+            DMatrixT, client, dreenc, dy, enable_categorical=True, ref=Xy
+        )
+        results_1 = dxgb.train(
+            client,
+            {"device": device},
+            Xy,
+            evals=[(Xy_valid, "Valid")],
+            xgb_model=results["booster"],
+        )
+
+        # Reversed training continuation
+        Xy = create_dmatrix(DMatrixT, client, dreenc, dy, enable_categorical=True)
+        Xy_valid = create_dmatrix(
+            DMatrixT, client, denc, dy, enable_categorical=True, ref=Xy
+        )
+        results_2 = dxgb.train(
+            client,
+            {"device": device},
+            Xy,
+            evals=[(Xy_valid, "Valid")],
+            xgb_model=results["booster"],
+        )
+        np.testing.assert_allclose(
+            results_1["history"]["Valid"]["rmse"], results_2["history"]["Valid"]["rmse"]
+        )
+
+        predt_0 = dxgb.inplace_predict(client, results, denc).compute()
+        predt_1 = dxgb.inplace_predict(client, results, dreenc).compute()
+        assert_allclose(device, predt_0, predt_1)
+
+        predt_0 = dxgb.predict(client, results, Xy).compute()
+        predt_1 = dxgb.predict(client, results, Xy_valid).compute()
+        assert_allclose(device, predt_0, predt_1)
+
+    for DMatrixT in [dxgb.DaskDMatrix, dxgb.DaskQuantileDMatrix]:
+        run(DMatrixT)
diff --git a/python-package/xgboost/testing/data.py b/python-package/xgboost/testing/data.py
index 7124c48d9d0d..0aa1c915a8ec 100644
--- a/python-package/xgboost/testing/data.py
+++ b/python-package/xgboost/testing/data.py
@@ -1,4 +1,4 @@
-# pylint: disable=invalid-name, too-many-lines
+# pylint: disable=too-many-lines
 """Utilities for data generation."""
 import gc
 import multiprocessing
@@ -36,7 +36,7 @@
 from ..training import train as train_fn
 
 if TYPE_CHECKING:
-    from ..compat import DataFrame as DataFrameT
+    from pandas import DataFrame as DataFrameT
 else:
     DataFrameT = Any
 
@@ -46,7 +46,7 @@
 
 def np_dtypes(
     n_samples: int, n_features: int
-) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]:
+) -> Generator[Union[Tuple[np.ndarray, np.ndarray], Tuple[list, list]], None, None]:
     """Enumerate all supported dtypes from numpy."""
     pd = pytest.importorskip("pandas")
 
@@ -92,12 +92,12 @@ def np_dtypes(
     orig = rng.binomial(1, 0.5, size=n_samples * n_features).reshape(
         n_samples, n_features
     )
-    for dtype in [np.bool_, bool]:
-        X = np.array(orig, dtype=dtype)
+    for dtype1 in [np.bool_, bool]:
+        X = np.array(orig, dtype=dtype1)
         yield orig, X
 
-    for dtype in [np.bool_, bool]:
-        X = np.array(orig, dtype=dtype)
+    for dtype2 in [np.bool_, bool]:
+        X = np.array(orig, dtype=dtype2)
         df_orig = pd.DataFrame(orig)
         df = pd.DataFrame(X)
         yield df_orig, df
@@ -150,9 +150,11 @@ def pd_dtypes() -> Generator:
 
     # Categorical
     orig = orig.astype("category")
+    for c in orig.columns:
+        orig[c] = orig[c].cat.rename_categories(int)
     for Null in (np.nan, None, pd.NA):
         df = pd.DataFrame(
-            {"f0": [1.0, 2.0, Null, 3.0], "f1": [3.0, 2.0, Null, 1.0]},
+            {"f0": [1, 2, Null, 3], "f1": [3, 2, Null, 1]},
             dtype=pd.CategoricalDtype(),
         )
         yield orig, df
@@ -239,10 +241,58 @@ def check_inf(rng: RNG) -> None:
 
 @memory.cache
 def get_california_housing() -> Tuple[np.ndarray, np.ndarray]:
-    """Fetch the California housing dataset from sklearn."""
-    datasets = pytest.importorskip("sklearn.datasets")
-    data = datasets.fetch_california_housing()
-    return data.data, data.target
+    """Synthesize a dataset similar to the sklearn California housing dataset.
+
+    The real one can be obtained via:
+
+    .. code-block::
+
+        import sklearn.datasets
+
+        X, y = sklearn.datasets.fetch_california_housing(return_X_y=True)
+
+    """
+    n_samples = 20640
+    rng = np.random.default_rng(2025)
+
+    pd = pytest.importorskip("pandas")
+
+    def mixture_2comp(
+        means: List[float], sigmas: List[float], weights: List[float]
+    ) -> np.ndarray:
+        l0 = rng.normal(
+            size=(int(n_samples * weights[0])), loc=means[0], scale=sigmas[0]
+        )
+        l1 = rng.normal(size=(n_samples - l0.shape[0]), loc=means[1], scale=sigmas[1])
+        return np.concatenate([l0, l1], axis=0)
+
+    def norm(mean: float, std: float) -> np.ndarray:
+        return rng.normal(loc=mean, scale=std, size=(n_samples,))
+
+    df = pd.DataFrame(
+        {
+            "Longitude": mixture_2comp(
+                [-118.0703597, -121.85682825],
+                [0.7897320650373969, 0.7248398629412008],
+                [0.60402556, 0.39597444],
+            ),
+            "Latitude": mixture_2comp(
+                [37.84266317, 33.86030848],
+                [1.0643911549736087, 0.5049274656834589],
+                [0.44485062, 0.55514938],
+            ),
+            "MedInc": norm(mean=3.8706710029069766, std=1.8997756945748738),
+            "HouseAge": norm(mean=28.639486434108527, std=12.585252725724606),
+            "AveRooms": norm(mean=5.428999742190376, std=2.474113202333516),
+            "AveBedrms": norm(mean=1.096675149606208, std=0.47389937625774475),
+            "Population": norm(mean=1425.4767441860465, std=1132.434687757615),
+            "AveOccup": norm(mean=3.0706551594363742, std=10.385797959128219),
+            "MedHouseVal": norm(mean=2.068558169089147, std=1.1539282040412253),
+        }
+    )
+    X = df[df.columns.difference(["MedHouseVal"])].to_numpy()
+    y = df["MedHouseVal"].to_numpy()
+    return X, y
 
 
 @memory.cache
@@ -658,7 +708,7 @@ def init_rank_score(
     # random sample
     rng = np.random.default_rng(1994)
     n_samples = int(X.shape[0] * sample_rate)
-    index = np.arange(0, X.shape[0], dtype=np.uint64)
+    index: npt.NDArray = np.arange(0, X.shape[0], dtype=np.uint64)
     rng.shuffle(index)
     index = index[:n_samples]
 
@@ -992,23 +1042,28 @@ def make_categorical(
     """
     pd = pytest.importorskip("pandas")
 
+    # Use different rngs for column and rows. We can change the `n_samples` without
+    # changing the column type.
     rng = np.random.RandomState(random_state)
+    row_rng = np.random.RandomState(random_state + 1)
 
     df = pd.DataFrame()
     for i in range(n_features):
         choice = rng.binomial(1, cat_ratio, size=1)[0]
         if choice == 1:
             if np.issubdtype(cat_dtype, np.str_):
+                # we rely on using the feature index as the seed to generate the same
+                # categories for multiple calls to `make_categorical`.
                 categories = np.array(unique_random_strings(n_categories, i))
-                c = rng.choice(categories, size=n_samples, replace=True)
+                c = row_rng.choice(categories, size=n_samples, replace=True)
             else:
                 categories = np.arange(0, n_categories)
-                c = rng.randint(low=0, high=n_categories, size=n_samples)
+                c = row_rng.randint(low=0, high=n_categories, size=n_samples)
 
             df[str(i)] = pd.Series(c, dtype="category")
             df[str(i)] = df[str(i)].cat.set_categories(categories)
         else:
-            num = rng.randint(low=0, high=n_categories, size=n_samples)
+            num = row_rng.randint(low=0, high=n_categories, size=n_samples)
             df[str(i)] = pd.Series(num, dtype=num.dtype)
 
     label = np.zeros(shape=(n_samples,))
@@ -1021,7 +1076,7 @@ def make_categorical(
 
     if sparsity > 0.0:
         for i in range(n_features):
-            index = rng.randint(
+            index = row_rng.randint(
                 low=0, high=n_samples - 1, size=int(n_samples * sparsity)
             )
             df.iloc[index, i] = np.nan
@@ -1034,7 +1089,7 @@ def make_categorical(
 
     if shuffle:
         columns = list(df.columns)
-        rng.shuffle(columns)
+        row_rng.shuffle(columns)
         df = df[columns]
 
     if device != "cpu":
diff --git a/python-package/xgboost/testing/data_iter.py b/python-package/xgboost/testing/data_iter.py
index 6e38c6ce5bba..371578ed9ebf 100644
--- a/python-package/xgboost/testing/data_iter.py
+++ b/python-package/xgboost/testing/data_iter.py
@@ -8,6 +8,7 @@
 
 from ..compat import import_cupy
 from ..core import DataIter, DMatrix, ExtMemQuantileDMatrix, QuantileDMatrix
+from .utils import predictor_equal
 
 
 def run_mixed_sparsity(device: str) -> None:
@@ -36,7 +37,7 @@ def run_mixed_sparsity(device: str) -> None:
     y_arr = np.concatenate(y, axis=0)
     Xy_1 = QuantileDMatrix(X_arr, y_arr)
 
-    assert tm.predictor_equal(Xy_0, Xy_1)
+    assert predictor_equal(Xy_0, Xy_1)
 
 
 def check_invalid_cat_batches(device: str) -> None:
diff --git a/python-package/xgboost/testing/federated.py b/python-package/xgboost/testing/federated.py
index ddcce88c75f3..981a124b8a83 100644
--- a/python-package/xgboost/testing/federated.py
+++ b/python-package/xgboost/testing/federated.py
@@ -14,8 +14,8 @@
 import xgboost as xgb
 import xgboost.federated
 from xgboost import testing as tm
-from xgboost.training import TrainingCallback
 
+from .._typing import EvalsLog
 from ..collective import _Args as CollArgs
 
 SERVER_KEY = "server-key.pem"
@@ -80,7 +80,7 @@ def run_worker(
         num_round = 20
 
         # Run training, all the features in training API is available.
-        results: TrainingCallback.EvalsLog = {}
+        results: EvalsLog = {}
         bst = xgb.train(
             param,
             dtrain,
diff --git a/python-package/xgboost/testing/interaction_constraints.py b/python-package/xgboost/testing/interaction_constraints.py
new file mode 100644
index 000000000000..4e12a894e695
--- /dev/null
+++ b/python-package/xgboost/testing/interaction_constraints.py
@@ -0,0 +1,88 @@
+"""Tests for interaction constraints."""
+
+from typing import Optional, Sequence, Union
+
+import numpy as np
+
+from .._typing import FeatureNames
+from ..core import DMatrix
+from ..training import train
+from .utils import Device
+
+
+def run_interaction_constraints(  # pylint: disable=too-many-locals
+    tree_method: str,
+    device: Device,
+    feature_names: Optional[FeatureNames] = None,
+    interaction_constraints: Union[str, Sequence] = "[[0, 1]]",
+) -> None:
+    """Tests interaction constraints on a synthetic dataset."""
+    x1 = np.random.normal(loc=1.0, scale=1.0, size=1000)
+    x2 = np.random.normal(loc=1.0, scale=1.0, size=1000)
+    x3 = np.random.choice([1, 2, 3], size=1000, replace=True)
+    y = (
+        x1
+        + x2
+        + x3
+        + x1 * x2 * x3
+        + np.random.normal(loc=0.001, scale=1.0, size=1000)
+        + 3 * np.sin(x1)
+    )
+    X = np.column_stack((x1, x2, x3))
+    dtrain = DMatrix(X, label=y, feature_names=feature_names)
+
+    params = {
+        "max_depth": 3,
+        "eta": 0.1,
+        "nthread": 2,
+        "interaction_constraints": interaction_constraints,
+        "tree_method": tree_method,
+        "device": device,
+    }
+    num_boost_round = 12
+    # Fit a model that only allows interaction between x1 and x2
+    bst = train(params, dtrain, num_boost_round, evals=[(dtrain, "train")])
+
+    # Set all observations to have the same x3 values then increment by the same amount
+    def f(x: int) -> np.ndarray:
+        tmat = DMatrix(
+            np.column_stack((x1, x2, np.repeat(x, 1000))), feature_names=feature_names
+        )
+        return bst.predict(tmat)
+
+    preds = [f(x) for x in [1, 2, 3]]
+
+    # Check incrementing x3 has the same effect on all observations
+    #   since x3 is constrained to be independent of x1 and x2
+    #   and all observations start off from the same x3 value
+    diff1 = preds[1] - preds[0]
+    assert np.all(np.abs(diff1 - diff1[0]) < 1e-4)
+    diff2 = preds[2] - preds[1]
+    assert np.all(np.abs(diff2 - diff2[0]) < 1e-4)
+
+
+def training_accuracy(tree_method: str, dpath: str, device: Device) -> None:
+    """Test accuracy, reused by GPU tests."""
+    from sklearn.metrics import accuracy_score
+
+    dtrain = DMatrix(dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm")
+    dtest = DMatrix(dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm")
+    params = {
+        "eta": 1,
+        "max_depth": 6,
+        "objective": "binary:logistic",
+        "tree_method": tree_method,
+        "device": device,
+        "interaction_constraints": "[[1,2], [2,3,4]]",
+    }
+    num_boost_round = 5
+
+    params["grow_policy"] = "lossguide"
+    bst = train(params, dtrain, num_boost_round)
+    pred_dtest = bst.predict(dtest) < 0.5
+    assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
+
+    params["grow_policy"] = "depthwise"
+    bst = train(params, dtrain, num_boost_round)
+    pred_dtest = bst.predict(dtest) < 0.5
+    assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
diff --git a/python-package/xgboost/testing/metrics.py b/python-package/xgboost/testing/metrics.py
index 802add7b2bf8..f6e5033b4ba3 100644
--- a/python-package/xgboost/testing/metrics.py
+++ b/python-package/xgboost/testing/metrics.py
@@ -5,12 +5,16 @@
 import numpy as np
 import pytest
 
-import xgboost as xgb
-from xgboost.compat import concat
-from xgboost.core import _parse_eval_str
+from ..compat import concat
+from ..core import DMatrix, QuantileDMatrix, _parse_eval_str
+from ..sklearn import XGBClassifier, XGBRanker
+from ..training import train
+from .utils import Device
 
 
-def check_precision_score(tree_method: str) -> None:
+def check_precision_score(  # pylint: disable=too-many-locals
+    tree_method: str, device: Device
+) -> None:
     """Test for precision with ranking and classification."""
     datasets = pytest.importorskip("sklearn.datasets")
 
@@ -19,7 +23,7 @@ def check_precision_score(tree_method: str) -> None:
     )
     qid = np.zeros(shape=y.shape)  # same group
 
-    ltr = xgb.XGBRanker(n_estimators=2, tree_method=tree_method)
+    ltr = XGBRanker(n_estimators=2, tree_method=tree_method, device=device)
     ltr.fit(X, y, qid=qid)
 
     # re-generate so that XGBoost doesn't evaluate the result to 1.0
@@ -28,9 +32,7 @@ def check_precision_score(tree_method: str) -> None:
     )
 
     ltr.set_params(eval_metric="pre@32")
-    result = _parse_eval_str(
-        ltr.get_booster().eval_set(evals=[(xgb.DMatrix(X, y), "Xy")])
-    )
+    result = _parse_eval_str(ltr.get_booster().eval_set(evals=[(DMatrix(X, y), "Xy")]))
     score_0 = result[1][1]
 
     X_list = []
@@ -52,14 +54,14 @@ def check_precision_score(tree_method: str) -> None:
     y = concat(y_list)
 
     result = _parse_eval_str(
-        ltr.get_booster().eval_set(evals=[(xgb.DMatrix(X, y, qid=qid), "Xy")])
+        ltr.get_booster().eval_set(evals=[(DMatrix(X, y, qid=qid), "Xy")])
     )
     assert result[1][0].endswith("pre@32")
     score_1 = result[1][1]
     assert score_1 == score_0
 
 
-def check_quantile_error(tree_method: str) -> None:
+def check_quantile_error(tree_method: str, device: Device) -> None:
     """Test for the `quantile` loss."""
     from sklearn.datasets import make_regression
     from sklearn.metrics import mean_pinball_loss
@@ -67,10 +69,15 @@ def check_quantile_error(tree_method: str) -> None:
     rng = np.random.RandomState(19)
     # pylint: disable=unbalanced-tuple-unpacking
     X, y = make_regression(128, 3, random_state=rng)
-    Xy = xgb.QuantileDMatrix(X, y)
+    Xy = QuantileDMatrix(X, y)
     evals_result: Dict[str, Dict] = {}
-    booster = xgb.train(
-        {"tree_method": tree_method, "eval_metric": "quantile", "quantile_alpha": 0.3},
+    booster = train(
+        {
+            "tree_method": tree_method,
+            "eval_metric": "quantile",
+            "quantile_alpha": 0.3,
+            "device": device,
+        },
         Xy,
         evals=[(Xy, "Train")],
         evals_result=evals_result,
@@ -80,12 +87,13 @@ def check_quantile_error(tree_method: str) -> None:
     np.testing.assert_allclose(evals_result["Train"]["quantile"][-1], loss)
 
     alpha = [0.25, 0.5, 0.75]
-    booster = xgb.train(
+    booster = train(
         {
             "tree_method": tree_method,
             "eval_metric": "quantile",
             "quantile_alpha": alpha,
             "objective": "reg:quantileerror",
+            "device": device,
         },
         Xy,
         evals=[(Xy, "Train")],
@@ -96,3 +104,167 @@ def check_quantile_error(tree_method: str) -> None:
         [mean_pinball_loss(y, predt[:, i], alpha=alpha[i]) for i in range(3)]
     )
     np.testing.assert_allclose(evals_result["Train"]["quantile"][-1], loss)
+
+
+def run_roc_auc_binary(tree_method: str, n_samples: int, device: Device) -> None:
+    """TestROC AUC metric on a binary classification problem."""
+    from sklearn.datasets import make_classification
+    from sklearn.metrics import roc_auc_score
+
+    rng = np.random.RandomState(1994)
+    n_features = 10
+
+    X, y = make_classification(
+        n_samples,
+        n_features,
+        n_informative=n_features,
+        n_redundant=0,
+        random_state=rng,
+    )
+    Xy = DMatrix(X, y)
+    booster = train(
+        {
+            "tree_method": tree_method,
+            "device": device,
+            "eval_metric": "auc",
+            "objective": "binary:logistic",
+        },
+        Xy,
+        num_boost_round=1,
+    )
+    score = booster.predict(Xy)
+    skl_auc = roc_auc_score(y, score)
+    auc = float(booster.eval(Xy).split(":")[1])
+    np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
+
+    X = rng.randn(*X.shape)
+    score = booster.predict(DMatrix(X))
+    skl_auc = roc_auc_score(y, score)
+    auc = float(booster.eval(DMatrix(X, y)).split(":")[1])
+    np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
+
+
+def run_pr_auc_multi(tree_method: str, device: Device) -> None:
+    """Test for PR AUC metric on a multi-class classification problem."""
+    from sklearn.datasets import make_classification
+
+    X, y = make_classification(64, 16, n_informative=8, n_classes=3, random_state=1994)
+    clf = XGBClassifier(
+        tree_method=tree_method, n_estimators=1, eval_metric="aucpr", device=device
+    )
+    clf.fit(X, y, eval_set=[(X, y)])
+    evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
+    # No available implementation for comparison, just check that XGBoost converges
+    # to 1.0
+    clf = XGBClassifier(
+        tree_method=tree_method, n_estimators=10, eval_metric="aucpr", device=device
+    )
+    clf.fit(X, y, eval_set=[(X, y)])
+    evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
+    np.testing.assert_allclose(1.0, evals_result, rtol=1e-2)
+
+
+def run_roc_auc_multi(  # pylint: disable=too-many-locals
+    tree_method: str, n_samples: int, weighted: bool, device: Device
+) -> None:
+    """Test for ROC AUC metric on a multi-class classification problem."""
+    from sklearn.datasets import make_classification
+    from sklearn.metrics import roc_auc_score
+
+    rng = np.random.RandomState(1994)
+    n_features = 10
+    n_classes = 4
+
+    X, y = make_classification(
+        n_samples,
+        n_features,
+        n_informative=n_features,
+        n_redundant=0,
+        n_classes=n_classes,
+        random_state=rng,
+    )
+    if weighted:
+        weights = rng.randn(n_samples)
+        weights -= weights.min()
+        weights /= weights.max()
+    else:
+        weights = None
+
+    Xy = DMatrix(X, y, weight=weights)
+    booster = train(
+        {
+            "tree_method": tree_method,
+            "eval_metric": "auc",
+            "objective": "multi:softprob",
+            "num_class": n_classes,
+            "device": device,
+        },
+        Xy,
+        num_boost_round=1,
+    )
+    score = booster.predict(Xy)
+    skl_auc = roc_auc_score(
+        y, score, average="weighted", sample_weight=weights, multi_class="ovr"
+    )
+    auc = float(booster.eval(Xy).split(":")[1])
+    np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
+
+    X = rng.randn(*X.shape)
+
+    score = booster.predict(DMatrix(X, weight=weights))
+    skl_auc = roc_auc_score(
+        y, score, average="weighted", sample_weight=weights, multi_class="ovr"
+    )
+    auc = float(booster.eval(DMatrix(X, y, weight=weights)).split(":")[1])
+    np.testing.assert_allclose(skl_auc, auc, rtol=1e-5)
+
+
+def run_pr_auc_ltr(tree_method: str, device: Device) -> None:
+    """Test for PR AUC metric on a ranking problem."""
+    from sklearn.datasets import make_classification
+
+    X, y = make_classification(128, 4, n_classes=2, random_state=1994)
+    ltr = XGBRanker(
+        tree_method=tree_method,
+        n_estimators=16,
+        objective="rank:pairwise",
+        eval_metric="aucpr",
+        device=device,
+    )
+    groups = np.array([32, 32, 64])
+    ltr.fit(
+        X,
+        y,
+        group=groups,
+        eval_set=[(X, y)],
+        eval_group=[groups],
+    )
+    results = ltr.evals_result()["validation_0"]["aucpr"]
+    assert results[-1] >= 0.99
+
+
+def run_pr_auc_binary(tree_method: str, device: Device) -> None:
+    """Test for PR AUC metric on a binary classification problem."""
+    from sklearn.datasets import make_classification
+    from sklearn.metrics import auc, precision_recall_curve
+
+    X, y = make_classification(128, 4, n_classes=2, random_state=1994)
+    clf = XGBClassifier(
+        tree_method=tree_method, n_estimators=1, eval_metric="aucpr", device=device
+    )
+    clf.fit(X, y, eval_set=[(X, y)])
+    evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
+
+    y_score = clf.predict_proba(X)[:, 1]  # get the positive column
+    precision, recall, _ = precision_recall_curve(y, y_score)
+    prauc = auc(recall, precision)
+    # Interpolation results are slightly different from sklearn, but overall should
+    # be similar.
+    np.testing.assert_allclose(prauc, evals_result, rtol=1e-2)
+
+    clf = XGBClassifier(
+        tree_method=tree_method, n_estimators=10, eval_metric="aucpr", device=device
+    )
+    clf.fit(X, y, eval_set=[(X, y)])
+    evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
+    np.testing.assert_allclose(0.99, evals_result, rtol=1e-2)
diff --git a/python-package/xgboost/testing/monotone_constraints.py b/python-package/xgboost/testing/monotone_constraints.py
new file mode 100644
index 000000000000..36db20b48e02
--- /dev/null
+++ b/python-package/xgboost/testing/monotone_constraints.py
@@ -0,0 +1,65 @@
+"""Helpers for testing monotone constraints."""
+
+from typing import Optional
+
+import numpy as np
+
+from .._typing import FeatureNames
+from ..core import Booster, DMatrix
+
+
+def is_increasing(v: np.ndarray) -> bool:
+    """Whether is v increasing."""
+    return np.count_nonzero(np.diff(v) < 0.0) == 0
+
+
+def is_decreasing(v: np.ndarray) -> bool:
+    """Whether is v decreasing."""
+    return np.count_nonzero(np.diff(v) > 0.0) == 0
+
+
+def is_correctly_constrained(
+    learner: Booster, feature_names: Optional[FeatureNames] = None
+) -> bool:
+    """Whether the monotone constraint is correctly applied."""
+    n = 100
+    variable_x = np.linspace(0, 1, n).reshape((n, 1))
+    fixed_xs_values = np.linspace(0, 1, n)
+
+    for i in range(n):
+        fixed_x = fixed_xs_values[i] * np.ones((n, 1))
+        monotonically_increasing_x = np.column_stack((variable_x, fixed_x))
+        monotonically_increasing_dset = DMatrix(
+            monotonically_increasing_x, feature_names=feature_names
+        )
+        monotonically_increasing_y = learner.predict(monotonically_increasing_dset)
+
+        monotonically_decreasing_x = np.column_stack((fixed_x, variable_x))
+        monotonically_decreasing_dset = DMatrix(
+            monotonically_decreasing_x, feature_names=feature_names
+        )
+        monotonically_decreasing_y = learner.predict(monotonically_decreasing_dset)
+
+        if not (
+            is_increasing(monotonically_increasing_y)
+            and is_decreasing(monotonically_decreasing_y)
+        ):
+            return False
+
+    return True
+
+
+NUMBER_OF_DPOINTS = 1000
+x1_positively_correlated_with_y = np.random.random(size=NUMBER_OF_DPOINTS)
+x2_negatively_correlated_with_y = np.random.random(size=NUMBER_OF_DPOINTS)
+
+x = np.column_stack((x1_positively_correlated_with_y, x2_negatively_correlated_with_y))
+zs = np.random.normal(loc=0.0, scale=0.01, size=NUMBER_OF_DPOINTS)
+y = (
+    5 * x1_positively_correlated_with_y
+    + np.sin(10 * np.pi * x1_positively_correlated_with_y)
+    - 5 * x2_negatively_correlated_with_y
+    - np.cos(10 * np.pi * x2_negatively_correlated_with_y)
+    + zs
+)
+training_dset = DMatrix(x, label=y)
diff --git a/python-package/xgboost/testing/multi_target.py b/python-package/xgboost/testing/multi_target.py
new file mode 100644
index 000000000000..ccfa5a83cb1c
--- /dev/null
+++ b/python-package/xgboost/testing/multi_target.py
@@ -0,0 +1,151 @@
+"""Tests for multi-target training."""
+
+from typing import Dict, Optional, Tuple
+
+import numpy as np
+import pytest
+from sklearn.datasets import (
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
+
+import xgboost.testing as tm
+
+from .._typing import ArrayLike
+from ..core import Booster, DMatrix, QuantileDMatrix
+from ..objective import Objective, TreeObjective
+from ..sklearn import XGBClassifier
+from ..training import train
+from .updater import ResetStrategy
+from .utils import Device
+
+
+def run_multiclass(device: Device, learning_rate: Optional[float]) -> None:
+    """Use vector leaf for multi-class models."""
+    X, y = make_classification(
+        128, n_features=12, n_informative=10, n_classes=4, random_state=2025
+    )
+    clf = XGBClassifier(
+        multi_strategy="multi_output_tree",
+        callbacks=[ResetStrategy()],
+        n_estimators=10,
+        device=device,
+        learning_rate=learning_rate,
+    )
+    clf.fit(X, y, eval_set=[(X, y)])
+    assert clf.objective == "multi:softprob"
+    assert tm.non_increasing(clf.evals_result()["validation_0"]["mlogloss"])
+    if learning_rate is not None and abs(learning_rate - 1.0) < 1e-5:
+        assert clf.evals_result()["validation_0"]["mlogloss"][-1] < 0.045
+
+    proba = clf.predict_proba(X)
+    assert proba.shape == (y.shape[0], 4)
+
+
+def run_multilabel(device: Device, learning_rate: Optional[float]) -> None:
+    """Use vector leaf for multi-label classification models."""
+    # pylint: disable=unbalanced-tuple-unpacking
+    X, y = make_multilabel_classification(128, random_state=2025)
+    clf = XGBClassifier(
+        multi_strategy="multi_output_tree",
+        callbacks=[ResetStrategy()],
+        n_estimators=10,
+        device=device,
+        learning_rate=learning_rate,
+    )
+    clf.fit(X, y, eval_set=[(X, y)])
+    assert clf.objective == "binary:logistic"
+    assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
+    if learning_rate is not None and abs(learning_rate - 1.0) < 1e-5:
+        assert clf.evals_result()["validation_0"]["logloss"][-1] < 0.065
+
+    proba = clf.predict_proba(X)
+    assert proba.shape == y.shape
+
+
+def run_reduced_grad(device: Device) -> None:
+    """Basic test for using reduced gradient for tree splits."""
+    import cupy as cp
+
+    class LsObj0(TreeObjective):
+        """Split grad is the same as value grad."""
+
+        def __call__(
+            self, y_pred: ArrayLike, dtrain: DMatrix
+        ) -> Tuple[cp.ndarray, cp.ndarray]:
+            y_true = dtrain.get_label().reshape(y_pred.shape)
+            grad, hess = tm.ls_obj(y_true, y_pred, None)
+            return cp.array(grad), cp.array(hess)
+
+        def split_grad(
+            self, grad: ArrayLike, hess: ArrayLike
+        ) -> Tuple[ArrayLike, ArrayLike]:
+            return cp.array(grad), cp.array(hess)
+
+    class LsObj1(Objective):
+        """No split grad."""
+
+        def __call__(
+            self, y_pred: ArrayLike, dtrain: DMatrix
+        ) -> Tuple[cp.ndarray, cp.ndarray]:
+            y_true = dtrain.get_label().reshape(y_pred.shape)
+            grad, hess = tm.ls_obj(y_true, y_pred, None)
+            return cp.array(grad), cp.array(hess)
+
+    X, y = make_regression(  # pylint: disable=unbalanced-tuple-unpacking
+        n_samples=1024, n_features=16, random_state=1994, n_targets=5
+    )
+    Xy = QuantileDMatrix(X, y)
+
+    def run_test(
+        obj: Optional[Objective], base_score: Optional[list[float]] = None
+    ) -> Booster:
+        evals_result: Dict[str, Dict] = {}
+        booster = train(
+            {
+                "device": device,
+                "multi_strategy": "multi_output_tree",
+                "learning_rate": 1,
+                "base_score": base_score,
+            },
+            Xy,
+            evals=[(Xy, "Train")],
+            obj=obj,
+            num_boost_round=8,
+            evals_result=evals_result,
+        )
+        assert tm.non_increasing(evals_result["Train"]["rmse"])
+        return booster
+
+    booster_0 = run_test(LsObj0())
+    booster_1 = run_test(LsObj1())
+    np.testing.assert_allclose(
+        booster_0.inplace_predict(X), booster_1.inplace_predict(X)
+    )
+
+    booster_2 = run_test(LsObj0(), [0.5] * y.shape[1])
+    booster_3 = run_test(None, [0.5] * y.shape[1])
+    np.testing.assert_allclose(
+        booster_2.inplace_predict(X), booster_3.inplace_predict(X)
+    )
+
+    # Use mean gradient, should still converge.
+    class LsObj2(LsObj0):
+        """Use mean as split grad."""
+
+        def __init__(self, check_used: bool):
+            self._chk = check_used
+
+        def split_grad(
+            self, grad: ArrayLike, hess: ArrayLike
+        ) -> Tuple[cp.ndarray, cp.ndarray]:
+            if self._chk:
+                assert False
+            sgrad = cp.mean(grad, axis=1)
+            shess = cp.mean(hess, axis=1)
+            return sgrad, shess
+
+    run_test(LsObj2(False))
+    with pytest.raises(AssertionError):
+        run_test(LsObj2(True))
diff --git a/python-package/xgboost/testing/ordinal.py b/python-package/xgboost/testing/ordinal.py
index 404d795951df..6629868778f4 100644
--- a/python-package/xgboost/testing/ordinal.py
+++ b/python-package/xgboost/testing/ordinal.py
@@ -1,19 +1,32 @@
-# pylint: disable=invalid-name
 """Tests for the ordinal re-coder."""
 
+import itertools
 import os
 import tempfile
-from typing import Any, Literal, Tuple, Type
+from concurrent.futures import ThreadPoolExecutor
+from functools import cache as fcache
+from typing import Any, Tuple, Type, TypeVar
 
 import numpy as np
+import pytest
 
-from ..compat import import_cupy
+from .._typing import EvalsLog
 from ..core import DMatrix, ExtMemQuantileDMatrix, QuantileDMatrix
 from ..data import _lazy_load_cudf_is_cat
-from .data import IteratorForTest, is_pd_cat_dtype, make_categorical
-
-
-def get_df_impl(device: str) -> Tuple[Type, Type]:
+from ..training import train
+from .data import (
+    IteratorForTest,
+    is_pd_cat_dtype,
+    make_batches,
+    make_categorical,
+    memory,
+)
+from .updater import get_basescore
+from .utils import Device, assert_allclose, predictor_equal
+
+
+@fcache
+def get_df_impl(device: Device) -> Tuple[Type, Type]:
     """Get data frame implementation based on the ]device."""
     if device == "cpu":
         import pandas as pd
@@ -28,31 +41,47 @@ def get_df_impl(device: str) -> Tuple[Type, Type]:
     return Df, Ser
 
 
-def assert_allclose(device: str, a: Any, b: Any) -> None:
-    """Dispatch the assert_allclose for devices."""
+def asarray(device: Device, data: Any) -> np.ndarray:
+    """Wrapper to get an array."""
     if device == "cpu":
-        np.testing.assert_allclose(a, b)
-    else:
-        cp = import_cupy()
-        cp.testing.assert_allclose(a, b)
+        return np.asarray(data)
+    import cupy as cp
+
+    return cp.asarray(data)
+
 
+def comp_booster(device: Device, Xy: DMatrix, booster: str) -> None:
+    """Compare the results from DMatrix and Booster."""
+    cats_dm = Xy.get_categories(export_to_arrow=True).to_arrow()
+    assert cats_dm is not None
 
-def run_cat_container(device: Literal["cpu", "cuda"]) -> None:
+    rng = np.random.default_rng(2025)
+    Xy.set_label(rng.normal(size=Xy.num_row()))
+    bst = train({"booster": booster, "device": device}, Xy, 1)
+    cats_bst = bst.get_categories(export_to_arrow=True).to_arrow()
+    assert cats_bst is not None
+    assert cats_dm == cats_bst
+
+
+def run_cat_container(device: Device) -> None:
     """Basic tests for the container class used by the DMatrix."""
 
-    def run_dispatch(device: str, DMatrixT: Type) -> None:
+    def run_dispatch(device: Device, DMatrixT: Type) -> None:
         Df, _ = get_df_impl(device)
         # Basic test with a single feature
         df = Df({"c": ["cdef", "abc"]}, dtype="category")
         categories = df.c.cat.categories
 
         Xy = DMatrixT(df, enable_categorical=True)
-        results = Xy.get_categories()
+        assert Xy.feature_names == ["c"]
+        assert Xy.feature_types == ["c"]
+        results = Xy.get_categories(export_to_arrow=True).to_arrow()
         assert results is not None
-        assert len(results["c"]) == len(categories)
-        for i in range(len(results["c"])):
-            assert str(results["c"][i]) == str(categories[i]), (
-                results["c"][i],
+        results_di = dict(results)
+        assert len(results_di["c"]) == len(categories)
+        for i in range(len(results_di["c"])):
+            assert str(results_di["c"][i]) == str(categories[i]), (
+                results_di["c"][i],
                 categories[i],
             )
 
@@ -60,9 +89,10 @@ def run_dispatch(device: str, DMatrixT: Type) -> None:
         df = Df({"c": ["cdef", None, "abc", "abc"]}, dtype="category")
         Xy = DMatrixT(df, enable_categorical=True)
 
-        cats = Xy.get_categories()
+        cats = Xy.get_categories(export_to_arrow=True).to_arrow()
         assert cats is not None
-        ser = cats["c"].to_pandas()
+        cats_id = dict(cats)
+        ser = cats_id["c"].to_pandas()
         assert ser.iloc[0] == "abc"
         assert ser.iloc[1] == "cdef"
         assert ser.size == 2
@@ -73,16 +103,25 @@ def run_dispatch(device: str, DMatrixT: Type) -> None:
         assert_allclose(device, csr.indptr, np.array([0, 1, 1, 2, 3]))
         assert_allclose(device, csr.indices, np.array([0, 0, 0]))
 
+        comp_booster(device, Xy, "gbtree")
+        comp_booster(device, Xy, "dart")
+
         # Test with explicit null-terminated strings.
         df = Df({"c": ["cdef", None, "abc", "abc\0"]}, dtype="category")
         Xy = DMatrixT(df, enable_categorical=True)
 
+        comp_booster(device, Xy, "gbtree")
+        comp_booster(device, Xy, "dart")
+
+        with pytest.raises(ValueError, match="export_to_arrow"):
+            Xy.get_categories(export_to_arrow=False).to_arrow()
+
     for dm in (DMatrix, QuantileDMatrix):
         run_dispatch(device, dm)
 
 
 # pylint: disable=too-many-statements
-def run_cat_container_mixed(device: Literal["cpu", "cuda"]) -> None:
+def run_cat_container_mixed(device: Device) -> None:
     """Run checks with mixed types."""
     import pandas as pd
 
@@ -96,12 +135,15 @@ def is_cudf_cat(_: Any) -> bool:
     n_samples = int(2**10)
 
     def check(Xy: DMatrix, X: pd.DataFrame) -> None:
-        cats = Xy.get_categories()
+        cats = Xy.get_categories(export_to_arrow=True).to_arrow()
         assert cats is not None
+        cats_di = dict(cats)
 
         for fname in X.columns:
             if is_pd_cat_dtype(X[fname].dtype) or is_cudf_cat(X[fname].dtype):
-                aw_list = sorted(cats[fname].to_pylist())
+                vf = cats_di[fname]
+                assert vf is not None
+                aw_list = sorted(vf.to_pylist())
                 if is_cudf_cat(X[fname].dtype):
                     pd_list: list = X[fname].unique().to_arrow().to_pylist()
                 else:
@@ -113,24 +155,30 @@ def check(Xy: DMatrix, X: pd.DataFrame) -> None:
                 pd_list = sorted(pd_list)
                 assert aw_list == pd_list
             else:
-                assert cats[fname] is None
+                assert cats_di[fname] is None
 
         if not hasattr(Xy, "ref"):  # not quantile DMatrix.
+            assert not isinstance(Xy, QuantileDMatrix)
             with tempfile.TemporaryDirectory() as tmpdir:
                 fname = os.path.join(tmpdir, "DMatrix.binary")
                 Xy.save_binary(fname)
 
                 Xy_1 = DMatrix(fname)
-                cats_1 = Xy_1.get_categories()
+                cats_1 = Xy_1.get_categories(export_to_arrow=True).to_arrow()
                 assert cats_1 is not None
+                cats_1_di = dict(cats_1)
 
-                for k, v_0 in cats.items():
-                    v_1 = cats_1[k]
+                for k, v_0 in cats_di.items():
+                    v_1 = cats_1_di[k]
                     if v_0 is None:
                         assert v_1 is None
                     else:
+                        assert v_1 is not None
                         assert v_0.to_pylist() == v_1.to_pylist()
 
+        comp_booster(device, Xy, "gbtree")
+        comp_booster(device, Xy, "dart")
+
     def run_dispatch(DMatrixT: Type) -> None:
         # full str type
         X, y = make_categorical(
@@ -203,8 +251,24 @@ def run_dispatch(DMatrixT: Type) -> None:
     for dm in (DMatrix, QuantileDMatrix):
         run_dispatch(dm)
 
+    # No category
+    batches = make_batches(
+        n_samples_per_batch=128, n_features=4, n_batches=1, use_cupy=device == "cuda"
+    )
+    X, y, w = map(lambda x: x[0], batches)
+
+    for DMatrixT in (DMatrix, QuantileDMatrix):
+        Xy = DMatrixT(X, y, weight=w)
+        all_num = Xy.get_categories(export_to_arrow=True).to_arrow()
+        assert all_num is not None
+        for _, v in all_num:
+            assert v is None
+
+        with pytest.raises(ValueError, match="export_to_arrow"):
+            Xy.get_categories(export_to_arrow=False).to_arrow()
 
-def run_cat_container_iter(device: Literal["cpu", "cuda"]) -> None:
+
+def run_cat_container_iter(device: Device) -> None:
     """Test the categories container for iterator-based inputs."""
     n_batches = 4
     n_features = 8
@@ -228,8 +292,540 @@ def run_cat_container_iter(device: Literal["cpu", "cuda"]) -> None:
     it = IteratorForTest(X, y, None, cache="cache", on_host=device == "cuda")
 
     Xy = ExtMemQuantileDMatrix(it, enable_categorical=True)
-    cats = Xy.get_categories()
+    cats = Xy.get_categories(export_to_arrow=True).to_arrow()
     assert cats is not None and len(cats) == n_features
-    for _, v in cats.items():
+    cats_di = dict(cats)
+    for _, v in cats_di.items():
+        assert v is not None
         assert v.null_count == 0
         assert len(v) == n_cats
+
+
+def _basic_example(device: Device) -> Tuple[Any, Any, np.ndarray, np.ndarray]:
+    Df, _ = get_df_impl(device)
+
+    enc = Df({"c": ["cdef", "abc", "def"]}, dtype="category")
+    codes = enc.c.cat.codes  # 1, 0, 2
+    assert_allclose(device, asarray(device, codes), np.array([1, 0, 2]))
+    encoded = np.array([codes.iloc[2], codes.iloc[1]])  # def, abc
+    np.testing.assert_allclose(encoded, [2, 0])
+
+    reenc = Df({"c": ["def", "abc"]}, dtype="category")  # same as `encoded`
+    codes = reenc.c.cat.codes
+    assert_allclose(device, codes, np.array([1, 0]))
+
+    y = np.array([0, 1, 2])
+
+    return enc, reenc, encoded, y
+
+
+def run_basic_predict(DMatrixT: Type, device: Device, tdevice: Device) -> None:
+    """Enable tests with mixed devices."""
+    enc, reenc, encoded, y = _basic_example(device)
+
+    Xy = DMatrixT(enc, y, enable_categorical=True)
+    booster = train({"device": tdevice}, Xy, num_boost_round=4)
+
+    predt0 = booster.inplace_predict(reenc)
+    predt1 = booster.inplace_predict(encoded)
+    assert_allclose(device, predt0, predt1)
+
+    fmat = DMatrixT(reenc, enable_categorical=True)
+    predt2 = booster.predict(fmat)
+    assert_allclose(device, predt0, predt2)
+
+
+def run_cat_predict(device: Device) -> None:
+    """Basic tests for re-coding during prediction."""
+    Df, _ = get_df_impl(device)
+
+    for dm in (DMatrix, QuantileDMatrix):
+        run_basic_predict(dm, device, device)
+
+    def run_mixed(DMatrixT: Type) -> None:
+        df = Df({"b": [2, 1, 3], "c": ["cdef", "abc", "def"]}, dtype="category")
+        y = np.array([0, 1, 2])
+
+        # used with the next df
+        b_codes = df.b.cat.codes
+        assert_allclose(device, asarray(device, b_codes), np.array([1, 0, 2]))
+        # pick codes of 3, 1
+        b_encoded = np.array([b_codes.iloc[2], b_codes.iloc[1]])
+
+        c_codes = df.c.cat.codes
+        assert_allclose(device, asarray(device, c_codes), np.array([1, 0, 2]))
+        # pick codes of "def", "abc"
+        c_encoded = np.array([c_codes.iloc[2], c_codes.iloc[1]])
+        encoded = np.stack([b_encoded, c_encoded], axis=1)
+
+        Xy = DMatrixT(df, y, enable_categorical=True)
+        booster = train({"device": device}, Xy, num_boost_round=4)
+
+        df = Df({"b": [3, 1], "c": ["def", "abc"]}, dtype="category")
+        predt0 = booster.inplace_predict(df)
+        predt1 = booster.inplace_predict(encoded)
+        assert_allclose(device, predt0, predt1)
+
+        fmat = DMatrixT(df, enable_categorical=True)
+        predt2 = booster.predict(fmat)
+        assert_allclose(device, predt0, predt2)
+
+    for dm in (DMatrix, QuantileDMatrix):
+        run_mixed(dm)
+
+
+def run_cat_invalid(device: Device) -> None:
+    """Basic tests for invalid inputs."""
+    Df, Ser = get_df_impl(device)
+    y = np.array([0, 1, 2])
+
+    def run_invalid(DMatrixT: Type) -> None:
+        df = Df({"b": [2, 1, 3], "c": ["cdef", "abc", "def"]}, dtype="category")
+
+        Xy = DMatrixT(df, y, enable_categorical=True)
+        booster = train({"device": device}, Xy, num_boost_round=4)
+        df["b"] = df["b"].astype(np.int64)
+        with pytest.raises(ValueError, match="The data type doesn't match"):
+            booster.inplace_predict(df)
+
+        Xy = DMatrixT(df, y, enable_categorical=True)
+        with pytest.raises(ValueError, match="The data type doesn't match"):
+            booster.predict(Xy)
+
+        df = Df(
+            {"b": [2, 1, 3, 4], "c": ["cdef", "abc", "def", "bbc"]}, dtype="category"
+        )
+        with pytest.raises(ValueError, match="Found a category not in the training"):
+            booster.inplace_predict(df)
+
+    for dm in (DMatrix, QuantileDMatrix):
+        run_invalid(dm)
+
+    df = Df({"b": [2, 1, 3], "c": ["cdef", "abc", "def"]}, dtype="category")
+    Xy = DMatrix(df, y, enable_categorical=True)
+    booster = train({"device": device}, Xy, num_boost_round=4)
+    df["c"] = Ser(asarray(device, [0, 1, 1]), dtype="category")
+
+    msg = "index type must match between the training and test set"
+
+    with pytest.raises(ValueError, match=msg):
+        booster.inplace_predict(df)
+
+    with pytest.raises(ValueError, match=msg):
+        DMatrix(df, enable_categorical=True, feature_types=booster.get_categories())
+
+    with pytest.raises(ValueError, match=msg):
+        QuantileDMatrix(
+            df, enable_categorical=True, feature_types=booster.get_categories()
+        )
+
+
+def run_cat_thread_safety(device: Device) -> None:
+    """Basic tests for thread safety."""
+    X, y = make_categorical(2048, 16, 112, onehot=False, cat_ratio=0.5, device=device)
+    Xy = QuantileDMatrix(X, y, enable_categorical=True)
+    booster = train({"device": device}, Xy, num_boost_round=10)
+
+    def run_thread_safety(DMatrixT: Type) -> bool:
+        Xy = DMatrixT(X, enable_categorical=True)
+        predt0 = booster.predict(Xy)
+        predt1 = booster.inplace_predict(X)
+        assert_allclose(device, predt0, predt1)
+        return True
+
+    futures = []
+    n_cpus = os.cpu_count()
+    assert n_cpus is not None
+    for dm in (DMatrix, QuantileDMatrix):
+        with ThreadPoolExecutor(max_workers=max(n_cpus, 10)) as e:
+            for _ in range(32):
+                fut = e.submit(run_thread_safety, dm)
+                futures.append(fut)
+
+    for f in futures:
+        assert f.result()
+
+
+U = TypeVar("U", DMatrix, QuantileDMatrix)
+
+
+def _make_dm(DMatrixT: Type[U], ref: DMatrix, *args: Any, **kwargs: Any) -> U:
+    if DMatrixT is QuantileDMatrix:
+        return DMatrixT(*args, ref=ref, enable_categorical=True, **kwargs)
+    return DMatrixT(*args, enable_categorical=True, **kwargs)
+
+
+def _run_predt(
+    device: Device,
+    DMatrixT: Type,
+    pred_contribs: bool,
+    pred_interactions: bool,
+    pred_leaf: bool,
+) -> None:
+    enc, reenc, encoded, y = _basic_example(device)
+
+    Xy = DMatrixT(enc, y, enable_categorical=True)
+    booster = train({"device": device}, Xy, num_boost_round=4)
+
+    predt_0 = booster.predict(
+        _make_dm(DMatrixT, ref=Xy, data=reenc),
+        pred_contribs=pred_contribs,
+        pred_interactions=pred_interactions,
+        pred_leaf=pred_leaf,
+    )
+    predt_1 = booster.predict(
+        _make_dm(DMatrixT, ref=Xy, data=encoded.reshape(2, 1), feature_names=["c"]),
+        pred_contribs=pred_contribs,
+        pred_interactions=pred_interactions,
+        pred_leaf=pred_leaf,
+    )
+    assert_allclose(device, predt_0, predt_1)
+
+
+def run_cat_shap(device: Device) -> None:
+    """Basic tests for SHAP values."""
+
+    for dm in (DMatrix, QuantileDMatrix):
+        _run_predt(
+            device, dm, pred_contribs=True, pred_interactions=False, pred_leaf=False
+        )
+
+    for dm in (DMatrix, QuantileDMatrix):
+        _run_predt(
+            device, dm, pred_contribs=False, pred_interactions=True, pred_leaf=False
+        )
+
+
+def run_cat_leaf(device: Device) -> None:
+    """Basic tests for leaf prediction."""
+    # QuantileDMatrix is not supported by leaf.
+    _run_predt(
+        device, DMatrix, pred_contribs=False, pred_interactions=False, pred_leaf=True
+    )
+
+
+# pylint: disable=too-many-locals
+@memory.cache
+def make_recoded(device: Device, *, n_features: int = 4096) -> Tuple:
+    """Synthesize a test dataset with changed encoding."""
+    Df, _ = get_df_impl(device)
+
+    import pandas as pd
+
+    # Test large column numbers. XGBoost makes some specializations for slim datasets,
+    # make sure we cover all the cases.
+    n_samples = 1024
+
+    # Same between old and new, with 0 ("a") and 1 ("b") exchanged their position.
+    old_cats = ["a", "b", "c", "d"]
+    new_cats = ["b", "a", "c", "d"]
+    mapping = {0: 1, 1: 0}
+
+    rng = np.random.default_rng(2025)
+
+    col_numeric = rng.uniform(0, 1, size=(n_samples, n_features // 2))
+    col_categorical = rng.integers(
+        low=0, high=4, size=(n_samples, n_features // 2), dtype=np.int32
+    )
+
+    df = {}  # avoid fragmentation warning from pandas
+    for c in range(n_features):
+        if c % 2 == 0:
+            col = col_numeric[:, c // 2]
+        else:
+            codes = col_categorical[:, c // 2]
+            col = pd.Categorical.from_codes(
+                categories=old_cats,
+                codes=codes,
+            )
+        df[f"f{c}"] = col
+
+    enc = Df(df)
+    y = rng.normal(size=n_samples)
+
+    reenc = enc.copy()
+    for c in range(n_features):
+        if c % 2 == 0:
+            continue
+
+        name = f"f{c}"
+        codes_ser = reenc[name].cat.codes
+        if hasattr(codes_ser, "to_pandas"):  # cudf
+            codes_ser = codes_ser.to_pandas()
+        new_codes = codes_ser.replace(mapping)
+        reenc[name] = pd.Categorical.from_codes(categories=new_cats, codes=new_codes)
+    reenc = Df(reenc)
+    assert (reenc.iloc[:, 1].cat.codes != enc.iloc[:, 1].cat.codes).any()
+    return enc, reenc, y, col_numeric, col_categorical
+
+
+def run_specified_cat(  # pylint: disable=too-many-locals
+    device: Device,
+) -> None:
+    """Run with manually specified category encoding."""
+    import pandas as pd
+
+    # Same between old and new, with 0 ("a") and 1 ("b") exchanged their position.
+    old_cats = ["a", "b", "c", "d"]
+    new_cats = ["b", "a", "c", "d"]
+
+    col0 = np.arange(0, 9)
+    col1 = pd.Categorical.from_codes(
+        # b, b, c, d, a, c, c, d, a
+        categories=old_cats,
+        codes=[1, 1, 2, 3, 0, 2, 2, 3, 0],
+    )
+    df = pd.DataFrame({"f0": col0, "f1": col1})
+    Df, _ = get_df_impl(device)
+    df = Df(df)
+    rng = np.random.default_rng(2025)
+    y = rng.uniform(size=df.shape[0])
+
+    for dm in (DMatrix, QuantileDMatrix):
+        Xy = dm(df, y, enable_categorical=True)
+        booster = train({"device": device}, Xy)
+        predt0 = booster.predict(Xy)
+        predt1 = booster.inplace_predict(df)
+        assert_allclose(device, predt0, predt1)
+
+        col1 = pd.Categorical.from_codes(
+            # b, b, c, d, a, c, c, d, a
+            categories=new_cats,
+            codes=[0, 0, 2, 3, 1, 2, 2, 3, 1],
+        )
+        df1 = Df({"f0": col0, "f1": col1})
+        predt2 = booster.inplace_predict(df1)
+        assert_allclose(device, predt0, predt2)
+
+    enc, reenc, y, col_numeric, col_categorical = make_recoded(device)
+
+    Xy = DMatrix(enc, y, enable_categorical=True)
+    booster = train({"device": device}, Xy)
+
+    predt0 = booster.predict(Xy)
+    predt1 = booster.inplace_predict(enc)
+    assert_allclose(device, predt0, predt1)
+
+    Xy = DMatrix(reenc, y, enable_categorical=True)
+    predt2 = booster.predict(Xy)
+    assert_allclose(device, predt0, predt2)
+
+    array = np.empty(shape=(reenc.shape[0], reenc.shape[1]))
+
+    array[:, enc.dtypes == "category"] = col_categorical
+    array[:, enc.dtypes != "category"] = col_numeric
+
+    if device == "cuda":
+        import cupy as cp
+
+        array = cp.array(array)
+
+    predt3 = booster.inplace_predict(array)
+    assert_allclose(device, predt0, predt3)
+
+
+def run_validation(device: Device) -> None:
+    """Check the validation dataset is using the correct encoding."""
+    enc, reenc, y, _, _ = make_recoded(device)
+
+    Xy = DMatrix(enc, y, enable_categorical=True)
+    Xy_valid = DMatrix(reenc, y, enable_categorical=True)
+
+    evals_result: EvalsLog = {}
+    train(
+        {"device": device},
+        Xy,
+        evals=[(Xy, "Train"), (Xy_valid, "Valid")],
+        evals_result=evals_result,
+    )
+
+    # Evaluation dataset should have the exact same performance as the training dataset.
+    assert_allclose(
+        device, evals_result["Train"]["rmse"], evals_result["Valid"]["rmse"]
+    )
+
+
+def run_recode_dmatrix(device: Device) -> None:
+    """Test re-coding inpput for DMatrix."""
+    import pandas as pd
+
+    Df, _ = get_df_impl(device)
+
+    # String index
+    old_cats = ["a", "b", "c", "d"]
+    new_cats = ["b", "a", "c", "d"]
+
+    col0 = np.arange(0, 9)
+    col1 = pd.Categorical.from_codes(
+        # b, b, c, d, a, c, c, d, a
+        categories=old_cats,
+        codes=[1, 1, 2, 3, 0, 2, 2, 3, 0],
+    )
+    df = Df({"f0": col0, "f1": col1})
+
+    Xy = DMatrix(df, enable_categorical=True)
+    cats_0 = Xy.get_categories(export_to_arrow=True)
+    assert Xy.feature_types == ["int", "c"]
+
+    col1 = pd.Categorical.from_codes(
+        # b, b, c, d, a, c, c, d, a
+        categories=new_cats,
+        codes=[0, 0, 2, 3, 1, 2, 2, 3, 1],
+    )
+    df = Df({"f0": col0, "f1": col1})
+    Xy = DMatrix(df, enable_categorical=True, feature_types=cats_0)
+    # feature_types is still correct
+    assert Xy.feature_names == ["f0", "f1"]
+    assert Xy.feature_types == ["int", "c"]
+    cats_1 = Xy.get_categories(export_to_arrow=True)
+    assert cats_0.to_arrow() == cats_1.to_arrow()
+
+    # Numeric index
+    col0 = pd.Categorical.from_codes(
+        categories=[5, 6, 7, 8],
+        codes=[0, 0, 2, 3, 1, 2, 2, 3, 1],
+    )
+    Df, _ = get_df_impl(device)
+    df = Df({"cat": col0})
+    for DMatrixT in (DMatrix, QuantileDMatrix):
+        Xy = DMatrixT(df, enable_categorical=True)
+        cats_0 = Xy.get_categories(export_to_arrow=True)
+        assert cats_0 is not None
+
+        Xy = DMatrixT(df, enable_categorical=True, feature_types=cats_0)
+        cats_1 = Xy.get_categories(export_to_arrow=True)
+        assert cats_1 is not None
+
+        assert cats_0.to_arrow() == cats_1.to_arrow()
+
+    # Recode
+    for DMatrixT in (DMatrix, QuantileDMatrix):
+        enc, reenc, y, _, _ = make_recoded(device)
+        Xy_0 = DMatrixT(enc, y, enable_categorical=True)
+        cats_0 = Xy_0.get_categories(export_to_arrow=True)
+
+        assert cats_0 is not None
+
+        Xy_1 = DMatrixT(reenc, y, feature_types=cats_0, enable_categorical=True)
+        cats_1 = Xy_1.get_categories(export_to_arrow=True)
+        assert cats_1 is not None
+
+        assert cats_0.to_arrow() == cats_1.to_arrow()
+        assert predictor_equal(Xy_0, Xy_1)
+
+
+def run_training_continuation(device: Device) -> None:
+    """Test re-coding for training continuation."""
+    enc, reenc, y, _, _ = make_recoded(device)
+
+    def check(Xy_0: DMatrix, Xy_1: DMatrix) -> None:
+        params = {"device": device}
+
+        r = 2
+        evals_result_0: EvalsLog = {}
+        booster_0 = train(
+            params,
+            Xy_0,
+            evals=[(Xy_1, "Valid")],
+            num_boost_round=r,
+            evals_result=evals_result_0,
+        )
+        evals_result_1: EvalsLog = {}
+        booster_1 = train(
+            params,
+            Xy_1,
+            evals=[(Xy_1, "Valid")],
+            xgb_model=booster_0,
+            num_boost_round=r,
+            evals_result=evals_result_1,
+        )
+        assert get_basescore(booster_0) == get_basescore(booster_1)
+
+        evals_result_2: EvalsLog = {}
+        booster_2 = train(
+            params,
+            Xy_0,
+            evals=[(Xy_1, "Valid")],
+            num_boost_round=r * 2,
+            evals_result=evals_result_2,
+        )
+        # Check evaluation results
+        eval_concat = evals_result_0["Valid"]["rmse"] + evals_result_1["Valid"]["rmse"]
+        eval_full = evals_result_2["Valid"]["rmse"]
+        np.testing.assert_allclose(eval_full, eval_concat)
+
+        # Test inference
+        for a, b in itertools.product([enc, reenc], [enc, reenc]):
+            predt_0 = booster_1.inplace_predict(a)
+            predt_1 = booster_2.inplace_predict(b)
+            assert_allclose(device, predt_0, predt_1, rtol=1e-5)
+
+        # With DMatrix
+        for a, b in itertools.product([Xy_0, Xy_1], [Xy_0, Xy_1]):
+            predt_0 = booster_1.predict(a)
+            predt_1 = booster_2.predict(b)
+            assert_allclose(device, predt_0, predt_1, rtol=1e-5)
+
+    for Train, Valid in itertools.product(
+        [DMatrix, QuantileDMatrix], [DMatrix, QuantileDMatrix]
+    ):
+        Xy_0 = Train(enc, y, enable_categorical=True)
+        if Valid is QuantileDMatrix:
+            Xy_1 = Valid(
+                reenc,
+                y,
+                enable_categorical=True,
+                feature_types=Xy_0.get_categories(),
+                ref=Xy_0,
+            )
+        else:
+            Xy_1 = Valid(
+                reenc, y, enable_categorical=True, feature_types=Xy_0.get_categories()
+            )
+        check(Xy_0, Xy_1)
+
+
+def run_update(device: Device) -> None:
+    """Test with individual updaters."""
+    enc, reenc, y, _, _ = make_recoded(device)
+    Xy = DMatrix(enc, y, enable_categorical=True)
+    booster_0 = train({"device": device}, Xy, num_boost_round=4)
+    model_0 = booster_0.save_raw()
+    cats_0 = booster_0.get_categories()
+
+    Xy_1 = DMatrix(reenc, y, feature_types=cats_0, enable_categorical=True)
+
+    booster_1 = train(
+        {
+            "device": device,
+            "updater": "prune",
+            "process_type": "update",
+        },
+        Xy_1,
+        num_boost_round=4,
+        xgb_model=booster_0,
+    )
+    model_1 = booster_1.save_raw()
+
+    assert model_0 == model_1  # also compares the cat container inside
+
+
+def run_recode_dmatrix_predict(device: Device) -> None:
+    """Run prediction with re-coded DMatrix."""
+    enc, reenc, y, _, _ = make_recoded(device)
+
+    for DMatrixT in (DMatrix, QuantileDMatrix):
+        Xy = DMatrix(enc, y, enable_categorical=True)
+        booster = train({"device": device}, Xy, num_boost_round=4)
+        cats_0 = booster.get_categories()
+
+        Xy_1 = _make_dm(DMatrixT, Xy, reenc, y, feature_types=cats_0)
+        Xy_2 = _make_dm(DMatrixT, Xy, reenc, y)
+
+        predt_0 = booster.predict(Xy)
+        predt_1 = booster.predict(Xy_1)
+        predt_2 = booster.predict(Xy_2)
+        predt_3 = booster.inplace_predict(enc)
+
+        for predt in (predt_1, predt_2, predt_3):
+            assert_allclose(device, predt_0, predt)
diff --git a/python-package/xgboost/testing/parse_tree.py b/python-package/xgboost/testing/parse_tree.py
new file mode 100644
index 000000000000..15935fd512c8
--- /dev/null
+++ b/python-package/xgboost/testing/parse_tree.py
@@ -0,0 +1,32 @@
+"""Tests for parsing trees."""
+
+import pytest
+
+from ..core import DMatrix
+from ..sklearn import XGBRegressor
+from ..training import train
+from .data import make_categorical
+from .utils import Device
+
+
+def run_tree_to_df_categorical(tree_method: str, device: Device) -> None:
+    """Tests tree_to_df with categorical features."""
+    X, y = make_categorical(100, 10, 31, onehot=False)
+    Xy = DMatrix(X, y, enable_categorical=True)
+    booster = train(
+        {"tree_method": tree_method, "device": device}, Xy, num_boost_round=10
+    )
+    df = booster.trees_to_dataframe()
+    for _, x in df.iterrows():
+        if x["Feature"] != "Leaf":
+            assert len(x["Category"]) >= 1
+
+
+def run_split_value_histograms(tree_method: str, device: Device) -> None:
+    """Tests split_value_histograms with categorical features."""
+    X, y = make_categorical(1000, 10, 13, onehot=False)
+    reg = XGBRegressor(tree_method=tree_method, enable_categorical=True, device=device)
+    reg.fit(X, y)
+
+    with pytest.raises(ValueError, match="doesn't"):
+        reg.get_booster().get_split_value_histogram("3", bins=5)
diff --git a/python-package/xgboost/testing/plotting.py b/python-package/xgboost/testing/plotting.py
new file mode 100644
index 000000000000..9966cf2c6c92
--- /dev/null
+++ b/python-package/xgboost/testing/plotting.py
@@ -0,0 +1,29 @@
+"""Test plotting functions for XGBoost."""
+
+import json
+
+from graphviz import Source
+from matplotlib.axes import Axes
+
+from ..plotting import plot_tree, to_graphviz
+from ..sklearn import XGBRegressor
+from .data import make_categorical
+from .utils import Device
+
+
+def run_categorical(tree_method: str, device: Device) -> None:
+    """Tests plotting functions for categorical features."""
+    X, y = make_categorical(1000, 31, 19, onehot=False)
+    reg = XGBRegressor(
+        enable_categorical=True, n_estimators=10, tree_method=tree_method, device=device
+    )
+    reg.fit(X, y)
+    trees = reg.get_booster().get_dump(dump_format="json")
+    for tree in trees:
+        j_tree = json.loads(tree)
+        assert "leaf" in j_tree.keys() or isinstance(j_tree["split_condition"], list)
+
+    graph = to_graphviz(reg, tree_idx=len(j_tree) - 1)
+    assert isinstance(graph, Source)
+    ax = plot_tree(reg, tree_idx=len(j_tree) - 1)
+    assert isinstance(ax, Axes)
diff --git a/python-package/xgboost/testing/predict.py b/python-package/xgboost/testing/predict.py
new file mode 100644
index 000000000000..9f2d42aeed03
--- /dev/null
+++ b/python-package/xgboost/testing/predict.py
@@ -0,0 +1,96 @@
+"""Tests for inference."""
+
+from typing import Type
+
+import numpy as np
+from scipy.special import logit  # pylint: disable=no-name-in-module
+
+from ..core import DMatrix
+from ..training import train
+from .shared import validate_leaf_output
+from .updater import get_basescore
+from .utils import Device
+
+
+# pylint: disable=too-many-locals
+def run_predict_leaf(device: Device, DMatrixT: Type[DMatrix]) -> np.ndarray:
+    """Run tests for leaf index prediction."""
+    rows = 100
+    cols = 4
+    classes = 5
+    num_parallel_tree = 4
+    num_boost_round = 10
+    rng = np.random.RandomState(1994)
+    X = rng.randn(rows, cols)
+    y = rng.randint(low=0, high=classes, size=rows)
+
+    m = DMatrixT(X, y)
+    booster = train(
+        {
+            "num_parallel_tree": num_parallel_tree,
+            "num_class": classes,
+            "tree_method": "hist",
+        },
+        m,
+        num_boost_round=num_boost_round,
+    )
+
+    booster.set_param({"device": device})
+    empty = DMatrixT(np.ones(shape=(0, cols)))
+    empty_leaf = booster.predict(empty, pred_leaf=True)
+    assert empty_leaf.shape[0] == 0
+
+    leaf = booster.predict(m, pred_leaf=True, strict_shape=True)
+    assert leaf.shape[0] == rows
+    assert leaf.shape[1] == num_boost_round
+    assert leaf.shape[2] == classes
+    assert leaf.shape[3] == num_parallel_tree
+
+    validate_leaf_output(leaf, num_parallel_tree)
+
+    n_iters = np.int32(2)
+    sliced = booster.predict(
+        m,
+        pred_leaf=True,
+        iteration_range=(0, n_iters),
+        strict_shape=True,
+    )
+    first = sliced[0, ...]
+
+    assert np.prod(first.shape) == classes * num_parallel_tree * n_iters
+
+    # When there's only 1 tree, the output is a 1 dim vector
+    booster = train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
+    booster.set_param({"device": device})
+    assert booster.predict(m, pred_leaf=True).shape == (rows,)
+
+    return leaf
+
+
+def run_base_margin_vs_base_score(device: Device) -> None:
+    """Test for the relation between score and margin."""
+    from sklearn.datasets import make_classification
+
+    intercept = 0.5
+
+    X, y = make_classification(random_state=2025)
+    booster = train(
+        {"base_score": intercept, "objective": "binary:logistic", "device": device},
+        dtrain=DMatrix(X, y),
+        num_boost_round=1,
+    )
+    np.testing.assert_allclose(get_basescore(booster), intercept)
+    predt_0 = booster.predict(DMatrix(X, y))
+
+    margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)
+    Xy = DMatrix(X, y, base_margin=margin)
+    # 0.2 is a dummy value
+    booster = train(
+        {"base_score": 0.2, "objective": "binary:logistic", "device": device},
+        dtrain=Xy,
+        num_boost_round=1,
+    )
+    np.testing.assert_allclose(get_basescore(booster), 0.2)
+    predt_1 = booster.predict(Xy)
+
+    np.testing.assert_allclose(predt_0, predt_1)
diff --git a/python-package/xgboost/testing/ranking.py b/python-package/xgboost/testing/ranking.py
index ebf88eceecf2..af7ee067a6bb 100644
--- a/python-package/xgboost/testing/ranking.py
+++ b/python-package/xgboost/testing/ranking.py
@@ -9,8 +9,10 @@
 import xgboost as xgb
 from xgboost import testing as tm
 
+from .utils import Device
 
-def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
+
+def run_ranking_qid_df(impl: ModuleType, tree_method: str, device: Device) -> None:
     """Test ranking with qid packed into X."""
     import scipy.sparse
     from sklearn.metrics import mean_squared_error
@@ -21,7 +23,9 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
     # pack qid into x using dataframe
     df = impl.DataFrame(X)
     df["qid"] = q
-    ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method)
+    ranker = xgb.XGBRanker(
+        n_estimators=3, eval_metric="ndcg", tree_method=tree_method, device=device
+    )
     ranker.fit(df, y)
     s = ranker.score(df, y)
     assert s > 0.7
@@ -32,13 +36,15 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
     ranker.fit(df, y, eval_set=[(valid_df, y)])
 
     # same as passing qid directly
-    ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method)
+    ranker = xgb.XGBRanker(
+        n_estimators=3, eval_metric="ndcg", tree_method=tree_method, device=device
+    )
     ranker.fit(X, y, qid=q)
     s1 = ranker.score(df, y)
     assert np.isclose(s, s1)
 
     # Works with standard sklearn cv
-    if tree_method != "gpu_hist":
+    if device == "cpu":
         # we need cuML for this.
         kfold = StratifiedGroupKFold(shuffle=False)
         results = cross_val_score(ranker, df, y, cv=kfold, groups=df.qid)
@@ -52,6 +58,7 @@ def neg_mse(*args: Any, **kwargs: Any) -> float:
         n_estimators=3,
         eval_metric=neg_mse,
         tree_method=tree_method,
+        device=device,
         disable_default_eval_metric=True,
     )
     ranker.fit(df, y, eval_set=[(valid_df, y)])
@@ -59,7 +66,7 @@ def neg_mse(*args: Any, **kwargs: Any) -> float:
     assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1])
 
     # Works with sparse data
-    if tree_method != "gpu_hist":
+    if device == "cpu":
         # no sparse with cuDF
         X_csr = scipy.sparse.csr_matrix(X)
         df = impl.DataFrame.sparse.from_spmatrix(
@@ -67,7 +74,10 @@ def neg_mse(*args: Any, **kwargs: Any) -> float:
         )
         df["qid"] = q
         ranker = xgb.XGBRanker(
-            n_estimators=3, eval_metric="ndcg", tree_method=tree_method
+            n_estimators=3,
+            eval_metric="ndcg",
+            tree_method=tree_method,
+            device=device,
         )
         ranker.fit(df, y)
         s2 = ranker.score(df, y)
@@ -105,6 +115,7 @@ def run_ranking_categorical(device: str) -> None:
 def run_normalization(device: str) -> None:
     """Test normalization."""
     X, y, qid, _ = tm.make_ltr(2048, 4, 64, 3)
+    # top-k
     ltr = xgb.XGBRanker(objective="rank:pairwise", n_estimators=4, device=device)
     ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
     e0 = ltr.evals_result()
@@ -119,6 +130,53 @@ def run_normalization(device: str) -> None:
     e1 = ltr.evals_result()
     assert e1["validation_0"]["ndcg@32"][-1] > e0["validation_0"]["ndcg@32"][-1]
 
+    # mean
+    ltr = xgb.XGBRanker(
+        objective="rank:pairwise",
+        n_estimators=4,
+        device=device,
+        lambdarank_pair_method="mean",
+        lambdarank_normalization=True,
+    )
+    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
+    e0 = ltr.evals_result()
+
+    ltr = xgb.XGBRanker(
+        objective="rank:pairwise",
+        n_estimators=4,
+        device=device,
+        lambdarank_pair_method="mean",
+        lambdarank_normalization=False,
+    )
+    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
+    e1 = ltr.evals_result()
+    # no normalization since the number of pairs is 1.
+    assert e1["validation_0"]["ndcg"][-1] == e0["validation_0"]["ndcg"][-1]
+
+    # mean
+    ltr = xgb.XGBRanker(
+        objective="rank:pairwise",
+        n_estimators=4,
+        device=device,
+        lambdarank_pair_method="mean",
+        lambdarank_normalization=True,
+        lambdarank_num_pair_per_sample=4,
+    )
+    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
+    e0 = ltr.evals_result()
+
+    ltr = xgb.XGBRanker(
+        objective="rank:pairwise",
+        n_estimators=4,
+        device=device,
+        lambdarank_pair_method="mean",
+        lambdarank_normalization=False,
+        lambdarank_num_pair_per_sample=4,
+    )
+    ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
+    e1 = ltr.evals_result()
+    assert e1["validation_0"]["ndcg"][-1] != e0["validation_0"]["ndcg"][-1]
+
 
 def run_score_normalization(device: str, objective: str) -> None:
     """Test normalization by score differences."""
diff --git a/python-package/xgboost/testing/shared.py b/python-package/xgboost/testing/shared.py
index 32d5962e7c30..fcdfe5a6e138 100644
--- a/python-package/xgboost/testing/shared.py
+++ b/python-package/xgboost/testing/shared.py
@@ -1,6 +1,5 @@
 """Testing code shared by other tests."""
 
-# pylint: disable=invalid-name
 import collections
 import importlib.util
 import json
diff --git a/python-package/xgboost/testing/updater.py b/python-package/xgboost/testing/updater.py
index b610ab94f4ec..d0912262cce4 100644
--- a/python-package/xgboost/testing/updater.py
+++ b/python-package/xgboost/testing/updater.py
@@ -2,39 +2,52 @@
 
 import json
 from functools import partial, update_wrapper
-from typing import Any, Dict, List, Union, overload
+from string import ascii_lowercase
+from typing import Any, Dict, List, Optional, Union, overload
 
 import numpy as np
 import pytest
 
 import xgboost as xgb
 import xgboost.testing as tm
+from xgboost.core import _parse_version
 from xgboost.data import is_pd_cat_dtype
 
 from ..core import DataIter
 from .data_iter import CatIter
+from .utils import Device
 
 
 @overload
-def get_basescore(model: xgb.XGBModel) -> float: ...
+def get_basescore(model: xgb.XGBModel) -> List[float]: ...
 
 
 @overload
-def get_basescore(model: xgb.Booster) -> float: ...
+def get_basescore(model: xgb.Booster) -> List[float]: ...
 
 
-def get_basescore(model: Union[xgb.XGBModel, xgb.Booster]) -> float:
+@overload
+def get_basescore(model: Dict[str, Any]) -> List[float]: ...
+
+
+def get_basescore(
+    model: Union[xgb.XGBModel, xgb.Booster, Dict],
+) -> List[float]:
     """Get base score from an XGBoost sklearn estimator."""
     if isinstance(model, xgb.XGBModel):
         model = model.get_booster()
 
-    base_score = float(
-        json.loads(model.save_config())["learner"]["learner_model_param"]["base_score"]
-    )
-    return base_score
+    if isinstance(model, dict):
+        jintercept = model["learner"]["learner_model_param"]["base_score"]
+    else:
+        jintercept = json.loads(model.save_config())["learner"]["learner_model_param"][
+            "base_score"
+        ]
+    return json.loads(jintercept)
 
 
-def check_init_estimation(tree_method: str) -> None:
+# pylint: disable=too-many-statements
+def check_init_estimation(tree_method: str, device: Device) -> None:
     """Test for init estimation."""
     from sklearn.datasets import (
         make_classification,
@@ -43,18 +56,26 @@ def check_init_estimation(tree_method: str) -> None:
     )
 
     def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name
-        reg = xgb.XGBRegressor(tree_method=tree_method, max_depth=1, n_estimators=1)
+        reg = xgb.XGBRegressor(
+            tree_method=tree_method, max_depth=1, n_estimators=1, device=device
+        )
         reg.fit(X, y, eval_set=[(X, y)])
         base_score_0 = get_basescore(reg)
         score_0 = reg.evals_result()["validation_0"]["rmse"][0]
 
+        n_targets = 1 if y.ndim == 1 else y.shape[1]
+        intercept = np.full(shape=(n_targets,), fill_value=0.5, dtype=np.float32)
         reg = xgb.XGBRegressor(
-            tree_method=tree_method, max_depth=1, n_estimators=1, boost_from_average=0
+            tree_method=tree_method,
+            device=device,
+            max_depth=1,
+            n_estimators=1,
+            base_score=intercept,
         )
         reg.fit(X, y, eval_set=[(X, y)])
         base_score_1 = get_basescore(reg)
         score_1 = reg.evals_result()["validation_0"]["rmse"][0]
-        assert not np.isclose(base_score_0, base_score_1)
+        assert not np.isclose(base_score_0, base_score_1).any()
         assert score_0 < score_1  # should be better
 
     # pylint: disable=unbalanced-tuple-unpacking
@@ -64,20 +85,49 @@ def run_reg(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-na
     X, y = make_regression(n_samples=4096, n_targets=3, random_state=17)
     run_reg(X, y)
 
-    def run_clf(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-name
-        clf = xgb.XGBClassifier(tree_method=tree_method, max_depth=1, n_estimators=1)
-        clf.fit(X, y, eval_set=[(X, y)])
+    # pylint: disable=invalid-name
+    def run_clf(
+        X: np.ndarray, y: np.ndarray, w: Optional[np.ndarray] = None
+    ) -> List[float]:
+        clf = xgb.XGBClassifier(
+            tree_method=tree_method, max_depth=1, n_estimators=1, device=device
+        )
+        if w is not None:
+            clf.fit(
+                X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]
+            )
+        else:
+            clf.fit(X, y, eval_set=[(X, y)])
         base_score_0 = get_basescore(clf)
-        score_0 = clf.evals_result()["validation_0"]["logloss"][0]
+        if clf.n_classes_ == 2:
+            score_0 = clf.evals_result()["validation_0"]["logloss"][0]
+        else:
+            score_0 = clf.evals_result()["validation_0"]["mlogloss"][0]
 
+        n_targets = 1 if y.ndim == 1 else y.shape[1]
+        intercept = np.full(shape=(n_targets,), fill_value=0.5, dtype=np.float32)
         clf = xgb.XGBClassifier(
-            tree_method=tree_method, max_depth=1, n_estimators=1, boost_from_average=0
+            tree_method=tree_method,
+            max_depth=1,
+            n_estimators=1,
+            device=device,
+            base_score=intercept,
         )
-        clf.fit(X, y, eval_set=[(X, y)])
+        if w is not None:
+            clf.fit(
+                X, y, sample_weight=w, eval_set=[(X, y)], sample_weight_eval_set=[w]
+            )
+        else:
+            clf.fit(X, y, eval_set=[(X, y)])
         base_score_1 = get_basescore(clf)
-        score_1 = clf.evals_result()["validation_0"]["logloss"][0]
-        assert not np.isclose(base_score_0, base_score_1)
-        assert score_0 < score_1  # should be better
+        if clf.n_classes_ == 2:
+            score_1 = clf.evals_result()["validation_0"]["logloss"][0]
+        else:
+            score_1 = clf.evals_result()["validation_0"]["mlogloss"][0]
+        assert not np.isclose(base_score_0, base_score_1).any()
+        assert score_0 < score_1 + 1e-4  # should be better
+
+        return base_score_0
 
     # pylint: disable=unbalanced-tuple-unpacking
     X, y = make_classification(n_samples=4096, random_state=17)
@@ -87,9 +137,29 @@ def run_clf(X: np.ndarray, y: np.ndarray) -> None:  # pylint: disable=invalid-na
     )
     run_clf(X, y)
 
+    X, y = make_classification(
+        n_samples=4096, random_state=17, n_classes=5, n_informative=20, n_redundant=0
+    )
+    intercept = run_clf(X, y)
+    np.testing.assert_allclose(np.sum(intercept), 1.0)
+    assert np.all(np.array(intercept) > 0)
+    np_int = (
+        np.histogram(
+            y, bins=np.concatenate([np.unique(y), np.array([np.finfo(np.float32).max])])
+        )[0]
+        / y.shape[0]
+    )
+    np.testing.assert_allclose(intercept, np_int)
+
+    rng = np.random.default_rng(1994)
+    w = rng.uniform(low=0, high=1, size=(y.shape[0],))
+    intercept = run_clf(X, y, w)
+    np.testing.assert_allclose(np.sum(intercept), 1.0)
+    assert np.all(np.array(intercept) > 0)
+
 
 # pylint: disable=too-many-locals
-def check_quantile_loss(tree_method: str, weighted: bool) -> None:
+def check_quantile_loss(tree_method: str, weighted: bool, device: Device) -> None:
     """Test for quantile loss."""
     from sklearn.datasets import make_regression
     from sklearn.metrics import mean_pinball_loss
@@ -99,9 +169,7 @@ def check_quantile_loss(tree_method: str, weighted: bool) -> None:
     n_samples = 4096
     n_features = 8
     n_estimators = 8
-    # non-zero base score can cause floating point difference with GPU predictor.
-    # multi-class has small difference than single target in the prediction kernel
-    base_score = 0.0
+
     rng = np.random.RandomState(1994)
     # pylint: disable=unbalanced-tuple-unpacking
     X, y = make_regression(
@@ -117,11 +185,15 @@ def check_quantile_loss(tree_method: str, weighted: bool) -> None:
     Xy = xgb.QuantileDMatrix(X, y, weight=weight)
 
     alpha = np.array([0.1, 0.5])
+    # non-zero base score can cause floating point difference with GPU predictor.
+    # multi-class has small difference than single target in the prediction kernel
+    base_score = np.zeros(shape=alpha.shape, dtype=np.float32)
     evals_result: Dict[str, Dict] = {}
     booster_multi = xgb.train(
         {
             "objective": "reg:quantileerror",
             "tree_method": tree_method,
+            "device": device,
             "quantile_alpha": alpha,
             "base_score": base_score,
         },
@@ -153,8 +225,9 @@ def check_quantile_loss(tree_method: str, weighted: bool) -> None:
             {
                 "objective": "reg:quantileerror",
                 "tree_method": tree_method,
+                "device": device,
                 "quantile_alpha": a,
-                "base_score": base_score,
+                "base_score": base_score[i],
             },
             Xy,
             num_boost_round=n_estimators,
@@ -574,6 +647,155 @@ def run(max_cat_to_onehot: int) -> None:
     run(USE_PART)
 
 
+def run_max_cat(tree_method: str, device: Device) -> None:
+    """Test data with size smaller than number of categories."""
+    import pandas as pd
+
+    rng = np.random.default_rng(0)
+    n_cat = 100
+    n = 5
+
+    X = pd.Series(
+        ["".join(rng.choice(list(ascii_lowercase), size=3)) for i in range(n_cat)],
+        dtype="category",
+    )[:n].to_frame()
+
+    reg = xgb.XGBRegressor(
+        enable_categorical=True,
+        tree_method=tree_method,
+        device=device,
+        n_estimators=10,
+    )
+    y = pd.Series(range(n))
+    reg.fit(X=X, y=y, eval_set=[(X, y)])
+    assert tm.non_increasing(reg.evals_result()["validation_0"]["rmse"])
+
+
+def run_invalid_category(tree_method: str, device: Device) -> None:
+    """Test with invalid categorical inputs."""
+    rng = np.random.default_rng()
+    # too large
+    X = rng.integers(low=0, high=4, size=1000).reshape(100, 10)
+    y = rng.normal(loc=0, scale=1, size=100)
+    X[13, 7] = np.iinfo(np.int32).max + 1
+
+    # Check is performed during sketching.
+    Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
+    with pytest.raises(ValueError):
+        xgb.train({"tree_method": tree_method, "device": device}, Xy)
+
+    X[13, 7] = 16777216
+    Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
+    with pytest.raises(ValueError):
+        xgb.train({"tree_method": tree_method, "device": device}, Xy)
+
+    # mixed positive and negative values
+    X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10)  # type: ignore
+    y = rng.normal(loc=0, scale=1, size=100)
+
+    Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
+    with pytest.raises(ValueError):
+        xgb.train({"tree_method": tree_method, "device": device}, Xy)
+
+    if device == "cuda":
+        import cupy as cp
+
+        X, y = cp.array(X), cp.array(y)
+        with pytest.raises(ValueError):
+            Xy = xgb.QuantileDMatrix(X, y, feature_types=["c"] * 10)
+
+
+def run_adaptive(tree_method: str, weighted: bool, device: Device) -> None:
+    """Test for adaptive trees."""
+    rng = np.random.RandomState(1994)
+    from sklearn import __version__ as sklearn_version
+    from sklearn.datasets import make_regression
+    from sklearn.utils import stats
+
+    n_samples = 256
+    X, y = make_regression(  # pylint: disable=unbalanced-tuple-unpacking
+        n_samples, 16, random_state=rng
+    )
+    if weighted:
+        w = rng.normal(size=n_samples)
+        w -= w.min()
+        Xy = xgb.DMatrix(X, y, weight=w)
+
+        (sk_major, sk_minor, _), _ = _parse_version(sklearn_version)
+        if sk_major > 1 or sk_minor >= 7:
+            kwargs = {"percentile_rank": 50}
+        else:
+            kwargs = {"percentile": 50}
+        base_score = stats._weighted_percentile(  # pylint: disable=protected-access
+            y, w, **kwargs
+        )
+    else:
+        Xy = xgb.DMatrix(X, y)
+        base_score = np.median(y)
+
+    booster_0 = xgb.train(
+        {
+            "tree_method": tree_method,
+            "base_score": base_score,
+            "objective": "reg:absoluteerror",
+            "device": device,
+        },
+        Xy,
+        num_boost_round=1,
+    )
+    booster_1 = xgb.train(
+        {
+            "tree_method": tree_method,
+            "objective": "reg:absoluteerror",
+            "device": device,
+        },
+        Xy,
+        num_boost_round=1,
+    )
+    config_0 = json.loads(booster_0.save_config())
+    config_1 = json.loads(booster_1.save_config())
+
+    assert get_basescore(config_0) == get_basescore(config_1)
+
+    raw_booster = booster_1.save_raw(raw_format="ubj")
+    booster_2 = xgb.Booster(model_file=raw_booster)
+    config_2 = json.loads(booster_2.save_config())
+    assert get_basescore(config_1) == get_basescore(config_2)
+
+    booster_0 = xgb.train(
+        {
+            "tree_method": tree_method,
+            "base_score": base_score + 1.0,
+            "objective": "reg:absoluteerror",
+            "device": device,
+        },
+        Xy,
+        num_boost_round=1,
+    )
+    config_0 = json.loads(booster_0.save_config())
+    np.testing.assert_allclose(
+        get_basescore(config_0), np.asarray(get_basescore(config_1)) + 1
+    )
+
+    evals_result: Dict[str, Dict[str, list]] = {}
+    xgb.train(
+        {
+            "tree_method": tree_method,
+            "device": device,
+            "objective": "reg:absoluteerror",
+            "subsample": 0.8,
+            "eta": 1.0,
+        },
+        Xy,
+        num_boost_round=10,
+        evals=[(Xy, "Train")],
+        evals_result=evals_result,
+    )
+    mae = evals_result["Train"]["mae"]
+    assert mae[-1] < 20.0
+    assert tm.non_increasing(mae)
+
+
 def train_result(
     param: Dict[str, Any], dmat: xgb.DMatrix, num_rounds: int
 ) -> Dict[str, Any]:
diff --git a/python-package/xgboost/testing/utils.py b/python-package/xgboost/testing/utils.py
new file mode 100644
index 000000000000..be1b990fd036
--- /dev/null
+++ b/python-package/xgboost/testing/utils.py
@@ -0,0 +1,35 @@
+"""Helpers for test code."""
+
+from typing import Any, Literal, TypeAlias
+
+import numpy as np
+
+from ..compat import import_cupy
+from ..core import DMatrix
+from ..data import _is_cupy_alike
+
+Device: TypeAlias = Literal["cpu", "cuda"]
+
+
+def assert_allclose(
+    device: Device, a: Any, b: Any, *, rtol: float = 1e-7, atol: float = 0
+) -> None:
+    """Dispatch the assert_allclose for devices."""
+    if device == "cpu" and not _is_cupy_alike(a) and not _is_cupy_alike(b):
+        np.testing.assert_allclose(a, b, atol=atol, rtol=rtol)
+    else:
+        cp = import_cupy()
+        cp.testing.assert_allclose(a, b, atol=atol, rtol=rtol)
+
+
+def predictor_equal(lhs: DMatrix, rhs: DMatrix) -> bool:
+    """Assert whether two DMatrices contain the same predictors."""
+    lcsr = lhs.get_data()
+    rcsr = rhs.get_data()
+    return all(
+        (
+            np.array_equal(lcsr.data, rcsr.data),
+            np.array_equal(lcsr.indices, rcsr.indices),
+            np.array_equal(lcsr.indptr, rcsr.indptr),
+        )
+    )
diff --git a/python-package/xgboost/testing/with_skl.py b/python-package/xgboost/testing/with_skl.py
new file mode 100644
index 000000000000..6bbffafbdbf4
--- /dev/null
+++ b/python-package/xgboost/testing/with_skl.py
@@ -0,0 +1,231 @@
+# pylint: disable=too-many-arguments, too-many-positional-arguments
+"""Tests for compatiblity with sklearn."""
+
+from typing import Callable, Optional, Type
+
+import numpy as np
+import pytest
+
+from ..core import DMatrix
+from ..sklearn import XGBClassifier, XGBRegressor, XGBRFRegressor
+from .data import get_california_housing, make_batches
+from .ordinal import make_recoded
+from .utils import Device, assert_allclose
+
+
+def run_boost_from_prediction_binary(
+    tree_method: str,
+    device: Device,
+    X: np.ndarray,
+    y: np.ndarray,
+    as_frame: Optional[Callable],
+) -> None:
+    """
+    Parameters
+    ----------
+
+    as_frame: A callable function to convert margin into DataFrame, useful for different
+    df implementations.
+    """
+
+    model_0 = XGBClassifier(
+        learning_rate=0.3,
+        random_state=0,
+        n_estimators=4,
+        tree_method=tree_method,
+        device=device,
+    )
+    model_0.fit(X=X, y=y)
+    margin = model_0.predict(X, output_margin=True)
+    if as_frame is not None:
+        margin = as_frame(margin)
+
+    model_1 = XGBClassifier(
+        learning_rate=0.3,
+        random_state=0,
+        n_estimators=4,
+        tree_method=tree_method,
+        device=device,
+    )
+    model_1.fit(X=X, y=y, base_margin=margin)
+    predictions_1 = model_1.predict(X, base_margin=margin)
+
+    cls_2 = XGBClassifier(
+        learning_rate=0.3,
+        random_state=0,
+        n_estimators=8,
+        tree_method=tree_method,
+        device=device,
+    )
+    cls_2.fit(X=X, y=y)
+    predictions_2 = cls_2.predict(X)
+    np.testing.assert_allclose(predictions_1, predictions_2)
+
+
+def run_boost_from_prediction_multi_clasas(
+    estimator: Type,
+    tree_method: str,
+    device: Device,
+    X: np.ndarray,
+    y: np.ndarray,
+    as_frame: Optional[Callable],
+) -> None:
+    """Boosting from prediction with multi-class clf."""
+    # Multi-class
+    model_0 = estimator(
+        learning_rate=0.3,
+        random_state=0,
+        n_estimators=4,
+        tree_method=tree_method,
+        device=device,
+    )
+    model_0.fit(X=X, y=y)
+    margin = model_0.get_booster().inplace_predict(X, predict_type="margin")
+    if as_frame is not None:
+        margin = as_frame(margin)
+
+    model_1 = estimator(
+        learning_rate=0.3,
+        random_state=0,
+        n_estimators=4,
+        tree_method=tree_method,
+        device=device,
+    )
+    model_1.fit(X=X, y=y, base_margin=margin)
+    predictions_1 = model_1.get_booster().predict(
+        DMatrix(X, base_margin=margin), output_margin=True
+    )
+
+    model_2 = estimator(
+        learning_rate=0.3,
+        random_state=0,
+        n_estimators=8,
+        tree_method=tree_method,
+        device=device,
+    )
+    model_2.fit(X=X, y=y)
+    predictions_2 = model_2.get_booster().inplace_predict(X, predict_type="margin")
+
+    if hasattr(predictions_1, "get"):
+        predictions_1 = predictions_1.get()
+    if hasattr(predictions_2, "get"):
+        predictions_2 = predictions_2.get()
+    np.testing.assert_allclose(predictions_1, predictions_2, atol=1e-6)
+
+
+def run_housing_rf_regression(tree_method: str, device: Device) -> None:
+    """Testwith the cali housing dataset."""
+    from sklearn.metrics import mean_squared_error
+    from sklearn.model_selection import KFold
+
+    X, y = get_california_housing()
+    rng = np.random.RandomState(1994)
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
+        xgb_model = XGBRFRegressor(
+            random_state=42, tree_method=tree_method, device=device
+        ).fit(X[train_index], y[train_index])
+        preds = xgb_model.predict(X[test_index])
+        labels = y[test_index]
+        assert mean_squared_error(preds, labels) < 35
+
+    rfreg = XGBRFRegressor(device=device)
+    with pytest.raises(NotImplementedError):
+        rfreg.set_params(early_stopping_rounds=10)
+        rfreg.fit(X, y)
+
+
+def run_recoding(device: Device) -> None:
+    """Test re-coding for training continuation."""
+    enc, reenc, y, _, _ = make_recoded(device, n_features=16)
+    reg = XGBRegressor(enable_categorical=True, n_estimators=2, device=device)
+    reg.fit(enc, y, eval_set=[(reenc, y)])
+    results_0 = reg.evals_result()
+
+    booster = reg.get_booster()
+    assert not booster.get_categories().empty()
+
+    reg = XGBRegressor(enable_categorical=True, n_estimators=2, device=device)
+    reg.fit(reenc, y, xgb_model=booster, eval_set=[(enc, y)])
+    results_1 = reg.evals_result()
+
+    booster = reg.get_booster()
+    assert booster.num_boosted_rounds() == 4
+    assert not booster.get_categories().empty()
+
+    reg = XGBRegressor(enable_categorical=True, n_estimators=4, device=device)
+    reg.fit(enc, y, eval_set=[(reenc, y)])
+    results_2 = reg.evals_result()
+
+    np.testing.assert_allclose(
+        results_2["validation_0"]["rmse"],
+        results_0["validation_0"]["rmse"] + results_1["validation_0"]["rmse"],
+    )
+
+    np.testing.assert_allclose(reg.predict(reenc), reg.predict(enc))
+    np.testing.assert_allclose(reg.apply(reenc), reg.apply(enc))
+
+
+def run_intercept(device: Device) -> None:
+    """Tests for the intercept."""
+    from sklearn.datasets import make_classification, make_multilabel_classification
+
+    X, y, w = [v[0] for v in make_batches(256, 3, 1, use_cupy=False)]
+    reg = XGBRegressor(device=device)
+    reg.fit(X, y, sample_weight=w)
+    result = reg.intercept_
+    assert result.dtype == np.float32
+    assert result[0] < 0.5
+
+    reg = XGBRegressor(booster="gblinear", device=device)
+    reg.fit(X, y, sample_weight=w)
+    result = reg.intercept_
+    assert isinstance(result, np.ndarray)
+    assert result.dtype == np.float32
+    assert result[0] < 0.5
+
+    n_classes = 4
+    X, y = make_classification(
+        random_state=1994,
+        n_samples=128,
+        n_features=16,
+        n_classes=n_classes,
+        n_informative=16,
+        n_redundant=0,
+    )
+
+    clf = XGBClassifier(booster="gbtree", objective="multi:softprob", device=device)
+    clf.fit(X, y)
+    result = clf.intercept_
+    assert isinstance(result, np.ndarray)
+    assert len(result) == 4
+    assert (result >= 0.0).all()
+    np.testing.assert_allclose(sum(result), 1.0)
+
+    # Tests for user input
+    # Multi-class
+    intercept = np.ones(shape=(n_classes), dtype=np.float32) / n_classes
+    if device == "cuda":
+        import cupy as cp
+
+        intercept = cp.array(intercept)
+
+    clf = XGBClassifier(objective="multi:softprob", base_score=intercept)
+    clf.fit(X, y)
+    assert_allclose(device, intercept, clf.intercept_)
+
+    X, y = make_multilabel_classification(  # pylint: disable=unbalanced-tuple-unpacking
+        random_state=1994, n_samples=128, n_features=16, n_classes=n_classes
+    )
+
+    # Multi-label
+    intercept = np.ones(shape=(n_classes), dtype=np.float32) / 2
+    if device == "cuda":
+        import cupy as cp
+
+        intercept = cp.array(intercept)
+
+    clf = XGBClassifier(base_score=intercept)
+    clf.fit(X, y)
+    assert_allclose(device, intercept, clf.intercept_)
+    assert clf.objective == "binary:logistic"
diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py
index ee4c9e1ad2d4..75cebeff60d4 100644
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -1,10 +1,21 @@
-# pylint: disable=too-many-locals, too-many-arguments, invalid-name
+# pylint: disable=too-many-locals, too-many-arguments
 # pylint: disable=too-many-branches, too-many-statements
 """Training Library containing training routines."""
 import copy
 import os
 import weakref
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+    cast,
+)
 
 import numpy as np
 
@@ -15,7 +26,7 @@
     EvaluationMonitor,
     TrainingCallback,
 )
-from .compat import SKLEARN_INSTALLED, DataFrame, XGBStratifiedKFold
+from .compat import SKLEARN_INSTALLED, XGBStratifiedKFold
 from .core import (
     Booster,
     DMatrix,
@@ -26,8 +37,16 @@
     _RefMixIn,
 )
 
+if TYPE_CHECKING:
+    from pandas import DataFrame as PdDataFrame
+
 _CVFolds = Sequence["CVPack"]
 
+_RefError = (
+    "Training dataset should be used as a reference when constructing the "
+    "`QuantileDMatrix` for evaluation.",
+)
+
 
 @_deprecate_positional_args
 def train(
@@ -158,10 +177,7 @@ def train(
             and va.ref is not weakref.ref(dtrain)
             and va is not dtrain
         ):
-            raise ValueError(
-                "Training dataset should be used as a reference when constructing "
-                "the `QuantileDMatrix` for evaluation."
-            )
+            raise ValueError(_RefError)
 
     bst = Booster(params, [dtrain] + [d[0] for d in evals], model_file=xgb_model)
     start_iteration = 0
@@ -435,8 +451,7 @@ def cv(
     callbacks: Optional[Sequence[TrainingCallback]] = None,
     shuffle: bool = True,
     custom_metric: Optional[Metric] = None,
-) -> Union[Dict[str, float], DataFrame]:
-    # pylint: disable = invalid-name
+) -> Union[Dict[str, float], "PdDataFrame"]:
     """Cross-validation with given parameters.
 
     Parameters
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6ca2a2a12730..4f7b5622b9e1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,7 +18,7 @@ set_source_files_properties(
   PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON)
 
 if(USE_CUDA)
-  file(GLOB_RECURSE CUDA_SOURCES *.cu *.cuh)
+  file(GLOB_RECURSE CUDA_SOURCES *.cu)
   target_sources(objxgboost PRIVATE ${CUDA_SOURCES})
 endif()
 
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index f64bd3324406..bb552276f0d8 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -21,7 +21,7 @@
 #include "../common/hist_util.h"         // for HistogramCuts
 #include "../common/io.h"                // for FileExtension, LoadSequentialFile, MemoryBuf...
 #include "../common/threading_utils.h"   // for OmpGetNumThreads, ParallelFor
-#include "../data/adapter.h"             // for ArrayAdapter, DenseAdapter, RecordBatchesIte...
+#include "../data/adapter.h"             // for ArrayAdapter, DenseAdapter
 #include "../data/batch_utils.h"         // for MatchingPageBytes, CachePageRatio
 #include "../data/cat_container.h"       // for CatContainer
 #include "../data/ellpack_page.h"        // for EllpackPage
@@ -127,6 +127,11 @@ XGB_DLL int XGBuildInfo(char const **out) {
   info["USE_FEDERATED"] = Boolean{false};
 #endif
 
+#if defined(XGBOOST_GIT_HASH)
+  char const *git_hash = XGBOOST_GIT_HASH;
+  info["GIT_HASH"] = String{git_hash};
+#endif
+
   XGBBuildInfoDevice(&info);
 
   auto &out_str = GlobalConfigAPIThreadLocalStore::Get()->ret_str;
@@ -325,19 +330,20 @@ XGB_DLL int XGDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHandle proxy
                                                                  cuda_impl::MatchingPageBytes());
   CHECK_EQ(min_cache_page_bytes, cuda_impl::MatchingPageBytes())
       << "Page concatenation is not supported by the DMatrix yet.";
+  auto cache_host_ratio =
+      OptionalArg<Number, float>(jconfig, "cache_host_ratio", cuda_impl::AutoHostRatio());
 
   xgboost_CHECK_C_ARG_PTR(next);
   xgboost_CHECK_C_ARG_PTR(reset);
   xgboost_CHECK_C_ARG_PTR(out);
 
-  auto config = ExtMemConfig{
-      cache, on_host, min_cache_page_bytes, missing, /*max_num_device_pages=*/0, n_threads};
+  auto config =
+      ExtMemConfig{cache, on_host, cache_host_ratio, min_cache_page_bytes, missing, n_threads};
   *out = new std::shared_ptr<xgboost::DMatrix>{
       xgboost::DMatrix::Create(iter, proxy, reset, next, config)};
   API_END();
 }
 
-
 namespace {
 std::shared_ptr<DMatrix> GetRefDMatrix(DataIterHandle ref) {
   std::shared_ptr<DMatrix> _ref{nullptr};
@@ -393,17 +399,17 @@ XGB_DLL int XGExtMemQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatr
   std::string cache = RequiredArg<String>(jconfig, "cache_prefix", __func__);
   auto min_cache_page_bytes = OptionalArg<Integer, std::int64_t>(jconfig, "min_cache_page_bytes",
                                                                  cuda_impl::AutoCachePageBytes());
-  auto max_num_device_pages = OptionalArg<Integer, std::int64_t>(jconfig, "max_num_device_pages",
-                                                                 cuda_impl::MaxNumDevicePages());
   auto max_quantile_blocks = OptionalArg<Integer, std::int64_t>(
       jconfig, "max_quantile_blocks", std::numeric_limits<std::int64_t>::max());
+  auto cache_host_ratio =
+      OptionalArg<Number, float>(jconfig, "cache_host_ratio", cuda_impl::AutoHostRatio());
 
   xgboost_CHECK_C_ARG_PTR(next);
   xgboost_CHECK_C_ARG_PTR(reset);
   xgboost_CHECK_C_ARG_PTR(out);
 
   auto config =
-      ExtMemConfig{cache, on_host, min_cache_page_bytes, missing, max_num_device_pages, n_threads};
+      ExtMemConfig{cache, on_host, cache_host_ratio, min_cache_page_bytes, missing, n_threads};
   *out = new std::shared_ptr<xgboost::DMatrix>{xgboost::DMatrix::Create(
       iter, proxy, p_ref, reset, next, max_bin, max_quantile_blocks, config)};
   API_END();
@@ -416,52 +422,45 @@ XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out) {
   API_END();
 }
 
-XGB_DLL int XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle,
-                                                    char const *c_interface_str) {
-  API_BEGIN();
-  CHECK_HANDLE();
-  xgboost_CHECK_C_ARG_PTR(c_interface_str);
+namespace {
+[[nodiscard]] xgboost::data::DMatrixProxy *GetDMatrixProxy(DMatrixHandle handle) {
   auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
   CHECK(p_m);
   auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
   CHECK(m) << "Current DMatrix type does not support set data.";
-  m->SetCUDAArray(c_interface_str);
+  return m;
+}
+}  // namespace
+
+XGB_DLL int XGProxyDMatrixSetDataCudaArrayInterface(DMatrixHandle handle, char const *data) {
+  API_BEGIN();
+  CHECK_HANDLE();
+  xgboost_CHECK_C_ARG_PTR(data);
+  GetDMatrixProxy(handle)->SetCudaArray(data);
   API_END();
 }
 
-XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle, char const *c_interface_str) {
+XGB_DLL int XGProxyDMatrixSetDataCudaColumnar(DMatrixHandle handle, char const *data) {
   API_BEGIN();
   CHECK_HANDLE();
-  xgboost_CHECK_C_ARG_PTR(c_interface_str);
-  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
-  CHECK(p_m);
-  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
-  CHECK(m) << "Current DMatrix type does not support set data.";
-  m->SetCUDAArray(c_interface_str);
+  xgboost_CHECK_C_ARG_PTR(data);
+  GetDMatrixProxy(handle)->SetCudaColumnar(data);
   API_END();
 }
 
-XGB_DLL int XGProxyDMatrixSetDataColumnar(DMatrixHandle handle, char const *c_interface_str) {
+XGB_DLL int XGProxyDMatrixSetDataColumnar(DMatrixHandle handle, char const *data) {
   API_BEGIN();
   CHECK_HANDLE();
-  xgboost_CHECK_C_ARG_PTR(c_interface_str);
-  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
-  CHECK(p_m);
-  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
-  CHECK(m) << "Current DMatrix type does not support set data.";
-  m->SetColumnarData(c_interface_str);
+  xgboost_CHECK_C_ARG_PTR(data);
+  GetDMatrixProxy(handle)->SetColumnar(data);
   API_END();
 }
 
-XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle, char const *c_interface_str) {
+XGB_DLL int XGProxyDMatrixSetDataDense(DMatrixHandle handle, char const *data) {
   API_BEGIN();
   CHECK_HANDLE();
-  xgboost_CHECK_C_ARG_PTR(c_interface_str);
-  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
-  CHECK(p_m);
-  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
-  CHECK(m) << "Current DMatrix type does not support set data.";
-  m->SetArrayData(c_interface_str);
+  xgboost_CHECK_C_ARG_PTR(data);
+  GetDMatrixProxy(handle)->SetArray(data);
   API_END();
 }
 
@@ -472,26 +471,12 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr, c
   xgboost_CHECK_C_ARG_PTR(indptr);
   xgboost_CHECK_C_ARG_PTR(indices);
   xgboost_CHECK_C_ARG_PTR(data);
-  auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
-  CHECK(p_m);
-  auto m = static_cast<xgboost::data::DMatrixProxy *>(p_m->get());
-  CHECK(m) << "Current DMatrix type does not support set data.";
-  m->SetCSRData(indptr, indices, data, ncol, true);
+  GetDMatrixProxy(handle)->SetCsr(indptr, indices, data, ncol, true);
   API_END();
 }
 
 // End Create from data iterator
 
-XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indices,
-                                     const bst_float *data, size_t nindptr, size_t nelem,
-                                     size_t num_col, DMatrixHandle *out) {
-  API_BEGIN();
-  LOG(WARNING) << error::DeprecatedFunc(__func__, "2.0.0", "XGDMatrixCreateFromCSR");
-  data::CSRAdapter adapter(indptr, indices, data, nindptr - 1, nelem, num_col);
-  *out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, std::nan(""), 1));
-  API_END();
-}
-
 XGB_DLL int XGDMatrixCreateFromColumnar(char const *data, char const *c_json_config,
                                         DMatrixHandle *out) {
   API_BEGIN();
@@ -571,17 +556,6 @@ XGB_DLL int XGDMatrixCreateFromCSC(char const *indptr, char const *indices, char
   API_END();
 }
 
-XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t *col_ptr, const unsigned *indices,
-                                     const bst_float *data, size_t nindptr, size_t, size_t num_row,
-                                     DMatrixHandle *out) {
-  API_BEGIN();
-  LOG(WARNING) << error::DeprecatedFunc(__func__, "2.0.0", "XGDMatrixCreateFromCSC");
-  data::CSCAdapter adapter(col_ptr, indices, data, nindptr - 1, num_row);
-  xgboost_CHECK_C_ARG_PTR(out);
-  *out = new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, std::nan(""), 1));
-  API_END();
-}
-
 XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data,
                                    xgboost::bst_ulong nrow,
                                    xgboost::bst_ulong ncol, bst_float missing,
@@ -720,54 +694,147 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
   API_END();
 }
 
-XGB_DLL int XGBDMatrixGetCategories(DMatrixHandle handle, char const **out) {
+namespace {
+template <typename FidxT>
+void GetCategoriesImpl(enc::HostColumnsView const &cats, FidxT n_features,
+                       std::string *p_out_storage, char const **out) {
+  auto &ret_str = *p_out_storage;
+  ret_str.clear();
+
   // We can directly use the storage in the cat container instead of allocating temporary storage.
+  Json jout{Array{}};
+  for (decltype(n_features) f_idx = 0; f_idx < n_features; ++f_idx) {
+    if (cats.Empty()) {
+      get<Array>(jout).emplace_back();
+      continue;
+    }
+    auto const &col = cats[f_idx];
+    if (std::visit([](auto &&arg) { return arg.empty(); }, col)) {
+      get<Array>(jout).emplace_back();
+      continue;
+    }
+    std::visit(enc::Overloaded{[&](enc::CatStrArrayView const &str) {
+                                 auto const &offsets = str.offsets;
+                                 auto ovec = linalg::MakeVec(offsets.data(), offsets.size());
+                                 auto jovec = linalg::ArrayInterface(ovec);
+
+                                 auto const &values = str.values;
+                                 auto dvec = linalg::MakeVec(values.data(), values.size());
+                                 auto jdvec = linalg::ArrayInterface(dvec);
+
+                                 get<Array>(jout).emplace_back(Object{});
+                                 get<Array>(jout).back()["offsets"] = std::move(jovec);
+                                 get<Array>(jout).back()["values"] = std::move(jdvec);
+                               },
+                               [&](auto &&values) {
+                                 auto vec = linalg::MakeVec(values.data(), values.size());
+                                 auto jvec = linalg::ArrayInterface(vec);
+                                 get<Array>(jout).emplace_back(std::move(jvec));
+                               }},
+               col);
+  }
+  auto str = Json::Dump(jout);
+  ret_str = std::move(str);
+
+  *out = ret_str.c_str();
+}
+
+CatContainer *CopyCatContainer(Context const *ctx, CatContainer const *cats,
+                               bst_feature_t n_features) {
+  CatContainer *new_cats = new CatContainer{};
+  new_cats->Copy(ctx, *cats);
+  CHECK_EQ(new_cats->Empty(), cats->Empty());
+  if (!new_cats->Empty()) {
+    CHECK_EQ(new_cats->NumFeatures(), n_features);
+    CHECK_EQ(new_cats->NumFeatures(), cats->NumFeatures());
+  }
+  return new_cats;
+}
+}  // anonymous namespace
+
+typedef  void * CategoriesHandle;  // NOLINT
+
+/**
+ * Fetching categories is experimental (3.1), C functions are hidden at the moment.
+ *
+ * No actual container method is exposed through the C API. It's just an opaque handle at
+ * the moment. This way we get to reuse the methods and the context from the DMatrix and
+ * Booster.
+ */
+/**
+ * @brief Create an opaque handle to the internal container.
+ *
+ * @param handle An instance of the data matrix.
+ * @param out    Created handle to the category container. Set to NULL if there's no category.
+ *
+ * @return 0 when success, -1 when failure happens.
+ */
+XGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const * /*config*/,
+                                   CategoriesHandle *out) {
   API_BEGIN()
   CHECK_HANDLE()
+
+  auto const p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
+  auto const cats = p_fmat->Cats();
+  xgboost_CHECK_C_ARG_PTR(out);
+  if (cats->Empty()) {
+    out = nullptr;
+  } else {
+    auto new_cats = CopyCatContainer(p_fmat->Ctx(), cats, p_fmat->Info().num_col_);
+    *out = new_cats;
+  }
+
+  API_END()
+}
+/**
+ * @brief Create an opaque handle to the internal container and export it to arrow.
+ *
+ * @param handle     An instance of the data matrix.
+ * @param out        Created handle to the category container
+ * @param export_out JSON encoded array of categories, with length equal to the number of features.
+ *
+ * @return 0 when success, -1 when failure happens.
+ */
+XGB_DLL int XGDMatrixGetCategoriesExportToArrow(DMatrixHandle handle, char const * /*config*/,
+                                                CategoriesHandle *out, char const **export_out) {
+  API_BEGIN();
+  CHECK_HANDLE()
+
   auto const p_fmat = *static_cast<std::shared_ptr<DMatrix> *>(handle);
-  auto const cats = p_fmat->Cats()->HostView();
+  auto const cats = p_fmat->Cats();
+  auto n_features = p_fmat->Info().num_col_;
 
-  auto &ret_str = p_fmat->GetThreadLocal().ret_str;
   xgboost_CHECK_C_ARG_PTR(out);
+  xgboost_CHECK_C_ARG_PTR(export_out);
 
-  if (cats.Empty()) {
+  if (cats->Empty()) {
     *out = nullptr;
+    *export_out = nullptr;
   } else {
-    Json jout{Array{}};
-    auto n_features = p_fmat->Info().num_col_;
-    for (decltype(n_features) f_idx = 0; f_idx < n_features; ++f_idx) {
-      auto const &col = cats[f_idx];
-      if (std::visit([](auto &&arg) { return arg.empty(); }, col)) {
-        get<Array>(jout).emplace_back();
-        continue;
-      }
-      std::visit(enc::Overloaded{[&](enc::CatStrArrayView const &str) {
-                                   auto const &offsets = str.offsets;
-                                   auto ovec = linalg::MakeVec(offsets.data(), offsets.size());
-                                   auto jovec = linalg::ArrayInterface(ovec);
-
-                                   auto const &values = str.values;
-                                   auto dvec = linalg::MakeVec(values.data(), values.size());
-                                   auto jdvec = linalg::ArrayInterface(dvec);
-
-                                   get<Array>(jout).emplace_back(Object{});
-                                   get<Array>(jout).back()["offsets"] = std::move(jovec);
-                                   get<Array>(jout).back()["values"] = std::move(jdvec);
-                                 },
-                                 [&](auto &&values) {
-                                   auto vec = linalg::MakeVec(values.data(), values.size());
-                                   auto jvec = linalg::ArrayInterface(vec);
-                                   get<Array>(jout).emplace_back(std::move(jvec));
-                                 }},
-                 col);
-    }
-    auto str = Json::Dump(jout);
-    ret_str = std::move(str);
-
-    *out = ret_str.c_str();
+    // Create a new container
+    auto new_cats = CopyCatContainer(p_fmat->Ctx(), cats, n_features);
+    *out = new_cats;
+    // Export to arrow
+    auto &ret_str = p_fmat->GetThreadLocal().ret_str;
+    GetCategoriesImpl(new_cats->HostView(), n_features, &ret_str, export_out);
   }
 
-  API_END()
+  API_END();
+}
+/**
+ * @brief Free the opaque handle.
+ *
+ * @param handle An instance of the category container.
+ *
+ * @return 0 when success, -1 when failure happens.
+ */
+XGB_DLL int XGBCategoriesFree(CategoriesHandle handle) {
+  API_BEGIN();
+  xgboost_CHECK_C_ARG_PTR(handle);
+  auto p_cats = static_cast<CatContainer *>(handle);
+  CHECK(p_cats);
+  delete p_cats;
+  API_END();
 }
 
 XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
@@ -1138,7 +1205,7 @@ XGB_DLL int XGBoosterBoostOneIter(BoosterHandle handle, DMatrixHandle dtrain, bs
 
 namespace xgboost {
 // copy user-supplied CUDA gradient arrays
-void CopyGradientFromCUDAArrays(Context const *, ArrayInterface<2, false> const &,
+void CopyGradientFromCudaArrays(Context const *, ArrayInterface<2, false> const &,
                                 ArrayInterface<2, false> const &, linalg::Matrix<GradientPair> *)
 #if !defined(XGBOOST_USE_CUDA)
 {
@@ -1161,7 +1228,7 @@ XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, in
   StringView msg{"Mismatched shape between the gradient and hessian."};
   CHECK_EQ(i_grad.Shape<0>(), i_hess.Shape<0>()) << msg;
   CHECK_EQ(i_grad.Shape<1>(), i_hess.Shape<1>()) << msg;
-  linalg::Matrix<GradientPair> gpair;
+  GradientContainer gpair;
   auto grad_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_grad.data);
   auto hess_is_cuda = ArrayInterfaceHandler::IsCudaPtr(i_hess.data);
   CHECK_EQ(i_grad.Shape<0>(), p_fmat->Info().num_row_)
@@ -1170,8 +1237,8 @@ XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, in
   auto *learner = static_cast<Learner *>(handle);
   auto ctx = learner->Ctx();
   if (!grad_is_cuda) {
-    gpair.Reshape(i_grad.Shape<0>(), i_grad.Shape<1>());
-    auto h_gpair = gpair.HostView();
+    gpair.gpair.Reshape(i_grad.Shape<0>(), i_grad.Shape<1>());
+    auto h_gpair = gpair.gpair.HostView();
     DispatchDType(i_grad, DeviceOrd::CPU(), [&](auto &&t_grad) {
       DispatchDType(i_hess, DeviceOrd::CPU(), [&](auto &&t_hess) {
         common::ParallelFor(h_gpair.Size(), ctx->Threads(),
@@ -1179,9 +1246,50 @@ XGB_DLL int XGBoosterTrainOneIter(BoosterHandle handle, DMatrixHandle dtrain, in
       });
     });
   } else {
-    CopyGradientFromCUDAArrays(ctx, i_grad, i_hess, &gpair);
+    CopyGradientFromCudaArrays(ctx, i_grad, i_hess, &gpair.gpair);
+  }
+  learner->BoostOneIter(iter, p_fmat, &gpair);
+  API_END();
+}
+
+typedef char const *JArrayStr;  // NOLINT
+
+// Hidden, working-in-progress support for reduced gradient. CUDA-only at the moment.
+/**
+ * @brief Use a different type of gradient for tree split.
+ *
+ * @param split_grad Gradient for finding tree splits.
+ * @param split_hess Hessian for finding tree splits.
+ * @param value_grad Gradient for calculating tree leaf weights.
+ * @param value_hess Hessian for calculating tree leaf weights.
+ */
+XGB_DLL int XGBoosterTrainOneIterWithSplitGrad(BoosterHandle handle, DMatrixHandle dtrain, int iter,
+                                               JArrayStr split_grad, JArrayStr split_hess,
+                                               JArrayStr value_grad, JArrayStr value_hess) {
+  API_BEGIN();
+  CHECK_HANDLE();
+  auto *learner = static_cast<Learner *>(handle);
+  GradientContainer gpair;
+  auto ctx = learner->Ctx();
+  CHECK(ctx->IsCUDA()) << "Reduced gradient with CPU" << MTNotImplemented();
+  {
+    ArrayInterface<2, false> i_grad{StringView{split_grad}};
+    ArrayInterface<2, false> i_hess{StringView{split_hess}};
+    CHECK(ArrayInterfaceHandler::IsCudaPtr(i_grad.data))
+        << "Reduced gradient with CPU" << MTNotImplemented();
+    CopyGradientFromCudaArrays(ctx, i_grad, i_hess, &gpair.gpair);
+  }
+  {
+    ArrayInterface<2, false> i_grad{StringView{value_grad}};
+    ArrayInterface<2, false> i_hess{StringView{value_hess}};
+    CHECK(ArrayInterfaceHandler::IsCudaPtr(i_grad.data))
+        << "Reduced gradient with CPU" << MTNotImplemented();
+    CopyGradientFromCudaArrays(ctx, i_grad, i_hess, &gpair.value_gpair);
   }
+
+  auto p_fmat = CastDMatrixHandle(dtrain);
   learner->BoostOneIter(iter, p_fmat, &gpair);
+
   API_END();
 }
 
@@ -1336,7 +1444,7 @@ void InplacePredictImpl(std::shared_ptr<DMatrix> p_m, char const *c_json_config,
   *out_shape = dmlc::BeginPtr(shape);
 }
 
-XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *array_interface,
+XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *data,
                                       char const *c_json_config, DMatrixHandle m,
                                       xgboost::bst_ulong const **out_shape,
                                       xgboost::bst_ulong *out_dim, const float **out_result) {
@@ -1350,8 +1458,8 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *array_in
   }
   auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
   CHECK(proxy) << "Invalid input type for inplace predict.";
-  xgboost_CHECK_C_ARG_PTR(array_interface);
-  proxy->SetArrayData(array_interface);
+  xgboost_CHECK_C_ARG_PTR(data);
+  proxy->SetArray(data);
   auto *learner = static_cast<xgboost::Learner *>(handle);
   InplacePredictImpl(p_m, c_json_config, learner, out_shape, out_dim, out_result);
   API_END();
@@ -1372,7 +1480,7 @@ XGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *array
   auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
   CHECK(proxy) << "Invalid input type for inplace predict.";
   xgboost_CHECK_C_ARG_PTR(array_interface);
-  proxy->SetColumnarData(array_interface);
+  proxy->SetColumnar(array_interface);
   auto *learner = static_cast<xgboost::Learner *>(handle);
   InplacePredictImpl(p_m, c_json_config, learner, out_shape, out_dim, out_result);
   API_END();
@@ -1394,14 +1502,14 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
   auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
   CHECK(proxy) << "Invalid input type for inplace predict.";
   xgboost_CHECK_C_ARG_PTR(indptr);
-  proxy->SetCSRData(indptr, indices, data, cols, true);
+  proxy->SetCsr(indptr, indices, data, cols, true);
   auto *learner = static_cast<xgboost::Learner *>(handle);
   InplacePredictImpl(p_m, c_json_config, learner, out_shape, out_dim, out_result);
   API_END();
 }
 
 #if !defined(XGBOOST_USE_CUDA)
-XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, char const *,
+XGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *, char const *,
                                           DMatrixHandle, xgboost::bst_ulong const **,
                                           xgboost::bst_ulong *, const float **) {
   API_BEGIN();
@@ -1410,7 +1518,7 @@ XGB_DLL int XGBoosterPredictFromCUDAArray(BoosterHandle handle, char const *, ch
   API_END();
 }
 
-XGB_DLL int XGBoosterPredictFromCUDAColumnar(BoosterHandle handle, char const *, char const *,
+XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *, char const *,
                                              DMatrixHandle, xgboost::bst_ulong const **,
                                              xgboost::bst_ulong *, const float **) {
   API_BEGIN();
@@ -1420,38 +1528,70 @@ XGB_DLL int XGBoosterPredictFromCUDAColumnar(BoosterHandle handle, char const *,
 }
 #endif  // !defined(XGBOOST_USE_CUDA)
 
-XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char* fname) {
+namespace {
+template <typename Buffer, typename Iter = typename Buffer::const_iterator>
+Json DispatchModelType(Buffer const &buffer, StringView ext, bool warn) {
+  auto first_non_space = [&](Iter beg, Iter end) {
+    for (auto i = beg; i != end; ++i) {
+      if (!std::isspace(*i)) {
+        return i;
+      }
+    }
+    return end;
+  };
+
+  Json model;
+  auto it = first_non_space(buffer.cbegin() + 1, buffer.cend());
+  if (it != buffer.cend() && *it == '"') {
+    if (warn) {
+      LOG(WARNING) << "Unknown file format: `" << ext << "`. Using JSON (`json`) as a guess.";
+    }
+    model = Json::Load(StringView{buffer.data(), buffer.size()});
+  } else if (it != buffer.cend() && std::isalpha(*it)) {
+    if (warn) {
+      LOG(WARNING) << "Unknown file format: `" << ext << "`. Using UBJSON (`ubj`) as a guess.";
+    }
+    model = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);
+  } else {
+    LOG(FATAL) << "Invalid model format. Expecting UBJSON (`ubj`) or JSON (`json`), got `" << ext
+               << "`";
+  }
+  return model;
+}
+}  // namespace
+
+XGB_DLL int XGBoosterLoadModel(BoosterHandle handle, const char *fname) {
   API_BEGIN();
   CHECK_HANDLE();
   xgboost_CHECK_C_ARG_PTR(fname);
   auto read_file = [&]() {
     auto str = common::LoadSequentialFile(fname);
-    CHECK_GE(str.size(), 3);  // "{}\0"
-    CHECK_EQ(str[0], '{');
+    // "{}"
+    CHECK_GE(str.size(), 2) << error::InvalidModel(fname);
+    // The old binary format has the starting bytes "binf".
+    if (str.size() >= 4 && StringView{str.data(), 4} == "binf") {  // NOLINT
+      LOG(FATAL) << error::OldBinaryModel(fname);
+    }
+    CHECK_EQ(str[0], '{') << error::InvalidModel(fname);
     return str;
   };
-  if (common::FileExtension(fname) == "json") {
+  auto ext = common::FileExtension(fname);
+  if (ext == "json") {
     auto buffer = read_file();
     Json in{Json::Load(StringView{buffer.data(), buffer.size()})};
-    static_cast<Learner*>(handle)->LoadModel(in);
-  } else if (common::FileExtension(fname) == "ubj") {
+    static_cast<Learner *>(handle)->LoadModel(in);
+  } else if (ext == "ubj") {
     auto buffer = read_file();
     Json in = Json::Load(StringView{buffer.data(), buffer.size()}, std::ios::binary);
     static_cast<Learner *>(handle)->LoadModel(in);
   } else {
-    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname, "r"));
-    static_cast<Learner*>(handle)->LoadModel(fi.get());
+    auto buffer = read_file();
+    auto in = DispatchModelType(buffer, ext, true);
+    static_cast<Learner *>(handle)->LoadModel(in);
   }
   API_END();
 }
 
-namespace {
-void WarnOldModel() {
-  LOG(WARNING) << "Saving into deprecated binary model format, please consider using `json` or "
-                  "`ubj`. Model format is default to UBJSON in XGBoost 2.1 if not specified.";
-}
-}  // anonymous namespace
-
 XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
   API_BEGIN();
   CHECK_HANDLE();
@@ -1467,17 +1607,14 @@ XGB_DLL int XGBoosterSaveModel(BoosterHandle handle, const char *fname) {
     Json::Dump(out, &str, mode);
     fo->Write(str.data(), str.size());
   };
-  if (common::FileExtension(fname) == "json") {
+  auto ext = common::FileExtension(fname);
+  if (ext == "json") {
     save_json(std::ios::out);
-  } else if (common::FileExtension(fname) == "ubj") {
+  } else if (ext == "ubj") {
     save_json(std::ios::binary);
-  } else if (common::FileExtension(fname) == "deprecated") {
-    WarnOldModel();
-    auto *bst = static_cast<Learner *>(handle);
-    bst->SaveModel(fo.get());
   } else {
-    LOG(WARNING) << "Saving model in the UBJSON format as default.  You can use file extension:"
-                    " `json`, `ubj` or `deprecated` to choose between formats.";
+    LOG(WARNING) << "Saving model in the UBJSON format as default.  You can use a file extension:"
+                    " `json` or `ubj` to choose between formats.";
     save_json(std::ios::binary);
   }
   API_END();
@@ -1488,9 +1625,13 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void *buf,
   API_BEGIN();
   CHECK_HANDLE();
   xgboost_CHECK_C_ARG_PTR(buf);
-
+  using CharT = std::add_const_t<char>;
+  using IdxType = common::Span<CharT>::index_type;
+  auto buffer = common::Span{static_cast<CharT *>(buf), static_cast<IdxType>(len)};
+  // Don't warn, we have to guess the format with buffer input.
+  auto in = DispatchModelType(buffer, "", false);
   common::MemoryFixSizeBuffer fs((void *)buf, len);  // NOLINT(*)
-  static_cast<Learner *>(handle)->LoadModel(&fs);
+  static_cast<Learner *>(handle)->LoadModel(in);
   API_END();
 }
 
@@ -1523,17 +1664,9 @@ XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *json_co
     save_json(std::ios::out);
   } else if (format == "ubj") {
     save_json(std::ios::binary);
-  } else if (format == "deprecated") {
-    WarnOldModel();
-    auto &raw_str = learner->GetThreadLocal().ret_str;
-    raw_str.clear();
-    common::MemoryBufferStream fo(&raw_str);
-    learner->SaveModel(&fo);
-
-    *out_dptr = dmlc::BeginPtr(raw_str);
-    *out_len = static_cast<xgboost::bst_ulong>(raw_str.size());
   } else {
-    LOG(FATAL) << "Unknown format: `" << format << "`";
+    LOG(FATAL) << "Unknown model format: `" << format
+               << "`. Expecting UBJSON (`ubj`) or JSON (`json`).";
   }
 
   API_END();
@@ -1671,6 +1804,59 @@ XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle,
   API_END();
 }
 
+/**
+ * Experimental (3.1), hidden.
+ */
+/**
+ * See @ref XGDMatrixGetCategories
+ */
+XGB_DLL int XGBoosterGetCategories(DMatrixHandle handle, char const * /*config*/,
+                                   CategoriesHandle *out) {
+  API_BEGIN()
+  CHECK_HANDLE()
+
+  auto *bst = static_cast<Learner *>(handle);
+  auto const cats = bst->Cats();
+  xgboost_CHECK_C_ARG_PTR(out);
+  if (cats->Empty()) {
+    out = nullptr;
+  } else {
+    auto new_cats = CopyCatContainer(bst->Ctx(), cats, bst->GetNumFeature());
+    *out = new_cats;
+  }
+
+  API_END()
+}
+/**
+ * See @ref XGDMatrixGetCategoriesExportToArrow
+ */
+XGB_DLL int XGBoosterGetCategoriesExportToArrow(BoosterHandle handle, char const * /*config*/,
+                                                CategoriesHandle *out, char const **export_out) {
+  API_BEGIN()
+  CHECK_HANDLE()
+
+  auto *bst = static_cast<Learner *>(handle);
+  auto const cats = bst->Cats();
+  auto n_features = bst->GetNumFeature();
+
+  xgboost_CHECK_C_ARG_PTR(out);
+  xgboost_CHECK_C_ARG_PTR(export_out);
+
+  if (cats->Empty()) {
+    *out = nullptr;
+    *export_out = nullptr;
+  } else {
+    // Create a new container
+    auto new_cats = CopyCatContainer(bst->Ctx(), cats, n_features);
+    *out = new_cats;
+    // Export to arrow
+    auto &ret_str = bst->GetThreadLocal().ret_str;
+    GetCategoriesImpl(new_cats->HostView(), n_features, &ret_str, export_out);
+  }
+
+  API_END()
+}
+
 XGB_DLL int XGBoosterGetAttr(BoosterHandle handle, const char *key, const char **out,
                              int *success) {
   auto* bst = static_cast<Learner*>(handle);
diff --git a/src/c_api/c_api.cu b/src/c_api/c_api.cu
index c9ff16dea120..999d3dfb36d5 100644
--- a/src/c_api/c_api.cu
+++ b/src/c_api/c_api.cu
@@ -1,12 +1,11 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <thrust/transform.h>  // for transform
 
 #include "../common/api_entry.h"       // for XGBAPIThreadLocalEntry
 #include "../common/cuda_context.cuh"  // for CUDAContext
-#include "../common/threading_utils.h"
-#include "../data/array_interface.h"  // for DispatchDType, ArrayInterface
+#include "../data/array_interface.h"   // for DispatchDType, ArrayInterface
 #include "../data/device_adapter.cuh"
 #include "../data/proxy_dmatrix.h"
 #include "c_api_error.h"
@@ -17,7 +16,13 @@
 #include "xgboost/learner.h"
 #if defined(XGBOOST_USE_NCCL)
 #include <nccl.h>
-#endif
+#endif  // defined(XGBOOST_USE_NCCL)
+#if defined(XGBOOST_USE_NVCOMP)
+#include <nvcomp/version.h>
+#endif  // defined(XGBOOST_USE_NVCOMP)
+#if defined(XGBOOST_USE_RMM)
+#include <rmm/version_config.hpp>
+#endif  // defined(XGBOOST_USE_RMM)
 
 namespace xgboost {
 void XGBBuildInfoDevice(Json *p_info) {
@@ -56,6 +61,15 @@ void XGBBuildInfoDevice(Json *p_info) {
 #else
   info["USE_RMM"] = Boolean{false};
 #endif
+
+#if defined(XGBOOST_USE_NVCOMP)
+  info["USE_NVCOMP"] = Boolean{true};
+  v = {Json{Integer{NVCOMP_VER_MAJOR}}, Json{Integer{NVCOMP_VER_MINOR}},
+       Json{Integer{NVCOMP_VER_PATCH}}};
+  info["NVCOMP_VERSION"] = v;
+#else
+  info["USE_NVCOMP"] = Boolean{false};
+#endif
 }
 
 void XGBoostAPIGuard::SetGPUAttribute() {
@@ -70,7 +84,7 @@ void XGBoostAPIGuard::RestoreGPUAttribute() {
   cudaSetDevice(device_id_);
 }
 
-void CopyGradientFromCUDAArrays(Context const *ctx, ArrayInterface<2, false> const &grad,
+void CopyGradientFromCudaArrays(Context const *ctx, ArrayInterface<2, false> const &grad,
                                 ArrayInterface<2, false> const &hess,
                                 linalg::Matrix<GradientPair> *out_gpair) {
   auto grad_dev = dh::CudaGetPointerDevice(grad.data);
@@ -127,10 +141,10 @@ XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
   API_END();
 }
 
-int InplacePreidctCUDA(BoosterHandle handle, char const *c_array_interface,
-                       char const *c_json_config, std::shared_ptr<DMatrix> p_m,
-                       xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim,
-                       const float **out_result) {
+template <bool is_columnar>
+int InplacePreidctCUDA(BoosterHandle handle, char const *data, char const *c_json_config,
+                       std::shared_ptr<DMatrix> p_m, xgboost::bst_ulong const **out_shape,
+                       xgboost::bst_ulong *out_dim, const float **out_result) {
   API_BEGIN();
   CHECK_HANDLE();
   if (!p_m) {
@@ -138,8 +152,13 @@ int InplacePreidctCUDA(BoosterHandle handle, char const *c_array_interface,
   }
   auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
   CHECK(proxy) << "Invalid input type for inplace predict.";
+  xgboost_CHECK_C_ARG_PTR(data);
 
-  proxy->SetCUDAArray(c_array_interface);
+  if constexpr (is_columnar) {
+    proxy->SetCudaColumnar(data);
+  } else {
+    proxy->SetCudaArray(data);
+  }
 
   auto config = Json::Load(StringView{c_json_config});
   auto *learner = static_cast<Learner *>(handle);
@@ -173,7 +192,7 @@ int InplacePreidctCUDA(BoosterHandle handle, char const *c_array_interface,
   API_END();
 }
 
-XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *c_json_strs,
+XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *data,
                                              char const *c_json_config, DMatrixHandle m,
                                              xgboost::bst_ulong const **out_shape,
                                              xgboost::bst_ulong *out_dim,
@@ -183,11 +202,10 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *c
   if (m) {
     p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
   }
-  return InplacePreidctCUDA(handle, c_json_strs, c_json_config, p_m, out_shape, out_dim,
-                            out_result);
+  return InplacePreidctCUDA<true>(handle, data, c_json_config, p_m, out_shape, out_dim, out_result);
 }
 
-XGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *c_json_strs,
+XGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *data,
                                           char const *c_json_config, DMatrixHandle m,
                                           xgboost::bst_ulong const **out_shape,
                                           xgboost::bst_ulong *out_dim, const float **out_result) {
@@ -196,6 +214,6 @@ XGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *c_js
     p_m = *static_cast<std::shared_ptr<DMatrix> *>(m);
   }
   xgboost_CHECK_C_ARG_PTR(out_result);
-  return InplacePreidctCUDA(handle, c_json_strs, c_json_config, p_m, out_shape, out_dim,
-                            out_result);
+  return InplacePreidctCUDA<false>(handle, data, c_json_config, p_m, out_shape, out_dim,
+                                   out_result);
 }
diff --git a/src/cli_main.cc b/src/cli_main.cc
deleted file mode 100644
index 1c388cf845c2..000000000000
--- a/src/cli_main.cc
+++ /dev/null
@@ -1,503 +0,0 @@
-/*!
- * Copyright 2014-2020 by Contributors
- * \file cli_main.cc
- * \brief The command line interface program of xgboost.
- *  This file is not included in dynamic library.
- */
-#include <dmlc/timer.h>
-#include <xgboost/base.h>
-#include <xgboost/data.h>
-#include <xgboost/json.h>
-#include <xgboost/learner.h>
-#include <xgboost/logging.h>
-#include <xgboost/parameter.h>
-
-#include <cstdio>
-#include <cstring>
-#include <ctime>
-#include <iomanip>
-#include <string>
-#include <vector>
-
-#include "c_api/c_api_utils.h"
-#include "common/common.h"
-#include "common/config.h"
-#include "common/io.h"
-#include "common/version.h"
-
-namespace xgboost {
-enum CLITask {
-  kTrain = 0,
-  kDumpModel = 1,
-  kPredict = 2
-};
-
-struct CLIParam : public XGBoostParameter<CLIParam> {
-  /*! \brief the task name */
-  int task;
-  /*! \brief whether evaluate training statistics */
-  bool eval_train;
-  /*! \brief number of boosting iterations */
-  int num_round;
-  /*! \brief the period to save the model, 0 means only save the final round model */
-  int save_period;
-  /*! \brief the path of training set */
-  std::string train_path;
-  /*! \brief path of test dataset */
-  std::string test_path;
-  /*! \brief the path of test model file, or file to restart training */
-  std::string model_in;
-  /*! \brief the path of final model file, to be saved */
-  std::string model_out;
-  /*! \brief the path of directory containing the saved models */
-  std::string model_dir;
-  /*! \brief name of predict file */
-  std::string name_pred;
-  /*! \brief data split mode */
-  int dsplit;
-  /*!\brief limit number of trees in prediction */
-  int ntree_limit;
-  int iteration_begin;
-  int iteration_end;
-  /*!\brief whether to directly output margin value */
-  bool pred_margin;
-  /*! \brief whether dump statistics along with model */
-  int dump_stats;
-  /*! \brief what format to dump the model in */
-  std::string dump_format;
-  /*! \brief name of feature map */
-  std::string name_fmap;
-  /*! \brief name of dump file */
-  std::string name_dump;
-  /*! \brief the paths of validation data sets */
-  std::vector<std::string> eval_data_paths;
-  /*! \brief the names of the evaluation data used in output log */
-  std::vector<std::string> eval_data_names;
-  /*! \brief all the configurations */
-  std::vector<std::pair<std::string, std::string> > cfg;
-
-  static constexpr char const* const kNull = "NULL";
-
-  // declare parameters
-  DMLC_DECLARE_PARAMETER(CLIParam) {
-    // NOTE: declare everything except eval_data_paths.
-    DMLC_DECLARE_FIELD(task).set_default(kTrain)
-        .add_enum("train", kTrain)
-        .add_enum("dump", kDumpModel)
-        .add_enum("pred", kPredict)
-        .describe("Task to be performed by the CLI program.");
-    DMLC_DECLARE_FIELD(eval_train).set_default(false)
-        .describe("Whether evaluate on training data during training.");
-    DMLC_DECLARE_FIELD(num_round).set_default(10).set_lower_bound(1)
-        .describe("Number of boosting iterations");
-    DMLC_DECLARE_FIELD(save_period).set_default(0).set_lower_bound(0)
-        .describe("The period to save the model, 0 means only save final model.");
-    DMLC_DECLARE_FIELD(train_path).set_default("NULL")
-        .describe("Training data path.");
-    DMLC_DECLARE_FIELD(test_path).set_default("NULL")
-        .describe("Test data path.");
-    DMLC_DECLARE_FIELD(model_in).set_default("NULL")
-        .describe("Input model path, if any.");
-    DMLC_DECLARE_FIELD(model_out).set_default("NULL")
-        .describe("Output model path, if any.");
-    DMLC_DECLARE_FIELD(model_dir).set_default("./")
-        .describe("Output directory of period checkpoint.");
-    DMLC_DECLARE_FIELD(name_pred).set_default("pred.txt")
-        .describe("Name of the prediction file.");
-    DMLC_DECLARE_FIELD(dsplit).set_default(0)
-        .add_enum("row", 0)
-        .add_enum("col", 1)
-        .describe("Data split mode.");
-    DMLC_DECLARE_FIELD(ntree_limit).set_default(0).set_lower_bound(0)
-        .describe("(Deprecated) Use iteration_begin/iteration_end instead.");
-    DMLC_DECLARE_FIELD(iteration_begin).set_default(0).set_lower_bound(0)
-        .describe("Begining of boosted tree iteration used for prediction.");
-    DMLC_DECLARE_FIELD(iteration_end).set_default(0).set_lower_bound(0)
-        .describe("End of boosted tree iteration used for prediction.  0 means all the trees.");
-    DMLC_DECLARE_FIELD(pred_margin).set_default(false)
-        .describe("Whether to predict margin value instead of probability.");
-    DMLC_DECLARE_FIELD(dump_stats).set_default(false)
-        .describe("Whether dump the model statistics.");
-    DMLC_DECLARE_FIELD(dump_format).set_default("text")
-        .describe("What format to dump the model in.");
-    DMLC_DECLARE_FIELD(name_fmap).set_default("NULL")
-        .describe("Name of the feature map file.");
-    DMLC_DECLARE_FIELD(name_dump).set_default("dump.txt")
-        .describe("Name of the output dump text file.");
-    // alias
-    DMLC_DECLARE_ALIAS(train_path, data);
-    DMLC_DECLARE_ALIAS(test_path, test:data);
-    DMLC_DECLARE_ALIAS(name_fmap, fmap);
-  }
-  // customized configure function of CLIParam
-  inline void Configure(const std::vector<std::pair<std::string, std::string> >& _cfg) {
-    // Don't copy the configuration to enable parameter validation.
-    auto unknown_cfg = this->UpdateAllowUnknown(_cfg);
-    this->cfg.emplace_back("validate_parameters", "True");
-    for (const auto& kv : unknown_cfg) {
-      if (!strncmp("eval[", kv.first.c_str(), 5)) {
-        char evname[256];
-        CHECK_EQ(sscanf(kv.first.c_str(), "eval[%[^]]", evname), 1)
-            << "must specify evaluation name for display";
-        eval_data_names.emplace_back(evname);
-        eval_data_paths.push_back(kv.second);
-      } else {
-        this->cfg.emplace_back(kv);
-      }
-    }
-    // constraint.
-    if (name_pred == "stdout") {
-      save_period = 0;
-    }
-  }
-};
-
-constexpr char const* const CLIParam::kNull;
-
-DMLC_REGISTER_PARAMETER(CLIParam);
-
-std::string CliHelp() {
-  return "Use xgboost -h for showing help information.\n";
-}
-
-void CLIError(dmlc::Error const& e) {
-  std::cerr << "Error running xgboost:\n\n"
-            << e.what() << "\n"
-            << CliHelp()
-            << std::endl;
-}
-
-class CLI {
-  CLIParam param_;
-  std::unique_ptr<Learner> learner_;
-  enum Print {
-    kNone,
-    kVersion,
-    kHelp
-  } print_info_ {kNone};
-
-  void ResetLearner(std::vector<std::shared_ptr<DMatrix>> const &matrices) {
-    learner_.reset(Learner::Create(matrices));
-    if (param_.model_in != CLIParam::kNull) {
-      this->LoadModel(param_.model_in, learner_.get());
-      learner_->SetParams(param_.cfg);
-    } else {
-      learner_->SetParams(param_.cfg);
-    }
-    learner_->Configure();
-  }
-
-  void CLITrain() {
-    const double tstart_data_load = dmlc::GetTime();
-    // load in data.
-    std::shared_ptr<DMatrix> dtrain(DMatrix::Load(
-        param_.train_path, ConsoleLogger::GlobalVerbosity() > ConsoleLogger::DefaultVerbosity(),
-        static_cast<DataSplitMode>(param_.dsplit)));
-    std::vector<std::shared_ptr<DMatrix>> deval;
-    std::vector<std::shared_ptr<DMatrix>> cache_mats;
-    std::vector<std::shared_ptr<DMatrix>> eval_datasets;
-    cache_mats.push_back(dtrain);
-    for (size_t i = 0; i < param_.eval_data_names.size(); ++i) {
-      deval.emplace_back(std::shared_ptr<DMatrix>(
-          DMatrix::Load(param_.eval_data_paths[i],
-                        ConsoleLogger::GlobalVerbosity() > ConsoleLogger::DefaultVerbosity(),
-                        static_cast<DataSplitMode>(param_.dsplit))));
-      eval_datasets.push_back(deval.back());
-      cache_mats.push_back(deval.back());
-    }
-    std::vector<std::string> eval_data_names = param_.eval_data_names;
-    if (param_.eval_train) {
-      eval_datasets.push_back(dtrain);
-      eval_data_names.emplace_back("train");
-    }
-    // initialize the learner.
-    this->ResetLearner(cache_mats);
-    LOG(INFO) << "Loading data: " << dmlc::GetTime() - tstart_data_load
-              << " sec";
-
-    // start training.
-    const double start = dmlc::GetTime();
-    int32_t version = 0;
-    for (int i = version / 2; i < param_.num_round; ++i) {
-      double elapsed = dmlc::GetTime() - start;
-      if (version % 2 == 0) {
-        LOG(INFO) << "boosting round " << i << ", " << elapsed
-                  << " sec elapsed";
-        learner_->UpdateOneIter(i, dtrain);
-        version += 1;
-      }
-      std::string res = learner_->EvalOneIter(i, eval_datasets, eval_data_names);
-      LOG(CONSOLE) << res;
-
-      if (param_.save_period != 0 && (i + 1) % param_.save_period == 0) {
-        std::ostringstream os;
-        os << param_.model_dir << '/' << std::setfill('0') << std::setw(4)
-           << i + 1 << ".model";
-        this->SaveModel(os.str(), learner_.get());
-      }
-
-      version += 1;
-    }
-    LOG(INFO) << "Complete Training loop time: " << dmlc::GetTime() - start
-              << " sec";
-    // always save final round
-    if ((param_.save_period == 0 ||
-         param_.num_round % param_.save_period != 0)) {
-      std::ostringstream os;
-      if (param_.model_out == CLIParam::kNull) {
-        os << param_.model_dir << '/' << std::setfill('0') << std::setw(4)
-           << param_.num_round << ".model";
-      } else {
-        os << param_.model_out;
-      }
-      this->SaveModel(os.str(), learner_.get());
-    }
-
-    double elapsed = dmlc::GetTime() - start;
-    LOG(INFO) << "update end, " << elapsed << " sec in all";
-  }
-
-  void CLIDumpModel() {
-    FeatureMap fmap;
-    if (param_.name_fmap != CLIParam::kNull) {
-      std::unique_ptr<dmlc::Stream> fs(
-          dmlc::Stream::Create(param_.name_fmap.c_str(), "r"));
-      dmlc::istream is(fs.get());
-      fmap.LoadText(is);
-    }
-    // load model
-    CHECK_NE(param_.model_in, CLIParam::kNull) << "Must specify model_in for dump";
-    this->ResetLearner({});
-
-    // dump data
-    std::vector<std::string> dump =
-        learner_->DumpModel(fmap, param_.dump_stats, param_.dump_format);
-    std::unique_ptr<dmlc::Stream> fo(
-        dmlc::Stream::Create(param_.name_dump.c_str(), "w"));
-    dmlc::ostream os(fo.get());
-    if (param_.dump_format == "json") {
-      os << "[" << std::endl;
-      for (size_t i = 0; i < dump.size(); ++i) {
-        if (i != 0) {
-          os << "," << std::endl;
-        }
-        os << dump[i];  // Dump the previously generated JSON here
-      }
-      os << std::endl << "]" << std::endl;
-    } else {
-      for (size_t i = 0; i < dump.size(); ++i) {
-        os << "booster[" << i << "]:\n";
-        os << dump[i];
-      }
-    }
-    // force flush before fo destruct.
-    os.set_stream(nullptr);
-  }
-
-  void CLIPredict() {
-    CHECK_NE(param_.test_path, CLIParam::kNull)
-        << "Test dataset parameter test:data must be specified.";
-    // load data
-    std::shared_ptr<DMatrix> dtest(DMatrix::Load(
-        param_.test_path,
-        ConsoleLogger::GlobalVerbosity() > ConsoleLogger::DefaultVerbosity(),
-        static_cast<DataSplitMode>(param_.dsplit)));
-    // load model
-    CHECK_NE(param_.model_in, CLIParam::kNull) << "Must specify model_in for predict";
-    this->ResetLearner({});
-
-    LOG(INFO) << "Start prediction...";
-    HostDeviceVector<bst_float> preds;
-    if (param_.ntree_limit != 0) {
-      param_.iteration_end = GetIterationFromTreeLimit(param_.ntree_limit, learner_.get());
-      LOG(WARNING) << "`ntree_limit` is deprecated, use `iteration_begin` and "
-                      "`iteration_end` instead.";
-    }
-    learner_->Predict(dtest, param_.pred_margin, &preds, param_.iteration_begin,
-                      param_.iteration_end);
-    LOG(CONSOLE) << "Writing prediction to " << param_.name_pred;
-
-    std::unique_ptr<dmlc::Stream> fo(
-        dmlc::Stream::Create(param_.name_pred.c_str(), "w"));
-    dmlc::ostream os(fo.get());
-    for (bst_float p : preds.ConstHostVector()) {
-      os << std::setprecision(std::numeric_limits<bst_float>::max_digits10) << p
-         << '\n';
-    }
-    // force flush before fo destruct.
-    os.set_stream(nullptr);
-  }
-
-  void LoadModel(std::string const& path, Learner* learner) const {
-    if (common::FileExtension(path) == "json") {
-      auto buffer = common::LoadSequentialFile(path);
-      CHECK_GT(buffer.size(), 2);
-      CHECK_EQ(buffer[0], '{');
-      Json in{Json::Load({buffer.data(), buffer.size()})};
-      learner->LoadModel(in);
-    } else {
-      std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(path.c_str(), "r"));
-      learner->LoadModel(fi.get());
-    }
-  }
-
-  void SaveModel(std::string const& path, Learner* learner) const {
-    learner->Configure();
-    std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(path.c_str(), "w"));
-    if (common::FileExtension(path) == "json") {
-      Json out{Object()};
-      learner->SaveModel(&out);
-      std::string str;
-      Json::Dump(out, &str);
-      fo->Write(str.c_str(), str.size());
-    } else {
-      learner->SaveModel(fo.get());
-    }
-  }
-
-  void PrintHelp() const {
-    std::cout << "Usage: xgboost [ -h ] [ -V ] [ config file ] [ arguments ]" << std::endl;
-    std::stringstream ss;
-    ss << R"(
-  Options and arguments:
-
-    -h, --help
-       Print this message.
-
-    -V, --version
-       Print XGBoost version.
-
-    arguments
-       Extra parameters that are not specified in config file, see below.
-
-  Config file specifies the configuration for both training and testing.  Each line
-  containing the [attribute] = [value] configuration.
-
-  General XGBoost parameters:
-
-    https://xgboost.readthedocs.io/en/latest/parameter.html
-
-  Command line interface specfic parameters:
-
-)";
-
-    std::string help = param_.__DOC__();
-    auto splited = common::Split(help, '\n');
-    for (auto str : splited) {
-      ss << "    " << str << '\n';
-    }
-    ss << R"(    eval[NAME]: string, optional, default='NULL'
-        Path to evaluation data, with NAME as data name.
-)";
-
-    ss << R"(
-  Example:  train.conf
-
-    # General parameters
-    booster = gbtree
-    objective = reg:squarederror
-    eta = 1.0
-    gamma = 1.0
-    seed = 0
-    min_child_weight = 0
-    max_depth = 3
-
-    # Training arguments for CLI.
-    num_round = 2
-    save_period = 0
-    data = "demo/data/agaricus.txt.train?format=libsvm"
-    eval[test] = "demo/data/agaricus.txt.test?format=libsvm"
-
-  See demo/ directory in XGBoost for more examples.
-)";
-    std::cout << ss.str() << std::endl;
-  }
-
-  void PrintVersion() const {
-    auto ver = Version::String(Version::Self());
-    std::cout << "XGBoost: " << ver << std::endl;
-  }
-
- public:
-  CLI(int argc, char* argv[]) {
-    if (argc < 2) {
-      this->PrintHelp();
-      exit(1);
-    }
-    for (int i = 0; i < argc; ++i) {
-      std::string str {argv[i]};
-      if (str == "-h" || str == "--help") {
-        print_info_ = kHelp;
-        break;
-      } else if (str == "-V" || str == "--version") {
-        print_info_ = kVersion;
-        break;
-      }
-    }
-    if (print_info_ != kNone) {
-      return;
-    }
-
-    std::string config_path = argv[1];
-
-    common::ConfigParser cp(config_path);
-    auto cfg = cp.Parse();
-
-    for (int i = 2; i < argc; ++i) {
-      char name[256], val[256];
-      if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) {
-        cfg.emplace_back(std::string(name), std::string(val));
-      }
-    }
-
-    param_.Configure(cfg);
-  }
-
-  int Run() {
-    switch (this->print_info_) {
-    case kNone:
-      break;
-    case kVersion: {
-      this->PrintVersion();
-      return 0;
-    }
-    case kHelp: {
-      this->PrintHelp();
-      return 0;
-    }
-    }
-
-    try {
-      switch (param_.task) {
-      case kTrain:
-        CLITrain();
-        break;
-      case kDumpModel:
-        CLIDumpModel();
-        break;
-      case kPredict:
-        CLIPredict();
-        break;
-      }
-    } catch (dmlc::Error const& e) {
-      xgboost::CLIError(e);
-      return 1;
-    }
-    return 0;
-  }
-};
-}  // namespace xgboost
-
-int main(int argc, char* argv[]) {
-  LOG(WARNING)
-      << "The command line interface is deprecated and will be removed in future releases.";
-  try {
-    xgboost::CLI cli(argc, argv);
-    return cli.Run();
-  } catch (dmlc::Error const& e) {
-    // This captures only the initialization error.
-    xgboost::CLIError(e);
-    return 1;
-  }
-  return 0;
-}
diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h
index 744ce8c2a871..0411dbe605db 100644
--- a/src/collective/aggregator.h
+++ b/src/collective/aggregator.h
@@ -160,6 +160,20 @@ template <typename T, std::int32_t kDim>
   return GlobalSum(ctx, info.IsColumnSplit(), values);
 }
 
+template <typename T>
+[[nodiscard]] Result GlobalSum(Context const* ctx, MetaInfo const& info,
+                               linalg::VectorView<T> values, double* sum_weight) {
+  if (info.IsColumnSplit()) {
+    return Success();
+  }
+  auto status = Success() << [&] {
+    return Allreduce(ctx, sum_weight, collective::Op::kSum);
+  } << [&] {
+    return Allreduce(ctx, values, collective::Op::kSum);
+  };
+  return status;
+}
+
 /**
  * @brief Find the global ratio of the given two values across all workers.
  *
diff --git a/src/collective/coll.cu b/src/collective/coll.cu
index 359a87164daa..d327e03ba29b 100644
--- a/src/collective/coll.cu
+++ b/src/collective/coll.cu
@@ -1,21 +1,23 @@
 /**
- * Copyright 2023-2024, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #if defined(XGBOOST_USE_NCCL)
-#include <chrono>       // for chrono, chrono_literals
-#include <cstddef>      // for size_t
-#include <cstdint>      // for int8_t, int64_t
-#include <future>       // for future, future_status
-#include <memory>       // for shared_ptr
-#include <mutex>        // for mutex, unique_lock
-#include <string>       // for string
-#include <thread>       // for this_thread
-#include <type_traits>  // for invoke_result_t, is_same_v, enable_if_t
-#include <utility>      // for move
-
-#include "../common/cleanup.h"           // for Cleanup
-#include "../common/device_helpers.cuh"  // for CUDAStreamView, CUDAEvent, device_vector
+#include <chrono>               // for chrono, chrono_literals
+#include <cstddef>              // for size_t
+#include <cstdint>              // for int8_t, int64_t
+#include <functional>           // for bit_and, bit_or, bit_xor
+#include <future>               // for future, future_status
+#include <memory>               // for shared_ptr
+#include <mutex>                // for mutex, unique_lock
+#include <string>               // for string
+#include <thread>               // for this_thread
+#include <type_traits>          // for invoke_result_t, is_same_v, enable_if_t
+#include <utility>              // for move
+
+#include "../common/cuda_stream.h"       // for StreamRef, Event
+#include "../common/device_helpers.cuh"  // for device_vector
 #include "../common/threadpool.h"        // for ThreadPool
+#include "../common/utils.h"             // for MakeCleanup
 #include "../data/array_interface.h"     // for ArrayInterfaceHandler
 #include "allgather.h"                   // for AllgatherVOffset
 #include "coll.cuh"                      // for NCCLColl
@@ -87,16 +89,16 @@ struct Chan {
 };
 }  // namespace
 
-template <typename Fn, typename R = std::invoke_result_t<Fn, dh::CUDAStreamView>>
+template <typename Fn, typename R = std::invoke_result_t<Fn, curt::StreamRef>>
 [[nodiscard]] std::enable_if_t<std::is_same_v<R, Result>, Result> AsyncLaunch(
     common::ThreadPool* pool, NCCLComm const* nccl, std::shared_ptr<NcclStub> stub,
-    dh::CUDAStreamView stream, Fn&& fn) {
-  dh::CUDAEvent e0;
+    curt::StreamRef stream, Fn&& fn) {
+  curt::Event e0;
   e0.Record(nccl->Stream());
   stream.Wait(e0);
 
   auto cleanup = common::MakeCleanup([&] {
-    dh::CUDAEvent e1;
+    curt::Event e1;
     e1.Record(stream);
     nccl->Stream().Wait(e1);
   });
@@ -180,7 +182,7 @@ bool IsBitwiseOp(Op const& op) {
 }
 
 template <typename Func>
-void RunBitwiseAllreduce(dh::CUDAStreamView stream, common::Span<std::int8_t> out_buffer,
+void RunBitwiseAllreduce(curt::StreamRef stream, common::Span<std::int8_t> out_buffer,
                          std::int8_t const* device_buffer, Func func, std::int32_t world_size,
                          std::size_t size) {
   dh::LaunchN(size, stream, [=] __device__(std::size_t idx) {
@@ -194,13 +196,13 @@ void RunBitwiseAllreduce(dh::CUDAStreamView stream, common::Span<std::int8_t> ou
 
 [[nodiscard]] Result BitwiseAllReduce(common::ThreadPool* pool, NCCLComm const* pcomm,
                                       common::Span<std::int8_t> data, Op op,
-                                      dh::CUDAStreamView stream) {
+                                      curt::StreamRef stream) {
   dh::device_vector<std::int8_t> buffer(data.size() * pcomm->World());
   auto* device_buffer = buffer.data().get();
   auto stub = pcomm->Stub();
 
   // First gather data from all the workers.
-  auto rc = AsyncLaunch(pool, pcomm, stub, stream, [&](dh::CUDAStreamView s) {
+  auto rc = AsyncLaunch(pool, pcomm, stub, stream, [&](curt::StreamRef s) {
     return stub->Allgather(data.data(), device_buffer, data.size(), ncclInt8, pcomm->Handle(), s);
   });
   if (!rc.OK()) {
@@ -210,16 +212,16 @@ void RunBitwiseAllreduce(dh::CUDAStreamView stream, common::Span<std::int8_t> ou
   // Then reduce locally.
   switch (op) {
     case Op::kBitwiseAND:
-      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, thrust::bit_and<std::int8_t>(),
-                          pcomm->World(), data.size());
+      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_and{}, pcomm->World(),
+                          data.size());
       break;
     case Op::kBitwiseOR:
-      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, thrust::bit_or<std::int8_t>(),
-                          pcomm->World(), data.size());
+      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_or{}, pcomm->World(),
+                          data.size());
       break;
     case Op::kBitwiseXOR:
-      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, thrust::bit_xor<std::int8_t>(),
-                          pcomm->World(), data.size());
+      RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_xor{}, pcomm->World(),
+                          data.size());
       break;
     default:
       LOG(FATAL) << "Not a bitwise reduce operation.";
@@ -263,7 +265,7 @@ ncclRedOp_t GetNCCLRedOp(Op const& op) {
         using T = decltype(t);
         auto rdata = common::RestoreType<T>(data);
         return AsyncLaunch(
-            &this->pool_, nccl, stub, this->stream_.View(), [&](dh::CUDAStreamView s) {
+            &this->pool_, nccl, stub, this->stream_.View(), [&](curt::StreamRef s) {
               return stub->Allreduce(data.data(), data.data(), rdata.size(), GetNCCLType(type),
                                      GetNCCLRedOp(op), nccl->Handle(), s);
             });
@@ -285,7 +287,7 @@ ncclRedOp_t GetNCCLRedOp(Op const& op) {
 
   return Success() << [&] {
     return AsyncLaunch(&this->pool_, nccl, stub, this->stream_.View(),
-                       [data, nccl, root, stub](dh::CUDAStreamView s) {
+                       [data, nccl, root, stub](curt::StreamRef s) {
                          return stub->Broadcast(data.data(), data.data(), data.size_bytes(),
                                                 ncclInt8, root, nccl->Handle(), s);
                        });
@@ -306,7 +308,7 @@ ncclRedOp_t GetNCCLRedOp(Op const& op) {
   auto send = data.subspan(comm.Rank() * size, size);
   return Success() << [&] {
     return AsyncLaunch(&this->pool_, nccl, stub, this->stream_.View(),
-                       [send, data, size, nccl, stub](dh::CUDAStreamView s) {
+                       [send, data, size, nccl, stub](curt::StreamRef s) {
                          return stub->Allgather(send.data(), data.data(), size, ncclInt8,
                                                 nccl->Handle(), s);
                        });
@@ -321,7 +323,7 @@ namespace cuda_impl {
  *
  * https://arxiv.org/abs/1812.05964
  */
-Result BroadcastAllgatherV(NCCLComm const* comm, dh::CUDAStreamView s,
+Result BroadcastAllgatherV(NCCLComm const* comm, curt::StreamRef s,
                            common::Span<std::int8_t const> data,
                            common::Span<std::int64_t const> sizes, common::Span<std::int8_t> recv) {
   auto stub = comm->Stub();
@@ -379,7 +381,7 @@ Result BroadcastAllgatherV(NCCLComm const* comm, dh::CUDAStreamView s,
       };
     }
     case AllgatherVAlgo::kBcast: {
-      return AsyncLaunch(&this->pool_, nccl, stub, this->stream_.View(), [&](dh::CUDAStreamView s) {
+      return AsyncLaunch(&this->pool_, nccl, stub, this->stream_.View(), [&](curt::StreamRef s) {
         return cuda_impl::BroadcastAllgatherV(nccl, s, data, sizes, recv);
       });
     }
diff --git a/src/collective/coll.cuh b/src/collective/coll.cuh
index 1ebd33c74626..084f89402866 100644
--- a/src/collective/coll.cuh
+++ b/src/collective/coll.cuh
@@ -1,21 +1,21 @@
 /**
- * Copyright 2023-2024, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #pragma once
 
 #include <cstdint>  // for int8_t, int64_t
 
-#include "../common/device_helpers.cuh"  // for CUDAStream
-#include "../common/threadpool.h"        // for ThreadPool
-#include "../data/array_interface.h"     // for ArrayInterfaceHandler
-#include "coll.h"                        // for Coll
-#include "comm.h"                        // for Comm
-#include "xgboost/span.h"                // for Span
+#include "../common/cuda_stream.h"    // for Stream
+#include "../common/threadpool.h"     // for ThreadPool
+#include "../data/array_interface.h"  // for ArrayInterfaceHandler
+#include "coll.h"                     // for Coll
+#include "comm.h"                     // for Comm
+#include "xgboost/span.h"             // for Span
 
 namespace xgboost::collective {
 class NCCLColl : public Coll {
   common::ThreadPool pool_;
-  dh::CUDAStream stream_;
+  curt::Stream stream_;
 
  public:
   NCCLColl();
diff --git a/src/collective/comm.cu b/src/collective/comm.cu
index a5c4a1d8845e..7894daed1a35 100644
--- a/src/collective/comm.cu
+++ b/src/collective/comm.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023-2024, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #if defined(XGBOOST_USE_NCCL)
 #include <algorithm>  // for sort
@@ -113,7 +113,7 @@ NCCLComm::NCCLComm(Context const* ctx, Comm const& root, std::shared_ptr<Coll> p
 
   for (std::int32_t r = 0; r < root.World(); ++r) {
     this->channels_.emplace_back(
-        std::make_shared<NCCLChannel>(root, r, nccl_comm_, stub_, dh::DefaultStream()));
+        std::make_shared<NCCLChannel>(root, r, nccl_comm_, stub_, curt::DefaultStream()));
   }
 }
 
diff --git a/src/collective/comm.cuh b/src/collective/comm.cuh
index 4add9ca612e0..95d36e929176 100644
--- a/src/collective/comm.cuh
+++ b/src/collective/comm.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #pragma once
 
@@ -9,7 +9,7 @@
 
 #include <utility>  // for move
 
-#include "../common/device_helpers.cuh"
+#include "../common/cuda_stream.h"  // for StreamRef
 #include "coll.h"
 #include "comm.h"
 #include "nccl_stub.h"  // for NcclStub
@@ -30,7 +30,7 @@ class NCCLComm : public Comm {
   ncclComm_t nccl_comm_{nullptr};
   std::shared_ptr<NcclStub> stub_;
   ncclUniqueId nccl_unique_id_{};
-  dh::CUDAStreamView stream_;
+  curt::StreamRef stream_;
   std::string nccl_path_;
 
  public:
@@ -45,7 +45,7 @@ class NCCLComm : public Comm {
   }
   ~NCCLComm() override;
   [[nodiscard]] bool IsFederated() const override { return false; }
-  [[nodiscard]] dh::CUDAStreamView Stream() const { return stream_; }
+  [[nodiscard]] curt::StreamRef Stream() const { return stream_; }
   [[nodiscard]] Result Block() const override {
     auto rc = this->Stream().Sync(false);
     return GetCUDAResult(rc);
@@ -60,16 +60,16 @@ class NCCLChannel : public Channel {
   std::int32_t rank_{-1};
   ncclComm_t nccl_comm_{};
   std::shared_ptr<NcclStub> stub_;
-  dh::CUDAStreamView stream_;
+  curt::StreamRef stream_;
 
  public:
   explicit NCCLChannel(Comm const& comm, std::int32_t rank, ncclComm_t nccl_comm,
-                       std::shared_ptr<NcclStub> stub, dh::CUDAStreamView stream)
+                       std::shared_ptr<NcclStub> stub, curt::StreamRef stream)
       : rank_{rank},
         nccl_comm_{nccl_comm},
         stub_{std::move(stub)},
         Channel{comm, nullptr},
-        stream_{stream} {}
+        stream_{std::move(stream)} {}
 
   [[nodiscard]] Result SendAll(std::int8_t const* ptr, std::size_t n) override {
     return stub_->Send(ptr, n, ncclInt8, rank_, nccl_comm_, stream_);
diff --git a/src/collective/result.cc b/src/collective/result.cc
index 140efa6d8bee..fd0914e4c11f 100644
--- a/src/collective/result.cc
+++ b/src/collective/result.cc
@@ -1,5 +1,5 @@
 /**
- *  Copyright 2024, XGBoost Contributors
+ *  Copyright 2024-2025, XGBoost Contributors
  */
 #include "xgboost/collective/result.h"
 
@@ -65,7 +65,7 @@ void ResultImpl::Concat(std::unique_ptr<ResultImpl> rhs) {
 std::string MakeMsg(std::string&& msg, char const* file, std::int32_t line) {
   dmlc::DateLogger logger;
   if (file && line != -1) {
-    auto name = std::filesystem::path{ file }.filename();
+    auto name = std::filesystem::path{file}.filename();
     return "[" + name.string() + ":" + std::to_string(line) + "|" + logger.HumanDate() +
            "]: " + std::forward<std::string>(msg);
   }
@@ -73,9 +73,19 @@ std::string MakeMsg(std::string&& msg, char const* file, std::int32_t line) {
 }
 }  // namespace detail
 
-void SafeColl(Result const& rc) {
-  if (!rc.OK()) {
-    LOG(FATAL) << rc.Report();
+void SafeColl(Result const& rc, char const* file, std::int32_t line) {
+  if (rc.OK()) {
+    return;
   }
+  if (file && line != -1) {
+    dmlc::DateLogger logger;
+    auto name = std::filesystem::path{file}.filename();
+    LOG(FATAL) << ("[" + name.string() + ":" + std::to_string(line) + "|" + logger.HumanDate() +
+                   "]:\n")
+               << rc.Report();
+    // Return just in case if this function is deep in ctypes callbacks.
+    return;
+  }
+  LOG(FATAL) << rc.Report();
 }
 }  // namespace xgboost::collective
diff --git a/src/collective/socket.cc b/src/collective/socket.cc
index e37648c8edd3..7ae39cbc2365 100644
--- a/src/collective/socket.cc
+++ b/src/collective/socket.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2022-2024, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #include "xgboost/collective/socket.h"
 
diff --git a/src/common/algorithm.cuh b/src/common/algorithm.cuh
index e88eb1f0c9b1..2309b39fbe84 100644
--- a/src/common/algorithm.cuh
+++ b/src/common/algorithm.cuh
@@ -1,22 +1,27 @@
 /**
- * Copyright 2022-2024, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_COMMON_ALGORITHM_CUH_
 #define XGBOOST_COMMON_ALGORITHM_CUH_
 
-#include <thrust/copy.h>       // copy
-#include <thrust/sort.h>       // stable_sort_by_key
-#include <thrust/tuple.h>      // tuple,get
+#include <thrust/copy.h>                        // for copy
+#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator
+#include <thrust/sort.h>                        // for stable_sort_by_key
 
-#include <cstddef>             // size_t
-#include <cstdint>             // int32_t
-#include <cub/cub.cuh>         // DispatchSegmentedRadixSort,NullType,DoubleBuffer
-#include <iterator>            // distance
-#include <limits>              // numeric_limits
-#include <type_traits>         // conditional_t,remove_const_t
+#include <cstddef>                                      // size_t
+#include <cstdint>                                      // int32_t
+#include <cub/device/device_run_length_encode.cuh>      // for DeviceRunLengthEncode
+#include <cub/device/dispatch/dispatch_radix_sort.cuh>  // for DispatchSegmentedRadixSort
+#include <cub/util_type.cuh>                            // for NullType, DoubleBuffer
+#include <cuda/std/tuple>                               // for tuple
+#include <functional>                                   // for plus, logical_and
+#include <iterator>                                     // for distance
+#include <limits>                                       // for numeric_limits
+#include <type_traits>                                  // for conditional_t,remove_const_t
 
 #include "common.h"            // safe_cuda
 #include "cuda_context.cuh"    // CUDAContext
+#include "cuda_stream.h"       // for StreamRef
 #include "device_helpers.cuh"  // TemporaryArray,SegmentId,LaunchN,Iota
 #include "device_vector.cuh"   // for device_vector
 #include "xgboost/base.h"      // XGBOOST_DEVICE
@@ -26,6 +31,15 @@
 
 namespace xgboost::common {
 namespace detail {
+
+#if CUB_VERSION >= 300000
+constexpr auto kCubSortOrderAscending = cub::SortOrder::Ascending;
+constexpr auto kCubSortOrderDescending = cub::SortOrder::Descending;
+#else
+constexpr bool kCubSortOrderAscending = false;
+constexpr bool kCubSortOrderDescending = true;
+#endif
+
 // Wrapper around cub sort to define is_decending
 template <bool IS_DESCENDING, typename KeyT, typename BeginOffsetIteratorT,
           typename EndOffsetIteratorT>
@@ -42,8 +56,9 @@ static void DeviceSegmentedRadixSortKeys(CUDAContext const *ctx, void *d_temp_st
   cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
   cub::DoubleBuffer<cub::NullType> d_values;
 
+  constexpr auto kCubSortOrder = IS_DESCENDING ? kCubSortOrderDescending : kCubSortOrderAscending;
   dh::safe_cuda((cub::DispatchSegmentedRadixSort<
-                 IS_DESCENDING, KeyT, cub::NullType, BeginOffsetIteratorT, EndOffsetIteratorT,
+                 kCubSortOrder, KeyT, cub::NullType, BeginOffsetIteratorT, EndOffsetIteratorT,
                  OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,
                                     num_segments, d_begin_offsets, d_end_offsets, begin_bit,
                                     end_bit, false, ctx->Stream(), debug_synchronous)));
@@ -58,7 +73,7 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
                                   const ValueT *d_values_in, ValueT *d_values_out,
                                   std::size_t num_items, std::size_t num_segments,
                                   BeginOffsetIteratorT d_begin_offsets,
-                                  EndOffsetIteratorT d_end_offsets, dh::CUDAStreamView stream,
+                                  EndOffsetIteratorT d_end_offsets, curt::StreamRef stream,
                                   int begin_bit = 0, int end_bit = sizeof(KeyT) * 8) {
   cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT *>(d_keys_in), d_keys_out);
   cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT *>(d_values_in), d_values_out);
@@ -68,21 +83,22 @@ void DeviceSegmentedRadixSortPair(void *d_temp_storage,
   CHECK_LE(num_items, std::numeric_limits<OffsetT>::max());
   // For Thrust >= 1.12 or CUDA >= 11.4, we require system cub installation
 
+  constexpr auto kCubSortOrder = descending ? kCubSortOrderDescending : kCubSortOrderAscending;
 #if THRUST_MAJOR_VERSION >= 2
   dh::safe_cuda((cub::DispatchSegmentedRadixSort<
-                 descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
+                 kCubSortOrder, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
                  OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,
                                     num_segments, d_begin_offsets, d_end_offsets, begin_bit,
                                     end_bit, false, stream)));
 #elif (THRUST_MAJOR_VERSION == 1 && THRUST_MINOR_VERSION >= 13)
   dh::safe_cuda((cub::DispatchSegmentedRadixSort<
-                 descending, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
+                 kCubSortOrder, KeyT, ValueT, BeginOffsetIteratorT, EndOffsetIteratorT,
                  OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items,
                                     num_segments, d_begin_offsets, d_end_offsets, begin_bit,
                                     end_bit, false, stream, false)));
 #else
   dh::safe_cuda(
-      (cub::DispatchSegmentedRadixSort<descending, KeyT, ValueT, BeginOffsetIteratorT,
+      (cub::DispatchSegmentedRadixSort<kCubSortOrder, KeyT, ValueT, BeginOffsetIteratorT,
                                        OffsetT>::Dispatch(d_temp_storage, temp_storage_bytes,
                                                           d_keys, d_values, num_items, num_segments,
                                                           d_begin_offsets, d_end_offsets, begin_bit,
@@ -162,30 +178,30 @@ template <typename SegIt, typename ValIt>
 void SegmentedArgMergeSort(Context const *ctx, SegIt seg_begin, SegIt seg_end, ValIt val_begin,
                            ValIt val_end, dh::device_vector<std::size_t> *p_sorted_idx) {
   auto cuctx = ctx->CUDACtx();
-  using Tup = thrust::tuple<std::int32_t, float>;
+  using Tup = cuda::std::tuple<std::int32_t, float>;
   auto &sorted_idx = *p_sorted_idx;
   std::size_t n = std::distance(val_begin, val_end);
   sorted_idx.resize(n);
   dh::Iota(dh::ToSpan(sorted_idx), cuctx->Stream());
   dh::device_vector<Tup> keys(sorted_idx.size());
-  auto key_it = dh::MakeTransformIterator<Tup>(thrust::make_counting_iterator(0ul),
-                                               [=] XGBOOST_DEVICE(std::size_t i) -> Tup {
-                                                 std::int32_t seg_idx;
-                                                 if (i < *seg_begin) {
-                                                   seg_idx = -1;
-                                                 } else {
-                                                   seg_idx = dh::SegmentId(seg_begin, seg_end, i);
-                                                 }
-                                                 auto residue = val_begin[i];
-                                                 return thrust::make_tuple(seg_idx, residue);
-                                               });
+  auto key_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) -> Tup {
+    std::int32_t seg_idx;
+    if (i < *seg_begin) {
+      seg_idx = -1;
+    } else {
+      seg_idx = dh::SegmentId(seg_begin, seg_end, i);
+    }
+    auto residue = val_begin[i];
+    return cuda::std::make_tuple(seg_idx, residue);
+  });
   thrust::copy(ctx->CUDACtx()->CTP(), key_it, key_it + keys.size(), keys.begin());
   thrust::stable_sort_by_key(cuctx->TP(), keys.begin(), keys.end(), sorted_idx.begin(),
                              [=] XGBOOST_DEVICE(Tup const &l, Tup const &r) {
-                               if (thrust::get<0>(l) != thrust::get<0>(r)) {
-                                 return thrust::get<0>(l) < thrust::get<0>(r);  // segment index
+                               if (cuda::std::get<0>(l) != cuda::std::get<0>(r)) {
+                                 // segment index
+                                 return cuda::std::get<0>(l) < cuda::std::get<0>(r);
                                }
-                               return thrust::get<1>(l) < thrust::get<1>(r);    // residue
+                               return cuda::std::get<1>(l) < cuda::std::get<1>(r);  // residue
                              });
 }
 
@@ -207,49 +223,58 @@ void ArgSort(Context const *ctx, Span<U> keys, Span<IdxT> sorted_idx) {
   // track https://github.com/NVIDIA/cub/pull/340 for 64bit length support
   using OffsetT = std::conditional_t<!dh::BuildWithCUDACub(), std::ptrdiff_t, int32_t>;
   CHECK_LE(sorted_idx.size(), std::numeric_limits<OffsetT>::max());
+
   if (accending) {
     void *d_temp_storage = nullptr;
 #if THRUST_MAJOR_VERSION >= 2
-    dh::safe_cuda((cub::DispatchRadixSort<false, KeyT, ValueT, OffsetT>::Dispatch(
-        d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
-        cuctx->Stream())));
+    dh::safe_cuda(
+        (cub::DispatchRadixSort<detail::kCubSortOrderAscending, KeyT, ValueT, OffsetT>::Dispatch(
+            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
+            cuctx->Stream())));
 #else
-    dh::safe_cuda((cub::DispatchRadixSort<false, KeyT, ValueT, OffsetT>::Dispatch(
-        d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
-        nullptr, false)));
+    dh::safe_cuda(
+        (cub::DispatchRadixSort<detail::kCubSortOrderAscending, KeyT, ValueT, OffsetT>::Dispatch(
+            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
+            nullptr, false)));
 #endif
     dh::TemporaryArray<char> storage(bytes);
     d_temp_storage = storage.data().get();
 #if THRUST_MAJOR_VERSION >= 2
-    dh::safe_cuda((cub::DispatchRadixSort<false, KeyT, ValueT, OffsetT>::Dispatch(
-        d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
-        cuctx->Stream())));
+    dh::safe_cuda(
+        (cub::DispatchRadixSort<detail::kCubSortOrderAscending, KeyT, ValueT, OffsetT>::Dispatch(
+            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
+            cuctx->Stream())));
 #else
-    dh::safe_cuda((cub::DispatchRadixSort<false, KeyT, ValueT, OffsetT>::Dispatch(
-        d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
-        nullptr, false)));
+    dh::safe_cuda(
+        (cub::DispatchRadixSort<detail::kCubSortOrderAscending, KeyT, ValueT, OffsetT>::Dispatch(
+            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
+            nullptr, false)));
 #endif
   } else {
     void *d_temp_storage = nullptr;
 #if THRUST_MAJOR_VERSION >= 2
-    dh::safe_cuda((cub::DispatchRadixSort<true, KeyT, ValueT, OffsetT>::Dispatch(
-        d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
-        cuctx->Stream())));
+    dh::safe_cuda(
+        (cub::DispatchRadixSort<detail::kCubSortOrderDescending, KeyT, ValueT, OffsetT>::Dispatch(
+            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
+            cuctx->Stream())));
 #else
-    dh::safe_cuda((cub::DispatchRadixSort<true, KeyT, ValueT, OffsetT>::Dispatch(
-        d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
-        nullptr, false)));
+    dh::safe_cuda(
+        (cub::DispatchRadixSort<detail::kCubSortOrderDescending, KeyT, ValueT, OffsetT>::Dispatch(
+            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
+            nullptr, false)));
 #endif
     dh::TemporaryArray<char> storage(bytes);
     d_temp_storage = storage.data().get();
 #if THRUST_MAJOR_VERSION >= 2
-    dh::safe_cuda((cub::DispatchRadixSort<true, KeyT, ValueT, OffsetT>::Dispatch(
-        d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
-        cuctx->Stream())));
+    dh::safe_cuda(
+        (cub::DispatchRadixSort<detail::kCubSortOrderDescending, KeyT, ValueT, OffsetT>::Dispatch(
+            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
+            cuctx->Stream())));
 #else
-    dh::safe_cuda((cub::DispatchRadixSort<true, KeyT, ValueT, OffsetT>::Dispatch(
-        d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
-        nullptr, false)));
+    dh::safe_cuda(
+        (cub::DispatchRadixSort<detail::kCubSortOrderDescending, KeyT, ValueT, OffsetT>::Dispatch(
+            d_temp_storage, bytes, d_keys, d_values, sorted_idx.size(), 0, sizeof(KeyT) * 8, false,
+            nullptr, false)));
 #endif
   }
 
@@ -277,6 +302,10 @@ void CopyIf(CUDAContext const *cuctx, InIt in_first, InIt in_second, OutIt out_f
 template <typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename OffsetT>
 void InclusiveScan(xgboost::Context const *ctx, InputIteratorT d_in, OutputIteratorT d_out,
                    ScanOpT scan_op, OffsetT num_items) {
+#if CUB_VERSION >= 300000
+  static_assert(std::is_unsigned_v<OffsetT>, "OffsetT must be unsigned");
+  static_assert(sizeof(OffsetT) >= 4, "OffsetT must be at least 4 bytes long");
+#endif
   auto cuctx = ctx->CUDACtx();
   std::size_t bytes = 0;
 #if THRUST_MAJOR_VERSION >= 2
@@ -304,7 +333,47 @@ void InclusiveScan(xgboost::Context const *ctx, InputIteratorT d_in, OutputItera
 template <typename InputIteratorT, typename OutputIteratorT, typename OffsetT>
 void InclusiveSum(Context const *ctx, InputIteratorT d_in, OutputIteratorT d_out,
                   OffsetT num_items) {
+#if CUB_VERSION >= 300000
+  InclusiveScan(ctx, d_in, d_out, std::plus{}, num_items);
+#else
   InclusiveScan(ctx, d_in, d_out, cub::Sum{}, num_items);
+#endif
+}
+
+template <typename... Args>
+void RunLengthEncode(curt::StreamRef stream, Args &&...args) {
+  std::size_t n_bytes = 0;
+  dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(nullptr, n_bytes, args..., stream));
+  dh::CachingDeviceUVector<char> tmp(n_bytes);
+  dh::safe_cuda(cub::DeviceRunLengthEncode::Encode(tmp.data(), n_bytes, args..., stream));
+}
+
+template <typename... Args>
+void SegmentedSum(curt::StreamRef stream, Args &&...args) {
+  std::size_t n_bytes = 0;
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, n_bytes, args..., stream));
+  dh::CachingDeviceUVector<char> tmp(n_bytes);
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(tmp.data(), n_bytes, args..., stream));
+}
+
+/**
+ * @brief Customized version of @ref thrust::all_of
+ *
+ * @ref thrust::all_of uses small intervals for early stop. But we often use this function
+ * to perform checks on data and in most cases need to walk through the entire dataset
+ * (like all data point is valid). This function uses @ref thrust::reduce to avoid
+ * excessive kernel launches and synchronizations.
+ */
+template <typename Policy, typename InputIt, typename Chk>
+[[nodiscard]] std::enable_if_t<
+    std::is_same_v<bool,
+                   std::invoke_result_t<Chk, typename std::iterator_traits<InputIt>::value_type>>,
+    bool>
+AllOf(Policy policy, InputIt first, InputIt second, Chk &&check) {
+  auto n = std::distance(first, second);
+  auto it =
+      dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) { return check(first[i]); });
+  return dh::Reduce(policy, it, it + n, true, std::logical_and<>{});
 }
 }  // namespace xgboost::common
 #endif  // XGBOOST_COMMON_ALGORITHM_CUH_
diff --git a/src/common/algorithm.h b/src/common/algorithm.h
index 19afaf3cc51e..10d23d05cc08 100644
--- a/src/common/algorithm.h
+++ b/src/common/algorithm.h
@@ -78,7 +78,6 @@ void Sort(Context const *ctx, Iter begin, Iter end, Comp comp) {
 template <typename Idx, typename Iter, typename V = typename std::iterator_traits<Iter>::value_type,
           typename Comp = std::less<V>>
 std::vector<Idx> ArgSort(Context const *ctx, Iter begin, Iter end, Comp comp = std::less<V>{}) {
-  CHECK(!ctx->IsCUDA());
   auto n = std::distance(begin, end);
   std::vector<Idx> result(n);
   Iota(ctx, result.begin(), result.end(), 0);
diff --git a/src/common/bitfield.h b/src/common/bitfield.h
index 6ecd7fcdf5a0..08a385a2cf00 100644
--- a/src/common/bitfield.h
+++ b/src/common/bitfield.h
@@ -272,7 +272,7 @@ inline std::uint32_t TrailingZeroBits(std::uint32_t value) {
   }
 #if defined(__GNUC__)
   return __builtin_ctz(value);
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && defined(_M_X64)
   return _tzcnt_u32(value);
 #else
   return detail::TrailingZeroBitsImpl(value);
diff --git a/src/common/column_matrix.h b/src/common/column_matrix.h
index 17f3ed4c6824..3e0fd087c492 100644
--- a/src/common/column_matrix.h
+++ b/src/common/column_matrix.h
@@ -15,7 +15,8 @@
 #include <memory>
 #include <type_traits>  // for enable_if_t, is_same_v, is_signed_v
 
-#include "../data/adapter.h"
+#include "../data/adapter.h"  // for SparsePageAdapterBatch
+#include "../data/entry.h"    // for IsValidFunctor
 #include "../data/gradient_index.h"
 #include "bitfield.h"  // for RBitField8
 #include "hist_util.h"
diff --git a/src/common/common.h b/src/common/common.h
index 7cd131e1159a..f4e6a697e742 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -1,20 +1,18 @@
 /**
- * Copyright 2015-2024, XGBoost Contributors
+ * Copyright 2015-2025, XGBoost Contributors
  * \file common.h
  * \brief Common utilities
  */
 #ifndef XGBOOST_COMMON_COMMON_H_
 #define XGBOOST_COMMON_COMMON_H_
 
-#include <array>      // for array
-#include <cmath>      // for ceil
-#include <cstddef>    // for size_t
-#include <cstdint>    // for int32_t, int64_t
-#include <sstream>    // for basic_istream, operator<<, istringstream
-#include <string>     // for string, basic_string, getline, char_traits
-#include <tuple>      // for make_tuple
-#include <utility>    // for forward, index_sequence, make_index_sequence
-#include <vector>     // for vector
+#include <cmath>        // for ceil
+#include <cstddef>      // for size_t
+#include <cstdint>      // for int32_t, int64_t
+#include <sstream>      // for istringstream
+#include <string>       // for string, basic_string, getline, char_traits
+#include <string_view>  // for string_view
+#include <vector>       // for vector
 
 #include "xgboost/base.h"     // for XGBOOST_DEVICE
 #include "xgboost/logging.h"  // for LOG, LOG_FATAL, LogMessageFatal
@@ -52,9 +50,9 @@ namespace xgboost::common {
  * \param s String to be split.
  * \param delim The delimiter.
  */
-inline std::vector<std::string> Split(const std::string& s, char delim) {
+[[nodiscard]] inline std::vector<std::string> Split(std::string const &s, char delim) {
   std::string item;
-  std::istringstream is(s);
+  std::istringstream is{s};
   std::vector<std::string> ret;
   while (std::getline(is, item, delim)) {
     ret.push_back(item);
@@ -62,6 +60,43 @@ inline std::vector<std::string> Split(const std::string& s, char delim) {
   return ret;
 }
 
+[[nodiscard]] inline std::vector<std::string_view> Split(std::string_view s, char delim) {
+  std::size_t cur = 0;
+  std::vector<std::string_view> ret;
+  while ((cur = s.find_first_of(delim)) != std::string_view::npos) {
+    auto segment = s.substr(0, cur);
+    ret.push_back(segment);
+    s = s.substr(cur + 1);
+  }
+  if (!s.empty()) {
+    ret.push_back(s);
+  }
+  return ret;
+}
+
+// Trims leading whitespace from a string
+[[nodiscard]] inline std::string_view TrimFirst(std::string_view const &str) {
+  if (str.empty()) {
+    return str;
+  }
+  auto first = str.find_first_not_of(" \t\n\r");
+  if (first == std::string_view::npos) {
+    return {};
+  }
+  return str.substr(first);
+}
+
+[[nodiscard]] inline std::string_view TrimLast(std::string_view const &str) {
+  if (str.empty()) {
+    return str;
+  }
+  auto last = str.find_last_not_of(" \t\n\r");
+  if (last == std::string_view::npos) {
+    return {};
+  }
+  return str.substr(0, last + 1);
+}
+
 /**
  * @brief Add escapes for a UTF-8 string.
  */
@@ -86,19 +121,6 @@ XGBOOST_DEVICE T1 DivRoundUp(const T1 a, const T2 b) {
   return static_cast<T1>(std::ceil(static_cast<double>(a) / b));
 }
 
-namespace detail {
-template <class T, std::size_t N, std::size_t... Idx>
-constexpr auto UnpackArr(std::array<T, N> &&arr, std::index_sequence<Idx...>) {
-  return std::make_tuple(std::forward<std::array<T, N>>(arr)[Idx]...);
-}
-}  // namespace detail
-
-template <class T, std::size_t N>
-constexpr auto UnpackArr(std::array<T, N> &&arr) {
-  return detail::UnpackArr(std::forward<std::array<T, N>>(arr),
-                           std::make_index_sequence<N>{});
-}
-
 /*
  * Range iterator
  */
@@ -165,13 +187,19 @@ class Range {
 
 inline void AssertGPUSupport() {
 #ifndef XGBOOST_USE_CUDA
-    LOG(FATAL) << "XGBoost version not compiled with GPU support.";
+  LOG(FATAL) << "XGBoost version not compiled with GPU support.";
+#endif  // XGBOOST_USE_CUDA
+}
+
+inline void AssertNvCompSupport() {
+#ifndef XGBOOST_USE_NVCOMP
+  LOG(FATAL) << "XGBoost is not compiled with NVCOMP support.";
 #endif  // XGBOOST_USE_CUDA
 }
 
 inline void AssertNCCLSupport() {
 #if !defined(XGBOOST_USE_NCCL)
-    LOG(FATAL) << "XGBoost version not compiled with NCCL support.";
+  LOG(FATAL) << "XGBoost version not compiled with NCCL support.";
 #endif  // !defined(XGBOOST_USE_NCCL)
 }
 
diff --git a/src/common/compressed_iterator.h b/src/common/compressed_iterator.h
index ab5815557f66..4ba8af2d64fa 100644
--- a/src/common/compressed_iterator.h
+++ b/src/common/compressed_iterator.h
@@ -1,12 +1,14 @@
 /**
- * Copyright 2017-2024, XGBoost Contributors
+ * Copyright 2017-2025, XGBoost Contributors
  * \file compressed_iterator.h
  */
 #pragma once
-#include <xgboost/base.h>
+#include <xgboost/base.h>  // for XGBOOST_RESTRICT
 
-#include <cmath>    // for ceil, log2
-#include <cstddef>  // for size_t
+#include <algorithm>  // for max
+#include <cmath>      // for ceil, log2
+#include <cstddef>    // for size_t
+#include <cstdint>    // for uint32_t
 
 #include "common.h"
 
@@ -79,7 +81,8 @@ class CompressedBufferWriter {
     size_t ret = std::ceil(static_cast<double>(compressed_size + detail::kPadding) /
                            static_cast<double>(sizeof(std::uint32_t))) *
                  sizeof(std::uint32_t);
-    return ret;
+    // Need at least 5 bytes for the reader
+    return std::max(ret, static_cast<std::size_t>(detail::kPadding + 1));
   }
 
   template <typename T>
@@ -212,4 +215,111 @@ class CompressedIterator {
     return *offset;
   }
 };
+
+/**
+ * @brief A compressed iterator with two buffers for the underlying storage.
+ *
+ * This accessor is significantly slower than the single buffer one due to pipeline
+ * stalling and should not be used as default. Pre-calculating the buffer selection
+ * indicator can help mitigate it. But we only use this iterator for external memory with
+ * direct memory access, which is slow anyway.
+ *
+ * Use the single buffer one as a reference for how it works.
+ */
+template <typename OutT>
+class DoubleCompressedIter {
+ public:
+  // Type definitions for thrust
+  using self_type = DoubleCompressedIter<OutT>;  // NOLINT
+  using difference_type = ptrdiff_t;             // NOLINT
+  using value_type = OutT;                       // NOLINT
+  using pointer = value_type *;                  // NOLINT
+  using reference = value_type;                  // NOLINT
+
+ private:
+  using BufT = CompressedByteT const *;
+  BufT XGBOOST_RESTRICT buf0_{nullptr};
+  BufT XGBOOST_RESTRICT buf1_{nullptr};
+  bst_idx_t const n0_{0};  // Size of the first buffer in bytes.
+  bst_idx_t const symbol_bits_{0};
+  std::size_t offset_{0};
+
+ public:
+  DoubleCompressedIter() = default;
+  DoubleCompressedIter(CompressedByteT const *XGBOOST_RESTRICT buf0, std::size_t n0_bytes,
+                       CompressedByteT const *XGBOOST_RESTRICT buf1, bst_idx_t n_symbols)
+      : buf0_{buf0}, buf1_{buf1}, n0_{n0_bytes}, symbol_bits_{detail::SymbolBits(n_symbols)} {}
+
+  XGBOOST_HOST_DEV_INLINE reference operator*() const {
+    constexpr std::int32_t kBitsPerByte = 8;
+    std::size_t start_bit_idx = ((offset_ + 1) * symbol_bits_ - 1);
+    std::size_t start_byte_idx = start_bit_idx >> 3;
+    start_byte_idx += detail::kPadding;
+
+    std::uint64_t tmp;
+
+    if (start_byte_idx >= this->n0_ && (start_byte_idx - 4) < this->n0_) {
+      // Access between two buffers.
+      auto getv = [&](auto shift) {
+        auto shifted = start_byte_idx - shift;
+        bool ind = (shifted >= n0_);  // indicator for which buffer to read
+        // Pick the buffer to read
+        auto const *XGBOOST_RESTRICT buf = ind ? buf1_ : buf0_;
+        shifted -= ind * n0_;
+        return static_cast<std::uint64_t>(buf[shifted]);
+      };
+      // Read 5 bytes - the maximum we will need
+      tmp = static_cast<std::uint64_t>(buf0_[start_byte_idx - 4]) << 32 | getv(3) << 24 |
+            getv(2) << 16 | getv(1) << 8 | static_cast<std::uint64_t>(buf1_[start_byte_idx - n0_]);
+    } else {
+      // Access one of the buffers
+      bool ind = start_byte_idx >= n0_;
+      // Pick the buffer to read
+      auto const *XGBOOST_RESTRICT buf = reinterpret_cast<CompressedByteT const *>(
+          (!ind) * reinterpret_cast<std::uintptr_t>(buf0_) +
+          ind * reinterpret_cast<std::uintptr_t>(buf1_));
+      auto shifted = start_byte_idx - n0_ * ind;
+
+      /**
+       * Alternatively, we can use vector loads, but it requires aligned memory allocation
+       * by the backing storage.
+       *
+       * // Align the pointer for vector load
+       * auto beg_ptr = buf + shifted - 4;
+       * // base ptr in bytes
+       * auto aligned_beg_ptr = rmm::align_down(reinterpret_cast<std::uintptr_t>(beg_ptr),
+       *                                        std::alignment_of_v<std::uint32_t>);
+       * // base ptr in uint32
+       * auto aligned_beg_u32_ptr = reinterpret_cast<std::uint32_t const *>(aligned_beg_ptr);
+       * // 2 vector loads for 8 bytes, we will need 5 of them
+       * std::uint64_t v;
+       * auto *XGBOOST_RESTRICT v_ptr = reinterpret_cast<std::uint32_t *>(&v);
+       * v_ptr[0] = aligned_beg_u32_ptr[0];
+       * v_ptr[1] = aligned_beg_u32_ptr[1];
+       * // Difference between the original ptr and the aligned ptr.
+       * auto diff = reinterpret_cast<std::uintptr_t>(beg_ptr) - aligned_beg_ptr;
+       * // Beginning ptr that points to the first loaded values
+       * auto loaded_beg_ptr = reinterpret_cast<CompressedByteT const *>(&v) + diff;
+       */
+
+      // Read 5 bytes - the maximum we will need
+      tmp = static_cast<std::uint64_t>(buf[shifted - 4]) << 32 |
+            static_cast<std::uint64_t>(buf[shifted - 3]) << 24 |
+            static_cast<std::uint64_t>(buf[shifted - 2]) << 16 |
+            static_cast<std::uint64_t>(buf[shifted - 1]) << 8 | buf[shifted];
+    }
+
+    std::int32_t bit_shift = (kBitsPerByte - ((offset_ + 1) * symbol_bits_)) % kBitsPerByte;
+    tmp >>= bit_shift;
+    // Mask off unneeded bits
+    std::uint64_t mask = (static_cast<std::uint64_t>(1) << symbol_bits_) - 1;
+    return static_cast<OutT>(tmp & mask);
+  }
+
+  XGBOOST_DEVICE reference operator[](std::size_t idx) const {
+    self_type offset = (*this);
+    offset.offset_ += idx;
+    return *offset;
+  }
+};
 }  // namespace xgboost::common
diff --git a/src/common/config.h b/src/common/config.h
deleted file mode 100644
index c8b98eb77f99..000000000000
--- a/src/common/config.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*!
- * Copyright 2014-2019 by Contributors
- * \file config.h
- * \brief helper class to load in configures from file
- * \author Haoda Fu, Hyunsu Cho
- */
-#ifndef XGBOOST_COMMON_CONFIG_H_
-#define XGBOOST_COMMON_CONFIG_H_
-
-#include <string>
-#include <fstream>
-#include <istream>
-#include <sstream>
-#include <vector>
-#include <regex>
-#include <iterator>
-#include <utility>
-
-#include "xgboost/logging.h"
-
-namespace xgboost {
-namespace common {
-/*!
- * \brief Implementation of config reader
- */
-class ConfigParser {
- public:
-  /*!
-   * \brief Constructor for INI-style configuration parser
-   * \param path path to configuration file
-   */
-  explicit ConfigParser(const std::string path)
-      : path_(std::move(path)),
-      line_comment_regex_("^#"),
-      key_regex_(R"rx(^([^#"'=\r\n\t ]+)[\t ]*=)rx"),
-      key_regex_escaped_(R"rx(^(["'])([^"'=\r\n]+)\1[\t ]*=)rx"),
-      value_regex_(R"rx(^([^#"'\r\n\t ]+)[\t ]*(?:#.*){0,1}$)rx"),
-      value_regex_escaped_(R"rx(^(["'])([^"'\r\n]+)\1[\t ]*(?:#.*){0,1}$)rx")
-  {}
-
-  std::string LoadConfigFile(const std::string& path) {
-    std::ifstream fin(path, std::ios_base::in | std::ios_base::binary);
-    CHECK(fin) << "Failed to open config file: \"" << path << "\"";
-    try {
-      std::string content{std::istreambuf_iterator<char>(fin),
-                          std::istreambuf_iterator<char>()};
-      return content;
-    } catch (std::ios_base::failure const &e) {
-      LOG(FATAL) << "Failed to read config file: \"" << path << "\"\n"
-                 << e.what();
-    }
-    return "";
-  }
-
-  /*!
-   * \brief Normalize end-of-line in a file so that it uses LF for all
-   *        line endings.
-   *
-   * This is needed because some OSes use CR or CR LF instead.  So we
-   * replace all CR with LF.
-   *
-   * \param p_config_str pointer to configuration
-   */
-  std::string NormalizeConfigEOL(std::string const& config_str) {
-    std::string result;
-    std::stringstream ss(config_str);
-    for (auto c : config_str) {
-      if (c == '\r') {
-        result.push_back('\n');
-        continue;
-      }
-      result.push_back(c);
-    }
-    return result;
-  }
-
-  /*!
-   * \brief Parse configuration file into key-value pairs.
-   * \param path path to configuration file
-   * \return list of key-value pairs
-   */
-  std::vector<std::pair<std::string, std::string>> Parse() {
-    std::string content { LoadConfigFile(path_) };
-    content = NormalizeConfigEOL(content);
-    std::stringstream ss { content };
-    std::vector<std::pair<std::string, std::string>> results;
-    std::string line;
-    std::string key, value;
-    // Loop over every line of the configuration file
-    while (std::getline(ss, line)) {
-      if (ParseKeyValuePair(line, &key, &value)) {
-        results.emplace_back(key, value);
-      }
-    }
-    return results;
-  }
-
- private:
-  std::string path_;
-  const std::regex line_comment_regex_, key_regex_, key_regex_escaped_,
-    value_regex_, value_regex_escaped_;
-
- public:
-  /*!
-   * \brief Remove leading and trailing whitespaces from a given string
-   * \param str string
-   * \return Copy of str with leading and trailing whitespaces removed
-   */
-  static std::string TrimWhitespace(const std::string& str) {
-    const auto first_char = str.find_first_not_of(" \t\n\r");
-    const auto last_char = str.find_last_not_of(" \t\n\r");
-    if (first_char == std::string::npos) {
-      // Every character in str is a whitespace
-      return {};
-    }
-    CHECK_NE(last_char, std::string::npos);
-    const auto substr_len = last_char + 1 - first_char;
-    return str.substr(first_char, substr_len);
-  }
-
-  /*!
-   * \brief Parse a key-value pair from a string representing a line
-   * \param str string (cannot be multi-line)
-   * \param key place to store the key, if parsing is successful
-   * \param value place to store the value, if parsing is successful
-   * \return Whether the parsing was successful
-   */
-  bool ParseKeyValuePair(const std::string& str, std::string* key,
-                         std::string* value) {
-    std::string buf = TrimWhitespace(str);
-    if (buf.empty()) {
-      return false;
-    }
-
-    /* Match key */
-    std::smatch m;
-    if (std::regex_search(buf, m, line_comment_regex_)) {
-      // This line is a comment
-      return false;
-    } else if (std::regex_search(buf, m, key_regex_)) {
-      // Key doesn't have whitespace or #
-      CHECK_EQ(m.size(), 2);
-      *key = m[1].str();
-    } else if (std::regex_search(buf, m, key_regex_escaped_)) {
-      // Key has a whitespace and/or #; it has to be wrapped around a pair of
-      // single or double quotes. Example: "foo bar"  'foo#bar'
-      CHECK_EQ(m.size(), 3);
-      *key = m[2].str();
-    } else {
-      LOG(FATAL) << "This line is not a valid key-value pair: " << str;
-    }
-
-    /* Match value */
-    buf = m.suffix().str();
-    buf = TrimWhitespace(buf);
-    if (std::regex_search(buf, m, value_regex_)) {
-      // Value doesn't have whitespace or #
-      CHECK_EQ(m.size(), 2);
-      *value = m[1].str();
-    } else if (std::regex_search(buf, m, value_regex_escaped_)) {
-      // Value has a whitespace and/or #; it has to be wrapped around a pair of
-      // single or double quotes. Example: "foo bar"  'foo#bar'
-      CHECK_EQ(m.size(), 3);
-      *value = m[2].str();
-    } else {
-      LOG(FATAL) << "This line is not a valid key-value pair: " << str;
-    }
-    return true;
-  }
-};
-
-}  // namespace common
-}  // namespace xgboost
-#endif  // XGBOOST_COMMON_CONFIG_H_
diff --git a/src/common/cuda_context.cuh b/src/common/cuda_context.cuh
index 7e1db8e3bf2f..c41cb0bc9d9a 100644
--- a/src/common/cuda_context.cuh
+++ b/src/common/cuda_context.cuh
@@ -1,11 +1,12 @@
 /**
- * Copyright 2022-2023, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_COMMON_CUDA_CONTEXT_CUH_
 #define XGBOOST_COMMON_CUDA_CONTEXT_CUH_
 #include <thrust/execution_policy.h>
 
-#include "device_helpers.cuh"
+#include "cuda_stream.h"      // for DefaultStream
+#include "device_vector.cuh"  // for XGBCachingDeviceAllocator, XGBDeviceAllocator
 
 namespace xgboost {
 struct CUDAContext {
@@ -19,9 +20,9 @@ struct CUDAContext {
    */
   auto CTP() const {
 #if THRUST_MAJOR_VERSION >= 2 || defined(XGBOOST_USE_RMM)
-    return thrust::cuda::par_nosync(caching_alloc_).on(dh::DefaultStream());
+    return thrust::cuda::par_nosync(caching_alloc_).on(curt::DefaultStream());
 #else
-    return thrust::cuda::par(caching_alloc_).on(dh::DefaultStream());
+    return thrust::cuda::par(caching_alloc_).on(curt::DefaultStream());
 #endif  // THRUST_MAJOR_VERSION >= 2
   }
   /**
@@ -29,12 +30,12 @@ struct CUDAContext {
    */
   auto TP() const {
 #if THRUST_MAJOR_VERSION >= 2
-    return thrust::cuda::par_nosync(alloc_).on(dh::DefaultStream());
+    return thrust::cuda::par_nosync(alloc_).on(curt::DefaultStream());
 #else
-    return thrust::cuda::par(alloc_).on(dh::DefaultStream());
+    return thrust::cuda::par(alloc_).on(curt::DefaultStream());
 #endif  // THRUST_MAJOR_VERSION >= 2
   }
-  auto Stream() const { return dh::DefaultStream(); }
+  auto Stream() const { return curt::DefaultStream(); }
 };
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_CUDA_CONTEXT_CUH_
diff --git a/src/common/cuda_dr_utils.cc b/src/common/cuda_dr_utils.cc
index 13f2516d408f..f39b513c4ee4 100644
--- a/src/common/cuda_dr_utils.cc
+++ b/src/common/cuda_dr_utils.cc
@@ -1,28 +1,36 @@
 /**
- * Copyright 2024, XGBoost contributors
+ * Copyright 2024-2025, XGBoost contributors
  */
 #if defined(XGBOOST_USE_CUDA)
 #include "cuda_dr_utils.h"
 
 #include <algorithm>  // for max
+#include <charconv>   // for from_chars
 #include <cstdint>    // for int32_t
 #include <cstring>    // for memset
 #include <memory>     // for make_unique
 #include <mutex>      // for call_once
 #include <sstream>    // for stringstream
-#include <string>     // for string
+#include <string>     // for string, stoi
 
-#include "common.h"               // for safe_cuda
+#include "common.h"               // for safe_cuda, TrimFirst, Split
 #include "cuda_rt_utils.h"        // for CurrentDevice
-#include "xgboost/string_view.h"  // for StringVie
+#include "io.h"                   // for CmdOutput
+#include "xgboost/string_view.h"  // for StringView
 
 namespace xgboost::cudr {
-CuDriverApi::CuDriverApi() {
+CuDriverApi::CuDriverApi(std::int32_t cu_major, std::int32_t cu_minor, std::int32_t kdm_major) {
   // similar to dlopen, but without the need to release a handle.
   auto safe_load = [](xgboost::StringView name, auto **fnptr) {
     cudaDriverEntryPointQueryResult status;
+#if (CUDA_VERSION / 1000) >= 13
+    dh::safe_cuda(cudaGetDriverEntryPointByVersion(name.c_str(), reinterpret_cast<void **>(fnptr),
+                                                   12080, cudaEnablePerThreadDefaultStream,
+                                                   &status));
+#else
     dh::safe_cuda(cudaGetDriverEntryPoint(name.c_str(), reinterpret_cast<void **>(fnptr),
                                           cudaEnablePerThreadDefaultStream, &status));
+#endif  // (CUDA_VERSION / 1000) >= 13
     CHECK(status == cudaDriverEntryPointSuccess) << name;
     CHECK(*fnptr);
   };
@@ -39,7 +47,18 @@ CuDriverApi::CuDriverApi() {
   safe_load("cuGetErrorName", &this->cuGetErrorName);
   safe_load("cuDeviceGetAttribute", &this->cuDeviceGetAttribute);
   safe_load("cuDeviceGet", &this->cuDeviceGet);
-
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+  // CTK 12.8
+  if (((cu_major == 12 && cu_minor >= 8) || cu_major > 12) && (kdm_major >= 570)) {
+    safe_load("cuMemBatchDecompressAsync", &this->cuMemBatchDecompressAsync);
+  } else {
+    this->cuMemBatchDecompressAsync = nullptr;
+  }
+#else
+  (void)cu_major;
+  (void)cu_minor;
+  (void)kdm_major;
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
   CHECK(this->cuMemGetAllocationGranularity);
 }
 
@@ -73,9 +92,17 @@ void CuDriverApi::ThrowIfError(CUresult status, StringView fn, std::int32_t line
 }
 
 [[nodiscard]] CuDriverApi &GetGlobalCuDriverApi() {
+  std::int32_t cu_major = -1, cu_minor = -1;
+  curt::GetDrVersionGlobal(&cu_major, &cu_minor);
+
+  std::int32_t kdm_major = -1, kdm_minor = -1;
+  if (!GetVersionFromSmiGlobal(&kdm_major, &kdm_minor)) {
+    kdm_major = -1;
+  }
+
   static std::once_flag flag;
   static std::unique_ptr<CuDriverApi> cu;
-  std::call_once(flag, [&] { cu = std::make_unique<CuDriverApi>(); });
+  std::call_once(flag, [&] { cu = std::make_unique<CuDriverApi>(cu_major, cu_minor, kdm_major); });
   return *cu;
 }
 
@@ -104,5 +131,97 @@ void MakeCuMemLocation(CUmemLocationType type, CUmemLocation *loc) {
   MakeCuMemLocation(type, &prop.location);
   return prop;
 }
+
+[[nodiscard]] bool GetVersionFromSmi(std::int32_t *p_major, std::int32_t *p_minor) {
+  using ::xgboost::common::Split;
+  using ::xgboost::common::TrimFirst;
+  // `nvidia-smi --version` is not available for older versions, as a result, we can't query the
+  // cuda driver version unless we want to parse the table output.
+
+  // Example output on a 2-GPU system:
+  //
+  // $ nvidia-smi --query-gpu=driver_version --format=csv
+  //
+  // driver_version
+  // 570.124.06
+  // 570.124.06
+  //
+  auto cmd = "nvidia-smi --query-gpu=driver_version --format=csv";
+  auto smi_out_str = common::CmdOutput(StringView{cmd});
+
+  auto Invalid = [=] {
+    *p_major = *p_minor = -1;
+    return false;
+  };
+  if (smi_out_str.empty()) {
+    return Invalid();
+  }
+
+  auto smi_split = Split(smi_out_str, '\n');
+  if (smi_split.size() < 2) {
+    return Invalid();
+  }
+
+  // Use the first GPU
+  auto smi_ver = Split(TrimFirst(smi_split[1]), '.');
+  // 570.124.06
+  // On WSL2, you can have driver version with two components, e.g. 573.24
+  if (smi_ver.size() != 2 && smi_ver.size() != 3) {
+    return Invalid();
+  }
+
+  auto [smajor, sminor] = std::tie(smi_ver[0], smi_ver[1]);
+  auto ret0 = std::from_chars(smajor.data(), smajor.data() + smajor.size(), *p_major);
+  auto ret1 = std::from_chars(sminor.data(), sminor.data() + sminor.size(), *p_minor);
+  if (ret0.ec != std::errc{} || ret1.ec != std::errc{}) {
+    return Invalid();
+  }
+  LOG(INFO) << "Driver version: `" << *p_major << "." << *p_minor << "`";
+  return true;
+}
+
+[[nodiscard]] bool GetVersionFromSmiGlobal(std::int32_t *p_major, std::int32_t *p_minor) {
+  static std::once_flag flag;
+  static std::int32_t major = -1, minor = -1;
+  static bool result = false;
+  std::call_once(flag, [&] { result = GetVersionFromSmi(&major, &minor); });
+
+  *p_major = major;
+  *p_minor = minor;
+  return result;
+}
+
+namespace detail {
+// Split up an impl function for simple tests.
+[[nodiscard]] std::int32_t GetC2cLinkCountFromSmiImpl(std::string const &smi_output) {
+  using common::Split, common::TrimFirst, common::TrimLast;
+  auto smi_out_str = TrimLast(TrimFirst(smi_output));
+  auto lines = Split(smi_out_str, '\n');
+  if (lines.size() <= 1) {
+    return -1;
+  }
+  return lines.size() - 1;
+}
+}  // namespace detail
+
+[[nodiscard]] std::int32_t GetC2cLinkCountFromSmi() {
+  auto n_devices = curt::AllVisibleGPUs();
+  if (n_devices < 1) {
+    return -1;
+  }
+
+  // See test for example output from smi.
+  auto cmd = "nvidia-smi c2c -s -i 0";  // Select the first GPU to query.
+  auto out = common::CmdOutput(StringView{cmd});
+  auto cnt = detail::GetC2cLinkCountFromSmiImpl(out);
+  return cnt;
+}
+
+[[nodiscard]] std::int32_t GetC2cLinkCountFromSmiGlobal() {
+  static std::once_flag once;
+  static std::int32_t cnt = -1;
+  std::call_once(once, [&] { cnt = GetC2cLinkCountFromSmi(); });
+  return cnt;
+}
 }  // namespace xgboost::cudr
 #endif
diff --git a/src/common/cuda_dr_utils.h b/src/common/cuda_dr_utils.h
index ae0c9cef1dc7..5cc1530f1639 100644
--- a/src/common/cuda_dr_utils.h
+++ b/src/common/cuda_dr_utils.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2024, XGBoost contributors
+ * Copyright 2024-2025, XGBoost contributors
  *
  * @brief Utility for CUDA driver API.
  *
@@ -12,9 +12,14 @@
 #include <cuda_runtime_api.h>
 
 #include <cstdint>  // for int32_t
+#include <string>   // for string
 
 #include "xgboost/string_view.h"  // for StringView
 
+#if CUDART_VERSION >= 12080 && defined(__linux__)
+#define CUDA_HW_DECOM_AVAILABLE 1
+#endif
+
 namespace xgboost::cudr {
 /**
  * @brief A struct for retrieving CUDA driver API from the runtime API.
@@ -44,12 +49,17 @@ struct CuDriverApi {
   using DeviceGetAttribute = CUresult(int *pi, CUdevice_attribute attrib, CUdevice dev);
   using DeviceGet = CUresult(CUdevice *device, int ordinal);
 
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+  using BatchDecompressAsync = CUresult(CUmemDecompressParams *paramsArray, size_t count,
+                                        unsigned int flags, size_t *errorIndex, CUstream stream);
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
+
   MemGetAllocationGranularityFn *cuMemGetAllocationGranularity{nullptr};  // NOLINT
   MemCreateFn *cuMemCreate{nullptr};                                      // NOLINT
   /**
    * @param[in] offset - Must be zero.
    */
-  MemMapFn *cuMemMap{nullptr};                                            // NOLINT
+  MemMapFn *cuMemMap{nullptr};  // NOLINT
   /**
    * @param[out] ptr       - Resulting pointer to start of virtual address range allocated
    * @param[in]  size      - Size of the reserved virtual address range requested
@@ -67,7 +77,13 @@ struct CuDriverApi {
   DeviceGetAttribute *cuDeviceGetAttribute{nullptr};  // NOLINT
   DeviceGet *cuDeviceGet{nullptr};                    // NOLINT
 
-  CuDriverApi();
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+
+  BatchDecompressAsync *cuMemBatchDecompressAsync{nullptr};  // NOLINT
+
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
+
+  CuDriverApi(std::int32_t cu_major, std::int32_t cu_minor, std::int32_t kdm_major);
 
   void ThrowIfError(CUresult status, StringView fn, std::int32_t line, char const *file) const;
 };
@@ -96,10 +112,37 @@ inline auto GetAllocGranularity(CUmemAllocationProp const *prop) {
 /**
  * @brief Obtain appropriate device ordinal for `CUmemLocation`.
  */
-void MakeCuMemLocation(CUmemLocationType type, CUmemLocation* loc);
+void MakeCuMemLocation(CUmemLocationType type, CUmemLocation *loc);
 
 /**
  * @brief Construct a `CUmemAllocationProp`.
  */
 [[nodiscard]] CUmemAllocationProp MakeAllocProp(CUmemLocationType type);
+
+/**
+ * @brief Get system driver version from the `nvidia-smi` command.
+ *
+ * @return Whether the system call is successful.
+ */
+[[nodiscard]] bool GetVersionFromSmi(std::int32_t *p_major, std::int32_t *p_minor);
+
+/**
+ * @brief Cache the result from @ref GetVersionFromSmi in a global variable
+ */
+[[nodiscard]] bool GetVersionFromSmiGlobal(std::int32_t *p_major, std::int32_t *p_minor);
+
+namespace detail {
+[[nodiscard]] std::int32_t GetC2cLinkCountFromSmiImpl(std::string const &smi_output);
+}  // namespace detail
+
+/**
+ * @brief Get the total number of C2C links `NVML_FI_DEV_C2C_LINK_COUNT`.
+ *
+ * @return -1 if there's no C2C. Otherwise, the number of links.
+ */
+[[nodiscard]] std::int32_t GetC2cLinkCountFromSmi();
+/**
+ * @brief Cache the result from @ref GetC2cLinkCountFromSmi in a global variable
+ */
+[[nodiscard]] std::int32_t GetC2cLinkCountFromSmiGlobal();
 }  // namespace xgboost::cudr
diff --git a/src/common/cuda_pinned_allocator.cu b/src/common/cuda_pinned_allocator.cu
new file mode 100644
index 000000000000..21ece34090c7
--- /dev/null
+++ b/src/common/cuda_pinned_allocator.cu
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include "cuda_pinned_allocator.h"
+
+#if defined(XGBOOST_USE_CUDA)
+
+#include <cuda_runtime_api.h>  // for cudaMemPoolCreate, cudaMemPoolDestroy
+
+#include <array>    // for array
+#include <cstring>  // for memset
+#include <memory>   // for unique_ptr
+
+#endif  // defined(XGBOOST_USE_CUDA)
+
+#include "common.h"
+#include "cuda_dr_utils.h"  // for CUDA_HW_DECOM_AVAILABLE
+#include "cuda_rt_utils.h"  // for CurrentDevice
+
+namespace xgboost::common::cuda_impl {
+[[nodiscard]] MemPoolHdl CreateHostMemPool() {
+  auto mem_pool = std::unique_ptr<cudaMemPool_t, void (*)(cudaMemPool_t*)>{
+      [] {
+        cudaMemPoolProps h_props;
+        std::memset(&h_props, '\0', sizeof(h_props));
+        auto numa_id = curt::GetNumaId();
+        h_props.location.id = numa_id;
+        h_props.location.type = cudaMemLocationTypeHostNuma;
+        h_props.allocType = cudaMemAllocationTypePinned;
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+        h_props.usage = cudaMemPoolCreateUsageHwDecompress;
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
+        h_props.handleTypes = cudaMemHandleTypeNone;
+
+        cudaMemPoolProps d_props;
+        std::memset(&d_props, '\0', sizeof(d_props));
+        auto device_idx = curt::CurrentDevice();
+        d_props.location.id = device_idx;
+        d_props.location.type = cudaMemLocationTypeDevice;
+        d_props.allocType = cudaMemAllocationTypePinned;
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+        d_props.usage = cudaMemPoolCreateUsageHwDecompress;
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
+        d_props.handleTypes = cudaMemHandleTypeNone;
+
+        std::array<cudaMemPoolProps, 2> vprops{h_props, d_props};
+
+        cudaMemPool_t* mem_pool = new cudaMemPool_t;
+        dh::safe_cuda(cudaMemPoolCreate(mem_pool, vprops.data()));
+
+        cudaMemAccessDesc h_desc;
+        h_desc.location = h_props.location;
+        h_desc.flags = cudaMemAccessFlagsProtReadWrite;
+
+        cudaMemAccessDesc d_desc;
+        d_desc.location = d_props.location;
+        d_desc.flags = cudaMemAccessFlagsProtReadWrite;
+
+        std::array<cudaMemAccessDesc, 2> descs{h_desc, d_desc};
+        dh::safe_cuda(cudaMemPoolSetAccess(*mem_pool, descs.data(), descs.size()));
+        return mem_pool;
+      }(),
+      [](cudaMemPool_t* mem_pool) {
+        if (mem_pool) {
+          dh::safe_cuda(cudaMemPoolDestroy(*mem_pool));
+          delete mem_pool;
+        }
+      }};
+  return mem_pool;
+}
+}  // namespace xgboost::common::cuda_impl
diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h
index 4d7fa315845a..04549d54d3e4 100644
--- a/src/common/cuda_pinned_allocator.h
+++ b/src/common/cuda_pinned_allocator.h
@@ -1,15 +1,15 @@
 /**
- * Copyright 2022-2024, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  *
  * @brief cuda pinned allocator for usage with thrust containers
  */
-
 #pragma once
 
 #include <cuda_runtime.h>
 
 #include <cstddef>  // for size_t
 #include <limits>   // for numeric_limits
+#include <memory>   // for unique_ptr
 #include <new>      // for bad_array_new_length
 
 #include "common.h"
@@ -103,6 +103,34 @@ struct SamAllocPolicy {
   }
 };
 
+/**
+ * @brief A RAII handle type to the CUDA memory pool.
+ */
+using MemPoolHdl = std::unique_ptr<cudaMemPool_t, void (*)(cudaMemPool_t*)>;
+
+/**
+ * @brief Create a CUDA memory pool for allocating host pinned memory.
+ */
+[[nodiscard]] MemPoolHdl CreateHostMemPool();
+
+/**
+ * @brief C++ wrapper for the CUDA memory pool.
+ */
+class HostPinnedMemPool {
+  MemPoolHdl pool_;
+
+ public:
+  HostPinnedMemPool() : pool_{CreateHostMemPool()} {}
+  void* AllocateAsync(std::size_t n_bytes, cudaStream_t stream) {
+    void* ptr = nullptr;
+    dh::safe_cuda(cudaMallocFromPoolAsync(&ptr, n_bytes, *this->pool_, stream));
+    return ptr;
+  }
+  void DeallocateAsync(void* ptr, cudaStream_t stream) {
+    dh::safe_cuda(cudaFreeAsync(ptr, stream));
+  }
+};
+
 template <typename T, template <typename> typename Policy>
 class CudaHostAllocatorImpl : public Policy<T> {
  public:
diff --git a/src/common/cuda_rt_utils.cc b/src/common/cuda_rt_utils.cc
index b6014385d6c0..ae6249da372f 100644
--- a/src/common/cuda_rt_utils.cc
+++ b/src/common/cuda_rt_utils.cc
@@ -5,7 +5,9 @@
 
 #if defined(XGBOOST_USE_CUDA)
 #include <cuda_runtime_api.h>
-#endif  // defined(XGBOOST_USE_CUDA)
+
+#include <algorithm>  // for max
+#endif                // defined(XGBOOST_USE_CUDA)
 
 #include <cstddef>  // for size_t
 #include <cstdint>  // for int32_t
@@ -40,31 +42,19 @@ std::int32_t CurrentDevice(bool raise) {
 }
 
 // alternatively: `nvidia-smi -q | grep Addressing`
-bool SupportsPageableMem() {
+[[nodiscard]] bool SupportsPageableMem() {
   std::int32_t res{0};
   dh::safe_cuda(cudaDeviceGetAttribute(&res, cudaDevAttrPageableMemoryAccess, CurrentDevice()));
   return res == 1;
 }
 
-bool SupportsAts() {
+[[nodiscard]] bool SupportsAts() {
   std::int32_t res{0};
   dh::safe_cuda(cudaDeviceGetAttribute(&res, cudaDevAttrPageableMemoryAccessUsesHostPageTables,
                                        CurrentDevice()));
   return res == 1;
 }
 
-void CheckComputeCapability() {
-  for (std::int32_t d_idx = 0; d_idx < AllVisibleGPUs(); ++d_idx) {
-    cudaDeviceProp prop;
-    dh::safe_cuda(cudaGetDeviceProperties(&prop, d_idx));
-    std::ostringstream oss;
-    oss << "CUDA Capability Major/Minor version number: " << prop.major << "." << prop.minor
-        << " is insufficient.  Need >=3.5";
-    int failed = prop.major < 3 || (prop.major == 3 && prop.minor < 5);
-    if (failed) LOG(WARNING) << oss.str() << " for device: " << d_idx;
-  }
-}
-
 void SetDevice(std::int32_t device) {
   if (device >= 0) {
     dh::safe_cuda(cudaSetDevice(device));
@@ -93,15 +83,22 @@ void GetVersionImpl(Fn&& fn, std::int32_t* major, std::int32_t* minor) {
 }
 }  // namespace
 
-void RtVersion(std::int32_t* major, std::int32_t* minor) {
+void GetRtVersionGlobal(std::int32_t* major, std::int32_t* minor) {
   GetVersionImpl([](std::int32_t* ver) { dh::safe_cuda(cudaRuntimeGetVersion(ver)); }, major,
                  minor);
 }
 
-void DrVersion(std::int32_t* major, std::int32_t* minor) {
+void GetDrVersionGlobal(std::int32_t* major, std::int32_t* minor) {
   GetVersionImpl([](std::int32_t* ver) { dh::safe_cuda(cudaDriverGetVersion(ver)); }, major, minor);
 }
 
+[[nodiscard]] std::int32_t GetNumaId() {
+  std::int32_t numa_id = -1;
+  dh::safe_cuda(cudaDeviceGetAttribute(&numa_id, cudaDevAttrHostNumaId, curt::CurrentDevice()));
+  numa_id = std::max(numa_id, 0);
+  return numa_id;
+}
+
 #else
 std::int32_t AllVisibleGPUs() { return 0; }
 
@@ -125,5 +122,11 @@ void SetDevice(std::int32_t device) {
     common::AssertGPUSupport();
   }
 }
+
+[[nodiscard]] std::int32_t GetNumaId() {
+  common::AssertGPUSupport();
+  return 0;
+}
+
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace xgboost::curt
diff --git a/src/common/cuda_rt_utils.h b/src/common/cuda_rt_utils.h
index 7c80d9cf96f5..6fa747ecedf1 100644
--- a/src/common/cuda_rt_utils.h
+++ b/src/common/cuda_rt_utils.h
@@ -5,10 +5,6 @@
 #include <cstddef>  // for size_t
 #include <cstdint>  // for int32_t
 
-#if defined(XGBOOST_USE_NVTX)
-#include <nvtx3/nvtx3.hpp>
-#endif  // defined(XGBOOST_USE_NVTX)
-
 namespace xgboost::curt {
 std::int32_t AllVisibleGPUs();
 
@@ -19,12 +15,10 @@ std::int32_t CurrentDevice(bool raise = true);
 
 // Whether the device supports coherently accessing pageable memory without calling
 // `cudaHostRegister` on it
-bool SupportsPageableMem();
+[[nodiscard]] bool SupportsPageableMem();
 
 // Address Translation Service (ATS)
-bool SupportsAts();
-
-void CheckComputeCapability();
+[[nodiscard]] bool SupportsAts();
 
 void SetDevice(std::int32_t device);
 
@@ -34,40 +28,11 @@ void SetDevice(std::int32_t device);
 [[nodiscard]] std::size_t TotalMemory();
 
 // Returns the CUDA Runtime version.
-void RtVersion(std::int32_t* major, std::int32_t* minor);
+void GetRtVersionGlobal(std::int32_t* major, std::int32_t* minor);
 
 // Returns the latest version of CUDA supported by the driver.
-void DrVersion(std::int32_t* major, std::int32_t* minor);
+void GetDrVersionGlobal(std::int32_t* major, std::int32_t* minor);
 
-struct NvtxDomain {
-  static constexpr char const *name{"libxgboost"};  // NOLINT
-};
-
-#if defined(XGBOOST_USE_NVTX)
-using NvtxScopedRange = ::nvtx3::scoped_range_in<NvtxDomain>;
-using NvtxEventAttr = ::nvtx3::event_attributes;
-using NvtxRgb = ::nvtx3::rgb;
-#else
-class NvtxScopedRange {
- public:
-  template <typename... Args>
-  explicit NvtxScopedRange(Args &&...) {}
-};
-class NvtxEventAttr {
- public:
-  template <typename... Args>
-  explicit NvtxEventAttr(Args &&...) {}
-};
-class NvtxRgb {
- public:
-  template <typename... Args>
-  explicit NvtxRgb(Args &&...) {}
-};
-#endif  // defined(XGBOOST_USE_NVTX)
+// Get the current device's numa ID.
+[[nodiscard]] std::int32_t GetNumaId();
 }  // namespace xgboost::curt
-
-#if defined(XGBOOST_USE_NVTX)
-#define xgboost_NVTX_FN_RANGE() NVTX3_FUNC_RANGE_IN(::xgboost::curt::NvtxDomain)
-#else
-#define xgboost_NVTX_FN_RANGE()
-#endif  // defined(XGBOOST_USE_NVTX)
diff --git a/src/common/cuda_stream.h b/src/common/cuda_stream.h
new file mode 100644
index 000000000000..546029861058
--- /dev/null
+++ b/src/common/cuda_stream.h
@@ -0,0 +1,97 @@
+/**
+ * Copyright 2022-2025, XGBoost contributors
+ */
+#pragma once
+#include <cuda_runtime.h>
+
+#include <memory>   // for unique_ptr
+#include <utility>  // for swap
+
+#include "common.h"
+
+namespace xgboost::curt {
+class StreamRef;
+
+class Event {
+  std::unique_ptr<cudaEvent_t, void (*)(cudaEvent_t *)> event_;
+
+ public:
+  explicit Event(bool disable_timing = true)
+      : event_{[disable_timing] {
+                 auto e = new cudaEvent_t;
+                 dh::safe_cuda(cudaEventCreateWithFlags(
+                     e, disable_timing ? cudaEventDisableTiming : cudaEventDefault));
+                 return e;
+               }(),
+               [](cudaEvent_t *e) {
+                 if (e) {
+                   dh::safe_cuda(cudaEventDestroy(*e));
+                   delete e;
+                 }
+               }} {}
+
+  inline void Record(StreamRef stream);  // NOLINT
+  // Define swap-based ctor to make sure an event is always valid.
+  Event(Event &&e) : Event() { std::swap(this->event_, e.event_); }
+  Event &operator=(Event &&e) {
+    std::swap(this->event_, e.event_);
+    return *this;
+  }
+
+  operator cudaEvent_t() const { return *event_; }                // NOLINT
+  cudaEvent_t const *data() const { return this->event_.get(); }  // NOLINT
+  void Sync() { dh::safe_cuda(cudaEventSynchronize(*this->data())); }
+};
+
+class StreamRef {
+  cudaStream_t stream_{nullptr};
+
+ public:
+  explicit StreamRef(cudaStream_t s) : stream_{s} {}
+  void Wait(Event const &e) {
+#if defined(__CUDACC_VER_MAJOR__)
+#if __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0
+    // CUDA == 11.0
+    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, 0));
+#else
+    // CUDA > 11.0
+    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
+#endif  // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0:
+#else   // clang
+    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
+#endif  //  defined(__CUDACC_VER_MAJOR__)
+  }
+  operator cudaStream_t() const {  // NOLINT
+    return stream_;
+  }
+  cudaError_t Sync(bool error = true) {
+    if (error) {
+      dh::safe_cuda(cudaStreamSynchronize(stream_));
+      return cudaSuccess;
+    }
+    return cudaStreamSynchronize(stream_);
+  }
+};
+
+inline void Event::Record(StreamRef stream) {  // NOLINT
+  dh::safe_cuda(cudaEventRecord(*event_, cudaStream_t{stream}));
+}
+
+// Changing this has effect on prediction return, where we need to pass the pointer to
+// third-party libraries like cuPy
+inline StreamRef DefaultStream() { return StreamRef{cudaStreamPerThread}; }
+
+class Stream {
+  cudaStream_t stream_;
+
+ public:
+  Stream() { dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); }
+  ~Stream() { dh::safe_cuda(cudaStreamDestroy(stream_)); }
+
+  [[nodiscard]] StreamRef View() const { return StreamRef{stream_}; }
+  [[nodiscard]] cudaStream_t Handle() const { return stream_; }
+
+  void Sync() { this->View().Sync(); }
+  void Wait(Event const &e) { this->View().Wait(e); }
+};
+}  // namespace xgboost::curt
diff --git a/src/common/cuda_stream_pool.h b/src/common/cuda_stream_pool.h
new file mode 100644
index 000000000000..a3943ce00941
--- /dev/null
+++ b/src/common/cuda_stream_pool.h
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#pragma once
+#include <atomic>   // for atomic
+#include <cstddef>  // for size_t
+#include <vector>   // for vector
+
+#include "cuda_stream.h"       // for StreamRef, Stream
+
+namespace xgboost::curt {
+// rmm cuda_stream_pool
+class StreamPool {
+  mutable std::atomic<std::size_t> next_{0};
+  std::vector<curt::Stream> stream_;
+
+ public:
+  explicit StreamPool(std::size_t n) : stream_(n) {}
+  ~StreamPool() = default;
+  StreamPool(StreamPool const& that) = delete;
+  StreamPool& operator=(StreamPool const& that) = delete;
+
+  [[nodiscard]] curt::StreamRef operator[](std::size_t i) const { return stream_[i].View(); }
+  [[nodiscard]] curt::StreamRef Next() const { return stream_[(next_++) % stream_.size()].View(); }
+  [[nodiscard]] std::size_t Size() const { return stream_.size(); }
+};
+}  // namespace xgboost::curt
diff --git a/src/common/device_compression.cu b/src/common/device_compression.cu
new file mode 100644
index 000000000000..287a7695f80a
--- /dev/null
+++ b/src/common/device_compression.cu
@@ -0,0 +1,514 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ *
+ * We use NVComp to perform compression and access the DE API directly for
+ * decompression. Invoking the DE directly can help us avoid unnecessary kernal launches
+ * and CUDA API calls and any potential blocking behaviours.
+ */
+
+#include <cstddef>  // for size_t
+#include <cstdint>  // for uint8_t, uint32_t, int32_t
+#include <memory>   // for shared_ptr
+
+#include "device_compression.cuh"
+#include "cuda_stream.h"        // for StreamRef
+#include "device_helpers.cuh"  // for MemcpyBatchAsync
+#include "xgboost/span.h"      // for Span
+
+#if defined(XGBOOST_USE_NVCOMP)
+
+#include <nvcomp/snappy.h>   // for nvcompBatchedSnappyDecompressAsync
+#include <thrust/logical.h>  // for all_of
+#include <thrust/reduce.h>   // for reduce
+
+#include <algorithm>  // for transform, min
+#include <cstring>    // for memset
+#include <mutex>      // for once_flag, call_once
+#include <vector>     // for vector
+
+#include "common.h"               // for HumanMemUnit
+#include "compressed_iterator.h"  // for CompressedByteT
+#include "cuda_context.cuh"       // for CUDAContext
+#include "cuda_dr_utils.h"        // for GetGlobalCuDriverApi
+#include "cuda_rt_utils.h"        // for CurrentDevice
+#include "device_compression.h"
+#include "device_vector.cuh"      // for DeviceUVector
+#include "nvtx_utils.h"           // for xgboost_NVTX_FN_RANGE
+#include "ref_resource_view.cuh"  // for MakeFixedVecWithPinnedMemPool
+#include "ref_resource_view.h"    // for RefResourceView
+
+namespace xgboost::dc {
+namespace {
+// Parse snappy header
+XGBOOST_DEVICE std::uint32_t GetUncompressedSize(std::uint8_t const* src, std::size_t src_bytes,
+                                                 std::uint32_t* p_header_nbytes,
+                                                 std::int32_t* p_status) {
+  auto& n_bytes = *p_header_nbytes;
+  n_bytes = 0;
+
+  *p_status = 1;
+  std::uint32_t uncompressed_size = src[n_bytes++];
+  if (uncompressed_size > 0x7f) {
+    std::uint32_t c = (n_bytes < src_bytes) ? src[n_bytes++] : 0;
+    uncompressed_size = (uncompressed_size & 0x7f) | (c << 7);
+    if (uncompressed_size >= (0x80 << 7)) {
+      c = (n_bytes < src_bytes) ? src[n_bytes++] : 0;
+      uncompressed_size = (uncompressed_size & ((0x7f << 7) | 0x7f)) | (c << 14);
+      if (uncompressed_size >= (0x80 << 14)) {
+        c = (n_bytes < src_bytes) ? src[n_bytes++] : 0;
+        uncompressed_size = (uncompressed_size & ((0x7f << 14) | (0x7f << 7) | 0x7f)) | (c << 21);
+        if (uncompressed_size >= (0x80 << 21)) {
+          c = (n_bytes < src_bytes) ? src[n_bytes++] : 0;
+          if (c < 0x8) {
+            uncompressed_size =
+                (uncompressed_size & ((0x7f << 21) | (0x7f << 14) | (0x7f << 7) | 0x7f)) |
+                (c << 28);
+          } else {
+            *p_status = 0;
+          }
+        }
+      }
+    }
+  }
+
+  return uncompressed_size;
+}
+
+void FillDecompParams(void const* const* d_in_chunk_ptrs, std::size_t const* d_in_chunk_nbytes,
+                      common::Span<CUmemDecompressParams> de_params, size_t* d_act_nbytes,
+                      std::size_t const* d_out_chunk_nbytes, std::int32_t* statuses,
+                      curt::StreamRef stream) {
+  auto n_chunks = de_params.size();
+  dh::LaunchN(n_chunks, stream,
+              [d_in_chunk_ptrs, d_in_chunk_nbytes, d_out_chunk_nbytes, d_act_nbytes, de_params,
+               statuses, n_chunks] XGBOOST_DEVICE(std::size_t ix_chunk) {
+                std::size_t const dev_in_bytes = d_in_chunk_nbytes[ix_chunk];
+
+                // Parse the input buffer to determine the number of bytes to skip
+                // First byte with a 0 msb indicates no more bytes in the header
+                auto cur = reinterpret_cast<std::uint8_t const*>(d_in_chunk_ptrs[ix_chunk]);
+                std::uint32_t header_nbytes = 0;
+                std::uint32_t uncompressed_size =
+                    GetUncompressedSize(cur, dev_in_bytes, &header_nbytes, &statuses[ix_chunk]);
+                if (statuses[ix_chunk] == 0) {
+                  return;
+                }
+
+                de_params[ix_chunk].src = reinterpret_cast<const void*>(cur + header_nbytes);
+                de_params[ix_chunk].dst = nullptr;  // not know yet
+                de_params[ix_chunk].dstNumBytes = d_out_chunk_nbytes[ix_chunk];
+                d_act_nbytes[ix_chunk] = 0;
+                de_params[ix_chunk].dstActBytes =
+                    reinterpret_cast<cuuint32_t*>(&d_act_nbytes[ix_chunk]);
+                de_params[ix_chunk].srcNumBytes = dev_in_bytes - header_nbytes;
+                de_params[ix_chunk].algo = CU_MEM_DECOMPRESS_ALGORITHM_SNAPPY;
+                statuses[ix_chunk] = 1;
+              });
+}
+
+struct ChkOp {
+  XGBOOST_DEVICE bool operator()(int s) { return s == 1; }
+};
+
+void CheckAlign(nvcompAlignmentRequirements_t alignment) {
+  CHECK_EQ(alignment.input, 1);
+  CHECK_EQ(alignment.output, 1);
+  CHECK_EQ(alignment.temp, 1);
+}
+
+void SafeNvComp(nvcompStatus_t status) {
+  if (status != nvcompSuccess) {
+    LOG(FATAL) << "NVComp error:" << static_cast<std::int32_t>(status);
+  }
+}
+}  // namespace
+
+[[nodiscard]] DeStatus const& GetGlobalDeStatus() {
+  std::once_flag static flag;
+  DeStatus static de;
+  std::call_once(flag, [&] {
+    // First check driver, we don't need to worry about mismatched libcuda version and rm
+    // version here. The first DE-enabled GPU requires >= 12.8 to work.
+    std::int32_t driver_version = 0;
+    dh::safe_cuda(cudaDriverGetVersion(&driver_version));
+    if (driver_version < 12080) {
+      return;
+    }
+
+    // Then check HW
+    auto device = curt::CurrentDevice();
+    std::int32_t mask = 0;
+    safe_cu(cudr::GetGlobalCuDriverApi().cuDeviceGetAttribute(
+        &mask, CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_ALGORITHM_MASK, device));
+    de.avail = static_cast<bool>(mask);
+    if (!de.avail) {
+      return;
+    }
+
+    std::int32_t max_supported_size = 0;
+    // this refers to the output length of the decomp
+    safe_cu(cudr::GetGlobalCuDriverApi().cuDeviceGetAttribute(
+        &max_supported_size, CU_DEVICE_ATTRIBUTE_MEM_DECOMPRESS_MAXIMUM_LENGTH, device));
+    de.max_output_size = static_cast<std::size_t>(max_supported_size);
+    LOG(INFO) << "The maximum supported size of the DE:" << max_supported_size << std::endl;
+  });
+
+  return de;
+}
+
+SnappyDecomprMgrImpl::SnappyDecomprMgrImpl(curt::StreamRef s,
+                                           std::shared_ptr<HostPinnedMemPool> pool,
+                                           CuMemParams params,
+                                           common::Span<std::uint8_t const> in_compressed_data)
+    : n_dst_bytes{params.TotalDstBytes()} {
+  std::size_t n_chunks = params.size();
+  if (n_chunks == 0) {
+    return;
+  }
+
+  std::size_t last_in = 0, last_out = 0;
+
+  std::vector<void const*> in_chunk_ptrs(n_chunks);
+  std::vector<std::size_t> in_chunk_sizes(n_chunks);
+  std::vector<std::size_t> out_chunk_sizes(n_chunks);
+
+  dh::DeviceUVector<std::int32_t> status(n_chunks);
+  for (std::size_t i = 0; i < n_chunks; ++i) {
+    in_chunk_ptrs[i] = in_compressed_data.subspan(last_in, params[i].src_act_nbytes).data();
+    in_chunk_sizes[i] = params[i].src_act_nbytes;
+    out_chunk_sizes[i] = params[i].dst_nbytes;
+
+    last_in += params[i].src_nbytes;
+    last_out += params[i].dst_nbytes;
+  }
+  CHECK_EQ(this->n_dst_bytes, last_out);
+
+  // copy to d
+  dh::CopyTo(in_chunk_ptrs, &this->d_in_chunk_ptrs, s);
+  dh::CopyTo(in_chunk_sizes, &this->d_in_chunk_sizes, s);
+  dh::CopyTo(out_chunk_sizes, &this->d_out_chunk_sizes, s);
+  this->act_nbytes.resize(n_chunks, 0);
+
+  this->de_params = common::MakeFixedVecWithPinnedMemPool<decltype(this->de_params)::value_type>(
+      pool, n_chunks, s);
+  for (std::size_t i = 0; i < n_chunks; ++i) {
+    std::memset(this->de_params.data() + i, 0, sizeof(CUmemDecompressParams));
+  }
+
+  FillDecompParams(d_in_chunk_ptrs.data().get(), d_in_chunk_sizes.data().get(), de_params.ToSpan(),
+                   this->act_nbytes.data().get(), d_out_chunk_sizes.data().get(), status.data(), s);
+  dh::XGBCachingDeviceAllocator<char> alloc;
+  bool valid = thrust::all_of(thrust::cuda::par_nosync(alloc).on(s), status.cbegin(), status.cend(),
+                              ChkOp{});
+  CHECK(valid);
+
+  auto max_supported_size = GetGlobalDeStatus().max_output_size;
+  auto max_chunk_size = *std::max_element(out_chunk_sizes.cbegin(), out_chunk_sizes.cend());
+  if (GetGlobalDeStatus().avail) {
+    CHECK_GE(max_supported_size, max_chunk_size);
+  }
+
+  this->de_params_copy =
+      common::MakeFixedVecWithPinnedMemPool<decltype(this->de_params)::value_type>(pool, n_chunks,
+                                                                                   s);
+}
+
+common::Span<CUmemDecompressParams> SnappyDecomprMgrImpl::GetParams(
+    common::Span<common::CompressedByteT> out) {
+  xgboost_NVTX_FN_RANGE_C(3, 252, 198);
+  if (this->de_params.empty()) {
+    return {};
+  }
+  auto n_chunks = this->de_params.size();
+  CHECK(!this->de_params_copy.empty());
+  // Set the output buffers.
+  std::size_t last_out = 0;
+  for (std::size_t i = 0; i < n_chunks; ++i) {
+    this->de_params_copy[i] = this->de_params[i];
+    this->de_params_copy[i].dst = out.subspan(last_out, de_params[i].dstNumBytes).data();
+    last_out += de_params[i].dstNumBytes;
+  }
+
+  return this->de_params_copy.ToSpan();
+}
+
+[[nodiscard]] bool SnappyDecomprMgrImpl::Empty() const {
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+  return this->de_params.empty();
+#else
+  return true;
+#endif
+}
+
+SnappyDecomprMgr::SnappyDecomprMgr() : pimpl_{std::make_unique<SnappyDecomprMgrImpl>()} {}
+SnappyDecomprMgr::SnappyDecomprMgr(SnappyDecomprMgr&& that) = default;
+SnappyDecomprMgr& SnappyDecomprMgr::operator=(SnappyDecomprMgr&& that) = default;
+
+SnappyDecomprMgr::~SnappyDecomprMgr() = default;
+
+[[nodiscard]] bool SnappyDecomprMgr::Empty() const { return this->Impl()->Empty(); }
+
+[[nodiscard]] std::size_t SnappyDecomprMgr::DecompressedBytes() const {
+  return this->Impl()->n_dst_bytes;
+}
+
+SnappyDecomprMgrImpl* SnappyDecomprMgr::Impl() const { return this->pimpl_.get(); }
+
+void DecompressSnappy(curt::StreamRef stream, SnappyDecomprMgr const& mgr,
+                      common::Span<common::CompressedByteT> out, bool allow_fallback) {
+  xgboost_NVTX_FN_RANGE();
+  auto mgr_impl = mgr.Impl();
+  auto params = mgr_impl->GetParams(out);
+  if (params.empty()) {
+    CHECK(out.empty());
+    return;
+  }
+  if (GetGlobalDeStatus().avail &&
+      cudr::GetGlobalCuDriverApi().cuMemBatchDecompressAsync != nullptr) {
+    // Invoke the DE.
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+    std::size_t error_index;
+    safe_cu(cudr::GetGlobalCuDriverApi().cuMemBatchDecompressAsync(
+        params.data(), params.size(), 0 /*unused*/, &error_index, stream));
+#else
+    static_assert(false, "`cuMemBatchDecompressAsync` requires CUDA >= 12.8.")
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
+  } else {
+    // Fallback to nvcomp. This is only used during tests where we don't have access to DE
+    // but still want the test coverage.
+    CHECK(allow_fallback);
+    CheckAlign(nvcompBatchedSnappyDecompressRequiredAlignments);
+    auto n_chunks = mgr_impl->Chunks();
+    // Get sketch space
+    std::size_t n_tmp_bytes = 0;
+    SafeNvComp(nvcompBatchedSnappyDecompressGetTempSize(n_chunks, /*unused*/ 0, &n_tmp_bytes));
+    dh::device_vector<char> tmp(n_tmp_bytes, 0);
+
+    dh::device_vector<nvcompStatus_t> status(n_chunks, nvcompSuccess);
+
+    // Build output vector
+    std::vector<void*> h_out_ptrs(n_chunks);
+    std::transform(params.cbegin(), params.cend(), h_out_ptrs.begin(),
+                   [](auto const& p) { return p.dst; });
+    dh::device_vector<void*> d_out_ptrs(n_chunks);
+    dh::safe_cuda(cudaMemcpyAsync(d_out_ptrs.data().get(), h_out_ptrs.data(),
+                                  dh::ToSpan(d_out_ptrs).size_bytes(), cudaMemcpyDefault, stream));
+    // Run nvcomp
+    SafeNvComp(nvcompBatchedSnappyDecompressAsync(
+        mgr_impl->d_in_chunk_ptrs.data().get(), mgr_impl->d_in_chunk_sizes.data().get(),
+        mgr_impl->d_out_chunk_sizes.data().get(), mgr_impl->act_nbytes.data().get(), n_chunks,
+        tmp.data().get(), n_tmp_bytes, d_out_ptrs.data().get(), status.data().get(), stream));
+  }
+}
+
+[[nodiscard]] CuMemParams CompressSnappy(Context const* ctx,
+                                         common::Span<common::CompressedByteT const> in,
+                                         dh::DeviceUVector<std::uint8_t>* p_out,
+                                         std::size_t chunk_size) {
+  CHECK_GT(chunk_size, 0);
+  auto cuctx = ctx->CUDACtx();
+  auto nvcomp_batched_snappy_opts = nvcompBatchedSnappyDefaultOpts;
+
+  nvcompAlignmentRequirements_t compression_alignment_reqs;
+  SafeNvComp(nvcompBatchedSnappyCompressGetRequiredAlignments(nvcomp_batched_snappy_opts,
+                                                              &compression_alignment_reqs));
+  CheckAlign(compression_alignment_reqs);
+
+  /**
+   * Inputs
+   */
+  std::size_t n_chunks = (in.size() + chunk_size - 1) / chunk_size;
+  if (n_chunks == 0) {
+    p_out->clear();
+    return {};
+  }
+  std::size_t last = 0;
+
+  std::vector<common::CompressedByteT const*> h_in_ptrs(n_chunks);
+  std::vector<std::size_t> h_in_sizes(n_chunks);
+  for (std::size_t i = 0; i < n_chunks; ++i) {
+    auto n = std::min(chunk_size, in.size() - last);
+    auto chunk = in.subspan(last, n);
+    last += n;
+
+    h_in_sizes[i] = chunk.size();
+    h_in_ptrs[i] = chunk.data();
+  }
+  CHECK_EQ(last, in.size());
+
+  dh::DeviceUVector<void const*> in_ptrs(h_in_ptrs.size());
+  dh::safe_cuda(cudaMemcpyAsync(in_ptrs.data(), h_in_ptrs.data(),
+                                common::Span{h_in_ptrs}.size_bytes(), cudaMemcpyDefault,
+                                cuctx->Stream()));
+  dh::DeviceUVector<std::size_t> in_sizes(h_in_sizes.size());
+  dh::safe_cuda(cudaMemcpyAsync(in_sizes.data(), h_in_sizes.data(),
+                                common::Span{h_in_sizes}.size_bytes(), cudaMemcpyDefault,
+                                cuctx->Stream()));
+
+  CHECK_EQ(n_chunks, in_sizes.size());
+  std::size_t max_in_nbytes = *std::max_element(h_in_sizes.cbegin(), h_in_sizes.cend());
+
+  /**
+   * Outputs
+   */
+  std::size_t comp_temp_bytes;
+  SafeNvComp(nvcompBatchedSnappyCompressGetTempSize(n_chunks, chunk_size,
+                                                    nvcomp_batched_snappy_opts, &comp_temp_bytes));
+  CHECK_EQ(comp_temp_bytes, 0);
+  dh::DeviceUVector<char> comp_tmp(comp_temp_bytes);
+
+  std::size_t max_out_nbytes = 0;
+  SafeNvComp(nvcompBatchedSnappyCompressGetMaxOutputChunkSize(
+      std::min(max_in_nbytes, chunk_size), nvcomp_batched_snappy_opts, &max_out_nbytes));
+  p_out->resize(max_out_nbytes * n_chunks);
+  std::vector<void*> h_out_ptrs(n_chunks);
+  std::vector<std::size_t> h_out_sizes(n_chunks);
+  auto s_out = dh::ToSpan(*p_out);
+  for (std::size_t i = 0; i < n_chunks; ++i) {
+    auto chunk = s_out.subspan(max_out_nbytes * i, max_out_nbytes);
+    h_out_ptrs[i] = chunk.data();
+    h_out_sizes[i] = chunk.size();
+  }
+  dh::DeviceUVector<void*> out_ptrs(h_out_ptrs.size());
+  dh::safe_cuda(cudaMemcpyAsync(out_ptrs.data(), h_out_ptrs.data(),
+                                common::Span{h_out_ptrs}.size_bytes(), cudaMemcpyDefault));
+  dh::DeviceUVector<std::size_t> out_sizes(h_out_sizes.size());
+  dh::safe_cuda(cudaMemcpyAsync(out_sizes.data(), h_out_sizes.data(),
+                                common::Span{h_out_sizes}.size_bytes(), cudaMemcpyDefault));
+
+  /**
+   * Compress
+   */
+  SafeNvComp(nvcompBatchedSnappyCompressAsync(
+      in_ptrs.data(), in_sizes.data(), max_in_nbytes, n_chunks, comp_tmp.data(), comp_temp_bytes,
+      out_ptrs.data(), out_sizes.data(), nvcomp_batched_snappy_opts, cuctx->Stream()));
+  auto n_bytes = thrust::reduce(cuctx->CTP(), out_sizes.cbegin(), out_sizes.cend());
+  auto n_total_bytes = p_out->size();
+  auto ratio = static_cast<double>(n_total_bytes) / in.size_bytes();
+  auto ratio_act = static_cast<double>(n_bytes) / in.size_bytes();
+  LOG(DEBUG) << "[snappy] Input: " << common::HumanMemUnit(in.size_bytes())
+             << ", need:" << common::HumanMemUnit(n_bytes)
+             << ", allocated:" << common::HumanMemUnit(n_total_bytes) << ", ratio:" << ratio
+             << ", actual ratio:" << ratio_act;
+
+  /**
+   * Meta
+   */
+  CuMemParams params(n_chunks);
+  std::vector<std::size_t> h_act_nbytes(out_sizes.size());
+  dh::safe_cuda(cudaMemcpyAsync(h_act_nbytes.data(), out_sizes.data(),
+                                common::Span{h_out_sizes}.size_bytes(), cudaMemcpyDefault,
+                                cuctx->Stream()));
+  for (std::size_t i = 0; i < n_chunks; ++i) {
+    auto& p = params[i];
+    p.src_nbytes = h_out_sizes[i];
+    p.src_act_nbytes = h_act_nbytes[i];
+    p.dst_nbytes = h_in_sizes[i];
+    p.algo = ComprParam::kSnappy;
+  }
+  return params;
+}
+
+[[nodiscard]] common::RefResourceView<std::uint8_t> CoalesceCompressedBuffersToHost(
+    curt::StreamRef stream, std::shared_ptr<HostPinnedMemPool> pool,
+    CuMemParams const& in_params, dh::DeviceUVector<std::uint8_t> const& in_buf,
+    CuMemParams* p_out) {
+  std::size_t n_total_act_bytes = in_params.TotalSrcActBytes();
+  std::size_t n_total_bytes = in_params.TotalSrcBytes();
+  if (n_total_bytes == 0) {
+    CHECK_EQ(n_total_act_bytes, 0);
+    p_out->resize(0);
+    return {};
+  }
+  // copy from device buffer to the host cache.
+  CHECK_EQ(n_total_bytes, in_buf.size());
+  CHECK(pool);
+  auto c_page =
+      common::MakeFixedVecWithPinnedMemPool<std::remove_reference_t<decltype(in_buf)>::value_type>(
+          pool, n_total_act_bytes, stream);
+  std::vector<std::uint8_t const*> srcs(in_params.size());
+  std::vector<std::uint8_t*> dsts(in_params.size());
+  std::vector<std::size_t> sizes(in_params.size());
+
+  decltype(srcs)::value_type sptr = in_buf.data();
+  decltype(dsts)::value_type dptr = c_page.data();
+
+  for (std::size_t i = 0; i < in_params.size(); ++i) {
+    CHECK_LE(in_params[i].src_act_nbytes, in_params[i].src_nbytes);
+    sizes[i] = in_params[i].src_act_nbytes;
+
+    srcs[i] = sptr;
+    dsts[i] = dptr;
+
+    sptr += in_params[i].src_nbytes;
+    dptr += in_params[i].src_act_nbytes;
+  }
+  std::size_t fail_idx = 0;
+  dh::safe_cuda(dh::MemcpyBatchAsync<cudaMemcpyDeviceToHost>(dsts.data(), srcs.data(), sizes.data(),
+                                                             in_params.size(), &fail_idx, stream));
+
+  auto& out_params = *p_out;
+  out_params.resize(in_params.size());
+  for (std::size_t i = 0; i < in_params.size(); ++i) {
+    out_params[i].algo = in_params[i].algo;
+    out_params[i].dst_nbytes = in_params[i].dst_nbytes;
+    out_params[i].src_nbytes = in_params[i].src_act_nbytes;  // change to act
+    out_params[i].src_act_nbytes = in_params[i].src_act_nbytes;
+  }
+  return c_page;
+}
+}  // namespace xgboost::dc
+
+#else
+
+namespace xgboost::dc {
+// Impl
+SnappyDecomprMgrImpl::SnappyDecomprMgrImpl(curt::StreamRef,
+                                           std::shared_ptr<common::cuda_impl::HostPinnedMemPool>,
+                                           CuMemParams,
+                                           common::Span<common::CompressedByteT const>) {}
+
+// SnappyDecomprMgr
+SnappyDecomprMgr::SnappyDecomprMgr() = default;
+SnappyDecomprMgr::SnappyDecomprMgr(SnappyDecomprMgr&& that) = default;
+SnappyDecomprMgr& SnappyDecomprMgr::operator=(SnappyDecomprMgr&& that) = default;
+SnappyDecomprMgr::~SnappyDecomprMgr() = default;
+SnappyDecomprMgrImpl* SnappyDecomprMgr::Impl() const { return nullptr; }
+
+[[nodiscard]] bool SnappyDecomprMgr::Empty() const { return true; }
+[[nodiscard]] std::size_t SnappyDecomprMgr::DecompressedBytes() const { return 0; }
+
+// Round-trip compression
+void DecompressSnappy(curt::StreamRef, SnappyDecomprMgr const&,
+                      common::Span<common::CompressedByteT>, bool) {
+  common::AssertNvCompSupport();
+}
+
+[[nodiscard]] CuMemParams CompressSnappy(Context const*,
+                                         common::Span<common::CompressedByteT const> in,
+                                         dh::DeviceUVector<std::uint8_t>*, std::size_t) {
+  if (in.empty()) {
+    return {};
+  }
+  common::AssertNvCompSupport();
+  return {};
+}
+
+[[nodiscard]] common::RefResourceView<std::uint8_t> CoalesceCompressedBuffersToHost(
+    curt::StreamRef, std::shared_ptr<HostPinnedMemPool>, CuMemParams const& in_params,
+    dh::DeviceUVector<std::uint8_t> const&, CuMemParams*) {
+  std::size_t n_total_bytes = in_params.TotalSrcBytes();
+  if (n_total_bytes == 0) {
+    return {};
+  }
+  common::AssertNvCompSupport();
+  return {};
+}
+
+[[nodiscard]] DeStatus const& GetGlobalDeStatus() {
+  static thread_local DeStatus de;
+  return de;
+}
+}  // namespace xgboost::dc
+
+#endif  // defined(XGBOOST_USE_NVCOMP)
diff --git a/src/common/device_compression.cuh b/src/common/device_compression.cuh
new file mode 100644
index 000000000000..6ab3e62719d4
--- /dev/null
+++ b/src/common/device_compression.cuh
@@ -0,0 +1,124 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#pragma once
+
+#include <cstddef>  // for size_t
+#include <cstdint>  // for uint8_t
+
+#include "compressed_iterator.h"    // for CompressedByteT
+#include "cuda_dr_utils.h"          // for CUDA_HW_DECOM_AVAILABLE
+#include "cuda_pinned_allocator.h"  // for HostPinnedMemPool
+#include "cuda_stream.h"            // for StreamRef
+#include "device_compression.h"     // for CuMemParams
+#include "device_vector.cuh"        // for DeviceUVector
+#include "ref_resource_view.h"      // for RefResourceView
+#include "xgboost/span.h"           // for Span
+
+namespace xgboost::dc {
+
+using HostPinnedMemPool = common::cuda_impl::HostPinnedMemPool;
+
+/**
+ * @brief Use nvcomp to compress the data.
+ *
+ * @param ctx Context, provides the CUDA stream and execution policy.
+ * @param in  Input buffer, data to be compressed
+ * @param p_out Output buffer, storing comprssed data.
+ * @param chunk_size The number of bytes for each chunk.
+ */
+[[nodiscard]] CuMemParams CompressSnappy(Context const* ctx,
+                                         common::Span<common::CompressedByteT const> in,
+                                         dh::DeviceUVector<std::uint8_t>* p_out,
+                                         std::size_t chunk_size);
+/**
+ * @brief Run decompression with meta data cached in a mgr object.
+ *
+ * @param stream CUDA stream, it should be an asynchronous stream.
+ * @param mgr Cache for decompression-related data.
+ * @param out Pre-allocated output buffer based on the @ref CuMemParams returned from
+ *   compression.
+ * @param allow_fallback Allow fallback to nvcomp implementation if hardware accelerated
+ *   implementation is not available. Used for testing.
+ */
+void DecompressSnappy(curt::StreamRef stream, SnappyDecomprMgr const& mgr,
+                      common::Span<common::CompressedByteT> out, bool allow_fallback);
+
+/**
+ * @brief Coalesce the compressed chunks into a contiguous host pinned buffer.
+ *
+ * @param stream CUDA stream.
+ * @param pool Pinned memory pool for storing the results.
+ * @param in_params Params from @ref CompressSnappy, specifies the chunks.
+ * @param in_buf The buffer storing compressed chunks.
+ * @param p_out Re-newed parameters to keep track of the buffers.
+ */
+[[nodiscard]] common::RefResourceView<std::uint8_t> CoalesceCompressedBuffersToHost(
+    curt::StreamRef stream, std::shared_ptr<HostPinnedMemPool> pool,
+    CuMemParams const& in_params, dh::DeviceUVector<std::uint8_t> const& in_buf,
+    CuMemParams* p_out);
+
+// We store decompression parameters in struct of vectors. This is due to nvcomp works
+// with this format. But the CUDA driver works with vector of structs. We can optimize
+// toward the driver decompression function if the overhead is significant (too many
+// chunks).
+struct SnappyDecomprMgrImpl {
+  std::size_t n_dst_bytes{0};
+  // src of the CUmemDecompressParams
+  dh::device_vector<void const*> d_in_chunk_ptrs;
+  // srcNumBytes of the CUmemDecompressParams
+  dh::device_vector<std::size_t> d_in_chunk_sizes;
+  // dstNumBytes of the CUmemDecompressParams
+  dh::device_vector<std::size_t> d_out_chunk_sizes;
+  // dstActBytes of the CUmemDecompressParams
+  dh::device_vector<std::size_t> act_nbytes;
+
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+  using DeParams = common::RefResourceView<CUmemDecompressParams>;
+  DeParams de_params;
+  DeParams de_params_copy;
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
+
+  [[nodiscard]] std::size_t Chunks() const {
+#if defined(CUDA_HW_DECOM_AVAILABLE)
+    return de_params.size();
+#else
+    LOG(FATAL) << "CUDA >= 12.8 is required.";
+    return 0;
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
+  }
+
+  SnappyDecomprMgrImpl(curt::StreamRef s, std::shared_ptr<HostPinnedMemPool> pool,
+                       CuMemParams params, common::Span<std::uint8_t const> in_compressed_data);
+
+#if defined(CUDA_HW_DECOM_AVAILABLE) && defined(XGBOOST_USE_NVCOMP)
+  common::Span<CUmemDecompressParams> GetParams(common::Span<common::CompressedByteT> out);
+#endif  // defined(CUDA_HW_DECOM_AVAILABLE)
+
+  // big 5
+  SnappyDecomprMgrImpl() = default;
+  SnappyDecomprMgrImpl(SnappyDecomprMgrImpl const& that) = delete;
+  SnappyDecomprMgrImpl(SnappyDecomprMgrImpl&& that) = default;
+  SnappyDecomprMgrImpl& operator=(SnappyDecomprMgrImpl const&) = delete;
+  SnappyDecomprMgrImpl& operator=(SnappyDecomprMgrImpl&&) = default;
+
+  [[nodiscard]] bool Empty() const;
+};
+
+#if defined(XGBOOST_USE_NVCOMP)
+[[nodiscard]] inline auto MakeSnappyDecomprMgr(
+    curt::StreamRef s, std::shared_ptr<HostPinnedMemPool> pool, CuMemParams params,
+    common::Span<std::uint8_t const> in_compressed_data) {
+  SnappyDecomprMgr mgr;
+  *mgr.Impl() = SnappyDecomprMgrImpl{s, std::move(pool), std::move(params), in_compressed_data};
+  return mgr;
+}
+#else
+[[nodiscard]] inline auto MakeSnappyDecomprMgr(curt::StreamRef,
+                                               std::shared_ptr<HostPinnedMemPool>, CuMemParams,
+                                               common::Span<std::uint8_t const>) {
+  SnappyDecomprMgr mgr;
+  return mgr;
+}
+#endif  // defined(XGBOOST_USE_NVCOMP)
+}  // namespace xgboost::dc
diff --git a/src/common/device_compression.h b/src/common/device_compression.h
new file mode 100644
index 000000000000..beebc1a0343e
--- /dev/null
+++ b/src/common/device_compression.h
@@ -0,0 +1,135 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ *
+ * @brief Implement (de)compression with the help of nvcomp and the HW decompression engine.
+ */
+#pragma once
+
+#include <cstddef>  // for size_t
+#include <numeric>  // for accumulate
+#include <vector>   // for vector
+
+#include "transform_iterator.h"  // for MakeIndexTransformIter
+
+#if defined(XGBOOST_USE_NVCOMP)
+
+#include <memory>  // for unique_ptr
+
+#endif  // defined(XGBOOST_USE_NVCOMP)
+
+namespace xgboost::dc {
+/**
+ * The cuda driver @ref CUmemDecompressParams struct without the pointers. We use this
+ * struct to keep track of various buffer sizes. Naming of member variables follows the
+ * CUDA struct.
+ *
+ * The src_nbytes stores the size of the allocated buffer for compressed data, and the
+ * src_act_nbytes stores the actual size of the compressed data, which must be smaller
+ * than the allocated size (src_nbytes). The nvcomp API over-allocate for compression.
+ */
+struct ComprParam {
+  enum Algo {
+    kLz4 = 0,
+    kGDefalte = 1,
+    kSnappy = 2,  // the only supported one at the moment.
+  };
+
+  // Compressed buffer bytes
+  std::size_t src_nbytes = 0;
+  // Actual compressed bytes
+  std::size_t src_act_nbytes = 0;
+  // Decompressed bytes.
+  std::size_t dst_nbytes = 0;
+  Algo algo;
+};
+
+/**
+ * @brief A wrapper around vector of @ref ComprParam to help manage the chunks.
+ */
+struct CuMemParams {
+  std::vector<ComprParam> params;
+
+  CuMemParams() = default;
+  CuMemParams(CuMemParams const& that) = default;
+  CuMemParams(CuMemParams&& that) = default;
+  CuMemParams& operator=(CuMemParams&& that) = default;
+  CuMemParams& operator=(CuMemParams const& that) = default;
+
+  explicit CuMemParams(std::size_t n_chunks) : params(n_chunks) {}
+
+  ComprParam const& operator[](std::size_t i) const { return this->params[i]; }
+  ComprParam& operator[](std::size_t i) { return this->params[i]; }
+  ComprParam& at(std::size_t i) { return this->params.at(i); }              // NOLINT
+  ComprParam const& at(std::size_t i) const { return this->params.at(i); }  // NOLINT
+  void resize(std::size_t n) { this->params.resize(n); }                    // NOLINT
+
+  [[nodiscard]] auto cbegin() const { return this->params.cbegin(); }  // NOLINT
+  [[nodiscard]] auto cend() const { return this->params.cend(); }      // NOLINT
+
+  [[nodiscard]] auto begin() const { return this->params.begin(); }  // NOLINT
+  [[nodiscard]] auto end() const { return this->params.end(); }      // NOLINT
+  [[nodiscard]] auto begin() { return this->params.begin(); }        // NOLINT
+  [[nodiscard]] auto end() { return this->params.end(); }            // NOLINT
+
+  [[nodiscard]] std::size_t size() const { return this->params.size(); }  // NOLINT
+  [[nodiscard]] bool empty() const { return this->params.empty(); }       // NOLINT
+  [[nodiscard]] auto data() const { return this->params.data(); }         // NOLINT
+
+  [[nodiscard]] std::size_t TotalSrcBytes() const {
+    auto it = common::MakeIndexTransformIter(
+        [this](std::size_t i) { return this->params[i].src_nbytes; });
+    return std::accumulate(it, it + this->size(), static_cast<std::size_t>(0));
+  }
+  [[nodiscard]] std::size_t TotalSrcActBytes() const {
+    auto it = common::MakeIndexTransformIter(
+        [this](std::size_t i) { return this->params[i].src_act_nbytes; });
+    return std::accumulate(it, it + this->size(), static_cast<std::size_t>(0));
+  }
+  [[nodiscard]] std::size_t TotalDstBytes() const {
+    auto it = common::MakeIndexTransformIter(
+        [this](std::size_t i) { return this->params[i].dst_nbytes; });
+    return std::accumulate(it, it + this->size(), static_cast<std::size_t>(0));
+  }
+};
+
+class SnappyDecomprMgrImpl;
+
+/**
+ * @brief Help create and cache all decompression related meta data.
+ *
+ *   This struct is exposed to the CPU code. As a result, it's just a reference to the
+ *   @SnappyDecomprMgrImpl .
+ */
+class SnappyDecomprMgr {
+ public:
+  SnappyDecomprMgr();
+  SnappyDecomprMgr(SnappyDecomprMgr const& that) = delete;
+  SnappyDecomprMgr(SnappyDecomprMgr&& that);
+  SnappyDecomprMgr& operator=(SnappyDecomprMgr const& that) = delete;
+  SnappyDecomprMgr& operator=(SnappyDecomprMgr&& that);
+
+  ~SnappyDecomprMgr();
+
+  SnappyDecomprMgrImpl* Impl() const;
+
+  [[nodiscard]] bool Empty() const;
+  /**
+   * @brief The number of bytes of the uncompressed data.
+   */
+  [[nodiscard]] std::size_t DecompressedBytes() const;
+
+ private:
+  // Hide the CUDA API calls.
+#if defined(XGBOOST_USE_NVCOMP)
+  std::unique_ptr<SnappyDecomprMgrImpl> pimpl_;
+#endif  // defined(XGBOOST_USE_NVCOMP)
+};
+
+struct DeStatus {
+  bool avail{false};               // Whether the DE is present
+  std::size_t max_output_size{0};  // Maximum output size of the buffer
+};
+
+// Get the query result of DE stored in a global variable.
+[[nodiscard]] DeStatus const& GetGlobalDeStatus();
+}  // namespace xgboost::dc
diff --git a/src/common/device_debug.cuh b/src/common/device_debug.cuh
new file mode 100644
index 000000000000..6a2dfd285ea4
--- /dev/null
+++ b/src/common/device_debug.cuh
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#pragma once
+
+#include <cstddef>   // for size_t
+#include <iostream>  // for cout
+#include <vector>    // for vector
+
+#include "common.h"
+#include "device_helpers.cuh"     // for CopyDeviceSpanToVector
+#include "xgboost/span.h"         // for Span
+#include "xgboost/string_view.h"  // for StringView
+
+namespace xgboost::debug {
+// debug::SyncDevice(__FILE__, __LINE__);
+inline void SyncDevice(char const *file = __builtin_FILE(), int32_t line = __builtin_LINE()) {
+  {
+    auto err = cudaDeviceSynchronize();
+    dh::ThrowOnCudaError(err, file, line);
+  }
+  {
+    auto err = cudaGetLastError();
+    dh::ThrowOnCudaError(err, file, line);
+  }
+}
+
+template <typename T>
+void PrintDeviceSpan(common::Span<T> values, StringView name) {
+  std::cout << name << std::endl;
+  std::vector<std::remove_cv_t<T>> h_values(values.size());
+  dh::CopyDeviceSpanToVector(&h_values, values);
+  for (std::size_t i = 0; i < values.size(); ++i) {
+    if (i != 0 && i % 16 == 0) {
+      std::cout << std::endl;
+    }
+    std::cout << h_values[i] << ", ";
+  }
+  std::cout << std::endl;
+}
+}  // namespace xgboost::debug
diff --git a/src/common/device_helpers.cu b/src/common/device_helpers.cu
index 608a535cd8cb..1d29f0d48e13 100644
--- a/src/common/device_helpers.cu
+++ b/src/common/device_helpers.cu
@@ -1,19 +1,45 @@
 /**
- * Copyright 2024, XGBoost contributors
+ * Copyright 2024-2025, XGBoost contributors
  */
-#include "cuda_rt_utils.h"  // for RtVersion
+#include "../common/cuda_dr_utils.h"  // for GetVersionFromSmi
 #include "device_helpers.cuh"
+#include "device_vector.cuh"  // for GrowOnlyVirtualMemVec
 #include "xgboost/windefs.h"  // for xgboost_IS_WIN
 
 namespace dh {
+namespace {
+[[nodiscard]] bool IsSupportedDrVer(std::int32_t major, std::int32_t minor) {
+  return major > 12 || (major == 12 && minor >= 5);
+}
+
+// Check whether cuda virtual memory can be used.
+// Host NUMA allocation requires driver that supports CTK >= 12.5 to be stable
+[[nodiscard]] bool CheckVmAlloc() {
+  std::int32_t major{0}, minor{0};
+  xgboost::curt::GetDrVersionGlobal(&major, &minor);
+
+  bool vm_flag = true;
+  if (IsSupportedDrVer(major, minor)) {
+    // The result from the driver api is not reliable. The system driver might not match
+    // the CUDA driver in some obscure cases.
+    //
+    // https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
+    // Ver                 Linux       Win
+    // CUDA 12.5 Update 1  >=555.42.06 >=555.85
+    // CUDA 12.5 GA        >=555.42.02 >=555.85
+    vm_flag = xgboost::cudr::GetVersionFromSmiGlobal(&major, &minor) && major >= 555;
+  } else {
+    vm_flag = false;
+  }
+  return vm_flag;
+}
+}  // namespace
+
 PinnedMemory::PinnedMemory() {
 #if defined(xgboost_IS_WIN)
   this->impl_.emplace<detail::GrowOnlyPinnedMemoryImpl>();
 #else
-  std::int32_t major{0}, minor{0};
-  xgboost::curt::DrVersion(&major, &minor);
-  // Host NUMA allocation requires driver that supports CTK >= 12.5 to be stable.
-  if (major >= 12 && minor >= 5) {
+  if (CheckVmAlloc()) {
     this->impl_.emplace<detail::GrowOnlyVirtualMemVec>(CU_MEM_LOCATION_TYPE_HOST_NUMA);
   } else {
     this->impl_.emplace<detail::GrowOnlyPinnedMemoryImpl>();
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
index 5292edbf3591..dbea513ee4ca 100644
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #pragma once
 #include <thrust/binary_search.h>                       // thrust::upper_bound
@@ -7,6 +7,7 @@
 #include <thrust/device_vector.h>                       // for device_vector
 #include <thrust/execution_policy.h>                    // thrust::seq
 #include <thrust/iterator/discard_iterator.h>           // for discard_iterator
+#include <thrust/iterator/reverse_iterator.h>           // for make_reverse_iterator
 #include <thrust/iterator/transform_output_iterator.h>  // make_transform_output_iterator
 #include <thrust/system/cuda/error.h>
 #include <thrust/system_error.h>
@@ -16,10 +17,14 @@
 #include <cstddef>  // for size_t
 #include <cub/cub.cuh>
 #include <cub/util_type.cuh>  // for UnitWord, DoubleBuffer
+#include <cuda/std/iterator>  // for iterator_traits
+#include <functional>         // for equal_to
 #include <variant>            // for variant, visit
 #include <vector>             // for vector
 
 #include "common.h"
+#include "cuda_rt_utils.h"  // for GetNumaId, CurrentDevice
+#include "cuda_stream.h"    // for Stream
 #include "device_vector.cuh"
 #include "xgboost/host_device_vector.h"
 #include "xgboost/logging.h"
@@ -118,7 +123,7 @@ inline int32_t CurrentDevice() {
 
 // Helper function to get a device from a potentially CPU context.
 inline auto GetDevice(xgboost::Context const *ctx) {
-  auto d = (ctx->IsCUDA()) ? ctx->Device() : xgboost::DeviceOrd::CUDA(dh::CurrentDevice());
+  auto d = (ctx->IsCUDA()) ? ctx->Device() : xgboost::DeviceOrd::CUDA(::xgboost::curt::CurrentDevice());
   CHECK(!d.IsCPU());
   return d;
 }
@@ -252,18 +257,6 @@ void Iota(Container array, cudaStream_t stream) {
   LaunchN(array.size(), stream, [=] __device__(size_t i) { array[i] = i; });
 }
 
-// dh::DebugSyncDevice(__FILE__, __LINE__);
-inline void DebugSyncDevice(char const *file = __builtin_FILE(), int32_t line = __builtin_LINE()) {
-  {
-    auto err = cudaDeviceSynchronize();
-    ThrowOnCudaError(err, file, line);
-  }
-  {
-    auto err = cudaGetLastError();
-    ThrowOnCudaError(err, file, line);
-  }
-}
-
 // Faster to instantiate than caching_device_vector and invokes no synchronisation
 // Use this where vector functionality (e.g. resize) is not required
 template <typename T>
@@ -566,6 +559,11 @@ XGBOOST_DEVICE thrust::transform_iterator<FuncT, IterT, ReturnT> MakeTransformIt
   return thrust::transform_iterator<FuncT, IterT, ReturnT>(iter, func);
 }
 
+template <typename Fn>
+XGBOOST_DEVICE auto MakeIndexTransformIter(Fn &&fn) {
+  return thrust::make_transform_iterator(thrust::make_counting_iterator(0ul), std::forward<Fn>(fn));
+}
+
 template <typename It>
 size_t XGBOOST_DEVICE SegmentId(It first, It last, size_t idx) {
   size_t segment_id = thrust::upper_bound(thrust::seq, first, last, idx) - 1 - first;
@@ -604,12 +602,12 @@ struct SegmentedUniqueReduceOp {
  * \return Number of unique values in total.
  */
 template <typename DerivedPolicy, typename KeyInIt, typename KeyOutIt, typename ValInIt,
-          typename ValOutIt, typename CompValue, typename CompKey = thrust::equal_to<size_t>>
+          typename ValOutIt, typename CompValue, typename CompKey = std::equal_to<size_t>>
 size_t SegmentedUnique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
                        KeyInIt key_segments_first, KeyInIt key_segments_last, ValInIt val_first,
                        ValInIt val_last, KeyOutIt key_segments_out, ValOutIt val_out,
-                       CompValue comp, CompKey comp_key = thrust::equal_to<size_t>{}) {
-  using Key = thrust::pair<size_t, typename thrust::iterator_traits<ValInIt>::value_type>;
+                       CompValue comp, CompKey comp_key = std::equal_to<size_t>{}) {
+  using Key = thrust::pair<size_t, typename cuda::std::iterator_traits<ValInIt>::value_type>;
   auto unique_key_it = dh::MakeTransformIterator<Key>(
       thrust::make_counting_iterator(static_cast<size_t>(0)),
       [=] __device__(size_t i) {
@@ -655,22 +653,18 @@ size_t SegmentedUnique(const thrust::detail::execution_policy_base<DerivedPolicy
  * \tparam val_out            output iterator for values
  * \tparam comp               binary comparison operator
  */
-template <typename DerivedPolicy, typename SegInIt, typename SegOutIt,
-          typename KeyInIt, typename ValInIt, typename ValOutIt, typename Comp>
-size_t SegmentedUniqueByKey(
-    const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
-    SegInIt key_segments_first, SegInIt key_segments_last, KeyInIt key_first,
-    KeyInIt key_last, ValInIt val_first, SegOutIt key_segments_out,
-    ValOutIt val_out, Comp comp) {
-  using Key =
-      thrust::pair<size_t,
-                   typename thrust::iterator_traits<KeyInIt>::value_type>;
+template <typename DerivedPolicy, typename SegInIt, typename SegOutIt, typename KeyInIt,
+          typename ValInIt, typename ValOutIt, typename Comp>
+size_t SegmentedUniqueByKey(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
+                            SegInIt key_segments_first, SegInIt key_segments_last,
+                            KeyInIt key_first, KeyInIt key_last, ValInIt val_first,
+                            SegOutIt key_segments_out, ValOutIt val_out, Comp comp) {
+  using Key = thrust::pair<size_t, typename cuda::std::iterator_traits<KeyInIt>::value_type>;
 
   auto unique_key_it = dh::MakeTransformIterator<Key>(
-      thrust::make_counting_iterator(static_cast<size_t>(0)),
-      [=] __device__(size_t i) {
+      thrust::make_counting_iterator(static_cast<size_t>(0)), [=] __device__(size_t i) {
         size_t seg = dh::SegmentId(key_segments_first, key_segments_last, i);
-        return thrust::make_pair(seg, *(key_first + i));
+        return cuda::std::make_pair(seg, *(key_first + i));
       });
   size_t segments_len = key_segments_last - key_segments_first;
   thrust::fill(exec, key_segments_out, key_segments_out + segments_len, 0);
@@ -681,19 +675,19 @@ size_t SegmentedUniqueByKey(
   auto reduce_it = thrust::make_transform_output_iterator(
       thrust::make_discard_iterator(),
       detail::SegmentedUniqueReduceOp<Key, SegOutIt>{key_segments_out});
-  auto uniques_ret = thrust::unique_by_key_copy(
-      exec, unique_key_it, unique_key_it + n_inputs, val_first, reduce_it,
-      val_out, [=] __device__(Key const &l, Key const &r) {
-        if (l.first == r.first) {
-          // In the same segment.
-          return comp(thrust::get<1>(l), thrust::get<1>(r));
-        }
-        return false;
-      });
+  auto uniques_ret =
+      thrust::unique_by_key_copy(exec, unique_key_it, unique_key_it + n_inputs, val_first,
+                                 reduce_it, val_out, [=] __device__(Key const &l, Key const &r) {
+                                   if (l.first == r.first) {
+                                     // In the same segment.
+                                     return comp(l.second, r.second);
+                                   }
+                                   return false;
+                                 });
   auto n_uniques = uniques_ret.second - val_out;
   CHECK_LE(n_uniques, n_inputs);
-  thrust::exclusive_scan(exec, key_segments_out,
-                         key_segments_out + segments_len, key_segments_out, 0);
+  thrust::exclusive_scan(exec, key_segments_out, key_segments_out + segments_len, key_segments_out,
+                         0);
   return n_uniques;
 }
 
@@ -714,91 +708,9 @@ auto Reduce(Policy policy, InputIt first, InputIt second, Init init, Func reduce
   return aggregate;
 }
 
-class CUDAStreamView;
-
-class CUDAEvent {
-  std::unique_ptr<cudaEvent_t, void (*)(cudaEvent_t *)> event_;
-
- public:
-  CUDAEvent()
-      : event_{[] {
-                 auto e = new cudaEvent_t;
-                 dh::safe_cuda(cudaEventCreateWithFlags(e, cudaEventDisableTiming));
-                 return e;
-               }(),
-               [](cudaEvent_t *e) {
-                 if (e) {
-                   dh::safe_cuda(cudaEventDestroy(*e));
-                   delete e;
-                 }
-               }} {}
-
-  inline void Record(CUDAStreamView stream);  // NOLINT
-  // Define swap-based ctor to make sure an event is always valid.
-  CUDAEvent(CUDAEvent &&e) : CUDAEvent() { std::swap(this->event_, e.event_); }
-  CUDAEvent &operator=(CUDAEvent &&e) {
-    std::swap(this->event_, e.event_);
-    return *this;
-  }
-
-  operator cudaEvent_t() const { return *event_; }                // NOLINT
-  cudaEvent_t const *data() const { return this->event_.get(); }  // NOLINT
-};
-
-class CUDAStreamView {
-  cudaStream_t stream_{nullptr};
-
- public:
-  explicit CUDAStreamView(cudaStream_t s) : stream_{s} {}
-  void Wait(CUDAEvent const &e) {
-#if defined(__CUDACC_VER_MAJOR__)
-#if __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0
-    // CUDA == 11.0
-    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, 0));
-#else
-    // CUDA > 11.0
-    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
-#endif  // __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ == 0:
-#else   // clang
-    dh::safe_cuda(cudaStreamWaitEvent(stream_, cudaEvent_t{e}, cudaEventWaitDefault));
-#endif  //  defined(__CUDACC_VER_MAJOR__)
-  }
-  operator cudaStream_t() const {  // NOLINT
-    return stream_;
-  }
-  cudaError_t Sync(bool error = true) {
-    if (error) {
-      dh::safe_cuda(cudaStreamSynchronize(stream_));
-      return cudaSuccess;
-    }
-    return cudaStreamSynchronize(stream_);
-  }
-};
-
-inline void CUDAEvent::Record(CUDAStreamView stream) {  // NOLINT
-  dh::safe_cuda(cudaEventRecord(*event_, cudaStream_t{stream}));
-}
-
-// Changing this has effect on prediction return, where we need to pass the pointer to
-// third-party libraries like cuPy
-inline CUDAStreamView DefaultStream() { return CUDAStreamView{cudaStreamPerThread}; }
-
-class CUDAStream {
-  cudaStream_t stream_;
-
- public:
-  CUDAStream() { dh::safe_cuda(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); }
-  ~CUDAStream() { dh::safe_cuda(cudaStreamDestroy(stream_)); }
-
-  [[nodiscard]] CUDAStreamView View() const { return CUDAStreamView{stream_}; }
-  [[nodiscard]] cudaStream_t Handle() const { return stream_; }
-
-  void Sync() { this->View().Sync(); }
-  void Wait(CUDAEvent const &e) { this->View().Wait(e); }
-};
-
 template <class Src, class Dst>
-void CopyTo(Src const &src, Dst *dst, CUDAStreamView stream = DefaultStream()) {
+void CopyTo(Src const &src, Dst *dst,
+            ::xgboost::curt::StreamRef stream = ::xgboost::curt::DefaultStream()) {
   if (src.empty()) {
     dst->clear();
     return;
@@ -811,12 +723,57 @@ void CopyTo(Src const &src, Dst *dst, CUDAStreamView stream = DefaultStream()) {
                                 src.size() * sizeof(SVT), cudaMemcpyDefault, stream));
 }
 
+/**
+ * @brief Wrapper for the @ref cudaMemcpyBatchAsync .
+ *
+ * @param dsts Host pointer to a list of device pointers.
+ * @param srcs Host pointer to a list of device pointers.
+ * @param sizes Host pointer to a list of sizes.
+ * @param count How many batches.
+ * @param fail_idx Which batch has failed, if any. When it's assigned to SIZE_MAX, then
+ *   it's a general error.
+ * @param stream CUDA stream. The wrapper enforces stream order access.
+ */
+template <cudaMemcpyKind kind, typename T, typename U>
+[[nodiscard]] cudaError_t MemcpyBatchAsync(T **dsts, U **srcs, std::size_t const *sizes,
+                                           std::size_t count, std::size_t *fail_idx,
+                                           cudaStream_t stream) {
+#if CUDART_VERSION >= 12080
+  static_assert(kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyHostToDevice,
+                "Not implemented.");
+  cudaMemcpyAttributes attr;
+  attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream;
+  attr.flags = cudaMemcpyFlagPreferOverlapWithCompute;
+
+  auto assign_host = [](cudaMemLocation *hint) {
+    hint->type = cudaMemLocationTypeHostNuma;
+    hint->id = xgboost::curt::GetNumaId();
+  };
+  auto assign_device = [](cudaMemLocation *hint) {
+    hint->type = cudaMemLocationTypeDevice;
+    hint->id = xgboost::curt::CurrentDevice();
+  };
+  if constexpr (kind == cudaMemcpyDeviceToHost) {
+    assign_device(&attr.srcLocHint);
+    assign_host(&attr.dstLocHint);
+  } else {
+    assign_host(&attr.srcLocHint);
+    assign_device(&attr.dstLocHint);
+  }
+  return cudaMemcpyBatchAsync(dsts, srcs, const_cast<std::size_t *>(sizes), count, attr, fail_idx,
+                              stream);
+#else
+  LOG(FATAL) << "CUDA >= 12.8 is required.";
+  return cudaErrorInvalidValue;
+#endif  // CUDART_VERSION >= 12080
+}
+
 inline auto CachingThrustPolicy() {
   XGBCachingDeviceAllocator<char> alloc;
 #if THRUST_MAJOR_VERSION >= 2 || defined(XGBOOST_USE_RMM)
-  return thrust::cuda::par_nosync(alloc).on(DefaultStream());
+  return thrust::cuda::par_nosync(alloc).on(::xgboost::curt::DefaultStream());
 #else
-  return thrust::cuda::par(alloc).on(DefaultStream());
+  return thrust::cuda::par(alloc).on(::xgboost::curt::DefaultStream());
 #endif  // THRUST_MAJOR_VERSION >= 2 || defined(XGBOOST_USE_RMM)
 }
 
diff --git a/src/common/device_vector.cu b/src/common/device_vector.cu
index b7f300df61e2..c82c6c15890e 100644
--- a/src/common/device_vector.cu
+++ b/src/common/device_vector.cu
@@ -96,11 +96,4 @@ GrowOnlyVirtualMemVec::GrowOnlyVirtualMemVec(CUmemLocationType type)
   return std::accumulate(it, it + this->va_ranges_.size(), static_cast<std::size_t>(0));
 }
 }  // namespace detail
-
-#if defined(XGBOOST_USE_RMM)
-LoggingResource *GlobalLoggingResource() {
-  static auto mr{std::make_unique<LoggingResource>()};
-  return mr.get();
-}
-#endif  // defined(XGBOOST_USE_RMM)
 }  // namespace dh
diff --git a/src/common/device_vector.cuh b/src/common/device_vector.cuh
index 352ebf371d26..0d3fa4b93678 100644
--- a/src/common/device_vector.cuh
+++ b/src/common/device_vector.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2024, XGBoost Contributors
+ * Copyright 2017-2025, XGBoost Contributors
  */
 #pragma once
 #include <thrust/device_malloc_allocator.h>  // for device_malloc_allocator
@@ -7,22 +7,13 @@
 #include <thrust/device_vector.h>            // for device_vector
 
 #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
-#include <rmm/device_uvector.hpp>                      // for device_uvector
-#include <rmm/exec_policy.hpp>                         // for exec_policy_nosync
-#include <rmm/mr/device/device_memory_resource.hpp>    // for device_memory_resource
-#include <rmm/mr/device/per_device_resource.hpp>       // for get_current_device_resource
-#include <rmm/mr/device/thrust_allocator_adaptor.hpp>  // for thrust_allocator
-#include <rmm/version_config.hpp>                      // for RMM_VERSION_MAJOR
+#include <cuda/memory_resource>                      // for async_resource_ref
+#include <cuda/stream_ref>                           // for stream_ref
+#include <rmm/mr/device/device_memory_resource.hpp>  // for device_memory_resource
+#include <rmm/mr/device/per_device_resource.hpp>     // for get_current_device_resource
 
 #include "xgboost/global_config.h"  // for GlobalConfigThreadLocalStore
 
-#if !defined(RMM_VERSION_MAJOR) || !defined(RMM_VERSION_MINOR)
-
-#error "Please use RMM version 0.18 or later"
-#elif RMM_VERSION_MAJOR == 0 && RMM_VERSION_MINOR < 18
-#error "Please use RMM version 0.18 or later"
-#endif  // !defined(RMM_VERSION_MAJOR) || !defined(RMM_VERSION_MINOR)
-
 #endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 
 #include <cuda.h>  // for CUmemGenericAllocationHandle
@@ -32,10 +23,12 @@
 #include <cstdint>                 // for int64_t
 #include <cub/util_allocator.cuh>  // for CachingDeviceAllocator
 #include <cub/util_device.cuh>     // for CurrentDevice
+#include <functional>              // for function
 #include <memory>                  // for unique_ptr
 
 #include "common.h"         // for safe_cuda, HumanMemUnit
 #include "cuda_dr_utils.h"  // for CuDriverApi
+#include "cuda_stream.h"    // for DefaultStream
 #include "xgboost/logging.h"
 #include "xgboost/span.h"  // for Span
 
@@ -262,10 +255,45 @@ inline detail::MemoryLogger &GlobalMemoryLogger() {
   return memory_logger;
 }
 
+#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+using DeviceAsyncResourceRef = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;
+#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+
 namespace detail {
 #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+/**
+ * @brief Similar to `rmm::mr::thrust_allocator`.
+ */
 template <typename T>
-using XGBBaseDeviceAllocator = rmm::mr::thrust_allocator<T>;
+class ThrustAllocMrAdapter : public thrust::device_malloc_allocator<T> {
+  DeviceAsyncResourceRef mr_{rmm::mr::get_current_device_resource()};
+
+ public:
+  using Super = thrust::device_malloc_allocator<T>;
+  using pointer = typename Super::pointer;      // NOLINT(readability-identifier-naming)
+  using size_type = typename Super::size_type;  // NOLINT(readability-identifier-naming)
+
+  template <typename U>
+  struct rebind {                           // NOLINT(readability-identifier-naming)
+    using other = ThrustAllocMrAdapter<U>;  // NOLINT(readability-identifier-naming)
+  };
+
+  ThrustAllocMrAdapter() = default;
+  pointer allocate(size_type n) {  // NOLINT(readability-identifier-naming)
+    auto n_bytes = xgboost::common::SizeBytes<T>(n);
+    auto s = cuda::stream_ref{::xgboost::curt::DefaultStream()};
+    auto p = static_cast<T *>(mr_.allocate_async(n_bytes, std::alignment_of_v<T>, s));
+    return thrust::device_pointer_cast(p);
+  }
+  void deallocate(pointer ptr, size_type n) {  // NOLINT(readability-identifier-naming)
+    auto n_bytes = xgboost::common::SizeBytes<T>(n);
+    auto s = ::xgboost::curt::DefaultStream();
+    return mr_.deallocate_async(thrust::raw_pointer_cast(ptr), n_bytes, cuda::stream_ref{s});
+  }
+};
+
+template <typename T>
+using XGBBaseDeviceAllocator = ThrustAllocMrAdapter<T>;
 #else   // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 template <typename T>
 using XGBBaseDeviceAllocator = thrust::device_malloc_allocator<T>;
@@ -298,10 +326,7 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
     GlobalMemoryLogger().RegisterDeallocation(n * sizeof(T));
     SuperT::deallocate(ptr, n);
   }
-#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
-  XGBDefaultDeviceAllocatorImpl()
-      : SuperT(rmm::cuda_stream_per_thread, rmm::mr::get_current_device_resource()) {}
-#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+  XGBDefaultDeviceAllocatorImpl() : SuperT{} {}
 };
 
 /**
@@ -357,8 +382,7 @@ struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
   }
 #if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
   XGBCachingDeviceAllocatorImpl()
-      : SuperT(rmm::cuda_stream_per_thread, rmm::mr::get_current_device_resource()),
-        use_cub_allocator_(!xgboost::GlobalConfigThreadLocalStore::Get()->use_rmm) {}
+      : SuperT{}, use_cub_allocator_(!xgboost::GlobalConfigThreadLocalStore::Get()->use_rmm) {}
 #endif                                   // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
   XGBOOST_DEVICE void construct(T *) {}  // NOLINT
  private:
@@ -382,65 +406,9 @@ using XGBCachingDeviceAllocator = detail::XGBCachingDeviceAllocatorImpl<T>;
  *         OOM errors.
  */
 template <typename T>
-using device_vector = thrust::device_vector<T,  XGBDeviceAllocator<T>>;  // NOLINT
+using device_vector = thrust::device_vector<T, XGBDeviceAllocator<T>>;  // NOLINT
 template <typename T>
-using caching_device_vector = thrust::device_vector<T,  XGBCachingDeviceAllocator<T>>;  // NOLINT
-
-#if defined(XGBOOST_USE_RMM)
-/**
- * @brief Similar to `rmm::logging_resource_adaptor`, but uses XGBoost memory logger instead.
- */
-class LoggingResource : public rmm::mr::device_memory_resource {
-  rmm::mr::device_memory_resource *mr_{rmm::mr::get_current_device_resource()};
-
- public:
-  LoggingResource() = default;
-  ~LoggingResource() override = default;
-  LoggingResource(LoggingResource const &) = delete;
-  LoggingResource &operator=(LoggingResource const &) = delete;
-  LoggingResource(LoggingResource &&) noexcept = delete;
-  LoggingResource &operator=(LoggingResource &&) noexcept = delete;
-
-  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept {  // NOLINT
-    return mr_;
-  }
-  [[nodiscard]] rmm::mr::device_memory_resource *get_upstream() const noexcept {  // NOLINT
-    return mr_;
-  }
-
-  void *do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override {  // NOLINT
-    try {
-      auto const ptr = mr_->allocate(bytes, stream);
-      GlobalMemoryLogger().RegisterAllocation(bytes);
-      return ptr;
-    } catch (rmm::bad_alloc const &e) {
-      detail::ThrowOOMError(e.what(), bytes);
-    }
-    return nullptr;
-  }
-
-  void do_deallocate(void *ptr, std::size_t bytes,  // NOLINT
-                     rmm::cuda_stream_view stream) override {
-    mr_->deallocate(ptr, bytes, stream);
-    GlobalMemoryLogger().RegisterDeallocation(bytes);
-  }
-
-  [[nodiscard]] bool do_is_equal(  // NOLINT
-      device_memory_resource const &other) const noexcept override {
-    if (this == &other) {
-      return true;
-    }
-    auto const *cast = dynamic_cast<LoggingResource const *>(&other);
-    if (cast == nullptr) {
-      return mr_->is_equal(other);
-    }
-    return get_upstream_resource() == cast->get_upstream_resource();
-  }
-};
-
-LoggingResource *GlobalLoggingResource();
-
-#endif  // defined(XGBOOST_USE_RMM)
+using caching_device_vector = thrust::device_vector<T, XGBCachingDeviceAllocator<T>>;  // NOLINT
 
 /**
  * @brief Container class that doesn't initialize the data when RMM is used.
@@ -448,11 +416,13 @@ LoggingResource *GlobalLoggingResource();
 template <typename T, bool is_caching>
 class DeviceUVectorImpl {
  private:
-#if defined(XGBOOST_USE_RMM)
-  rmm::device_uvector<T> data_{0, rmm::cuda_stream_per_thread, GlobalLoggingResource()};
-#else
-  std::conditional_t<is_caching, ::dh::caching_device_vector<T>, ::dh::device_vector<T>> data_;
-#endif  // defined(XGBOOST_USE_RMM)
+  using Alloc =
+      std::conditional_t<is_caching, dh::XGBCachingDeviceAllocator<T>, dh::XGBDeviceAllocator<T>>;
+  Alloc alloc_;
+
+  std::size_t size_{0};
+  std::size_t capacity_{0};
+  std::unique_ptr<T, std::function<void(T *)>> data_;
 
  public:
   using value_type = T;                        // NOLINT
@@ -469,47 +439,66 @@ class DeviceUVectorImpl {
   DeviceUVectorImpl(DeviceUVectorImpl &&that) = default;
   DeviceUVectorImpl &operator=(DeviceUVectorImpl &&that) = default;
 
+  [[nodiscard]] std::size_t Capacity() const { return this->capacity_; }
+
+  // Resize without init.
   void resize(std::size_t n) {  // NOLINT
-#if defined(XGBOOST_USE_RMM)
-    data_.resize(n, rmm::cuda_stream_per_thread);
-#else
-    data_.resize(n);
-#endif
+    using ::xgboost::common::SizeBytes;
+
+    if (n <= this->Capacity()) {
+      this->size_ = n;
+      // early exit as no allocation is needed.
+      return;
+    }
+    CHECK_LE(this->size(), this->Capacity());
+
+    Alloc alloc = this->alloc_;
+    decltype(data_) new_ptr{thrust::raw_pointer_cast(this->alloc_.allocate(n)),
+                            [=](T *ptr) mutable {
+                              if (ptr) {
+                                alloc.deallocate(thrust::device_pointer_cast(ptr), n);
+                              }
+                            }};
+    CHECK(new_ptr.get());
+
+    auto s = ::xgboost::curt::DefaultStream();
+    safe_cuda(cudaMemcpyAsync(new_ptr.get(), this->data(), SizeBytes<T>(this->size()),
+                              cudaMemcpyDefault, s));
+    this->size_ = n;
+    this->capacity_ = n;
+
+    this->data_ = std::move(new_ptr);
+    // swap failed with CTK12.8
+    // std::swap(this->data_, new_ptr);
   }
-  void resize(std::size_t n, T const &v) {         // NOLINT
-#if defined(XGBOOST_USE_RMM)
+  // Resize with init
+  void resize(std::size_t n, T const &v) {  // NOLINT
     auto orig = this->size();
-    data_.resize(n, rmm::cuda_stream_per_thread);
+    this->resize(n);
     if (orig < n) {
-      thrust::fill(rmm::exec_policy_nosync{}, this->begin() + orig, this->end(), v);
+      auto exec = thrust::cuda::par_nosync.on(::xgboost::curt::DefaultStream());
+      thrust::fill(exec, this->begin() + orig, this->end(), v);
     }
-#else
-    data_.resize(n, v);
-#endif
   }
 
   void clear() {  // NOLINT
-#if defined(XGBOOST_USE_RMM)
-    this->data_.resize(0, rmm::cuda_stream_per_thread);
-#else
-    this->data_.clear();
-#endif  // defined(XGBOOST_USE_RMM)
+    this->resize(0);
   }
 
-  [[nodiscard]] std::size_t size() const { return data_.size(); }  // NOLINT
-  [[nodiscard]] bool empty() const { return this->size() == 0; }   // NOLINT
+  [[nodiscard]] std::size_t size() const { return this->size_; }  // NOLINT
+  [[nodiscard]] bool empty() const { return this->size() == 0; }  // NOLINT
 
-  [[nodiscard]] auto begin() { return data_.begin(); }  // NOLINT
-  [[nodiscard]] auto end() { return data_.end(); }      // NOLINT
+  [[nodiscard]] auto begin() { return this->data(); }               // NOLINT
+  [[nodiscard]] auto end() { return this->data() + this->size(); }  // NOLINT
 
   [[nodiscard]] auto begin() const { return this->cbegin(); }  // NOLINT
   [[nodiscard]] auto end() const { return this->cend(); }      // NOLINT
 
-  [[nodiscard]] auto cbegin() const { return data_.cbegin(); }  // NOLINT
-  [[nodiscard]] auto cend() const { return data_.cend(); }      // NOLINT
+  [[nodiscard]] auto cbegin() const { return this->data(); }               // NOLINT
+  [[nodiscard]] auto cend() const { return this->data() + this->size(); }  // NOLINT
 
-  [[nodiscard]] auto data() { return thrust::raw_pointer_cast(data_.data()); }        // NOLINT
-  [[nodiscard]] auto data() const { return thrust::raw_pointer_cast(data_.data()); }  // NOLINT
+  [[nodiscard]] auto data() { return this->data_.get(); }        // NOLINT
+  [[nodiscard]] auto data() const { return this->data_.get(); }  // NOLINT
 };
 
 template <typename T>
diff --git a/src/common/error_msg.cc b/src/common/error_msg.cc
index 765b6a6ba31b..0eb215f95f40 100644
--- a/src/common/error_msg.cc
+++ b/src/common/error_msg.cc
@@ -1,12 +1,14 @@
 /**
- * Copyright 2023 by XGBoost contributors
+ * Copyright 2023-2025, XGBoost contributors
  */
 #include "error_msg.h"
 
-#include <mutex>    // for call_once, once_flag
-#include <sstream>  // for stringstream
+#include <mutex>         // for call_once, once_flag
+#include <sstream>       // for stringstream
+#include <system_error>  // for error_code, system_category
 
 #include "../collective/communicator-inl.h"  // for GetRank
+#include "xgboost/collective/socket.h"       // for LastError
 #include "xgboost/context.h"                 // for Context
 #include "xgboost/logging.h"
 
@@ -17,14 +19,25 @@ namespace xgboost::error {
   return ss.str();
 }
 
-void WarnDeprecatedGPUHist() {
-  auto msg =
-      "The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` "
-      R"(parameter to CUDA instead.
+[[nodiscard]] std::string InvalidModel(StringView fname) {
+  std::stringstream ss;
+  ss << "Invalid model format in: `" << fname << "`.";
+  return ss.str();
+}
 
-    E.g. tree_method = "hist", device = "cuda"
-)";
-  LOG(WARNING) << msg;
+[[nodiscard]] std::string OldBinaryModel(StringView fname) {
+  std::stringstream ss;
+  ss << "Failed to load model: `" << fname << "`. ";
+  ss << R"doc(
+The binary format has been deprecated in 1.6 and removed in 3.1, use UBJ or JSON
+instead. You can port the binary model to UBJ and JSON by re-saving it with XGBoost
+3.0. See:
+
+    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html
+
+for more info.
+)doc";
+  return ss.str();
 }
 
 void WarnManualUpdater() {
@@ -37,15 +50,6 @@ void WarnManualUpdater() {
   });
 }
 
-void WarnDeprecatedGPUId() {
-  static std::once_flag flag;
-  std::call_once(flag, [] {
-    auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
-    msg += " E.g. device=cpu/cuda/cuda:0";
-    LOG(WARNING) << msg;
-  });
-}
-
 void WarnEmptyDataset() {
   static std::once_flag flag;
   std::call_once(flag,
@@ -85,4 +89,23 @@ void CheckOldNccl(std::int32_t major, std::int32_t minor, std::int32_t patch) {
     LOG(WARNING) << msg();
   }
 }
+
+[[nodiscard]] std::error_code SystemError() {
+  std::int32_t errsv = system::LastError();
+  auto err = std::error_code{errsv, std::system_category()};
+  return err;
+}
+
+void InvalidIntercept(std::int32_t n_classes, bst_target_t n_targets, std::size_t intercept_len) {
+  std::stringstream ss;
+  ss << "Invalid `base_score`, it should match the number of outputs for multi-class/target "
+     << "models. `base_score` len: " << intercept_len;
+  if (n_classes > 1) {
+    ss << ", `n_classes`: " << n_classes;
+  }
+  if (n_targets > 1) {
+    ss << ", `n_targets`: " << n_targets;
+  }
+  LOG(FATAL) << ss.str();
+}
 }  // namespace xgboost::error
diff --git a/src/common/error_msg.h b/src/common/error_msg.h
index 16652d1958ba..7960fe2ec1ee 100644
--- a/src/common/error_msg.h
+++ b/src/common/error_msg.h
@@ -1,14 +1,15 @@
 /**
- * Copyright 2023-2024, XGBoost contributors
+ * Copyright 2023-2025, XGBoost contributors
  *
  * \brief Common error message for various checks.
  */
 #ifndef XGBOOST_COMMON_ERROR_MSG_H_
 #define XGBOOST_COMMON_ERROR_MSG_H_
 
-#include <cstdint>    // for uint64_t
-#include <limits>     // for numeric_limits
-#include <string>     // for string
+#include <cstdint>       // for uint64_t
+#include <limits>        // for numeric_limits
+#include <string>        // for string
+#include <system_error>  // for error_code
 
 #include "xgboost/base.h"     // for bst_feature_t
 #include "xgboost/context.h"  // for Context
@@ -83,11 +84,11 @@ inline void WarnOldSerialization() {
   logged = true;
 }
 
-void WarnDeprecatedGPUHist();
+[[nodiscard]] std::string InvalidModel(StringView fname);
 
-void WarnManualUpdater();
+[[nodiscard]] std::string OldBinaryModel(StringView fname);
 
-void WarnDeprecatedGPUId();
+void WarnManualUpdater();
 
 void WarnEmptyDataset();
 
@@ -128,5 +129,23 @@ constexpr StringView ZeroCudaMemory() {
          "support. If you are using other types of memory pool, please consider reserving a "
          "portion of the GPU memory for XGBoost.";
 }
+
+// float64 is not supported by JSON yet. Also, floating point as categories is tricky
+// since floating point equality test is inaccurate for most hardware.
+constexpr StringView NoFloatCat() {
+  return "Category index from DataFrame has floating point dtype, consider using strings or "
+         "integers instead.";
+}
+
+constexpr StringView CacheHostRatioNotImpl() {
+  return "`cache_host_ratio` is only used by the GPU `ExtMemQuantileDMatrix`.";
+}
+constexpr StringView CacheHostRatioInvalid() {
+  return "`cache_host_ratio` must be in range [0, 1].";
+}
+
+[[nodiscard]] std::error_code SystemError();
+
+void InvalidIntercept(std::int32_t n_classes, bst_target_t n_targets, std::size_t intercept_len);
 }  // namespace xgboost::error
 #endif  // XGBOOST_COMMON_ERROR_MSG_H_
diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc
index dfd80cb68c13..501a308465e0 100644
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2023 by XGBoost Contributors
+ * Copyright 2017-2025, XGBoost Contributors
  * \file hist_util.cc
  */
 #include "hist_util.h"
@@ -10,6 +10,7 @@
 
 #include "../data/adapter.h"         // for SparsePageAdapterBatch
 #include "../data/gradient_index.h"  // for GHistIndexMatrix
+#include "io.h"                      // for AlignedResourceReadStream, AlignedFileWriteStream
 #include "quantile.h"
 #include "xgboost/base.h"
 #include "xgboost/context.h"  // for Context
@@ -29,6 +30,27 @@ HistogramCuts::HistogramCuts() {
   cut_ptrs_.HostVector().emplace_back(0);
 }
 
+void HistogramCuts::Save(common::AlignedFileWriteStream *fo) const {
+  auto const &ptrs = this->Ptrs();
+  CHECK_LE(Span{ptrs}.size_bytes(), WriteVec(fo, ptrs));
+  auto const &vals = this->Values();
+  CHECK_LE(Span{vals}.size_bytes(), WriteVec(fo, vals));
+  auto const &mins = this->MinValues();
+  CHECK_LE(Span{mins}.size_bytes(), WriteVec(fo, mins));
+  CHECK_GE(fo->Write(has_categorical_), sizeof(has_categorical_));
+  CHECK_GE(fo->Write(max_cat_), sizeof(max_cat_));
+}
+
+[[nodiscard]] HistogramCuts *HistogramCuts::Load(common::AlignedResourceReadStream *fi) {
+  auto p_cuts = new HistogramCuts;
+  CHECK(ReadVec(fi, &p_cuts->cut_ptrs_.HostVector()));
+  CHECK(ReadVec(fi, &p_cuts->cut_values_.HostVector()));
+  CHECK(ReadVec(fi, &p_cuts->min_vals_.HostVector()));
+  CHECK(fi->Read(&p_cuts->has_categorical_));
+  CHECK(fi->Read(&p_cuts->max_cat_));
+  return p_cuts;
+}
+
 HistogramCuts SketchOnDMatrix(Context const *ctx, DMatrix *m, bst_bin_t max_bins, bool use_sorted,
                               Span<float const> hessian) {
   HistogramCuts out;
diff --git a/src/common/hist_util.cuh b/src/common/hist_util.cuh
index ffdafa29205c..878301729f53 100644
--- a/src/common/hist_util.cuh
+++ b/src/common/hist_util.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2024, XGBoost contributors
+ * Copyright 2020-2025, XGBoost contributors
  *
  * \brief Front end and utilities for GPU based sketching.  Works on sliding window
  *        instead of stream.
@@ -15,9 +15,9 @@
 #include <cstdint>    // for uint32_t
 #include <limits>     // for numeric_limits
 
-#include "../data/adapter.h"  // for IsValidFunctor
-#include "algorithm.cuh"      // for CopyIf
-#include "cuda_context.cuh"   // for CUDAContext
+#include "../data/entry.h"   // for IsValidFunctor
+#include "algorithm.cuh"     // for CopyIf
+#include "cuda_context.cuh"  // for CUDAContext
 #include "device_helpers.cuh"
 #include "hist_util.h"
 #include "quantile.cuh"
@@ -45,7 +45,7 @@ __global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,
 
   dh::BlockFill(smem_cs_ptr, out_column_size.size(), 0);
 
-  cub::CTA_SYNC();
+  __syncthreads();
 
   auto n = batch_iter.size();
 
@@ -56,7 +56,7 @@ __global__ void GetColumnSizeSharedMemKernel(IterSpan<BatchIt> batch_iter,
     }
   }
 
-  cub::CTA_SYNC();
+  __syncthreads();
 
   auto out_global_ptr = out_column_size;
   for (auto i : dh::BlockStrideRange(static_cast<std::size_t>(0), out_column_size.size())) {
diff --git a/src/common/hist_util.h b/src/common/hist_util.h
index dc2bc3fd6a89..ce0a118af7fd 100644
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -24,6 +24,9 @@ namespace xgboost {
 class GHistIndexMatrix;
 
 namespace common {
+class AlignedFileWriteStream;
+class AlignedResourceReadStream;
+
 /*!
  * \brief A single row in global histogram index.
  *  Directly represent the global index in the histogram entry.
@@ -175,6 +178,9 @@ class HistogramCuts {
     this->min_vals_.SetDevice(d);
     this->min_vals_.ConstDevicePointer();
   }
+
+  void Save(common::AlignedFileWriteStream* fo) const;
+  [[nodiscard]] static HistogramCuts* Load(common::AlignedResourceReadStream* fi);
 };
 
 /**
diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc
index ab3d782ec14f..a1201258da49 100644
--- a/src/common/host_device_vector.cc
+++ b/src/common/host_device_vector.cc
@@ -179,13 +179,16 @@ template class HostDeviceVector<bst_float>;
 template class HostDeviceVector<double>;
 template class HostDeviceVector<GradientPair>;
 template class HostDeviceVector<GradientPairPrecise>;
-template class HostDeviceVector<int32_t>;   // bst_node_t
-template class HostDeviceVector<uint8_t>;
-template class HostDeviceVector<int8_t>;
+template class HostDeviceVector<std::int32_t>;   // bst_node_t
+template class HostDeviceVector<std::uint8_t>;
+template class HostDeviceVector<std::int8_t>;
 template class HostDeviceVector<FeatureType>;
 template class HostDeviceVector<Entry>;
 template class HostDeviceVector<bst_idx_t>;
-template class HostDeviceVector<uint32_t>;  // bst_feature_t
+template class HostDeviceVector<std::uint32_t>;  // bst_feature_t
+template class HostDeviceVector<RegTree::Node>;
+template class HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>;
+template class HostDeviceVector<RTreeNodeStat>;
 
 #if defined(__APPLE__) || defined(__EMSCRIPTEN__)
 /*
diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu
index f4ec79539678..d492285cb01a 100644
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #include <thrust/fill.h>
 
@@ -7,6 +7,7 @@
 #include <cstddef>  // for size_t
 #include <cstdint>
 
+#include "cuda_stream.h"  // for DefaultStream
 #include "device_helpers.cuh"
 #include "device_vector.cuh"  // for DeviceUVector
 #include "xgboost/data.h"
@@ -92,7 +93,7 @@ class HostDeviceVectorImpl {
       gpu_access_ = GPUAccess::kWrite;
       SetDevice();
       auto s_data = dh::ToSpan(*data_d_);
-      dh::LaunchN(data_d_->size(), dh::DefaultStream(),
+      dh::LaunchN(data_d_->size(), curt::DefaultStream(),
                   [=] XGBOOST_DEVICE(size_t i) { s_data[i] = v; });
     }
   }
@@ -141,7 +142,7 @@ class HostDeviceVectorImpl {
       CHECK_EQ(this->Device(), other->Device());
       dh::safe_cuda(cudaMemcpyAsync(this->DevicePointer() + ori_size, ptr,
                                     other->Size() * sizeof(T), cudaMemcpyDeviceToDevice,
-                                    dh::DefaultStream()));
+                                    curt::DefaultStream()));
     }
   }
 
@@ -215,7 +216,7 @@ class HostDeviceVectorImpl {
     LazyResizeDevice(data_h_.size());
     SetDevice();
     dh::safe_cuda(cudaMemcpyAsync(data_d_->data(), data_h_.data(), data_d_->size() * sizeof(T),
-                                  cudaMemcpyHostToDevice, dh::DefaultStream()));
+                                  cudaMemcpyHostToDevice, curt::DefaultStream()));
     gpu_access_ = access;
   }
 
@@ -242,7 +243,7 @@ class HostDeviceVectorImpl {
       SetDevice();
       dh::safe_cuda(cudaMemcpyAsync(data_d_->data(), other->data_d_->data(),
                                     data_d_->size() * sizeof(T), cudaMemcpyDefault,
-                                    dh::DefaultStream()));
+                                    curt::DefaultStream()));
     }
   }
 
@@ -251,7 +252,7 @@ class HostDeviceVectorImpl {
     gpu_access_ = GPUAccess::kWrite;
     SetDevice();
     dh::safe_cuda(cudaMemcpyAsync(data_d_->data(), begin, data_d_->size() * sizeof(T),
-                                  cudaMemcpyDefault, dh::DefaultStream()));
+                                  cudaMemcpyDefault, curt::DefaultStream()));
   }
 
   void LazyResizeDevice(size_t new_size) {
@@ -413,6 +414,7 @@ template class HostDeviceVector<bst_float>;
 template class HostDeviceVector<double>;
 template class HostDeviceVector<GradientPair>;
 template class HostDeviceVector<GradientPairPrecise>;
+template class HostDeviceVector<GradientPairInt64>;
 template class HostDeviceVector<std::int32_t>;   // bst_node_t
 template class HostDeviceVector<std::uint8_t>;
 template class HostDeviceVector<std::int8_t>;
diff --git a/src/common/io.cc b/src/common/io.cc
index a83c1da3c7f2..0b2f34b44a30 100644
--- a/src/common/io.cc
+++ b/src/common/io.cc
@@ -1,9 +1,10 @@
 /**
- * Copyright 2019-2024, by XGBoost Contributors
+ * Copyright 2019-2025, by XGBoost Contributors
  */
+#include "error_msg.h"
 #if defined(__unix__) || defined(__APPLE__)
 
-#include <fcntl.h>     // for open, O_RDONLY
+#include <fcntl.h>     // for open, O_RDONLY, posix_fadvise
 #include <sys/mman.h>  // for mmap, munmap, madvise
 #include <unistd.h>    // for close, getpagesize
 
@@ -12,28 +13,28 @@
 #include <xgboost/windefs.h>
 
 #if defined(xgboost_IS_WIN)
-#include <windows.h>
+
+#include <windows.h>  // for CreateFileMapping2, CreateFileEx...
+
 #endif  // defined(xgboost_IS_WIN)
 
 #endif  // defined(__unix__) || defined(__APPLE__)
 
 #include <algorithm>     // for copy, transform
 #include <cctype>        // for tolower
-#include <cerrno>        // for errno
 #include <cstddef>       // for size_t
 #include <cstdint>       // for int32_t, uint32_t
+#include <cstdio>        // for fread, fseek
 #include <cstring>       // for memcpy
 #include <filesystem>    // for filesystem, weakly_canonical
 #include <fstream>       // for ifstream
 #include <iterator>      // for distance
 #include <memory>        // for unique_ptr, make_unique
 #include <string>        // for string
-#include <system_error>  // for error_code, system_category
 #include <utility>       // for move
 #include <vector>        // for vector
 
 #include "io.h"
-#include "xgboost/collective/socket.h"  // for LastError
 #include "xgboost/logging.h"            // for CHECK_LE
 #include "xgboost/string_view.h"        // for StringView
 
@@ -41,6 +42,10 @@
 #include <limits>  // for numeric_limits
 #endif
 
+#if defined(__linux__)
+#include <sys/sysinfo.h>
+#endif
+
 namespace xgboost::common {
 size_t PeekableInStream::Read(void* dptr, size_t size) {
   size_t nbuffer = buffer_.length() - buffer_ptr_;
@@ -48,8 +53,7 @@ size_t PeekableInStream::Read(void* dptr, size_t size) {
   if (nbuffer < size) {
     std::memcpy(dptr, dmlc::BeginPtr(buffer_) + buffer_ptr_, nbuffer);
     buffer_ptr_ += nbuffer;
-    return nbuffer + strm_->Read(reinterpret_cast<char*>(dptr) + nbuffer,
-                                 size - nbuffer);
+    return nbuffer + strm_->Read(reinterpret_cast<char*>(dptr) + nbuffer, size - nbuffer);
   } else {
     std::memcpy(dptr, dmlc::BeginPtr(buffer_) + buffer_ptr_, size);
     buffer_ptr_ += size;
@@ -96,7 +100,7 @@ size_t FixedSizeStream::Read(void* dptr, size_t size) {
 }
 
 size_t FixedSizeStream::PeekRead(void* dptr, size_t size) {
-  if (size >= buffer_.size() - pointer_)  {
+  if (size >= buffer_.size() - pointer_) {
     std::copy(buffer_.cbegin() + pointer_, buffer_.cend(), reinterpret_cast<char*>(dptr));
     return std::distance(buffer_.cbegin() + pointer_, buffer_.cend());
   } else {
@@ -130,19 +134,13 @@ std::size_t GetMmapAlignment() {
   return getpagesize();
 #endif
 }
-
-auto SystemErrorMsg() {
-  std::int32_t errsv = system::LastError();
-  auto err = std::error_code{errsv, std::system_category()};
-  return err.message();
-}
 }  // anonymous namespace
 
 std::vector<char> LoadSequentialFile(std::string uri) {
   auto OpenErr = [&uri]() {
     std::string msg;
     msg = "Opening " + uri + " failed: ";
-    msg += SystemErrorMsg();
+    msg += error::SystemError().message();
     LOG(FATAL) << msg;
   };
 
@@ -177,6 +175,57 @@ std::string FileExtension(std::string fname, bool lower) {
   }
 }
 
+struct MmapFileImpl {
+#if defined(xgboost_IS_WIN)
+  HANDLE fd{INVALID_HANDLE_VALUE};
+  HANDLE file_map{INVALID_HANDLE_VALUE};
+#else
+  std::int32_t fd{0};
+#endif  // defined(xgboost_IS_WIN)
+  std::byte* base_ptr{nullptr};
+  std::size_t base_size{0};
+  std::size_t delta{0};
+  std::string path;
+
+  MmapFileImpl() = default;
+
+#if defined(xgboost_IS_WIN)
+  MmapFileImpl(HANDLE fd, HANDLE fm, std::byte* base_ptr, std::size_t base_size, std::size_t delta,
+               std::string path)
+      : fd{fd},
+        file_map{fm},
+        base_ptr{base_ptr},
+        base_size{base_size},
+        delta{delta},
+        path{std::move(path)} {}
+#else
+  MmapFileImpl(std::int32_t fd, std::byte* base_ptr, std::size_t base_size, std::size_t delta,
+               std::string path)
+      : fd{fd}, base_ptr{base_ptr}, base_size{base_size}, delta{delta}, path{std::move(path)} {}
+#endif  // defined(xgboost_IS_WIN)
+
+  void const* Data() const { return this->base_ptr + this->delta; }
+  void* Data() { return this->base_ptr + this->delta; }
+};
+
+void const* MMAPFile::Data() const {
+  if (!this->p_impl) {
+    return nullptr;
+  }
+  return this->p_impl->Data();
+}
+
+void* MMAPFile::Data() {
+  if (!this->p_impl) {
+    return nullptr;
+  }
+  return this->p_impl->Data();
+}
+
+[[nodiscard]] Span<std::byte> MMAPFile::BasePtr() const {
+  return Span{this->p_impl->base_ptr, this->p_impl->base_size};
+}
+
 // For some reason, NVCC 12.1 marks the function deleted if we expose it in the header.
 // NVCC 11.8 doesn't allow `noexcept(false) = default` altogether.
 ResourceHandler::~ResourceHandler() noexcept(false) {}  // NOLINT
@@ -189,10 +238,11 @@ MMAPFile* detail::OpenMmap(std::string path, std::size_t offset, std::size_t len
 #if defined(xgboost_IS_WIN)
   HANDLE fd = CreateFile(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING,
                          FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED, nullptr);
-  CHECK_NE(fd, INVALID_HANDLE_VALUE) << "Failed to open:" << path << ". " << SystemErrorMsg();
+  CHECK_NE(fd, INVALID_HANDLE_VALUE)
+      << "Failed to open:" << path << ". " << error::SystemError().message();
 #else
   auto fd = open(path.c_str(), O_RDONLY);
-  CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << SystemErrorMsg();
+  CHECK_GE(fd, 0) << "Failed to open:" << path << ". " << error::SystemError().message();
 #endif
 
   std::byte* ptr{nullptr};
@@ -203,26 +253,30 @@ MMAPFile* detail::OpenMmap(std::string path, std::size_t offset, std::size_t len
 #if defined(__linux__) || defined(__GLIBC__)
   int prot{PROT_READ};
   ptr = reinterpret_cast<std::byte*>(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));
-  CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg();
-  auto handle = new MMAPFile{fd, ptr, view_size, offset - view_start, std::move(path)};
+  CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << error::SystemError().message();
+  auto handle = new MMAPFile{
+      std::make_unique<MmapFileImpl>(fd, ptr, view_size, offset - view_start, std::move(path))};
 #elif defined(xgboost_IS_WIN)
-  auto file_size = GetFileSize(fd, nullptr);
-  DWORD access = PAGE_READONLY;
-  auto map_file = CreateFileMapping(fd, nullptr, access, 0, file_size, nullptr);
-  access = FILE_MAP_READ;
-  std::uint32_t loff = static_cast<std::uint32_t>(view_start);
-  std::uint32_t hoff = view_start >> 32;
-  CHECK(map_file) << "Failed to map: " << path << ". " << SystemErrorMsg();
-  ptr = reinterpret_cast<std::byte*>(MapViewOfFile(map_file, access, hoff, loff, view_size));
-  CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << SystemErrorMsg();
-  auto handle = new MMAPFile{fd, map_file, ptr, view_size, offset - view_start, std::move(path)};
+  LARGE_INTEGER file_size;
+  CHECK_NE(GetFileSizeEx(fd, &file_size), 0) << error::SystemError().message();
+  auto map_file = CreateFileMappingA(fd, nullptr, PAGE_READONLY, file_size.HighPart,
+                                     file_size.LowPart, nullptr);
+  CHECK(map_file) << "Failed to map: " << path << ". " << error::SystemError().message();
+
+  auto li_vs = reinterpret_cast<LARGE_INTEGER*>(&view_start);
+  ptr = reinterpret_cast<std::byte*>(
+      MapViewOfFile(map_file, FILE_MAP_READ, li_vs->HighPart, li_vs->LowPart, view_size));
+  CHECK_NE(ptr, nullptr) << "Failed to map: " << path << ". " << error::SystemError().message();
+  auto handle = new MMAPFile{std::make_unique<MmapFileImpl>(fd, map_file, ptr, view_size,
+                                                            offset - view_start, std::move(path))};
 #else
   CHECK_LE(offset, std::numeric_limits<off_t>::max())
       << "File size has exceeded the limit on the current system.";
   int prot{PROT_READ};
   ptr = reinterpret_cast<std::byte*>(mmap(nullptr, view_size, prot, MAP_PRIVATE, fd, view_start));
-  CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << SystemErrorMsg();
-  auto handle = new MMAPFile{fd, ptr, view_size, offset - view_start, std::move(path)};
+  CHECK_NE(ptr, MAP_FAILED) << "Failed to map: " << path << ". " << error::SystemError().message();
+  auto handle = new MMAPFile{
+      std::make_unique<MmapFileImpl>(fd, ptr, view_size, offset - view_start, std::move(path))};
 #endif  // defined(__linux__) || defined(__GLIBC__)
 
   return handle;
@@ -233,23 +287,27 @@ void detail::CloseMmap(MMAPFile* handle) {
     return;
   }
 #if defined(xgboost_IS_WIN)
-  if (handle->base_ptr) {
-    CHECK(UnmapViewOfFile(handle->base_ptr)) "Faled to call munmap: " << SystemErrorMsg();
+  if (handle->p_impl->base_ptr) {
+    CHECK(UnmapViewOfFile(handle->p_impl->base_ptr))
+        << "Failed to call munmap: " << error::SystemError().message();
   }
-  if (handle->fd != INVALID_HANDLE_VALUE) {
-    CHECK(CloseHandle(handle->fd)) << "Failed to close handle: " << SystemErrorMsg();
+  if (handle->p_impl->fd != INVALID_HANDLE_VALUE) {
+    CHECK(CloseHandle(handle->p_impl->fd))
+        << "Failed to close handle: " << error::SystemError().message();
   }
-  if (handle->file_map != INVALID_HANDLE_VALUE) {
-    CHECK(CloseHandle(handle->file_map)) << "Failed to close mapping object: " << SystemErrorMsg();
+  if (handle->p_impl->file_map != INVALID_HANDLE_VALUE) {
+    CHECK(CloseHandle(handle->p_impl->file_map))
+        << "Failed to close mapping object: " << error::SystemError().message();
   }
 #else
-  if (handle->base_ptr) {
-    CHECK_NE(munmap(handle->base_ptr, handle->base_size), -1)
-        << "Faled to call munmap: `" << handle->path << "`. " << SystemErrorMsg();
+  if (handle->p_impl->base_ptr) {
+    CHECK_NE(munmap(handle->p_impl->base_ptr, handle->p_impl->base_size), -1)
+        << "Failed to call munmap: `" << handle->p_impl->path << "`. "
+        << error::SystemError().message();
   }
-  if (handle->fd != 0) {
-    CHECK_NE(close(handle->fd), -1)
-        << "Faled to close: `" << handle->path << "`. " << SystemErrorMsg();
+  if (handle->p_impl->fd != 0) {
+    CHECK_NE(close(handle->p_impl->fd), -1)
+        << "Failed to close: `" << handle->p_impl->path << "`. " << error::SystemError().message();
   }
 #endif
   delete handle;
@@ -260,7 +318,7 @@ MmapResource::MmapResource(StringView path, std::size_t offset, std::size_t leng
       handle_{detail::OpenMmap(std::string{path}, offset, length), detail::CloseMmap},
       n_{length} {
 #if defined(__unix__) || defined(__APPLE__)
-  madvise(handle_->base_ptr, handle_->base_size, MADV_WILLNEED);
+  madvise(handle_->p_impl->base_ptr, handle_->p_impl->base_size, MADV_WILLNEED);
 #endif  // defined(__unix__) || defined(__APPLE__)
 }
 
@@ -280,6 +338,37 @@ MmapResource::~MmapResource() noexcept(false) = default;
 AlignedResourceReadStream::~AlignedResourceReadStream() noexcept(false) {}  // NOLINT
 PrivateMmapConstStream::~PrivateMmapConstStream() noexcept(false) {}        // NOLINT
 
+std::shared_ptr<MallocResource> MemBufFileReadStream::ReadFileIntoBuffer(StringView path,
+                                                                         std::size_t offset,
+                                                                         std::size_t length) {
+  CHECK(std::filesystem::exists(path.c_str())) << "`" << path << "` doesn't exist";
+  auto res = std::make_shared<MallocResource>(length);
+  auto ptr = res->DataAs<char>();
+  std::unique_ptr<FILE, std::function<int(FILE*)>> fp{fopen(path.c_str(), "rb"), fclose};
+
+  auto err = [&] {
+    auto e = error::SystemError().message();
+    LOG(FATAL) << "Failed to read file `" << path << "`. System error message: " << e;
+  };
+#if defined(__linux__)
+  auto fd = fileno(fp.get());
+  if (fd == -1) {
+    err();
+  }
+  if (posix_fadvise(fd, offset, length, POSIX_FADV_SEQUENTIAL) != 0) {
+    LOG(FATAL) << error::SystemError().message();
+  }
+#endif  // defined(__linux__)
+
+  if (fseek(fp.get(), offset, SEEK_SET) != 0) {
+    err();
+  }
+  if (fread(ptr, length, 1, fp.get()) != 1) {
+    err();
+  }
+  return res;
+}
+
 AlignedFileWriteStream::AlignedFileWriteStream(StringView path, StringView flags)
     : pimpl_{dmlc::Stream::Create(path.c_str(), flags.c_str())} {}
 
@@ -302,4 +391,36 @@ AlignedMemWriteStream::~AlignedMemWriteStream() = default;
 [[nodiscard]] std::size_t AlignedMemWriteStream::Tell() const noexcept(true) {
   return this->pimpl_->Tell();
 }
+
+[[nodiscard]] std::string CmdOutput(StringView cmd) {
+#if defined(xgboost_IS_WIN)
+  std::unique_ptr<FILE, std::function<int(FILE*)>> pipe(_popen(cmd.c_str(), "r"), _pclose);
+#else
+  // popen is a convenient method, but it always returns a success even if the command
+  // fails.
+  std::unique_ptr<FILE, std::function<int(FILE*)>> pipe(popen(cmd.c_str(), "r"), pclose);
+#endif
+  CHECK(pipe);
+  std::array<char, 128> buffer;
+  std::string result;
+  while (std::fgets(buffer.data(), static_cast<std::int32_t>(buffer.size()), pipe.get())) {
+    result += buffer.data();
+  }
+  return result;
+}
+
+[[nodiscard]] std::size_t TotalMemory() {
+#if defined(__linux__)
+  struct sysinfo info;
+  CHECK_EQ(sysinfo(&info), 0) << error::SystemError().message();
+  return info.totalram * info.mem_unit;
+#elif defined(xgboost_IS_WIN)
+  MEMORYSTATUSEX status;
+  status.dwLength = sizeof(status);
+  CHECK(GlobalMemoryStatusEx(&status)) << error::SystemError().message();
+  return static_cast<std::size_t>(status.ullTotalPhys);
+#else
+  LOG(FATAL) << "Not implemented";
+#endif  // defined(__linux__)
+}
 }  // namespace xgboost::common
diff --git a/src/common/io.h b/src/common/io.h
index e3eaa4faf89b..51aed53945b6 100644
--- a/src/common/io.h
+++ b/src/common/io.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2024, XGBoost Contributors
+ * Copyright 2014-2025, XGBoost Contributors
  * \file io.h
  * \brief general stream interface for serialization, I/O
  * \author Tianqi Chen
@@ -7,12 +7,6 @@
 #ifndef XGBOOST_COMMON_IO_H_
 #define XGBOOST_COMMON_IO_H_
 
-#include <xgboost/windefs.h>
-
-#if defined(xgboost_IS_WIN)
-#include <windows.h>
-#endif  // defined(xgboost_IS_WIN)
-
 #include <algorithm>    // for min, fill_n, copy_n
 #include <array>        // for array
 #include <cstddef>      // for byte, size_t
@@ -230,40 +224,16 @@ inline std::string ReadAll(std::string const &path) {
   return content;
 }
 
+struct MmapFileImpl;
+
 /**
  * @brief A handle to mmap file.
  */
 struct MMAPFile {
-#if defined(xgboost_IS_WIN)
-  HANDLE fd{INVALID_HANDLE_VALUE};
-  HANDLE file_map{INVALID_HANDLE_VALUE};
-#else
-  std::int32_t fd{0};
-#endif  // defined(xgboost_IS_WIN)
-  std::byte* base_ptr{nullptr};
-  std::size_t base_size{0};
-  std::size_t delta{0};
-  std::string path;
-
-  MMAPFile() = default;
-
-#if defined(xgboost_IS_WIN)
-  MMAPFile(HANDLE fd, HANDLE fm, std::byte* base_ptr, std::size_t base_size, std::size_t delta,
-           std::string path)
-      : fd{fd},
-        file_map{fm},
-        base_ptr{base_ptr},
-        base_size{base_size},
-        delta{delta},
-        path{std::move(path)} {}
-#else
-  MMAPFile(std::int32_t fd, std::byte* base_ptr, std::size_t base_size, std::size_t delta,
-           std::string path)
-      : fd{fd}, base_ptr{base_ptr}, base_size{base_size}, delta{delta}, path{std::move(path)} {}
-#endif  // defined(xgboost_IS_WIN)
-
-  void const* Data() const { return this->base_ptr + this->delta; }
-  void* Data() { return this->base_ptr + this->delta; }
+  std::unique_ptr<MmapFileImpl> p_impl;
+  [[nodiscard]] void const* Data() const;
+  [[nodiscard]] void* Data();
+  [[nodiscard]] Span<std::byte> BasePtr() const;
 };
 
 namespace detail {
@@ -282,12 +252,13 @@ class ResourceHandler {
  public:
   // RTTI
   enum Kind : std::uint8_t {
-    kMalloc = 0,         // System memory.
-    kMmap = 1,           // Memory mapp.
-    kCudaMalloc = 2,     // CUDA device memory.
-    kCudaMmap = 3,       // CUDA with mmap.
-    kCudaHostCache = 4,  // CUDA pinned host memory.
-    kCudaGrowOnly = 5,   // CUDA virtual memory allocator.
+    kMalloc = 0,             // System memory.
+    kMmap = 1,               // Memory mapp.
+    kCudaMalloc = 2,         // CUDA device memory.
+    kCudaMmap = 3,           // CUDA with mmap.
+    kCudaHostCache = 4,      // CUDA pinned host memory.
+    kCudaGrowOnly = 5,       // CUDA virtual memory allocator.
+    kCudaPinnedMemPool = 6,  // CUDA memory pool for pinned host memory.
   };
 
  private:
@@ -316,6 +287,8 @@ class ResourceHandler {
         return "CudaHostCache";
       case kCudaGrowOnly:
         return "CudaGrowOnly";
+      case kCudaPinnedMemPool:
+        return "CudaPinnedMemPool";
     }
     LOG(FATAL) << "Unreachable.";
     return {};
@@ -546,6 +519,26 @@ class PrivateMmapConstStream : public AlignedResourceReadStream {
   ~PrivateMmapConstStream() noexcept(false) override;
 };
 
+/**
+ * @brief Read a portion of a file into a memory buffer. This class helps integration with
+ *        external memory file format.
+ */
+class MemBufFileReadStream : public AlignedResourceReadStream {
+  static std::shared_ptr<MallocResource> ReadFileIntoBuffer(StringView path, std::size_t offset,
+                                                            std::size_t length);
+
+ public:
+  /**
+   * @brief Construct a stream for reading file.
+   *
+   * @param path      File path.
+   * @param offset    The number of bytes into the file.
+   * @param length    The number of bytes to read.
+   */
+  explicit MemBufFileReadStream(StringView path, std::size_t offset, std::size_t length)
+      : AlignedResourceReadStream{ReadFileIntoBuffer(path, offset, length)} {}
+};
+
 /**
  * @brief Base class for write stream with alignment defined by IOAlignment().
  */
@@ -607,5 +600,10 @@ class AlignedMemWriteStream : public AlignedFileWriteStream {
 
   [[nodiscard]] std::size_t Tell() const noexcept(true);
 };
+
+// Run a system command, get its stdout.
+[[nodiscard]] std::string CmdOutput(StringView cmd);
+
+[[nodiscard]] std::size_t TotalMemory();
 }  // namespace xgboost::common
 #endif  // XGBOOST_COMMON_IO_H_
diff --git a/src/common/json.cc b/src/common/json.cc
index 26d56bb1b03e..59259e1309e1 100644
--- a/src/common/json.cc
+++ b/src/common/json.cc
@@ -41,8 +41,11 @@ auto to_i64 = [](auto v) { return Json{static_cast<int64_t>(v)}; };
 void JsonWriter::Visit(I8Array const* arr) { this->WriteArray(arr, to_i64); }
 void JsonWriter::Visit(U8Array const* arr) { this->WriteArray(arr, to_i64); }
 void JsonWriter::Visit(I16Array const* arr) { this->WriteArray(arr, to_i64); }
+void JsonWriter::Visit(U16Array const* arr) { this->WriteArray(arr, to_i64); }
 void JsonWriter::Visit(I32Array const* arr) { this->WriteArray(arr, to_i64); }
+void JsonWriter::Visit(U32Array const* arr) { this->WriteArray(arr, to_i64); }
 void JsonWriter::Visit(I64Array const* arr) { this->WriteArray(arr, to_i64); }
+void JsonWriter::Visit(U64Array const* arr) { this->WriteArray(arr, to_i64); }  // dangerous
 
 void JsonWriter::Visit(JsonObject const* obj) {
   stream_->emplace_back('{');
@@ -156,10 +159,16 @@ std::string Value::TypeStr() const {
       return "U8Array";
     case ValueKind::kI16Array:
       return "I16Array";
+    case ValueKind::kU16Array:
+      return "U16Array";
     case ValueKind::kI32Array:
       return "I32Array";
+    case ValueKind::kU32Array:
+      return "U32Array";
     case ValueKind::kI64Array:
       return "I64Array";
+    case ValueKind::kU64Array:
+      return "U64Array";
   }
   return "";
 }
@@ -276,8 +285,11 @@ template class JsonTypedArray<double, Value::ValueKind::kF64Array>;
 template class JsonTypedArray<std::int8_t, Value::ValueKind::kI8Array>;
 template class JsonTypedArray<std::uint8_t, Value::ValueKind::kU8Array>;
 template class JsonTypedArray<std::int16_t, Value::ValueKind::kI16Array>;
+template class JsonTypedArray<std::uint16_t, Value::ValueKind::kU16Array>;
 template class JsonTypedArray<std::int32_t, Value::ValueKind::kI32Array>;
+template class JsonTypedArray<std::uint32_t, Value::ValueKind::kU32Array>;
 template class JsonTypedArray<std::int64_t, Value::ValueKind::kI64Array>;
+template class JsonTypedArray<std::uint64_t, Value::ValueKind::kU64Array>;
 
 // Json Number
 bool JsonNumber::operator==(Value const& rhs) const {
diff --git a/src/common/json_utils.h b/src/common/json_utils.h
index 812aa1f2e57a..a9e07c31e082 100644
--- a/src/common/json_utils.h
+++ b/src/common/json_utils.h
@@ -1,12 +1,14 @@
 /**
- * Copyright 2023, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  *
  * @brief Utils tailored for XGBoost.
  */
 #pragma once
 
+#include <algorithm>    // for transform, copy
 #include <string>       // for string
 #include <type_traits>  // for enable_if_t, remove_const_t
+#include <vector>       // for vector
 
 #include "xgboost/json.h"
 #include "xgboost/string_view.h"  // for StringView
@@ -71,4 +73,40 @@ auto const &OptionalArg(Json const &in, StringView key, T const &dft) {
   }
   return dft;
 }
+
+template <typename T, std::enable_if_t<std::is_floating_point_v<T>> * = nullptr>
+void SaveVector(std::vector<T> const &in, Json *p_out) {
+  auto &out = *p_out;
+  if (IsA<F32Array>(out)) {
+    auto &out_array = get<F32Array>(out);
+    out_array.resize(in.size());
+    std::copy(in.cbegin(), in.cend(), out_array.begin());
+  } else if (IsA<F64Array>(out)) {
+    auto &out_array = get<F64Array>(out);
+    out_array.resize(in.size());
+    std::copy(in.cbegin(), in.cend(), out_array.begin());
+  } else {
+    LOG(FATAL) << "Invalid array type.";
+  }
+}
+
+template <typename T, std::enable_if_t<std::is_floating_point_v<T>> * = nullptr>
+void LoadVector(Json const &in, std::vector<T> *out) {
+  if (IsA<F32Array>(in)) {
+    // JSON
+    auto const &array = get<F32Array const>(in);
+    out->resize(array.size());
+    std::copy(array.cbegin(), array.cend(), out->begin());
+  } else if (IsA<F64Array>(in)) {
+    auto const &array = get<F64Array const>(in);
+    out->resize(array.size());
+    std::copy(array.cbegin(), array.cend(), out->begin());
+  } else {
+    // UBJSON
+    auto const &array = get<Array const>(in);
+    out->resize(array.size());
+    std::transform(array.cbegin(), array.cend(), out->begin(),
+                   [](Json const &v) { return get<Number const>(v); });
+  }
+}
 }  // namespace xgboost
diff --git a/src/common/linalg_op.cc b/src/common/linalg_op.cc
new file mode 100644
index 000000000000..43a3af14ce15
--- /dev/null
+++ b/src/common/linalg_op.cc
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include "linalg_op.h"
+
+#include <cstddef>  // for size_t
+
+#include "optional_weight.h"  // for OptionalWeights
+#include "xgboost/context.h"  // for Context
+
+#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_SYCL)
+#include "common.h"  // for AssertGPUSupport
+#endif
+
+namespace xgboost::sycl::linalg {
+void SmallHistogram(Context const* ctx, xgboost::linalg::MatrixView<float const> indices,
+                    common::OptionalWeights const& weights,
+                    xgboost::linalg::VectorView<float> bins);
+#if !defined(XGBOOST_USE_SYCL)
+void SmallHistogram(Context const*, xgboost::linalg::MatrixView<float const>,
+                    common::OptionalWeights const&,
+                    xgboost::linalg::VectorView<float>) {
+  common::AssertSYCLSupport();
+}
+#endif
+}  // namespace xgboost::sycl::linalg
+
+namespace xgboost::linalg {
+namespace cuda_impl {
+void SmallHistogram(Context const* ctx, linalg::MatrixView<float const> indices,
+                    common::OptionalWeights const& weights, linalg::VectorView<float> bins);
+#if !defined(XGBOOST_USE_CUDA)
+void SmallHistogram(Context const*, linalg::MatrixView<float const>, common::OptionalWeights const&,
+                    linalg::VectorView<float>) {
+  common::AssertGPUSupport();
+}
+#endif
+}  // namespace cuda_impl
+
+void SmallHistogram(Context const* ctx, linalg::MatrixView<float const> indices,
+                    common::OptionalWeights const& weights, linalg::VectorView<float> bins) {
+  auto n = indices.Size();
+  if (ctx->IsCUDA()) {
+    cuda_impl::SmallHistogram(ctx, indices, weights, bins);
+  } else if (ctx->IsSycl()) {
+    sycl::linalg::SmallHistogram(ctx, indices, weights, bins);
+  } else {
+    for (std::size_t i = 0; i < n; ++i) {
+      auto y = indices(i);
+      auto w = weights[i];
+      bins(static_cast<std::size_t>(y)) += w;
+    }
+  }
+}
+}  // namespace xgboost::linalg
diff --git a/src/common/linalg_op.cu b/src/common/linalg_op.cu
new file mode 100644
index 000000000000..718f0193300c
--- /dev/null
+++ b/src/common/linalg_op.cu
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <thrust/scan.h>  // for inclusive_scan
+
+#include <cstddef>  // for size_t
+
+#include "algorithm.cuh"       // for ArgSort, RunLengthEncode
+#include "device_helpers.cuh"  // for MakeIndexTransformIter
+#include "device_vector.cuh"   // for DeviceUVector
+#include "linalg_op.cuh"
+#include "optional_weight.h"  // for OptionalWeights
+#include "xgboost/linalg.h"   // for VectorView
+
+namespace xgboost::linalg::cuda_impl {
+void SmallHistogram(Context const* ctx, linalg::MatrixView<float const> indices,
+                    common::OptionalWeights const& d_weights, linalg::VectorView<float> bins) {
+  auto n_bins = bins.Size();
+  auto cuctx = ctx->CUDACtx();
+  // Sort for segmented sum
+  dh::DeviceUVector<std::size_t> sorted_idx(indices.Size());
+  common::ArgSort<true>(ctx, indices.Values(), dh::ToSpan(sorted_idx));
+  auto d_sorted_idx = dh::ToSpan(sorted_idx);
+
+  auto key_it = dh::MakeIndexTransformIter(
+      [=] XGBOOST_DEVICE(std::size_t i) { return indices(d_sorted_idx[i]); });
+
+  dh::device_vector<std::size_t> counts_out(n_bins + 1, 0);
+  // Obtain the segment boundaries for the segmented sum.
+  dh::DeviceUVector<float> unique(n_bins);
+  dh::CachingDeviceUVector<std::size_t> num_runs(1);
+  common::RunLengthEncode(cuctx->Stream(), key_it, unique.begin(), counts_out.begin() + 1,
+                          num_runs.begin(), indices.Size());
+  thrust::inclusive_scan(cuctx->CTP(), counts_out.begin(), counts_out.end(), counts_out.begin());
+
+  auto val_it = dh::MakeIndexTransformIter(
+      [=] XGBOOST_DEVICE(std::size_t i) { return d_weights[d_sorted_idx[i]]; });
+  // Sum weighted-label for each class to acc, counts_out is the segment ptr after inclusive_scan
+  common::SegmentedSum(cuctx->Stream(), val_it, linalg::tbegin(bins), n_bins, counts_out.cbegin(),
+                       counts_out.cbegin() + 1);
+}
+}  // namespace xgboost::linalg::cuda_impl
diff --git a/src/common/linalg_op.cuh b/src/common/linalg_op.cuh
index 0920f99ad6cc..a0b8397cb52b 100644
--- a/src/common/linalg_op.cuh
+++ b/src/common/linalg_op.cuh
@@ -1,17 +1,24 @@
 /**
- * Copyright 2021-2023, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_COMMON_LINALG_OP_CUH_
 #define XGBOOST_COMMON_LINALG_OP_CUH_
 
-#include <cstdint>  // for int32_t
-#include <cstdlib>  // for size_t
-#include <tuple>    // for apply
+#include <thrust/iterator/counting_iterator.h>  // for counting_iterator
+#include <thrust/iterator/zip_iterator.h>       // for make_zip_iterator
+#include <thrust/transform.h>                   // for transform
 
+#include <cstdint>            // for int32_t
+#include <cstdlib>            // for size_t
+#include <cuda/std/iterator>  // for iterator_traits
+#include <cuda/std/tuple>     // for get
+#include <tuple>              // for apply
+
+#include "cuda_context.cuh"
 #include "device_helpers.cuh"  // for LaunchN
-#include "linalg_op.h"
-#include "xgboost/context.h"  // for Context
-#include "xgboost/linalg.h"   // for TensorView
+#include "type.h"              // for GetValueT
+#include "xgboost/context.h"   // for Context
+#include "xgboost/linalg.h"    // for TensorView
 
 namespace xgboost::linalg {
 namespace cuda_impl {
@@ -32,68 +39,84 @@ template <typename T>
 struct ElementWiseImpl<T, 1> {
   template <typename Fn>
   void operator()(TensorView<T, 1> t, Fn&& fn, cudaStream_t s) {
-    dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) { fn(i); });
+    dh::LaunchN(t.Size(), s, [=] __device__(std::size_t i) mutable { fn(i); });
   }
 };
 
 template <typename T, std::int32_t D, typename Fn>
 void ElementWiseKernel(TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
   dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
-  cuda_impl::ElementWiseImpl<T, D>{}(t, fn, s);
+  ElementWiseImpl<T, D>{}(t, fn, s);
 }
-}  // namespace cuda_impl
 
-template <typename T, int32_t D, typename Fn>
-void ElementWiseTransformDevice(TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
+template <typename T, std::int32_t D, typename Fn>
+void TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
+  auto s = ctx->CUDACtx()->Stream();
   if (t.Contiguous()) {
     auto ptr = t.Values().data();
-    dh::LaunchN(t.Size(), s, [=] __device__(size_t i) { ptr[i] = fn(i, ptr[i]); });
+    auto it =
+        thrust::make_zip_iterator(thrust::make_counting_iterator(static_cast<std::size_t>(0)), ptr);
+    using Tuple = typename cuda::std::iterator_traits<common::GetValueT<decltype(it)>>::value_type;
+    thrust::transform(ctx->CUDACtx()->CTP(), it, it + t.Size(), ptr,
+                      [=] XGBOOST_DEVICE(Tuple const& tup) {
+                        return fn(cuda::std::get<0>(tup), cuda::std::get<1>(tup));
+                      });
   } else {
     dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable {
-      T& v = detail::Apply(t, UnravelIndex(i, t.Shape()));
+      T& v = std::apply(t, UnravelIndex(i, t.Shape()));
       v = fn(i, v);
     });
   }
 }
 
-template <typename T, int32_t D, typename Fn>
-void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
-  ctx->IsCUDA() ? cuda_impl::ElementWiseKernel(t, fn)
-                : ElementWiseKernelHost(t, ctx->Threads(), fn);
+template <typename T, std::int32_t D, typename Fn>
+void TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
+  auto s = ctx->CUDACtx()->Stream();
+  if (t.Contiguous()) {
+    auto ptr = t.Values().data();
+    thrust::transform(ctx->CUDACtx()->CTP(), ptr, ptr + t.Size(), ptr,
+                      [=] XGBOOST_DEVICE(T const& v) { return fn(v); });
+  } else {
+    dh::LaunchN(t.Size(), s, [=] __device__(size_t i) mutable {
+      T& v = std::apply(t, UnravelIndex(i, t.Shape()));
+      v = fn(v);
+    });
+  }
 }
+}  // namespace cuda_impl
 
 namespace detail {
-template <typename T, std::int32_t kDim>
+template <typename T, std::int32_t D>
 struct IterOp {
-  TensorView<T, kDim> v;
-  XGBOOST_DEVICE T& operator()(std::size_t i) {
-    return detail::Apply(v, UnravelIndex(i, v.Shape()));
-  }
+  TensorView<T, D> v;
+  XGBOOST_DEVICE T& operator()(std::size_t i) { return std::apply(v, UnravelIndex(i, v.Shape())); }
 };
 }  // namespace detail
 
 // naming: thrust begin
 // returns a thrust iterator for a tensor view.
-template <typename T, std::int32_t kDim>
-auto tcbegin(TensorView<T, kDim> v) {  // NOLINT
+template <typename T, std::int32_t D>
+auto tcbegin(TensorView<T, D> v) {  // NOLINT
   return thrust::make_transform_iterator(
       thrust::make_counting_iterator(0ul),
-      detail::IterOp<std::add_const_t<std::remove_const_t<T>>, kDim>{v});
+      detail::IterOp<std::add_const_t<std::remove_const_t<T>>, D>{v});
 }
 
-template <typename T, std::int32_t kDim>
-auto tcend(TensorView<T, kDim> v) {  // NOLINT
+template <typename T, std::int32_t D>
+auto tcend(TensorView<T, D> v) {  // NOLINT
   return tcbegin(v) + v.Size();
 }
 
-template <typename T, std::int32_t kDim>
-auto tbegin(TensorView<T, kDim> v) {  // NOLINT
+template <typename T, std::int32_t D>
+auto tbegin(TensorView<T, D> v) {  // NOLINT
   return thrust::make_transform_iterator(thrust::make_counting_iterator(0ul),
-                                         detail::IterOp<std::remove_const_t<T>, kDim>{v});
+                                         detail::IterOp<std::remove_const_t<T>, D>{v});
 }
 
-template <typename T, std::int32_t kDim>
-auto tend(TensorView<T, kDim> v) {  // NOLINT
+template <typename T, std::int32_t D>
+auto tend(TensorView<T, D> v) {  // NOLINT
   return tbegin(v) + v.Size();
 }
 }  // namespace xgboost::linalg
diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
index 929e4c6ed647..d741c324fd9b 100644
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -1,97 +1,275 @@
 /**
- * Copyright 2021-2023, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
+ *
+ * @brief This module defines the dispatching functions for various linalg kernels.
+ *
+ * Client code can use utilities like @ref ElementWiseKernel by including this file in the
+ * right translation unit. For CUDA-compatible kernels, include this header in a .cu TU.
+ *
+ * Be aware of potential violation of the one definition rule (ODR). The dispatching
+ * functions should never be used in an inline function without a system tag.
  */
 #ifndef XGBOOST_COMMON_LINALG_OP_H_
 #define XGBOOST_COMMON_LINALG_OP_H_
-#include <cstdint>  // std::int32_t
-#include <type_traits>
 
-#include "common.h"
+#include <cstddef>      // for size_t
+#include <cstdint>      // for int32_t
+#include <tuple>        // for apply
+#include <type_traits>  // for conditional_t
+
+#include "json_utils.h"  // for LoadVector, SaveVector
 #include "threading_utils.h"
-#include "transform_iterator.h"  // MakeIndexTransformIter
-#include "xgboost/context.h"     // Context
+#include "transform_iterator.h"  // for MakeIndexTransformIter
+#include "xgboost/json.h"        // for Json
 #include "xgboost/linalg.h"
 
-namespace xgboost {
-namespace linalg {
-template <typename T, int32_t D, typename Fn>
-void ElementWiseTransformHost(linalg::TensorView<T, D> t, int32_t n_threads, Fn&& fn) {
+#if defined(__CUDACC__)
+#include <utility>  // for forward
+
+#include "linalg_op.cuh"
+#endif
+
+#if defined(XGBOOST_USE_SYCL)
+#include "../../plugin/sycl/common/linalg_op.h"
+#endif
+
+#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_SYCL)
+
+#include "common.h"           // for AssertGPUSupport
+#include "xgboost/context.h"  // for Context
+
+#endif  // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_SYCL)
+
+namespace xgboost::common {
+struct OptionalWeights;
+}
+
+namespace xgboost::linalg {
+namespace cpu_impl {
+template <typename T, std::int32_t D, typename Fn>
+void TransformIdxKernel(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn&& fn) {
   if (t.Contiguous()) {
     auto ptr = t.Values().data();
-    common::ParallelFor(t.Size(), n_threads, [&](size_t i) { ptr[i] = fn(i, ptr[i]); });
+    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) { ptr[i] = fn(i, ptr[i]); });
   } else {
-    common::ParallelFor(t.Size(), n_threads, [&](size_t i) {
-      auto& v = detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
+    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) {
+      auto& v = std::apply(t, linalg::UnravelIndex(i, t.Shape()));
       v = fn(i, v);
     });
   }
 }
 
 template <typename T, std::int32_t D, typename Fn>
-void ElementWiseKernelHost(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn &&fn) {
+void TransformKernel(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn&& fn) {
+  if (t.Contiguous()) {
+    auto ptr = t.Values().data();
+    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) { ptr[i] = fn(ptr[i]); });
+  } else {
+    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) {
+      auto& v = std::apply(t, linalg::UnravelIndex(i, t.Shape()));
+      v = fn(v);
+    });
+  }
+}
+
+template <typename T, std::int32_t D, typename Fn>
+void ElementWiseKernel(linalg::TensorView<T, D> t, std::int32_t n_threads, Fn&& fn) {
+  constexpr std::size_t kBlockSize = 2048;
   if constexpr (D == 1) {
-    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) { fn(i); });
+    common::ParallelFor1d<kBlockSize>(t.Size(), n_threads, [&](auto&& block) {
+      for (std::size_t i = block.begin(); i < block.end(); ++i) {
+        fn(i);
+      }
+    });
   } else if (D == 2 && t.CContiguous() && t.Shape(0) > t.Shape(1) * 64) {
     // Heuristic. Tall, c-contiguous matrix,
     auto n_rows = t.Shape(0);
     auto n_columns = t.Shape(1);
-    common::ParallelFor(n_rows, n_threads, [&](std::size_t i) {
-      for (std::size_t j = 0; j < n_columns; ++j) {
-        fn(i, j);
+    common::ParallelFor1d<kBlockSize>(n_rows, n_threads, [&](auto&& block) {
+      for (std::size_t i = block.begin(); i < block.end(); ++i) {
+        for (std::size_t j = 0; j < n_columns; ++j) {
+          fn(i, j);
+        }
       }
     });
   } else {
-    common::ParallelFor(t.Size(), n_threads, [&](std::size_t i) {
-      auto idx = linalg::UnravelIndex(i, t.Shape());
-      std::apply(fn, idx);
+    common::ParallelFor1d<kBlockSize>(t.Size(), n_threads, [&](auto&& block) {
+      for (std::size_t i = block.begin(); i < block.end(); ++i) {
+        std::apply(fn, linalg::UnravelIndex(i, t.Shape()));
+      }
     });
   }
 }
+}  // namespace cpu_impl
 
-#if !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_SYCL)
-template <typename T, int32_t D, typename Fn>
-void ElementWiseKernelDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
-  common::AssertGPUSupport();
-}
-
-template <typename T, int32_t D, typename Fn>
-void ElementWiseTransformDevice(linalg::TensorView<T, D>, Fn&&, void* = nullptr) {
-  common::AssertGPUSupport();
-}
-
-template <typename T, int32_t D, typename Fn>
-void ElementWiseKernel(Context const* ctx, linalg::TensorView<T, D> t, Fn&& fn) {
-  if (ctx->IsCUDA()) {
-    common::AssertGPUSupport();
-  }
-  ElementWiseKernelHost(t, ctx->Threads(), fn);
-}
-#endif  // !defined(XGBOOST_USE_CUDA) && !defined(XGBOOST_USE_SYCL)
-
-template <typename T, std::int32_t kDim>
-auto cbegin(TensorView<T, kDim> const& v) {  // NOLINT
-  auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
-    return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
+template <typename T, std::int32_t D>
+auto cbegin(TensorView<T, D> const& v) {  // NOLINT
+  auto it = common::MakeIndexTransformIter([&](std::size_t i) -> std::remove_cv_t<T> const& {
+    return std::apply(v, linalg::UnravelIndex(i, v.Shape()));
   });
   return it;
 }
 
-template <typename T, std::int32_t kDim>
-auto cend(TensorView<T, kDim> const& v) {  // NOLINT
+template <typename T, std::int32_t D>
+auto cend(TensorView<T, D> const& v) {  // NOLINT
   return cbegin(v) + v.Size();
 }
 
-template <typename T, std::int32_t kDim>
-auto begin(TensorView<T, kDim>& v) {  // NOLINT
+template <typename T, std::int32_t D>
+auto begin(TensorView<T, D>& v) {  // NOLINT
   auto it = common::MakeIndexTransformIter(
-      [&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
+      [&](std::size_t i) -> T& { return std::apply(v, linalg::UnravelIndex(i, v.Shape())); });
   return it;
 }
 
-template <typename T, std::int32_t kDim>
-auto end(TensorView<T, kDim>& v) {  // NOLINT
+template <typename T, std::int32_t D>
+auto end(TensorView<T, D>& v) {  // NOLINT
   return begin(v) + v.Size();
 }
-}  // namespace linalg
-}  // namespace xgboost
+
+namespace detail {
+using SysTagImpl = std::int32_t;
+// Magic for complying with the ODR.
+#if defined(__CUDACC__)
+constexpr SysTagImpl SysTag() { return 0; }
+#elif defined(XGBOOST_USE_SYCL)
+constexpr SysTagImpl SysTag() { return 1; }
+#else
+constexpr SysTagImpl SysTag() { return 2; }
+#endif
+}  // namespace detail
+
+/**
+ * @brief Elementwise kernel without a return type.
+ *
+ * @tparam T  Element type of the input array.
+ * @tparam D  Number of dimension of the input array.
+ * @tparam Fn Transformation function.
+ *
+ * @param t  Input array.
+ * @param fn Transformation function.
+ */
+#if defined(__CUDACC__)
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  ctx->DispatchDevice(
+      [&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+      [&] { cuda_impl::ElementWiseKernel(t, std::forward<Fn>(fn), ctx->CUDACtx()->Stream()); });
+}
+#elif defined(XGBOOST_USE_SYCL)
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  ctx->DispatchDevice([&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+                      [&] { LOG(FATAL) << "Invalid TU"; },
+                      [&] { ::xgboost::sycl::linalg::ElementWiseKernel(t, std::forward<Fn>(fn)); });
+}
+#else
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  CHECK(ctx->IsCPU());
+  ctx->DispatchDevice([&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+                      [&] { LOG(FATAL) << "Invalid TU"; });
+}
+#endif
+
+/**
+ * @brief Elementwise transform, with element index and the element itself as input.
+ *
+ * @tparam T  Element type of the input array.
+ * @tparam D  Number of dimension of the input array.
+ * @tparam Fn Transformation function, must return type T.
+ *
+ * @param t  Input array.
+ * @param fn Transformation function, must return type T.
+ */
+#if defined(__CUDACC__)
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  ctx->DispatchDevice(
+      [&] { cpu_impl::TransformIdxKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+      [&] { cuda_impl::TransformIdxKernel(ctx, t, std::forward<Fn>(fn)); });
+}
+#elif defined(XGBOOST_USE_SYCL)
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  ctx->DispatchDevice(
+      [&] { cpu_impl::TransformIdxKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+      [&] { LOG(FATAL) << "Invalid TU."; },
+      [&] {
+        static_assert(D == 1, "Not implemented.");
+        sycl::linalg::ElementWiseKernel(t, [=](std::size_t i) mutable { t(i) = fn(i, t(i)); });
+      });
+}
+#else
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  CHECK(ctx->IsCPU());
+  ctx->DispatchDevice(
+      [&] { cpu_impl::TransformIdxKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+      [&] { LOG(FATAL) << "Invalid TU."; });
+}
+#endif
+
+/**
+ * @brief Elementwise transform, with the element itself as input. Rest is the same as @ref
+ * TransformIdxKernel
+ */
+#if defined(__CUDACC__)
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  ctx->DispatchDevice([&] { cpu_impl::TransformKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+                      [&] { cuda_impl::TransformKernel(ctx, t, std::forward<Fn>(fn)); });
+}
+#elif defined(XGBOOST_USE_SYCL)
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  ctx->DispatchDevice([&] { cpu_impl::TransformKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+                      [&] { LOG(FATAL) << "Invalid TU."; },
+                      [&] {
+                        static_assert(D == 1, "Not implemented.");
+                        sycl::linalg::ElementWiseKernel(
+                            t, [=](std::size_t i) mutable { t(i) = fn(t(i)); });
+                      });
+}
+#else
+template <typename T, std::int32_t D, typename Fn, auto _tag = detail::SysTag()>
+void TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+  CHECK(ctx->IsCPU());
+  ctx->DispatchDevice([&] { cpu_impl::TransformKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
+                      [&] { LOG(FATAL) << "Invalid TU."; });
+}
+#endif
+
+// vector-scalar multiplication
+template <auto _tag = detail::SysTag()>
+void VecScaMul(Context const* ctx, linalg::VectorView<float> x, double mul) {
+  CHECK_EQ(x.Device().ordinal, ctx->Device().ordinal);
+  TransformKernel(ctx, x, [=] XGBOOST_DEVICE(float v) { return v * mul; });
+}
+
+// vector-scalar division
+template <auto _tag = detail::SysTag()>
+void VecScaDiv(Context const* ctx, linalg::VectorView<float> x, double div) {
+  return VecScaMul(ctx, x, 1.0 / div);
+}
+
+template <auto _tag = detail::SysTag()>
+void LogE(Context const* ctx, linalg::VectorView<float> x) {
+  CHECK_EQ(x.Device().ordinal, ctx->Device().ordinal);
+  TransformKernel(ctx, x, [=] XGBOOST_DEVICE(float v) { return log(v); });
+}
+
+template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
+void SaveVector(linalg::Vector<T> const& in, Json* p_out) {
+  ::xgboost::SaveVector(in.Data()->HostVector(), p_out);
+}
+
+template <typename T, std::enable_if_t<std::is_floating_point_v<T>>* = nullptr>
+void LoadVector(Json const& in, linalg::Vector<T>* out) {
+  ::xgboost::LoadVector(in, &out->Data()->HostVector());
+}
+
+void SmallHistogram(Context const* ctx, linalg::MatrixView<float const> indices,
+                    common::OptionalWeights const& weights, linalg::VectorView<float> bins);
+}  // namespace xgboost::linalg
 #endif  // XGBOOST_COMMON_LINALG_OP_H_
diff --git a/src/common/math.h b/src/common/math.h
index 3c64ec39937b..878870fbf2db 100644
--- a/src/common/math.h
+++ b/src/common/math.h
@@ -60,13 +60,13 @@ XGBOOST_DEVICE constexpr bool CloseTo(T a, U b) {
  * \param end end iterator of input
  */
 template <typename Iterator>
-XGBOOST_DEVICE inline void Softmax(Iterator start, Iterator end) {
+XGBOOST_DEVICE void Softmax(Iterator start, Iterator end) {
   static_assert(
       std::is_same_v<
           float, typename std::remove_reference_t<decltype(std::declval<Iterator>().operator*())>>,
-      "Values should be of type bst_float");
-  bst_float wmax = *start;
-  for (Iterator i = start+1; i != end; ++i) {
+      "Values should be of type float");
+  float wmax = *start;
+  for (Iterator i = start + 1; i != end; ++i) {
     wmax = fmaxf(*i, wmax);
   }
   double wsum = 0.0f;
diff --git a/src/common/numa_topo.cc b/src/common/numa_topo.cc
new file mode 100644
index 000000000000..f4b8b0ebe54b
--- /dev/null
+++ b/src/common/numa_topo.cc
@@ -0,0 +1,221 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include "numa_topo.h"
+
+#if defined(__linux__)
+
+#include <linux/mempolicy.h>  // for MPOL_BIND
+#include <sys/syscall.h>      // for SYS_get_mempolicy
+#include <unistd.h>           // for syscall
+
+#endif  // defined(__linux__)
+
+#include <cctype>      // for isalnum
+#include <cstddef>     // for size_t
+#include <cstdint>     // for int32_t
+#include <filesystem>  // for path
+#include <fstream>     // for ifstream
+#include <string>      // for string, stoi
+#include <vector>      // for vector
+
+#include "common.h"     // for TrimLast, TrimFirst
+#include "error_msg.h"  // for SystemError
+#include "xgboost/logging.h"
+
+namespace xgboost::common {
+
+namespace {
+namespace fs = std::filesystem;
+
+using MaskT = unsigned long;  // NOLINT
+inline constexpr std::size_t kMaskBits = sizeof(MaskT) * 8;
+
+#if defined(__linux__)
+// Wrapper for the system call.
+//
+// https://github.com/torvalds/linux/blob/3f31a806a62e44f7498e2d17719c03f816553f11/mm/mempolicy.c#L1075
+auto GetMemPolicy(int *mode, MaskT *nodemask, unsigned long maxnode, void *addr,  // NOLINT
+                  unsigned long flags) {                                          // NOLINT
+  return syscall(SYS_get_mempolicy, mode, nodemask, maxnode, addr, flags);
+}
+
+auto GetMemPolicy(int *policy, MaskT *nodemask, unsigned long maxnode) {  // NOLINT
+  return GetMemPolicy(policy, nodemask, maxnode, nullptr, 0);
+}
+#endif  // defined(__linux__)
+}  // namespace
+
+void ReadCpuList(fs::path const &path, std::vector<std::int32_t> *p_cpus) {
+  auto &cpus = *p_cpus;
+  cpus.clear();
+
+  std::string buff;
+  std::ifstream fin{path};
+  fin >> buff;
+  if (fin.fail()) {
+    LOG(WARNING) << "Failed to read: " << path;
+    return;
+  }
+
+  CHECK(!buff.empty());
+  buff = common::TrimFirst(common::TrimLast(buff));
+
+  std::int32_t k = 0;
+  CHECK(std::isalnum(buff[k]));
+  while (static_cast<std::size_t>(k) < buff.size()) {
+    std::int32_t val0 = -1, val1 = -1;
+    std::size_t idx = 0;
+    CHECK(std::isalnum(buff[k])) << k << " " << buff;
+    val0 = std::stoi(buff.data() + k, &idx);
+    auto last = k + idx;
+    CHECK_LE(last, buff.size());
+    k = last + 1;  // new begin
+    if (last == buff.size() || buff[last] != '-') {
+      // Single value
+      cpus.push_back(val0);
+      continue;
+    }
+    CHECK_EQ(buff[last], '-') << last;
+
+    idx = -1;
+    CHECK_LT(k, buff.size());
+    val1 = std::stoi(buff.data() + k, &idx);
+    CHECK_GE(idx, 1);
+    // Range
+    for (auto i = val0; i <= val1; ++i) {
+      cpus.push_back(i);
+    }
+    k += (idx + 1);
+  }
+}
+
+void GetNumaNodeCpus(std::int32_t node_id, std::vector<std::int32_t> *p_cpus) {
+  p_cpus->clear();
+#if defined(__linux__)
+  std::string nodename = "node" + std::to_string(node_id);
+  auto p_cpulist = fs::path{"/sys/devices/system/node"} / nodename / "cpulist";  // NOLINT
+
+  if (!fs::exists(p_cpulist)) {
+    return;
+  }
+  ReadCpuList(p_cpulist, p_cpus);
+#endif  // defined(__linux__)
+}
+
+[[nodiscard]] std::int32_t GetNumaMaxNumNodes() {
+#if defined(__linux__)
+  auto p_possible = fs::path{"/sys/devices/system/node/possible"};
+
+  std::int32_t max_n_nodes = kMaskBits;
+
+  if (fs::exists(p_possible)) {
+    std::vector<std::int32_t> cpus;
+    ReadCpuList(p_possible, &cpus);
+    auto it = std::max_element(cpus.cbegin(), cpus.cend());
+    // +1 since node/CPU uses 0-based indexing.
+    if (it != cpus.cend() && (*it + 1) > max_n_nodes) {
+      max_n_nodes = (*it + 1);
+    }
+  }
+
+  // Just in case if it keeps getting into error
+  constexpr decltype(max_n_nodes) kStop = 16384;
+  // Estimate the size of the CPU set based on the error returned from get mempolicy.
+  // Strategy used by hwloc and libnuma.
+  while (true) {
+    std::vector<MaskT> mask(max_n_nodes / kMaskBits, 0);
+
+    std::int32_t mode = -1;
+    auto err = GetMemPolicy(&mode, mask.data(), max_n_nodes);
+    if (!err || errno != EINVAL) {
+      return max_n_nodes;  // Got it.
+    }
+    max_n_nodes *= 2;
+
+    if (max_n_nodes > kStop) {
+      break;
+    }
+  }
+#endif  // defined(__linux__)
+  return -1;
+}
+
+[[nodiscard]] bool GetNumaMemBind() {
+#if defined(__linux__)
+  std::int32_t mode = -1;
+  auto max_n_nodes = GetNumaMaxNumNodes();
+  if (max_n_nodes <= 0) {
+    return false;  // Sth went wrong, assume there's no membind.
+  }
+  CHECK_GE(max_n_nodes, kMaskBits);
+  std::vector<MaskT> mask(max_n_nodes / kMaskBits);
+  auto status = GetMemPolicy(&mode, mask.data(), max_n_nodes);
+  if (status < 0) {
+    auto msg = error::SystemError().message();
+    LOG(WARNING) << msg;
+    return false;
+  }
+  return mode == MPOL_BIND;
+#else
+  return false;
+#endif  // defined(__linux__)
+}
+
+[[nodiscard]] std::int32_t GetNumaNumNodes() {
+#if defined(__linux__)
+  fs::path p_node{"/sys/devices/system/node"};
+  if (!fs::exists(p_node)) {
+    return -1;
+  }
+  try {
+    std::int32_t n_nodes{0};
+    for (auto const &entry : fs::directory_iterator{p_node}) {
+      auto name = entry.path().filename().string();
+      if (name.find("node") == 0) {  // starts with `node`
+        n_nodes += 1;
+      }
+    }
+    if (n_nodes == 0) {
+      // Something went wrong, we should have at lease 1 node.
+      LOG(WARNING) << "Failed to list NUMA nodes.";
+      return -1;
+    }
+    return n_nodes;
+  } catch (std::exception const &e) {
+    LOG(WARNING) << "Failed to list NUMA nodes: " << e.what();
+  }
+#endif  // defined(__linux__)
+  return -1;
+}
+
+void GetNumaHasNormalMemoryNodes(std::vector<std::int32_t> *p_nodes) {
+#if defined(__linux__)
+  fs::path has_nm{"/sys/devices/system/node/has_normal_memory"};
+  p_nodes->clear();
+  if (!fs::exists(has_nm)) {
+    return;
+  }
+  ReadCpuList(has_nm, p_nodes);
+#endif  // defined(__linux__)
+}
+
+void GetNumaHasCpuNodes(std::vector<std::int32_t> *p_nodes) {
+#if defined(__linux__)
+  fs::path has_cpu{"/sys/devices/system/node/has_cpu"};
+  p_nodes->clear();
+  if (!fs::exists(has_cpu)) {
+    return;
+  }
+  ReadCpuList(has_cpu, p_nodes);
+#endif  // defined(__linux__)
+}
+
+[[nodiscard]] bool GetCpuNuma(unsigned int* cpu, unsigned int* numa) {
+#ifdef SYS_getcpu
+  return syscall(SYS_getcpu, cpu, numa, NULL) == 0;
+#else
+  return false;
+#endif
+}
+}  // namespace xgboost::common
diff --git a/src/common/numa_topo.h b/src/common/numa_topo.h
new file mode 100644
index 000000000000..3aedbe42e57a
--- /dev/null
+++ b/src/common/numa_topo.h
@@ -0,0 +1,81 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#pragma once
+#include <cstdint>     // for int32_t
+#include <filesystem>  // for path
+#include <vector>      // for vector
+
+namespace xgboost::common {
+/**
+ * @brief Read a file with the `cpulist` format.
+ *
+ *   Linux-Only.
+ *
+ */
+void ReadCpuList(std::filesystem::path const &path, std::vector<std::int32_t> *p_cpus);
+
+/**
+ * @brief Get the list of CPU cores grouped under the NUMA node.
+ *
+ *   Linux-Only.
+ *
+ */
+void GetNumaNodeCpus(std::int32_t node_id, std::vector<std::int32_t> *p_cpus);
+
+/**
+ * @brief Find the maximum number of NUMA nodes.
+ *
+ *   Linux-Only.
+ *
+ * @return -1 if fail to get the number of nodes. Otherwise, the maximum number of nodes
+ *         for allocating node mask.
+ */
+[[nodiscard]] std::int32_t GetNumaMaxNumNodes();
+
+/**
+ * @brief Check whether the memory policy is set to bind.
+ *
+ *   Linux-Only.
+ *
+ */
+[[nodiscard]] bool GetNumaMemBind();
+
+/**
+ * @brief Get the number of configured NUMA nodes. This does not represent the highest
+ *        node ID as NUMA node ID doesn't have to be contiguous.
+ *
+ *   Linux-Only.
+ *
+ * @return -1 if there's no NUMA node. Otherwise, returns the number of NUMA nodes.
+ */
+[[nodiscard]] std::int32_t GetNumaNumNodes();
+
+/**
+ * @brief Read the `has_normal_memory` system file.
+ */
+void GetNumaHasNormalMemoryNodes(std::vector<std::int32_t> *p_nodes);
+
+/**
+ * @brief Read the `has_cpu` system file.
+ */
+void GetNumaHasCpuNodes(std::vector<std::int32_t> *p_nodes);
+
+/**
+ * @brief Get numa node on Linux. Other platforms are not supported. Returns false if the
+ *        call fails.
+ */
+[[nodiscard]] bool GetCpuNuma(unsigned int* cpu, unsigned int* numa);
+
+/**
+ * @brief Is it physically possible to access the wrong memory?
+ */
+[[nodiscard]] inline bool NumaMemCanCross() {
+  std::vector<std::int32_t> nodes;
+  GetNumaHasCpuNodes(&nodes);
+  bool result = nodes.size() > 1;
+  GetNumaHasNormalMemoryNodes(&nodes);
+  result &= nodes.size() > 1;
+  return result;
+}
+}  // namespace xgboost::common
diff --git a/src/common/nvtx_utils.h b/src/common/nvtx_utils.h
new file mode 100644
index 000000000000..14ad2637b25e
--- /dev/null
+++ b/src/common/nvtx_utils.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2024-2025, XGBoost contributors
+ */
+#pragma once
+
+#if defined(XGBOOST_USE_NVTX)
+#include <nvtx3/nvtx3.hpp>
+#endif  // defined(XGBOOST_USE_NVTX)
+
+#include "xgboost/string_view.h"  // for StringView
+
+namespace xgboost::nvtx {
+struct Domain {
+  static constexpr char const* name{"libxgboost"};  // NOLINT
+};
+
+#if defined(XGBOOST_USE_NVTX)
+using ScopedRange = ::nvtx3::scoped_range_in<Domain>;
+using EventAttr = ::nvtx3::event_attributes;
+using Rgb = ::nvtx3::rgb;
+
+inline auto MakeScopedRange(StringView name, Rgb color) {
+  ::nvtx3::v1::registered_string_in<Domain> const scope_name{name.c_str()};
+  ::nvtx3::v1::event_attributes const scope_attr{scope_name, color};
+  return ::nvtx3::v1::scoped_range_in<Domain>{scope_attr};
+}
+
+#else
+class ScopedRange {
+ public:
+  template <typename... Args>
+  explicit ScopedRange(Args&&...) {}
+};
+class EventAttr {
+ public:
+  template <typename... Args>
+  explicit EventAttr(Args&&...) {}
+};
+class Rgb {
+ public:
+  template <typename... Args>
+  explicit Rgb(Args&&...) {}
+};
+
+inline auto MakeScopedRange(StringView, Rgb) { return ScopedRange{}; }
+#endif  // defined(XGBOOST_USE_NVTX)
+}  // namespace xgboost::nvtx
+
+#if defined(XGBOOST_USE_NVTX)
+
+// Macro for making NVTX function range.
+#define xgboost_NVTX_FN_RANGE() NVTX3_FUNC_RANGE_IN(::xgboost::nvtx::Domain)
+
+// Macro for making colored NVTX function range.
+#define xgboost_NVTX_FN_RANGE_C(r, g, b) \
+  auto __nvtx_scoped__ = ::xgboost::nvtx::MakeScopedRange(__func__, (nvtx::Rgb((r), (g), (b))))
+
+#else
+
+#define xgboost_NVTX_FN_RANGE()
+
+#define xgboost_NVTX_FN_RANGE_C(r, g, b)
+
+#endif  // defined(XGBOOST_USE_NVTX)
diff --git a/src/common/optional_weight.cc b/src/common/optional_weight.cc
new file mode 100644
index 000000000000..a22de40c1a88
--- /dev/null
+++ b/src/common/optional_weight.cc
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include "optional_weight.h"
+
+#include <numeric>  // for accumulate
+
+#include "xgboost/base.h"     // for bst_idx_t
+#include "xgboost/context.h"  // for Context
+
+#include "common.h"  // for AssertGPUSupport
+
+namespace xgboost::common {
+#if defined(XGBOOST_USE_CUDA)
+namespace cuda_impl {
+double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights);
+}
+#endif
+
+#if defined(XGBOOST_USE_SYCL)
+namespace sycl_impl {
+double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights);
+}
+#endif
+
+[[nodiscard]] double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights,
+                                        bst_idx_t n_samples) {
+  if (weights.Empty()) {
+    return n_samples * weights.dft;
+  }
+  if (ctx->IsCUDA()) {
+#if defined(XGBOOST_USE_CUDA)
+    return cuda_impl::SumOptionalWeights(ctx, weights);
+#else
+    common::AssertGPUSupport();
+#endif
+  }
+  if (ctx->IsSycl()) {
+#if defined(XGBOOST_USE_SYCL)
+    return sycl_impl::SumOptionalWeights(ctx, weights);
+#else
+    common::AssertSYCLSupport();
+#endif
+  }
+  auto sum_weight = std::accumulate(weights.Data(), weights.Data() + weights.Size(), 0.0);
+  return sum_weight;
+}
+}  // namespace xgboost::common
diff --git a/src/common/optional_weight.cu b/src/common/optional_weight.cu
new file mode 100644
index 000000000000..7801eadea86e
--- /dev/null
+++ b/src/common/optional_weight.cu
@@ -0,0 +1,18 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <cuda/std/functional>  // for plus
+
+#include <cstddef>              // for size_t
+
+#include "cuda_context.cuh"
+#include "device_helpers.cuh"
+#include "optional_weight.h"
+#include "xgboost/context.h"  // for Context
+
+namespace xgboost::common::cuda_impl {
+double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights) {
+  auto w_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) { return weights[i]; });
+  return dh::Reduce(ctx->CUDACtx()->CTP(), w_it, w_it + weights.Size(), 0.0, cuda::std::plus{});
+}
+}  // namespace xgboost::common::cuda_impl
diff --git a/src/common/optional_weight.h b/src/common/optional_weight.h
index bbfd365c8ff6..6a4eb7d1df7e 100644
--- a/src/common/optional_weight.h
+++ b/src/common/optional_weight.h
@@ -1,8 +1,11 @@
 /**
- * Copyright 2022-2023 by XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_COMMON_OPTIONAL_WEIGHT_H_
 #define XGBOOST_COMMON_OPTIONAL_WEIGHT_H_
+
+#include <cstddef>  // for size_t
+
 #include "xgboost/base.h"                // XGBOOST_DEVICE
 #include "xgboost/context.h"             // Context
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
@@ -16,17 +19,23 @@ struct OptionalWeights {
   explicit OptionalWeights(Span<float const> w) : weights{w} {}
   explicit OptionalWeights(float w) : dft{w} {}
 
-  XGBOOST_DEVICE float operator[](size_t i) const { return weights.empty() ? dft : weights[i]; }
+  XGBOOST_DEVICE float operator[](std::size_t i) const {
+    return weights.empty() ? dft : weights[i];
+  }
   [[nodiscard]] auto Empty() const { return weights.empty(); }
   [[nodiscard]] auto Size() const { return weights.size(); }
+  [[nodiscard]] auto Data() const { return weights.data(); }
 };
 
-inline OptionalWeights MakeOptionalWeights(Context const* ctx,
+inline OptionalWeights MakeOptionalWeights(DeviceOrd device,
                                            HostDeviceVector<float> const& weights) {
-  if (ctx->IsCUDA()) {
-    weights.SetDevice(ctx->Device());
+  if (!device.IsCPU()) {
+    weights.SetDevice(device);
   }
-  return OptionalWeights{ctx->IsCUDA() ? weights.ConstDeviceSpan() : weights.ConstHostSpan()};
+  return OptionalWeights{device.IsCPU() ? weights.ConstHostSpan() : weights.ConstDeviceSpan()};
 }
+
+[[nodiscard]] double SumOptionalWeights(Context const* ctx, OptionalWeights const& weights,
+                                        bst_idx_t n_samples);
 }  // namespace xgboost::common
 #endif  // XGBOOST_COMMON_OPTIONAL_WEIGHT_H_
diff --git a/src/common/param_array.cc b/src/common/param_array.cc
new file mode 100644
index 000000000000..45fdc662a086
--- /dev/null
+++ b/src/common/param_array.cc
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2023-2025, XGBoost contributors
+ */
+#include "param_array.h"
+
+#include <cctype>   // for isspace
+#include <cstddef>  // for size_t
+#include <istream>  // for istream
+#include <ostream>  // for ostream
+#include <string>   // for string
+#include <vector>   // for vector
+
+#include "../common/json_utils.h"  // for TypeCheck
+#include "xgboost/json.h"          // for F32Array, get, Number
+#include "xgboost/json_io.h"       // for JsonWriter
+#include "xgboost/string_view.h"   // for StringView
+
+namespace xgboost::common {
+
+namespace {
+std::ostream& WriteStream(std::ostream& os,
+                          const ParamArray<float>& array) {  // NOLINT
+  auto const& t = array.Get();
+  F32Array arr{t.size()};
+  for (std::size_t i = 0; i < t.size(); ++i) {
+    arr.Set(i, t[i]);
+  }
+  std::vector<char> stream;
+  JsonWriter writer{&stream};
+  arr.Save(&writer);
+  for (auto c : stream) {
+    os << c;
+  }
+  return os;
+}
+}  // namespace
+
+std::ostream& operator<<(std::ostream& os, const ParamArray<float>& array) {  // NOLINT
+  return WriteStream(os, array);
+}
+
+namespace {
+std::istream& ReadStream(std::istream& is, ParamArray<float>& array) {  // NOLINT
+  auto& t = array.Get();
+  t.clear();
+  std::string str;
+  while (!is.eof()) {
+    std::string tmp;
+    is >> tmp;
+    str += tmp;
+  }
+  std::size_t head{0};
+  // unify notation for parsing.
+  while (std::isspace(str[head])) {
+    ++head;
+  }
+  if (str[head] == '(') {
+    str[head] = '[';
+  }
+  auto tail = str.size() - 1;
+  while (std::isspace(str[tail])) {
+    --tail;
+  }
+  if (str[tail] == ')') {
+    str[tail] = ']';
+  }
+
+  auto jarr = Json::Load(StringView{str});
+  // return if there's only one element
+  if (IsA<Number>(jarr)) {
+    t.emplace_back(get<Number const>(jarr));
+    return is;
+  }
+  if (IsA<Integer>(jarr)) {
+    t.emplace_back(get<Integer const>(jarr));
+    return is;
+  }
+
+  auto const& jvec = get<Array const>(jarr);
+  for (auto v : jvec) {
+    TypeCheck<Number, Integer>(v, array.Name());
+    if (IsA<Number>(v)) {
+      t.emplace_back(get<Number const>(v));
+    } else {
+      t.emplace_back(get<Integer const>(v));
+    }
+  }
+  return is;
+}
+}  // namespace
+
+std::istream& operator>>(std::istream& is, ParamArray<float>& array) {  // NOLINT
+  return ReadStream(is, array);
+}
+}  // namespace xgboost::common
diff --git a/src/common/param_array.h b/src/common/param_array.h
new file mode 100644
index 000000000000..b20321dbc4f6
--- /dev/null
+++ b/src/common/param_array.h
@@ -0,0 +1,66 @@
+/**
+ * Copyright 2023-2025, XGBoost contributors
+ */
+#pragma once
+
+#include <istream>  // for istream
+#include <ostream>  // for ostream
+#include <string>   // for string
+#include <utility>  // for forward
+#include <vector>   // for vector
+
+#include "xgboost/string_view.h"  // for StringView
+
+namespace xgboost::common {
+/**
+ * @brief A shim to enable ADL for parameter parsing. Alternatively, we can put the stream
+ * operators in std namespace, which seems to be less ideal.
+ */
+template <typename T>
+class ParamArray {
+  std::string name_;
+  std::vector<T> values_;
+
+ public:
+  using size_type = typename decltype(values_)::size_type;              // NOLINT
+  using const_reference = typename decltype(values_)::const_reference;  // NOLINT
+  using reference = typename decltype(values_)::reference;              // NOLINT
+
+ public:
+  ParamArray() = default;
+
+  ParamArray(ParamArray const& that) = default;
+  ParamArray& operator=(ParamArray const& that) = default;
+
+  ParamArray(ParamArray&& that) = default;
+  ParamArray& operator=(ParamArray&& that) = default;
+
+  template <typename... Args>
+  explicit ParamArray(StringView name, Args&&... args)
+      : name_{name}, values_{std::forward<Args>(args)...} {}
+
+  [[nodiscard]] std::vector<T>& Get() { return values_; }
+  [[nodiscard]] std::vector<T> const& Get() const { return values_; }
+  const_reference operator[](size_type i) const { return values_[i]; }
+  reference operator[](size_type i) { return values_[i]; }
+  [[nodiscard]] bool empty() const { return values_.empty(); }       // NOLINT
+  [[nodiscard]] std::size_t size() const { return values_.size(); }  // NOLINT
+  [[nodiscard]] auto data() const { return values_.data(); }         // NOLINT
+  ParamArray& operator=(std::vector<T> const& that) {
+    this->values_ = that;
+    return *this;
+  }
+  [[nodiscard]] StringView Name() const { return this->name_; }
+  [[nodiscard]] auto cbegin() const { return this->values_.cbegin(); }  // NOLINT
+  [[nodiscard]] auto cend() const { return this->values_.cend(); }      // NOLINT
+  [[nodiscard]] auto begin() { return this->values_.begin(); }          // NOLINT
+  [[nodiscard]] auto end() { return this->values_.end(); }              // NOLINT
+
+  void Resize(size_type n, T const& init) { this->values_.resize(n, init); }  // NOLINT
+};
+
+// For parsing array-based parameters inside DMLC parameter. Input can be a string to a
+// single float or a list of floats.
+std::ostream& operator<<(std::ostream& os, const ParamArray<float>& t);
+std::istream& operator>>(std::istream& is, ParamArray<float>& t);
+}  // namespace xgboost::common
diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h
index 09a5ecb680fb..a996f7b89ed1 100644
--- a/src/common/partition_builder.h
+++ b/src/common/partition_builder.h
@@ -108,18 +108,19 @@ class PartitionBuilder {
     return {nleft_elems, nright_elems};
   }
 
-  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
+  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry,
+            typename TreeView>
   void Partition(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
                  const common::Range1d range, const bst_bin_t split_cond,
                  GHistIndexMatrix const& gmat, const common::ColumnMatrix& column_matrix,
-                 const RegTree& tree, bst_idx_t const* rid) {
+                 TreeView const& tree, bst_idx_t const* rid) {
     common::Span<bst_idx_t const> rid_span{rid + range.begin(), rid + range.end()};
     common::Span<bst_idx_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
     common::Span<bst_idx_t> right = GetRightBuffer(node_in_set, range.begin(), range.end());
     std::size_t nid = nodes[node_in_set].nid;
     bst_feature_t fid = tree.SplitIndex(nid);
     bool default_left = tree.DefaultLeft(nid);
-    bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
+    bool is_cat = tree.SplitType(nid) == FeatureType::kCategorical;
     auto node_cats = tree.NodeCats(nid);
     auto const& cut_values = gmat.cut.Values();
 
@@ -203,15 +204,16 @@ class PartitionBuilder {
    * worker, so we go through all the rows and mark the bit vectors on whether the decision is made
    * to go right, or if the feature value used for the split is missing.
    */
-  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
+  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry,
+            typename TreeView>
   void MaskRows(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
                 const common::Range1d range, bst_bin_t split_cond, GHistIndexMatrix const& gmat,
-                const common::ColumnMatrix& column_matrix, const RegTree& tree,
+                const common::ColumnMatrix& column_matrix, TreeView const& tree,
                 bst_idx_t const* rid, BitVector* decision_bits, BitVector* missing_bits) {
     common::Span<bst_idx_t const> rid_span{rid + range.begin(), rid + range.end()};
     std::size_t nid = nodes[node_in_set].nid;
     bst_feature_t fid = tree.SplitIndex(nid);
-    bool is_cat = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
+    bool is_cat = tree.SplitType(nid) == FeatureType::kCategorical;
     auto node_cats = tree.NodeCats(nid);
     auto const& cut_values = gmat.cut.Values();
 
@@ -261,10 +263,10 @@ class PartitionBuilder {
    * @brief Once we've aggregated the decision and missing bits from all the workers, we can then
    * use them to partition the rows accordingly.
    */
-  template <typename ExpandEntry>
+  template <typename ExpandEntry, typename TreeView>
   void PartitionByMask(const size_t node_in_set, std::vector<ExpandEntry> const& nodes,
                        const common::Range1d range, GHistIndexMatrix const& gmat,
-                       const RegTree& tree, bst_idx_t const* rid, BitVector const& decision_bits,
+                       TreeView const& tree, bst_idx_t const* rid, BitVector const& decision_bits,
                        BitVector const& missing_bits) {
     common::Span<bst_idx_t const> rid_span(rid + range.begin(), rid + range.end());
     common::Span<bst_idx_t> left = GetLeftBuffer(node_in_set, range.begin(), range.end());
@@ -365,8 +367,8 @@ class PartitionBuilder {
   }
 
   // Copy row partitions into global cache for reuse in objective
-  template <typename Invalidp>
-  void LeafPartition(Context const* ctx, RegTree const& tree, RowSetCollection const& row_set,
+  template <typename Invalidp, typename TreeView>
+  void LeafPartition(Context const* ctx, TreeView const& tree, RowSetCollection const& row_set,
                      Span<bst_node_t> position, Invalidp invalidp) const {
     auto p_begin = row_set.Data()->data();
     // For each node, walk through all the samples that fall in this node.
diff --git a/src/common/pseudo_huber.cc b/src/common/pseudo_huber.cc
index 5f58a18b3ccb..c2799eb5096e 100644
--- a/src/common/pseudo_huber.cc
+++ b/src/common/pseudo_huber.cc
@@ -3,5 +3,5 @@
  */
 #include "pseudo_huber.h"
 namespace xgboost {
-DMLC_REGISTER_PARAMETER(PesudoHuberParam);
+DMLC_REGISTER_PARAMETER(PseudoHuberParam);
 }
diff --git a/src/common/pseudo_huber.h b/src/common/pseudo_huber.h
index 9cf604534806..0c0863385eb4 100644
--- a/src/common/pseudo_huber.h
+++ b/src/common/pseudo_huber.h
@@ -6,10 +6,10 @@
 #include "xgboost/parameter.h"
 
 namespace xgboost {
-struct PesudoHuberParam : public XGBoostParameter<PesudoHuberParam> {
+struct PseudoHuberParam : public XGBoostParameter<PseudoHuberParam> {
   float huber_slope{1.0};
 
-  DMLC_DECLARE_PARAMETER(PesudoHuberParam) {
+  DMLC_DECLARE_PARAMETER(PseudoHuberParam) {
     DMLC_DECLARE_FIELD(huber_slope)
         .set_default(1.0f)
         .describe("The delta term in Pseudo-Huber loss.");
diff --git a/src/common/quantile.cc b/src/common/quantile.cc
index 61e12100b17b..49df99d6261b 100644
--- a/src/common/quantile.cc
+++ b/src/common/quantile.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #include "quantile.h"
 
@@ -110,10 +110,14 @@ void HostSketchContainer::PushAdapterBatch(Batch const &batch, size_t base_rowid
       data::_type const &batch, size_t base_rowid, MetaInfo const &info, float missing);
 
 INSTANTIATE(ArrayAdapterBatch)
+INSTANTIATE(DenseAdapterBatch)
 INSTANTIATE(CSRArrayAdapterBatch)
-INSTANTIATE(CSCAdapterBatch)
+INSTANTIATE(CSCArrayAdapterBatch)
 INSTANTIATE(SparsePageAdapterBatch)
 INSTANTIATE(ColumnarAdapterBatch)
+INSTANTIATE(EncColumnarAdapterBatch)
+
+#undef INSTANTIATE
 
 namespace {
 /**
diff --git a/src/common/quantile.cu b/src/common/quantile.cu
index 222d7b309d89..535a2a700e89 100644
--- a/src/common/quantile.cu
+++ b/src/common/quantile.cu
@@ -464,7 +464,16 @@ void SketchContainer::Merge(Context const *ctx, Span<OffsetT const> d_that_colum
     return;
   }
 
-  this->Other().resize(this->Current().size() + that.size());
+  std::size_t new_size = this->Current().size() + that.size();
+  try {
+    this->Other().resize(new_size);
+  } catch (dmlc::Error const &) {
+    // Retry
+    this->Other().clear();
+    this->Other().shrink_to_fit();
+    this->Other().resize(new_size);
+  }
+
   CHECK_EQ(d_that_columns_ptr.size(), this->columns_ptr_.Size());
 
   MergeImpl(ctx, this->Data(), this->ColumnsPtr(), that, d_that_columns_ptr,
diff --git a/src/common/quantile.cuh b/src/common/quantile.cuh
index 1b60670d0d68..24c7219f5c87 100644
--- a/src/common/quantile.cuh
+++ b/src/common/quantile.cuh
@@ -1,12 +1,16 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_COMMON_QUANTILE_CUH_
 #define XGBOOST_COMMON_QUANTILE_CUH_
 
 #include <thrust/logical.h>  // for any_of
 
+#include <cstddef>     // for size_t
+#include <functional>  // for equal_to
+
 #include "categorical.h"
+#include "common.h"          // for HumanMemUnit
 #include "cuda_context.cuh"  // for CUDAContext
 #include "cuda_rt_utils.h"   // for SetDevice
 #include "device_helpers.cuh"
@@ -181,7 +185,7 @@ class SketchContainer {
     this->Current().shrink_to_fit();
     this->Other().clear();
     this->Other().shrink_to_fit();
-    LOG(DEBUG) << "Quantile memory cost:" << this->MemCapacityBytes();
+    LOG(DEBUG) << "Quantile memory cost:" << common::HumanMemUnit(this->MemCapacityBytes());
   }
 
   /* \brief Merge quantiles from other GPU workers. */
@@ -203,8 +207,8 @@ class SketchContainer {
   SketchContainer& operator=(const SketchContainer&) = delete;
 
   /* \brief Removes all the duplicated elements in quantile structure. */
-  template <typename KeyComp = thrust::equal_to<size_t>>
-  size_t Unique(Context const* ctx, KeyComp key_comp = thrust::equal_to<size_t>{}) {
+  template <typename KeyComp = std::equal_to<size_t>>
+  std::size_t Unique(Context const* ctx, KeyComp key_comp = std::equal_to<size_t>{}) {
     timer_.Start(__func__);
     curt::SetDevice(ctx->Ordinal());
     this->columns_ptr_.SetDevice(ctx->Device());
diff --git a/src/common/quantile_loss_utils.cc b/src/common/quantile_loss_utils.cc
index df2fa6edd868..b4a33580e5d0 100644
--- a/src/common/quantile_loss_utils.cc
+++ b/src/common/quantile_loss_utils.cc
@@ -1,73 +1,8 @@
 /**
- * Copyright 2023, XGBoost contributors
+ * Copyright 2023-2025, XGBoost contributors
  */
 #include "quantile_loss_utils.h"
 
-#include <cctype>   // for isspace
-#include <istream>  // for istream
-#include <ostream>  // for ostream
-#include <string>   // for string
-#include <vector>   // for vector
-
-#include "../common/json_utils.h"  // for TypeCheck
-#include "xgboost/json.h"          // for F32Array, get, Number
-#include "xgboost/json_io.h"       // for JsonWriter
-
 namespace xgboost::common {
-std::ostream& operator<<(std::ostream& os, const ParamFloatArray& array) {
-  auto const& t = array.Get();
-  xgboost::F32Array arr{t.size()};
-  for (std::size_t i = 0; i < t.size(); ++i) {
-    arr.Set(i, t[i]);
-  }
-  std::vector<char> stream;
-  xgboost::JsonWriter writer{&stream};
-  arr.Save(&writer);
-  for (auto c : stream) {
-    os << c;
-  }
-  return os;
-}
-
-std::istream& operator>>(std::istream& is, ParamFloatArray& array) {
-  auto& t = array.Get();
-  t.clear();
-  std::string str;
-  while (!is.eof()) {
-    std::string tmp;
-    is >> tmp;
-    str += tmp;
-  }
-  std::size_t head{0};
-  // unify notation for parsing.
-  while (std::isspace(str[head])) {
-    ++head;
-  }
-  if (str[head] == '(') {
-    str[head] = '[';
-  }
-  auto tail = str.size() - 1;
-  while (std::isspace(str[tail])) {
-    --tail;
-  }
-  if (str[tail] == ')') {
-    str[tail] = ']';
-  }
-
-  auto jarr = xgboost::Json::Load(xgboost::StringView{str});
-  // return if there's only one element
-  if (xgboost::IsA<xgboost::Number>(jarr)) {
-    t.emplace_back(xgboost::get<xgboost::Number const>(jarr));
-    return is;
-  }
-
-  auto jvec = xgboost::get<xgboost::Array const>(jarr);
-  for (auto v : jvec) {
-    xgboost::TypeCheck<xgboost::Number>(v, "alpha");
-    t.emplace_back(get<xgboost::Number const>(v));
-  }
-  return is;
-}
-
 DMLC_REGISTER_PARAMETER(QuantileLossParam);
 }  // namespace xgboost::common
diff --git a/src/common/quantile_loss_utils.h b/src/common/quantile_loss_utils.h
index bc781de259f1..34e794d94ca4 100644
--- a/src/common/quantile_loss_utils.h
+++ b/src/common/quantile_loss_utils.h
@@ -1,41 +1,22 @@
 /**
- * Copyright 2023 by XGBoost contributors
+ * Copyright 2023-2025, XGBoost contributors
  */
 #ifndef XGBOOST_COMMON_QUANTILE_LOSS_UTILS_H_
 #define XGBOOST_COMMON_QUANTILE_LOSS_UTILS_H_
 
-#include <algorithm>            // std::all_of
-#include <istream>              // std::istream
-#include <ostream>              // std::ostream
-#include <vector>               // std::vector
+#include <algorithm>  // for all_of
 
+#include "param_array.h"        // for ParamArray
 #include "xgboost/logging.h"    // CHECK
 #include "xgboost/parameter.h"  // XGBoostParameter
 
-namespace xgboost {
-namespace common {
-// A shim to enable ADL for parameter parsing. Alternatively, we can put the stream
-// operators in std namespace, which seems to be less ideal.
-class ParamFloatArray {
-  std::vector<float> values_;
-
- public:
-  std::vector<float>& Get() { return values_; }
-  std::vector<float> const& Get() const { return values_; }
-  decltype(values_)::const_reference operator[](decltype(values_)::size_type i) const {
-    return values_[i];
-  }
-};
-
-// For parsing quantile parameters. Input can be a string to a single float or a list of
-// floats.
-std::ostream& operator<<(std::ostream& os, const ParamFloatArray& t);
-std::istream& operator>>(std::istream& is, ParamFloatArray& t);
-
+namespace xgboost::common {
 struct QuantileLossParam : public XGBoostParameter<QuantileLossParam> {
-  ParamFloatArray quantile_alpha;
+  ParamArray<float> quantile_alpha{"quantile_alpha"};
   DMLC_DECLARE_PARAMETER(QuantileLossParam) {
-    DMLC_DECLARE_FIELD(quantile_alpha).describe("List of quantiles for quantile loss.");
+    DMLC_DECLARE_FIELD(quantile_alpha)
+        .describe("List of quantiles for quantile loss.")
+        .set_default(ParamArray<float>{"quantile_alpha"});
   }
   void Validate() const {
     CHECK(GetInitialised());
@@ -46,6 +27,5 @@ struct QuantileLossParam : public XGBoostParameter<QuantileLossParam> {
     CHECK(valid) << "quantile alpha must be in the range [0.0, 1.0].";
   }
 };
-}  // namespace common
-}  // namespace xgboost
+}  // namespace xgboost::common
 #endif  // XGBOOST_COMMON_QUANTILE_LOSS_UTILS_H_
diff --git a/src/common/ranking_utils.cc b/src/common/ranking_utils.cc
index 65793a13a10e..d477225a4efe 100644
--- a/src/common/ranking_utils.cc
+++ b/src/common/ranking_utils.cc
@@ -36,7 +36,8 @@ void RankingCache::InitOnCPU(Context const* ctx, MetaInfo const& info) {
 
   double sum_weights = 0;
   auto n_groups = Groups();
-  auto weight = common::MakeOptionalWeights(ctx, info.weights_);
+  auto device = ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device();
+  auto weight = common::MakeOptionalWeights(device, info.weights_);
   for (bst_omp_uint k = 0; k < n_groups; ++k) {
     sum_weights += weight[k];
   }
diff --git a/src/common/ranking_utils.cu b/src/common/ranking_utils.cu
index c67af5571be1..590dd93a321b 100644
--- a/src/common/ranking_utils.cu
+++ b/src/common/ranking_utils.cu
@@ -61,13 +61,13 @@ void CalcQueriesDCG(Context const* ctx, linalg::VectorView<float const> d_labels
 
   CHECK(out_dcg.Contiguous());
   std::size_t bytes;
-  cub::DeviceSegmentedReduce::Sum(nullptr, bytes, value_it, out_dcg.Values().data(),
-                                  d_group_ptr.size() - 1, d_group_ptr.data(),
-                                  d_group_ptr.data() + 1, ctx->CUDACtx()->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, bytes, value_it, out_dcg.Values().data(),
+                                                d_group_ptr.size() - 1, d_group_ptr.data(),
+                                                d_group_ptr.data() + 1, ctx->CUDACtx()->Stream()));
   dh::TemporaryArray<char> temp(bytes);
-  cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, value_it, out_dcg.Values().data(),
-                                  d_group_ptr.size() - 1, d_group_ptr.data(),
-                                  d_group_ptr.data() + 1, ctx->CUDACtx()->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(
+      temp.data().get(), bytes, value_it, out_dcg.Values().data(), d_group_ptr.size() - 1,
+      d_group_ptr.data(), d_group_ptr.data() + 1, ctx->CUDACtx()->Stream()));
 }
 
 void CalcQueriesInvIDCG(Context const* ctx, linalg::VectorView<float const> d_labels,
@@ -171,7 +171,7 @@ void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
   sorted_idx_cache_.SetDevice(ctx->Device());
   sorted_idx_cache_.Resize(info.labels.Size(), 0);
 
-  auto weight = common::MakeOptionalWeights(ctx, info.weights_);
+  auto weight = common::MakeOptionalWeights(ctx->Device(), info.weights_);
   auto w_it =
       dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul), WeightOp{weight});
   weight_norm_ = static_cast<double>(n_groups) / thrust::reduce(w_it, w_it + n_groups);
diff --git a/src/common/ranking_utils.cuh b/src/common/ranking_utils.cuh
index 297f5157ecfb..9025dfdbc533 100644
--- a/src/common/ranking_utils.cuh
+++ b/src/common/ranking_utils.cuh
@@ -30,6 +30,8 @@ XGBOOST_DEVICE __forceinline__ std::size_t ThreadsForMean(std::size_t group_size
                                                           std::size_t n_pairs) {
   return group_size * n_pairs;
 }
+// Number of threads in a group divided by the number of samples in this group, returns
+// the number of pairs for pair-wise ltr with sampling.
 XGBOOST_DEVICE __forceinline__ std::size_t PairsForGroup(std::size_t n_threads,
                                                          std::size_t group_size) {
   return n_threads / group_size;
diff --git a/src/common/ranking_utils.h b/src/common/ranking_utils.h
index 8d98dfb913d7..16a264fdc967 100644
--- a/src/common/ranking_utils.h
+++ b/src/common/ranking_utils.h
@@ -115,6 +115,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
   }
 
   [[nodiscard]] bool HasTruncation() const { return lambdarank_pair_method == PairMethod::kTopK; }
+  [[nodiscard]] bool IsMean() const { return lambdarank_pair_method == PairMethod::kMean; }
 
   // Used for evaluation metric and cache initialization, iterate through top-k or the whole list
   [[nodiscard]] auto TopK() const {
@@ -180,7 +181,8 @@ class RankingCache {
   HostDeviceVector<std::size_t> y_sorted_idx_cache_;
   // Cached labels sorted by the model
   HostDeviceVector<float> y_ranked_by_model_;
-  // store rounding factor for objective for each group
+  // Rounding factor for CUDA deterministic floating point summation. One rounding factor
+  // for each ranking group.
   linalg::Vector<GradientPair> roundings_;
   // rounding factor for cost
   HostDeviceVector<double> cost_rounding_;
@@ -215,6 +217,9 @@ class RankingCache {
     if (!info.weights_.Empty()) {
       CHECK_EQ(Groups(), info.weights_.Size()) << error::GroupWeight();
     }
+    if (param_.HasTruncation()) {
+      CHECK_GE(param_.NumPair(), 1);
+    }
   }
   [[nodiscard]] std::size_t MaxPositionSize() const {
     // Use truncation level as bound.
@@ -267,21 +272,21 @@ class RankingCache {
   }
 
   // CUDA cache getters, the cache is shared between metric and objective, some of these
-  // fields are lazy initialized to avoid unnecessary allocation.
+  // fields are initialized lazily to avoid unnecessary allocation.
   [[nodiscard]] common::Span<std::size_t const> CUDAThreadsGroupPtr() const {
     CHECK(!threads_group_ptr_.Empty());
     return threads_group_ptr_.ConstDeviceSpan();
   }
   [[nodiscard]] std::size_t CUDAThreads() const { return n_cuda_threads_; }
 
-  linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
+  [[nodiscard]] linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
     if (roundings_.Size() == 0) {
       roundings_.SetDevice(ctx->Device());
       roundings_.Reshape(Groups());
     }
     return roundings_.View(ctx->Device());
   }
-  common::Span<double> CUDACostRounding(Context const* ctx) {
+  [[nodiscard]] common::Span<double> CUDACostRounding(Context const* ctx) {
     if (cost_rounding_.Size() == 0) {
       cost_rounding_.SetDevice(ctx->Device());
       cost_rounding_.Resize(1);
diff --git a/src/common/ref_resource_view.cuh b/src/common/ref_resource_view.cuh
index 21d49333b579..bc8b7d7c010a 100644
--- a/src/common/ref_resource_view.cuh
+++ b/src/common/ref_resource_view.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2024, XGBoost Contributors
+ * Copyright 2024-2025, XGBoost Contributors
  */
 #pragma once
 
@@ -7,6 +7,7 @@
 #include <memory>   // for make_shared
 
 #include "cuda_context.cuh"     // for CUDAContext
+#include "cuda_stream.h"        // for StreamRef
 #include "ref_resource_view.h"  // for RefResourceView
 #include "resource.cuh"         // for CudaAllocResource
 #include "xgboost/context.h"    // for Context
@@ -29,6 +30,9 @@ template <typename T>
   return ref;
 }
 
+/**
+ * @brief Initialize the data in addition to allocation.
+ */
 template <typename T>
 [[nodiscard]] RefResourceView<T> MakeFixedVecWithCudaMalloc(Context const* ctx,
                                                             std::size_t n_elements, T const& init) {
@@ -43,4 +47,17 @@ template <typename T>
   auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};
   return ref;
 }
+
+/**
+ * @brief Create a fixed size resource view from a shared pinned memory pool.
+ */
+template <typename T>
+[[nodiscard]] RefResourceView<T> MakeFixedVecWithPinnedMemPool(
+    std::shared_ptr<cuda_impl::HostPinnedMemPool> pool, std::size_t n_elements,
+    curt::StreamRef stream) {
+  auto resource = std::make_shared<common::HostPinnedMemPoolResource>(
+      std::move(pool), n_elements * sizeof(T), stream);
+  auto ref = RefResourceView{resource->DataAs<T>(), n_elements, resource};
+  return ref;
+}
 }  // namespace xgboost::common
diff --git a/src/common/ref_resource_view.h b/src/common/ref_resource_view.h
index 3c33a839ab77..7fae75c6fcf3 100644
--- a/src/common/ref_resource_view.h
+++ b/src/common/ref_resource_view.h
@@ -68,7 +68,7 @@ class RefResourceView {
 
   [[nodiscard]] size_type size() const { return size_; }  // NOLINT
   [[nodiscard]] size_type size_bytes() const {            // NOLINT
-    return Span<const value_type>{data(), static_cast<size_t>(size())}.size_bytes();
+    return Span<const value_type>{data(), static_cast<std::size_t>(size())}.size_bytes();
   }
   [[nodiscard]] value_type* data() { return ptr_; };              // NOLINT
   [[nodiscard]] value_type const* data() const { return ptr_; };  // NOLINT
@@ -96,6 +96,8 @@ class RefResourceView {
     SPAN_LT(i, this->size_);
     return ptr_[i];
   }
+  [[nodiscard]] Span<std::add_const_t<T>> ToSpan() const { return {this->data(), this->size()}; }
+  [[nodiscard]] Span<T> ToSpan() { return {this->data(), this->size()}; }
 
   /**
    * @brief Get the underlying resource.
diff --git a/src/common/resource.cu b/src/common/resource.cu
index ef662e3bd6e0..f317c70de1f1 100644
--- a/src/common/resource.cu
+++ b/src/common/resource.cu
@@ -1,6 +1,7 @@
 /**
- * Copyright 2024, XGBoost Contributors
+ * Copyright 2024-2025, XGBoost Contributors
  */
+#include "cuda_stream.h"       // for DefaultStream
 #include "device_helpers.cuh"  // for CurrentDevice
 #include "resource.cuh"
 #include "xgboost/string_view.h"  // for StringView
@@ -12,20 +13,28 @@ CudaMmapResource::CudaMmapResource(StringView path, std::size_t offset, std::siz
               [](MMAPFile* handle) {
                 // Don't close the mmap while CUDA kernel is running.
                 if (handle) {
-                  dh::DefaultStream().Sync();
+                  curt::DefaultStream().Sync();
                 }
                 detail::CloseMmap(handle);
               }},
       n_{length} {
   auto device = dh::CurrentDevice();
-  dh::safe_cuda(
-      cudaMemAdvise(handle_->base_ptr, handle_->base_size, cudaMemAdviseSetReadMostly, device));
-  dh::safe_cuda(cudaMemAdvise(handle_->base_ptr, handle_->base_size,
-                              cudaMemAdviseSetPreferredLocation, device));
-  dh::safe_cuda(
-      cudaMemAdvise(handle_->base_ptr, handle_->base_size, cudaMemAdviseSetAccessedBy, device));
-  dh::safe_cuda(
-      cudaMemPrefetchAsync(handle_->base_ptr, handle_->base_size, device, dh::DefaultStream()));
+  auto ptr = handle_->BasePtr();
+#if (CUDA_VERSION / 1000) >= 13
+  cudaMemLocation loc;
+  loc.type = cudaMemLocationTypeDevice;
+  loc.id = device;
+#else
+  auto loc = device;
+#endif  // (CUDA_VERSION / 1000) >= 13
+  dh::safe_cuda(cudaMemAdvise(ptr.data(), ptr.size(), cudaMemAdviseSetReadMostly, loc));
+  dh::safe_cuda(cudaMemAdvise(ptr.data(), ptr.size(), cudaMemAdviseSetPreferredLocation, loc));
+  dh::safe_cuda(cudaMemAdvise(ptr.data(), ptr.size(), cudaMemAdviseSetAccessedBy, loc));
+#if (CUDA_VERSION / 1000) >= 13
+  dh::safe_cuda(cudaMemPrefetchAsync(ptr.data(), ptr.size(), loc, 0, curt::DefaultStream()));
+#else
+  dh::safe_cuda(cudaMemPrefetchAsync(ptr.data(), ptr.size(), device, curt::DefaultStream()));
+#endif  // (CUDA_VERSION / 1000) >= 13
 }
 
 [[nodiscard]] void* CudaMmapResource::Data() {
diff --git a/src/common/resource.cuh b/src/common/resource.cuh
index 4936cb798a07..0760ec4fa81f 100644
--- a/src/common/resource.cuh
+++ b/src/common/resource.cuh
@@ -1,11 +1,13 @@
 /**
- * Copyright 2024, XGBoost Contributors
+ * Copyright 2024-2025, XGBoost Contributors
  */
 #pragma once
 #include <cstddef>     // for size_t
 #include <functional>  // for function
+#include <utility>     // for move
 
-#include "cuda_pinned_allocator.h"  // for SamAllocator
+#include "cuda_pinned_allocator.h"  // for SamAllocator, HostPinnedMemPool
+#include "cuda_stream.h"            // for StreamRef
 #include "device_vector.cuh"        // for DeviceUVector, GrowOnlyVirtualMemVec
 #include "io.h"                     // for ResourceHandler, MMAPFile
 #include "xgboost/string_view.h"    // for StringView
@@ -75,6 +77,30 @@ class CudaPinnedResource : public ResourceHandler {
   void Resize(std::size_t n_bytes) { this->storage_.resize(n_bytes); }
 };
 
+/**
+ * @brief Resource for fixed-size memory allocated by @ref HostPinnedMemPool.
+ *
+ * This container shares the pool but owns the memory.
+ */
+class HostPinnedMemPoolResource : public ResourceHandler {
+  std::shared_ptr<cuda_impl::HostPinnedMemPool> pool_;
+  std::size_t n_bytes_;
+  curt::StreamRef stream_;
+  void* ptr_;
+
+ public:
+  explicit HostPinnedMemPoolResource(std::shared_ptr<cuda_impl::HostPinnedMemPool> pool,
+                                     std::size_t n_bytes, curt::StreamRef stream)
+      : ResourceHandler{kCudaPinnedMemPool},
+        pool_{std::move(pool)},
+        n_bytes_{n_bytes},
+        stream_{stream},
+        ptr_{this->pool_->AllocateAsync(n_bytes, stream)} {}
+  ~HostPinnedMemPoolResource() override { this->pool_->DeallocateAsync(this->ptr_, this->stream_); }
+  [[nodiscard]] std::size_t Size() const override { return this->n_bytes_; }
+  [[nodiscard]] void* Data() override { return this->ptr_; }
+};
+
 class CudaMmapResource : public ResourceHandler {
   std::unique_ptr<MMAPFile, std::function<void(MMAPFile*)>> handle_;
   std::size_t n_;
diff --git a/src/common/stats.cu b/src/common/stats.cu
index 8f8faf1e0fd4..3bfab8c25db9 100644
--- a/src/common/stats.cu
+++ b/src/common/stats.cu
@@ -1,10 +1,11 @@
 /**
- * Copyright 2022-2024, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 
 #include <thrust/iterator/counting_iterator.h>  // thrust::make_counting_iterator
 
 #include <cstddef>  // size_t
+#include <tuple>    // for apply
 
 #include "../collective/aggregator.h"  // for GlobalSum
 #include "cuda_context.cuh"            // CUDAContext
@@ -26,9 +27,8 @@ void Median(Context const* ctx, linalg::TensorView<float const, 2> t,
   dh::LaunchN(d_segments.size(), ctx->CUDACtx()->Stream(),
               [=] XGBOOST_DEVICE(std::size_t i) { d_segments[i] = t.Shape(0) * i; });
   auto val_it = dh::MakeTransformIterator<float>(
-      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(size_t i) {
-        return linalg::detail::Apply(t, linalg::UnravelIndex(i, t.Shape()));
-      });
+      thrust::make_counting_iterator(0ul),
+      [=] XGBOOST_DEVICE(size_t i) { return std::apply(t, linalg::UnravelIndex(i, t.Shape())); });
 
   out->SetDevice(ctx->Device());
   out->Reshape(t.Shape(1));
@@ -55,9 +55,10 @@ void Mean(Context const* ctx, linalg::VectorView<float const> v, linalg::VectorV
   std::size_t bytes;
   CHECK_EQ(out.Size(), 1);
   auto s = ctx->CUDACtx()->Stream();
-  cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size(), s);
+  dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, bytes, it, out.Values().data(), v.Size(), s));
   dh::TemporaryArray<char> temp{bytes};
-  cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size(), s);
+  dh::safe_cuda(
+      cub::DeviceReduce::Sum(temp.data().get(), bytes, it, out.Values().data(), v.Size(), s));
 }
 
 void SampleMean(Context const* ctx, bool is_column_split, linalg::MatrixView<float const> d_v,
diff --git a/src/common/threading_utils.cc b/src/common/threading_utils.cc
index 52e5d0c6871b..16afe72a0898 100644
--- a/src/common/threading_utils.cc
+++ b/src/common/threading_utils.cc
@@ -121,16 +121,8 @@ std::int32_t OmpGetNumThreads(std::int32_t n_threads) noexcept(true) {
   return n_threads;
 }
 
-[[nodiscard]] bool GetCpuNuma(unsigned int* cpu, unsigned int* numa) {
-#ifdef SYS_getcpu
-  return syscall(SYS_getcpu, cpu, numa, NULL) == 0;
-#else
-  return false;
-#endif
-}
-
 void NameThread(std::thread* t, StringView name) {
-#if defined(__linux__)
+#if defined(__linux__) && (!defined(__ANDROID__) || __ANDROID_API__ >= 26)
   auto handle = t->native_handle();
   char old[16];
   auto ret = pthread_getname_np(handle, old, 16);
diff --git a/src/common/threading_utils.h b/src/common/threading_utils.h
index f8d57a22207e..b20b1bc08069 100644
--- a/src/common/threading_utils.h
+++ b/src/common/threading_utils.h
@@ -17,6 +17,7 @@
 #include <utility>      // for forward
 #include <vector>       // for vector
 
+#include "common.h"  // for DivRoundUp
 #include "xgboost/logging.h"
 #include "xgboost/string_view.h"  // for StringView
 
@@ -40,24 +41,21 @@ namespace xgboost::common {
 // Inspired by tbb::blocked_range
 class Range1d {
  public:
-  Range1d(size_t begin, size_t end): begin_(begin), end_(end) {
-    CHECK_LT(begin, end);
-  }
+  Range1d(std::size_t begin, std::size_t end) : begin_{begin}, end_{end} { CHECK_LT(begin, end); }
 
-  size_t begin() const {  // NOLINT
+  [[nodiscard]] std::size_t begin() const {  // NOLINT
     return begin_;
   }
-
-  size_t end() const {  // NOLINT
+  [[nodiscard]] std::size_t end() const {  // NOLINT
     return end_;
   }
+  [[nodiscard]] std::size_t Size() const { return this->end() - this->begin(); }
 
  private:
-  size_t begin_;
-  size_t end_;
+  std::size_t begin_;
+  std::size_t end_;
 };
 
-
 // Split 2d space to balanced blocks
 // Implementation of the class is inspired by tbb::blocked_range2d
 // However, TBB provides only (n x m) 2d range (matrix) separated by blocks. Example:
@@ -141,7 +139,7 @@ class BlockedSpace2d {
 
 // Wrapper to implement nested parallelism with simple omp parallel for
 template <typename Func>
-void ParallelFor2d(const BlockedSpace2d& space, int n_threads, Func&& func) {
+void ParallelFor2d(const BlockedSpace2d& space, std::int32_t n_threads, Func&& func) {
   static_assert(std::is_void_v<std::invoke_result_t<Func, std::size_t, Range1d>>);
   std::size_t n_blocks_in_space = space.Size();
   CHECK_GE(n_threads, 1);
@@ -253,6 +251,28 @@ void ParallelFor(Index size, std::int32_t n_threads, Func&& fn) {
   ParallelFor(size, n_threads, Sched::Static(), std::forward<Func>(fn));
 }
 
+/**
+ * @brief 1-d block-based parallel for loop.
+ *
+ * @tparam kBlockOfRowsSize The size of the block.
+ * @tparam Index The type of the index.
+ * @tparam Func The type of the function.
+ *
+ * @param size The size of the range.
+ * @param n_threads The number of threads.
+ * @param fn The function to execute. The function should take a Range1d as an argument.
+ */
+template <std::size_t kBlockOfRowsSize, typename Index, typename Func>
+void ParallelFor1d(Index size, std::int32_t n_threads, Func&& fn) {
+  static_assert(std::is_void_v<std::invoke_result_t<Func, common::Range1d>>);
+  auto const n_blocks = DivRoundUp(size, kBlockOfRowsSize);
+  common::ParallelFor(n_blocks, n_threads, [&](auto block_id) {
+    std::size_t const block_beg = block_id * kBlockOfRowsSize;
+    auto const block_size = std::min(static_cast<std::size_t>(size - block_beg), kBlockOfRowsSize);
+    fn(common::Range1d{block_beg, block_beg + block_size});
+  });
+}
+
 inline std::int32_t OmpGetThreadLimit() {
   std::int32_t limit = omp_get_thread_limit();
   CHECK_GE(limit, 1) << "Invalid thread limit for OpenMP.";
@@ -319,12 +339,6 @@ class MemStackAllocator {
  */
 std::int32_t constexpr DefaultMaxThreads() { return 128; }
 
-/**
- * @brief Get numa node on Linux. Other platforms are not supported. Returns false if the
- *        call fails.
- */
-[[nodiscard]] bool GetCpuNuma(unsigned int* cpu, unsigned int* numa);
-
 /**
  * @brief Give the thread a name. Supports only pthread on linux.
  */
diff --git a/src/common/timer.cc b/src/common/timer.cc
index a105f7a4a4e4..35c779f12c82 100644
--- a/src/common/timer.cc
+++ b/src/common/timer.cc
@@ -1,12 +1,12 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include "timer.h"
 
 #include <utility>
 
 #include "../collective/communicator-inl.h"
-#include "cuda_rt_utils.h"
+#include "nvtx_utils.h"  // for Domain
 
 #if defined(XGBOOST_USE_NVTX)
 #include <nvtx3/nvtx3.hpp>
@@ -18,7 +18,7 @@ void Monitor::Start(std::string const &name) {
     auto &stats = statistics_map_[name];
     stats.timer.Start();
 #if defined(XGBOOST_USE_NVTX)
-    auto range_handle = nvtx3::start_range_in<curt::NvtxDomain>(label_ + "::" + name);
+    auto range_handle = nvtx3::start_range_in<nvtx::Domain>(label_ + "::" + name);
     stats.nvtx_id = range_handle.get_value();
 #endif  // defined(XGBOOST_USE_NVTX)
   }
@@ -30,7 +30,7 @@ void Monitor::Stop(const std::string &name) {
     stats.timer.Stop();
     stats.count++;
 #if defined(XGBOOST_USE_NVTX)
-    nvtx3::end_range_in<curt::NvtxDomain>(nvtx3::range_handle{stats.nvtx_id});
+    nvtx3::end_range_in<nvtx::Domain>(nvtx3::range_handle{stats.nvtx_id});
 #endif  // defined(XGBOOST_USE_NVTX)
   }
 }
diff --git a/src/common/transform_iterator.h b/src/common/transform_iterator.h
index 15e2279fa159..6f20697c4386 100644
--- a/src/common/transform_iterator.h
+++ b/src/common/transform_iterator.h
@@ -52,6 +52,10 @@ class IndexTransformIter {
   auto operator-(IndexTransformIter const &that) const { return iter_ - that.iter_; }
   bool operator==(IndexTransformIter const &that) const { return iter_ == that.iter_; }
   bool operator!=(IndexTransformIter const &that) const { return !(*this == that); }
+  bool operator<(IndexTransformIter const &that) const { return iter_ < that.iter_; }
+  bool operator>(IndexTransformIter const &that) const { return that < *this; }
+  bool operator<=(IndexTransformIter const &that) const { return !(that < *this); }
+  bool operator>=(IndexTransformIter const &that) const { return !(*this < that); }
 
   IndexTransformIter &operator++() {
     iter_++;
diff --git a/src/common/type.h b/src/common/type.h
index 661a52ec1b25..59c824b687b2 100644
--- a/src/common/type.h
+++ b/src/common/type.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #pragma once
 #include <cstdint>      // for int8_t
@@ -21,4 +21,7 @@ common::Span<T> RestoreType(common::Span<U> data) {
   auto restored = common::Span{reinterpret_cast<T*>(data.data()), n_total_bytes / sizeof(T)};
   return restored;
 }
+
+template <typename T>
+using GetValueT = std::remove_cv_t<std::remove_reference_t<T>>;
 }  // namespace xgboost::common
diff --git a/src/common/cleanup.h b/src/common/utils.h
similarity index 50%
rename from src/common/cleanup.h
rename to src/common/utils.h
index 13a637bd372f..b77315d10bf7 100644
--- a/src/common/cleanup.h
+++ b/src/common/utils.h
@@ -1,13 +1,14 @@
 /**
- * Copyright 2024, XGBoost Contributors
- *
- * @brief RAII guard, simplified version of absl::Cleanup
+ * Copyright 2024-2025, XGBoost Contributors
  */
 #pragma once
 #include <functional>  // for function
 #include <utility>     // for forward
 
+#include "xgboost/base.h"
+
 namespace xgboost::common {
+/** @brief RAII guard, simplified version of absl::Cleanup . */
 class Cleanup {
   std::function<void()> cb_;
 
@@ -22,4 +23,22 @@ template <typename Callback>
 auto MakeCleanup(Callback&& cb) {
   return Cleanup{std::forward<Callback>(cb)};
 }
+
+template <typename R>
+struct NoOp {
+  R val;
+
+  explicit NoOp(R&& v) : val{std::forward<R>(v)} {}
+
+  template <typename... Args>
+  XGBOOST_DEVICE R operator()(Args&&...) const {
+    return val;
+  }
+};
+
+template <>
+struct NoOp<void> {
+  template <typename... Args>
+  XGBOOST_DEVICE void operator()(Args&&...) const {}
+};
 }  // namespace xgboost::common
diff --git a/src/context.cc b/src/context.cc
index 7cd1e7c0013b..ae1874b02e4a 100644
--- a/src/context.cc
+++ b/src/context.cc
@@ -22,6 +22,10 @@
 
 #endif  // !defined(XGBOOST_USE_CUDA)
 
+#if defined(XGBOOST_USE_SYCL)
+#include "../plugin/sycl/context_helper.h"
+#endif  // defined (XGBOOST_USE_SYCL)
+
 namespace xgboost {
 
 DMLC_REGISTER_PARAMETER(Context);
@@ -200,7 +204,8 @@ DeviceOrd CUDAOrdinal(DeviceOrd device, bool) {
     device = CUDAOrdinal(device, fail_on_invalid_gpu_id);
     if (!device.IsCUDA()) {
       // We allow loading a GPU-based pickle on a CPU-only machine.
-      LOG(WARNING) << "XGBoost is not compiled with CUDA support.";
+      LOG(WARNING) << "Device is changed from GPU to CPU as we couldn't find any available GPU on "
+                      "the system.";
     }
   }
   return device;
@@ -229,36 +234,17 @@ void Context::Init(Args const& kwargs) {
   }
 }
 
-void Context::ConfigureGpuId(bool require_gpu) {
-  if (this->IsCPU() && require_gpu) {
-    this->UpdateAllowUnknown(Args{{kDevice, DeviceSym::CUDA()}});
-  }
-}
-
 void Context::SetDeviceOrdinal(Args const& kwargs) {
   auto gpu_id_it = std::find_if(kwargs.cbegin(), kwargs.cend(),
                                 [](auto const& p) { return p.first == "gpu_id"; });
   auto has_gpu_id = gpu_id_it != kwargs.cend();
-  auto device_it = std::find_if(kwargs.cbegin(), kwargs.cend(),
-                                [](auto const& p) { return p.first == kDevice; });
-  auto has_device = device_it != kwargs.cend();
-  if (has_device && has_gpu_id) {
-    LOG(FATAL) << "Both `device` and `gpu_id` are specified. Use `device` instead.";
-  }
-
   if (has_gpu_id) {
-    // Compatible with XGBoost < 2.0.0
-    error::WarnDeprecatedGPUId();
-    auto opt_id = ParseInt(StringView{gpu_id_it->second});
-    CHECK(opt_id.has_value()) << "Invalid value for `gpu_id`. Got:" << gpu_id_it->second;
-    if (opt_id.value() > DeviceOrd::CPUOrdinal()) {
-      this->UpdateAllowUnknown(Args{{kDevice, DeviceOrd::CUDA(opt_id.value()).Name()}});
-    } else {
-      this->UpdateAllowUnknown(Args{{kDevice, DeviceOrd::CPU().Name()}});
-    }
-    return;
+    LOG(FATAL) << "`gpu_id` has been removed since 3.1. Use `device` instead.";
   }
 
+  auto device_it = std::find_if(kwargs.cbegin(), kwargs.cend(),
+                                [](auto const& p) { return p.first == kDevice; });
+  auto has_device = device_it != kwargs.cend();
   auto new_d = MakeDeviceOrd(this->device, this->fail_on_invalid_gpu_id);
 
   if (!has_device) {
@@ -281,6 +267,14 @@ std::int32_t Context::Threads() const {
   return n_threads;
 }
 
+DeviceOrd Context::DeviceFP64() const {
+  #if defined(XGBOOST_USE_SYCL)
+    return sycl::DeviceFP64(device_);
+  #else
+    return device_;
+  #endif  // defined(XGBOOST_USE_SYCL)
+}
+
 #if !defined(XGBOOST_USE_CUDA)
 CUDAContext const* Context::CUDACtx() const {
   common::AssertGPUSupport();
diff --git a/src/data/adapter.cc b/src/data/adapter.cc
index 2df16b91b606..58d7e84e7e67 100644
--- a/src/data/adapter.cc
+++ b/src/data/adapter.cc
@@ -3,18 +3,41 @@
  */
 #include "adapter.h"
 
-#include <utility>  // for move
+#include <algorithm>  // for all_of
+#include <cstdint>    // for int32_t
+#include <numeric>    // for partial_sum
+#include <utility>    // for move
+#include <vector>     // for vector
 
 #include "../c_api/c_api_error.h"  // for API_BEGIN, API_END
+#include "../encoder/ordinal.h"    // for HostCatIndexView
 #include "array_interface.h"       // for ArrayInterface
-#include "xgboost/c_api.h"
+#include "columnar.h"              // for GetRefCats, GetArrowDictionary
+#include "xgboost/c_api.h"         // for DataIterHandle
+#include "xgboost/json.h"          // for Json, Object, Array
 #include "xgboost/logging.h"
 
 namespace xgboost::data {
+namespace {
+auto GetRefCats(Json handle) {
+  auto cats = reinterpret_cast<CatContainer const*>(get<Integer const>(handle));
+  CHECK(cats);
+  auto h_cats = cats->HostView();
+  return h_cats;
+}
+}  // anonymous namespace
+
 ColumnarAdapter::ColumnarAdapter(StringView columns) {
-  auto jarray = Json::Load(columns);
-  CHECK(IsA<Array>(jarray));
-  auto const& array = get<Array const>(jarray);
+  auto jdf = Json::Load(columns);
+
+  if (IsA<Object>(jdf)) {
+    // Has reference categories.
+    this->ref_cats_ = GetRefCats(jdf["ref_categories"]);
+    jdf = jdf["columns"];
+  }
+
+  CHECK(IsA<Array>(jdf));
+  auto const& array = get<Array const>(jdf);
   bst_idx_t n_samples{0};
   std::vector<std::int32_t> cat_segments{0};
   for (auto const& jcol : array) {
@@ -51,7 +74,12 @@ ColumnarAdapter::ColumnarAdapter(StringView columns) {
                                                     });
   this->cat_segments_ = std::move(cat_segments);
   CHECK(consistent) << "Size of columns should be the same.";
-  batch_ = ColumnarAdapterBatch{columns_};
+  batch_ = ColumnarAdapterBatch{columns_, NoOpAccessor{}};
+
+  if (!this->ref_cats_.Empty()) {
+    CHECK_EQ(this->ref_cats_.Size(), this->columns_.size())
+        << "Invalid reference categories, different number of columns";
+  }
 }
 
 template <typename DataIterHandle, typename XGBCallbackDataIterNext, typename XGBoostBatchCSR>
diff --git a/src/data/adapter.h b/src/data/adapter.h
index a9e97b3feb1b..88b7650c75bc 100644
--- a/src/data/adapter.h
+++ b/src/data/adapter.h
@@ -7,7 +7,6 @@
 #include <dmlc/data.h>
 
 #include <algorithm>  // for transform, all_of
-#include <cmath>      // for isfinite
 #include <cstddef>    // for size_t
 #include <cstdint>    // for uint8_t
 #include <limits>     // for numeric_limits
@@ -16,10 +15,9 @@
 #include <variant>    // for variant
 #include <vector>     // for vector
 
-#include "../common/math.h"
-#include "../encoder/ordinal.h"  // for CatStrArrayView
-#include "../encoder/types.h"    // for TupToVarT
-#include "array_interface.h"     // for CategoricalIndexArgTypes
+#include "../data/cat_container.h"  // for CatAccessor
+#include "array_interface.h"        // for ArrayInterface
+#include "entry.h"                  // for COOTuple
 #include "xgboost/base.h"
 #include "xgboost/data.h"
 #include "xgboost/logging.h"
@@ -72,34 +70,6 @@ namespace xgboost::data {
  * passing over the data. */
 constexpr size_t kAdapterUnknownSize = std::numeric_limits<size_t >::max();
 
-struct COOTuple {
-  COOTuple() = default;
-  XGBOOST_DEVICE COOTuple(bst_idx_t row_idx, bst_idx_t column_idx, float value)
-      : row_idx(row_idx), column_idx(column_idx), value(value) {}
-
-  bst_idx_t row_idx{0};
-  bst_idx_t column_idx{0};
-  float value{0};
-};
-
-struct IsValidFunctor {
-  float missing;
-
-  XGBOOST_DEVICE explicit IsValidFunctor(float missing) : missing(missing) {}
-
-  XGBOOST_DEVICE bool operator()(float value) const {
-    return !(common::CheckNAN(value) || value == missing);
-  }
-
-  XGBOOST_DEVICE bool operator()(const data::COOTuple& e) const {
-    return !(common::CheckNAN(e.value) || e.value == missing);
-  }
-
-  XGBOOST_DEVICE bool operator()(const Entry& e) const {
-    return !(common::CheckNAN(e.fvalue) || e.fvalue == missing);
-  }
-};
-
 namespace detail {
 
 /**
@@ -130,66 +100,8 @@ class NoMetaInfo {
   const uint64_t* Qid() const { return nullptr; }
   const float* BaseMargin() const { return nullptr; }
 };
-
 };  // namespace detail
 
-class CSRAdapterBatch : public detail::NoMetaInfo {
- public:
-  class Line {
-   public:
-    Line(bst_idx_t row_idx, bst_idx_t size, const unsigned* feature_idx, const float* values)
-        : row_idx_(row_idx), size_(size), feature_idx_(feature_idx), values_(values) {}
-
-    size_t Size() const { return size_; }
-    COOTuple GetElement(size_t idx) const {
-      return COOTuple{row_idx_, feature_idx_[idx], values_[idx]};
-    }
-
-   private:
-    bst_idx_t row_idx_;
-    bst_idx_t size_;
-    const unsigned* feature_idx_;
-    const float* values_;
-  };
-  CSRAdapterBatch(const size_t* row_ptr, const unsigned* feature_idx,
-                  const float* values, size_t num_rows, size_t, size_t)
-      : row_ptr_(row_ptr),
-        feature_idx_(feature_idx),
-        values_(values),
-        num_rows_(num_rows) {}
-  const Line GetLine(size_t idx) const {
-    size_t begin_offset = row_ptr_[idx];
-    size_t end_offset = row_ptr_[idx + 1];
-    return Line(idx, end_offset - begin_offset, &feature_idx_[begin_offset],
-                &values_[begin_offset]);
-  }
-  size_t Size() const { return num_rows_; }
-  static constexpr bool kIsRowMajor = true;
-
- private:
-  const size_t* row_ptr_;
-  const unsigned* feature_idx_;
-  const float* values_;
-  size_t num_rows_;
-};
-
-class CSRAdapter : public detail::SingleBatchDataIter<CSRAdapterBatch> {
- public:
-  CSRAdapter(const size_t* row_ptr, const unsigned* feature_idx, const float* values,
-             bst_idx_t num_rows, bst_idx_t num_elements, size_t num_features)
-      : batch_(row_ptr, feature_idx, values, num_rows, num_elements, num_features),
-        num_rows_(num_rows),
-        num_columns_(num_features) {}
-  const CSRAdapterBatch& Value() const override { return batch_; }
-  bst_idx_t NumRows() const { return num_rows_; }
-  bst_idx_t NumColumns() const { return num_columns_; }
-
- private:
-  CSRAdapterBatch batch_;
-  bst_idx_t num_rows_;
-  bst_idx_t num_columns_;
-};
-
 class DenseAdapterBatch : public detail::NoMetaInfo {
  public:
   DenseAdapterBatch(const float* values, bst_idx_t num_rows, bst_idx_t num_features)
@@ -217,6 +129,8 @@ class DenseAdapterBatch : public detail::NoMetaInfo {
   const Line GetLine(size_t idx) const {
     return Line(values_ + idx * num_features_, num_features_, idx);
   }
+  [[nodiscard]] std::size_t NumRows() const { return num_rows_; }
+  [[nodiscard]] std::size_t NumCols() const { return num_features_; }
   static constexpr bool kIsRowMajor = true;
 
  private:
@@ -233,8 +147,8 @@ class DenseAdapter : public detail::SingleBatchDataIter<DenseAdapterBatch> {
         num_columns_(num_features) {}
   const DenseAdapterBatch& Value() const override { return batch_; }
 
-  size_t NumRows() const { return num_rows_; }
-  size_t NumColumns() const { return num_columns_; }
+  [[nodiscard]] std::size_t NumRows() const { return num_rows_; }
+  [[nodiscard]] std::size_t NumColumns() const { return num_columns_; }
 
  private:
   DenseAdapterBatch batch_;
@@ -339,15 +253,15 @@ class CSRArrayAdapterBatch : public detail::NoMetaInfo {
         n_features_{n_features} {
   }
 
-  size_t NumRows() const {
+  [[nodiscard]] std::size_t NumRows() const {
     size_t size = indptr_.Shape<0>();
     size = size == 0 ? 0 : size - 1;
     return size;
   }
-  size_t NumCols() const { return n_features_; }
-  size_t Size() const { return this->NumRows(); }
+  [[nodiscard]] std::size_t NumCols() const { return n_features_; }
+  [[nodiscard]] std::size_t Size() const { return this->NumRows(); }
 
-  Line const GetLine(size_t idx) const {
+  [[nodiscard]] Line const GetLine(size_t idx) const {
     auto begin_no_stride = TypedIndex<size_t, 1>{indptr_}(idx);
     auto end_no_stride = TypedIndex<size_t, 1>{indptr_}(idx + 1);
 
@@ -365,9 +279,7 @@ class CSRArrayAdapterBatch : public detail::NoMetaInfo {
 };
 
 /**
- * Adapter for CSR array on host, in Python that's `scipy.sparse.csr_matrix`.  This is
- * similar to `CSRAdapter`, but supports __array_interface__ instead of raw pointers.  An
- * advantage is this can handle various data type without making a copy.
+ * @brief Adapter for CSR array on host, in Python that's `scipy.sparse.csr_matrix`.
  */
 class CSRArrayAdapter : public detail::SingleBatchDataIter<CSRArrayAdapterBatch> {
  public:
@@ -378,15 +290,13 @@ class CSRArrayAdapter : public detail::SingleBatchDataIter<CSRArrayAdapterBatch>
                                   static_cast<bst_feature_t>(num_cols_)};
   }
 
-  CSRArrayAdapterBatch const& Value() const override {
-    return batch_;
-  }
-  size_t NumRows() const {
+  [[nodiscard]] CSRArrayAdapterBatch const& Value() const override { return batch_; }
+  [[nodiscard]] std::size_t NumRows() const {
     size_t size = indptr_.Shape<0>();
     size = size == 0 ? 0 : size - 1;
     return size;
   }
-  size_t NumColumns() const { return num_cols_; }
+  [[nodiscard]] std::size_t NumColumns() const { return num_cols_; }
 
  private:
   CSRArrayAdapterBatch batch_;
@@ -396,72 +306,6 @@ class CSRArrayAdapter : public detail::SingleBatchDataIter<CSRArrayAdapterBatch>
   size_t num_cols_;
 };
 
-class CSCAdapterBatch : public detail::NoMetaInfo {
- public:
-  CSCAdapterBatch(const size_t* col_ptr, const unsigned* row_idx,
-                  const float* values, size_t num_features)
-      : col_ptr_(col_ptr),
-        row_idx_(row_idx),
-        values_(values),
-        num_features_(num_features) {}
-
- private:
-  class Line {
-   public:
-    Line(size_t col_idx, size_t size, const unsigned* row_idx,
-         const float* values)
-        : col_idx_(col_idx), size_(size), row_idx_(row_idx), values_(values) {}
-
-    size_t Size() const { return size_; }
-    COOTuple GetElement(size_t idx) const {
-      return COOTuple{row_idx_[idx], col_idx_, values_[idx]};
-    }
-
-   private:
-    size_t col_idx_;
-    size_t size_;
-    const unsigned* row_idx_;
-    const float* values_;
-  };
-
- public:
-  size_t Size() const { return num_features_; }
-  const Line GetLine(size_t idx) const {
-    size_t begin_offset = col_ptr_[idx];
-    size_t end_offset = col_ptr_[idx + 1];
-    return Line(idx, end_offset - begin_offset, &row_idx_[begin_offset],
-                &values_[begin_offset]);
-  }
-  static constexpr bool kIsRowMajor = false;
-
- private:
-  const size_t* col_ptr_;
-  const unsigned* row_idx_;
-  const float* values_;
-  size_t num_features_;
-};
-
-class CSCAdapter : public detail::SingleBatchDataIter<CSCAdapterBatch> {
- public:
-  CSCAdapter(const size_t* col_ptr, const unsigned* row_idx,
-             const float* values, size_t num_features, size_t num_rows)
-      : batch_(col_ptr, row_idx, values, num_features),
-        num_rows_(num_rows),
-        num_columns_(num_features) {}
-  const CSCAdapterBatch& Value() const override { return batch_; }
-
-  // JVM package sends 0 as unknown
-  size_t NumRows() const {
-    return num_rows_ == 0 ? kAdapterUnknownSize : num_rows_;
-  }
-  size_t NumColumns() const { return num_columns_; }
-
- private:
-  CSCAdapterBatch batch_;
-  size_t num_rows_;
-  size_t num_columns_;
-};
-
 class CSCArrayAdapterBatch : public detail::NoMetaInfo {
   ArrayInterface<1> indptr_;
   ArrayInterface<1> indices_;
@@ -480,8 +324,8 @@ class CSCArrayAdapterBatch : public detail::NoMetaInfo {
           values_{std::move(values)},
           offset_{offset} {}
 
-    std::size_t Size() const { return values_.Shape<0>(); }
-    COOTuple GetElement(std::size_t idx) const {
+    [[nodiscard]] std::size_t Size() const { return values_.Shape<0>(); }
+    [[nodiscard]] COOTuple GetElement(std::size_t idx) const {
       return {TypedIndex<std::size_t, 1>{row_idx_}(offset_ + idx), column_idx_,
               values_(offset_ + idx)};
     }
@@ -494,8 +338,11 @@ class CSCArrayAdapterBatch : public detail::NoMetaInfo {
                        ArrayInterface<1> values)
       : indptr_{std::move(indptr)}, indices_{std::move(indices)}, values_{std::move(values)} {}
 
-  std::size_t Size() const { return indptr_.n - 1; }
-  Line GetLine(std::size_t idx) const {
+  [[nodiscard]] std::size_t Size() const noexcept(true) {
+    auto n = indptr_.n;
+    return (n == 0) ? n : (n - 1);
+  }
+  [[nodiscard]] Line GetLine(std::size_t idx) const {
     auto begin_no_stride = TypedIndex<std::size_t, 1>{indptr_}(idx);
     auto end_no_stride = TypedIndex<std::size_t, 1>{indptr_}(idx + 1);
 
@@ -512,7 +359,7 @@ class CSCArrayAdapterBatch : public detail::NoMetaInfo {
 };
 
 /**
- * \brief CSC adapter with support for array interface.
+ * @brief CSC adapter with support for array interface.
  */
 class CSCArrayAdapter : public detail::SingleBatchDataIter<CSCArrayAdapterBatch> {
   ArrayInterface<1> indptr_;
@@ -537,16 +384,21 @@ class CSCArrayAdapter : public detail::SingleBatchDataIter<CSCArrayAdapterBatch>
   [[nodiscard]] const CSCArrayAdapterBatch& Value() const override { return batch_; }
 };
 
-class ColumnarAdapterBatch : public detail::NoMetaInfo {
-  common::Span<ArrayInterface<1>> columns_;
+template <typename EncAccessor>
+class EncColumnarAdapterBatchImpl : public detail::NoMetaInfo {
+  using ArrayInf = std::add_const_t<ArrayInterface<1>>;
+
+  common::Span<ArrayInf> columns_;
+  EncAccessor acc_;
 
   class Line {
-    common::Span<ArrayInterface<1>> const& columns_;
+    common::Span<ArrayInf> const& columns_;
     std::size_t const ridx_;
+    EncAccessor const& acc_;
 
    public:
-    explicit Line(common::Span<ArrayInterface<1>> const& columns, std::size_t ridx)
-        : columns_{columns}, ridx_{ridx} {}
+    explicit Line(common::Span<ArrayInf> const& columns, EncAccessor const& acc, std::size_t ridx)
+        : columns_{columns}, ridx_{ridx}, acc_{acc} {}
     [[nodiscard]] std::size_t Size() const { return columns_.empty() ? 0 : columns_.size(); }
 
     [[nodiscard]] COOTuple GetElement(std::size_t fidx) const {
@@ -554,16 +406,17 @@ class ColumnarAdapterBatch : public detail::NoMetaInfo {
       float value = column.valid.Data() == nullptr || column.valid.Check(ridx_)
                         ? column(ridx_)
                         : std::numeric_limits<float>::quiet_NaN();
-      return {ridx_, fidx, value};
+      return {ridx_, fidx, acc_(value, fidx)};
     }
   };
 
  public:
-  ColumnarAdapterBatch() = default;
-  explicit ColumnarAdapterBatch(common::Span<ArrayInterface<1>> columns) : columns_{columns} {}
-  [[nodiscard]] Line GetLine(std::size_t ridx) const { return Line{columns_, ridx}; }
+  EncColumnarAdapterBatchImpl() = default;
+  explicit EncColumnarAdapterBatchImpl(common::Span<ArrayInf> columns, EncAccessor acc)
+      : columns_{columns}, acc_{std::move(acc)} {}
+  [[nodiscard]] Line GetLine(std::size_t ridx) const { return Line{columns_, this->acc_, ridx}; }
   [[nodiscard]] std::size_t Size() const {
-    return columns_.empty() ? 0 : columns_.front().Shape<0>();
+    return columns_.empty() ? 0 : columns_.front().template Shape<0>();
   }
   [[nodiscard]] std::size_t NumCols() const { return columns_.empty() ? 0 : columns_.size(); }
   [[nodiscard]] std::size_t NumRows() const { return this->Size(); }
@@ -571,90 +424,30 @@ class ColumnarAdapterBatch : public detail::NoMetaInfo {
   static constexpr bool kIsRowMajor = true;
 };
 
-/**
- * @brief Get string names and codes for categorical features.
- *
- * @return The number of categories for the current column.
- */
-template <bool allow_mask, typename CategoricalIndex>
-[[nodiscard]] std::size_t GetArrowDictionary(Json jcol,
-                                             std::vector<CategoricalIndex>* p_cat_columns,
-                                             std::vector<ArrayInterface<1, allow_mask>>* p_columns,
-                                             std::size_t* p_n_bytes, bst_idx_t* p_n_samples) {
-  auto& cat_columns = *p_cat_columns;
-  // arrow StringArray for name of categories
-  auto const& jnames = get<Object const>(jcol[0]);
-  // There are 3 buffers for a StringArray, validity mask, offset, and data. Mask
-  // and data are represented by a single masked array.
-  auto const& joffset = get<Object const>(jnames.at("offsets"));
-  auto offset = ArrayInterface<1>{joffset};
-  auto const& jstr = get<Object const>(jnames.at("values"));
-  auto strbuf = ArrayInterface<1>(jstr);
-  CHECK_EQ(strbuf.type, ArrayInterfaceHandler::kI1);
-
-  auto names = enc::CatStrArrayView{
-      common::Span{static_cast<std::int32_t const*>(offset.data), offset.Shape<0>()},
-      common::Span<std::int8_t const>{reinterpret_cast<std::int8_t const*>(strbuf.data), strbuf.n}};
-  cat_columns.emplace_back(names);
-
-  // arrow Integer array for encoded categories
-  auto const& jcodes = get<Object const>(jcol[1]);
-  auto codes = ArrayInterface<1>{jcodes};
-  p_columns->push_back(codes);
-
-  auto& n_bytes = *p_n_bytes;
-  n_bytes += codes.ElementSize() * codes.Shape<0>();
-  n_bytes += names.SizeBytes();
-
-  *p_n_samples = std::max(*p_n_samples, static_cast<bst_idx_t>(codes.Shape<0>()));
-  return names.size();
-}
-
-/**
- * @brief Get numeric names and codes for categorical features.
- *
- * @return The number of categories for the current column.
- */
-template <typename CategoricalIndex, bool allow_mask>
-[[nodiscard]] std::size_t GetArrowNumericIndex(
-    DeviceOrd device, Json jcol, std::vector<CategoricalIndex>* p_cat_columns,
-    std::vector<ArrayInterface<1, allow_mask>>* p_columns, std::size_t* p_n_bytes,
-    bst_idx_t* p_n_samples) {
-  auto const& first = get<Object const>(jcol[0]);
-  auto names = ArrayInterface<1>{first};
-  auto& n_bytes = *p_n_bytes;
-  DispatchDType(names, device, [&](auto t) {
-    using T = typename decltype(t)::value_type;
-    constexpr bool kKnownType = enc::MemberOf<std::remove_cv_t<T>, enc::CatPrimIndexTypes>::value;
-    CHECK(kKnownType) << "Unsupported categorical index type.";
-    auto span = common::Span{t.Values().data(), t.Size()};
-    if constexpr (kKnownType) {
-      p_cat_columns->emplace_back(span);
-      n_bytes += span.size_bytes();
-    }
-  });
-  auto const& jcodes = get<Object const>(jcol[1]);
-  auto codes = ArrayInterface<1>{jcodes};
-  p_columns->push_back(codes);
-
-  n_bytes += codes.ElementSize() * codes.Shape<0>();
-  *p_n_samples = std::max(*p_n_samples, static_cast<bst_idx_t>(codes.Shape<0>()));
-
-  return names.n;
-}
+using ColumnarAdapterBatch = EncColumnarAdapterBatchImpl<NoOpAccessor>;
+using EncColumnarAdapterBatch = EncColumnarAdapterBatchImpl<CatAccessor>;
 
 /**
  * @brief Adapter for columnar format (arrow).
  *
- *   Supports for both numeric values and categorical values.
+ *   Supports both numeric values and categorical values.
+ *
+ * See @ref XGDMatrixCreateFromColumnar for notes
  */
 class ColumnarAdapter : public detail::SingleBatchDataIter<ColumnarAdapterBatch> {
   std::vector<ArrayInterface<1>> columns_;
+  enc::HostColumnsView ref_cats_;
   std::vector<enc::HostCatIndexView> cats_;
   std::vector<std::int32_t> cat_segments_;
   ColumnarAdapterBatch batch_;
   std::size_t n_bytes_{0};
 
+  [[nodiscard]] static bool HasCatImpl(std::vector<enc::HostCatIndexView> const& cats) {
+    return !std::all_of(cats.cbegin(), cats.cend(), [](auto const& cats) {
+      return std::visit([](auto&& cats) { return cats.empty(); }, cats);
+    });
+  }
+
  public:
   /**
    * @brief JSON-encoded array of columns.
@@ -670,19 +463,32 @@ class ColumnarAdapter : public detail::SingleBatchDataIter<ColumnarAdapterBatch>
     return 0;
   }
   [[nodiscard]] bst_idx_t NumColumns() const { return columns_.size(); }
-  [[nodiscard]] bool HasCategorical() const {
-    return !std::all_of(this->cats_.cbegin(), this->cats_.cend(), [](auto const& cats) {
-      return std::visit([](auto&& cats) { return cats.empty(); }, cats);
-    });
-  }
+
+  [[nodiscard]] bool HasCategorical() const { return HasCatImpl(this->cats_); }
+  [[nodiscard]] bool HasRefCategorical() const { return !this->ref_cats_.Empty(); }
+
   [[nodiscard]] std::size_t SizeBytes() const { return n_bytes_; }
 
   [[nodiscard]] enc::HostColumnsView Cats() const {
     return {this->cats_, this->cat_segments_,
             static_cast<std::int32_t>(this->cat_segments_.back())};
   }
+  [[nodiscard]] enc::HostColumnsView RefCats() const { return this->ref_cats_; }
+  [[nodiscard]] common::Span<ArrayInterface<1> const> Columns() const { return this->columns_; }
 };
 
+inline auto MakeEncColumnarBatch(Context const* ctx, ColumnarAdapter const* adapter) {
+  auto cats = std::make_unique<CatContainer>(adapter->RefCats(), true);
+  cats->Sort(ctx);
+  auto [acc, mapping] = cpu_impl::MakeCatAccessor(ctx, adapter->Cats(), cats.get());
+  return std::tuple{EncColumnarAdapterBatch{adapter->Columns(), acc}, std::move(mapping)};
+}
+
+inline auto MakeEncColumnarBatch(Context const* ctx,
+                                 std::shared_ptr<ColumnarAdapter> const& adapter) {
+  return MakeEncColumnarBatch(ctx, adapter.get());
+}
+
 class FileAdapterBatch {
  public:
   class Line {
diff --git a/src/data/array_interface.cc b/src/data/array_interface.cc
index 06b9ed00c870..8240387256d9 100644
--- a/src/data/array_interface.cc
+++ b/src/data/array_interface.cc
@@ -1,11 +1,49 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include "array_interface.h"
 
+#if !defined(XGBOOST_USE_CUDA)
+
 #include "../common/common.h"  // for AssertGPUSupport
 
+#endif  // !defined(XGBOOST_USE_CUDA)
+
 namespace xgboost {
+std::string ArrayInterfaceHandler::TypeStr(Type type) {
+  auto name_fn = [](std::int32_t bits, char t) {
+    return std::to_string(bits) + "-bit " + ArrayInterfaceErrors::TypeStr(t);
+  };
+  switch (type) {
+    case kF2:
+      return name_fn(16, 'f');
+    case kF4:
+      return name_fn(32, 'f');
+    case kF8:
+      return name_fn(64, 'f');
+    case kF16:
+      return name_fn(128, 'f');
+    case kI1:
+      return name_fn(8, 'i');
+    case kI2:
+      return name_fn(16, 'i');
+    case kI4:
+      return name_fn(32, 'i');
+    case kI8:
+      return name_fn(64, 'i');
+    case kU1:
+      return name_fn(8, 'u');
+    case kU2:
+      return name_fn(16, 'u');
+    case kU4:
+      return name_fn(32, 'u');
+    case kU8:
+      return name_fn(64, 'u');
+  }
+  LOG(FATAL) << "unreachable";
+  return {};
+}
+
 #if !defined(XGBOOST_USE_CUDA)
 void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); }
 bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; }
diff --git a/src/data/array_interface.cu b/src/data/array_interface.cu
index 28d8945c2ac3..bfedc841af5a 100644
--- a/src/data/array_interface.cu
+++ b/src/data/array_interface.cu
@@ -1,10 +1,9 @@
 /**
- * Copyright 2021-2023, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #include <cstdint>  // for int64_t
 
-#include "../common/common.h"
-#include "../common/device_helpers.cuh"  // for DefaultStream, CUDAEvent
+#include "../common/cuda_stream.h"  // for Event, StreamRef, DefaultStream
 #include "array_interface.h"
 #include "xgboost/logging.h"
 
@@ -27,9 +26,9 @@ void ArrayInterfaceHandler::SyncCudaStream(std::int64_t stream) {
     case 2:
       // default per-thread stream
     default: {
-      dh::CUDAEvent e;
-      e.Record(dh::CUDAStreamView{reinterpret_cast<cudaStream_t>(stream)});
-      dh::DefaultStream().Wait(e);
+      curt::Event e;
+      e.Record(curt::StreamRef{reinterpret_cast<cudaStream_t>(stream)});
+      curt::DefaultStream().Wait(e);
     }
   }
 }
@@ -38,6 +37,8 @@ bool ArrayInterfaceHandler::IsCudaPtr(void const* ptr) {
   if (!ptr) {
     return false;
   }
+  // clear potentially pre-existing/unrelated error
+  cudaGetLastError();
   cudaPointerAttributes attr;
   auto err = cudaPointerGetAttributes(&attr, ptr);
   // reset error
diff --git a/src/data/array_interface.h b/src/data/array_interface.h
index 35056b74f3aa..ce393c0fffa4 100644
--- a/src/data/array_interface.h
+++ b/src/data/array_interface.h
@@ -116,6 +116,8 @@ class ArrayInterfaceHandler {
     kU8 = 11,
   };
 
+  static std::string TypeStr(Type type);
+
   template <typename PtrType>
   static PtrType GetPtrFromArrayData(Object::Map const &obj) {
     auto data_it = obj.find("data");
diff --git a/src/data/batch_utils.cc b/src/data/batch_utils.cc
index 926650f9fc8d..6d2f71795ad5 100644
--- a/src/data/batch_utils.cc
+++ b/src/data/batch_utils.cc
@@ -1,9 +1,22 @@
 /**
- * Copyright 2023-2024, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #include "batch_utils.h"
 
-#include "../common/error_msg.h"  // for InconsistentMaxBin
+#include <algorithm>  // for max
+#include <cstddef>    // for size_t
+#include <cstdint>    // for int64_t
+#include <utility>    // for pair
+
+#include "../common/common.h"         // for AssertGPUSupport
+#include "../common/cuda_rt_utils.h"  // for TotalMemory
+#include "../common/error_msg.h"      // for InconsistentMaxBin
+
+#if defined(XGBOOST_USE_CUDA)
+
+#include "../common/cuda_dr_utils.h"  // for GetC2cLinkCountFromSmiGlobal
+
+#endif  // defined(XGBOOST_USE_CUDA)
 
 namespace xgboost::data::detail {
 void CheckParam(BatchParam const& init, BatchParam const& param) {
@@ -11,4 +24,78 @@ void CheckParam(BatchParam const& init, BatchParam const& param) {
   CHECK(!param.regen && param.hess.empty())
       << "Only the `hist` tree method can use the `QuantileDMatrix`.";
 }
+
+[[nodiscard]] std::pair<double, std::int64_t> DftPageSizeHostRatio(
+    std::size_t n_cache_bytes, bool is_validation, double cache_host_ratio,
+    std::int64_t min_cache_page_bytes) {
+  common::AssertGPUSupport();
+
+  if (!HostRatioIsAuto(cache_host_ratio)) {
+    // Use user config.
+    CHECK_GE(cache_host_ratio, 0.0f) << error::CacheHostRatioInvalid();
+    CHECK_LE(cache_host_ratio, 1.0f) << error::CacheHostRatioInvalid();
+  }
+
+#if defined(XGBOOST_USE_CUDA)
+  auto n_d_bytes = curt::TotalMemory();
+
+  using xgboost::cuda_impl::CachePageRatio;
+
+  auto lc = cudr::GetC2cLinkCountFromSmiGlobal();
+  if (lc >= 10) {
+    // >= 10, life is easy.
+    if (CachePageBytesIsAuto(min_cache_page_bytes)) {
+      min_cache_page_bytes = n_d_bytes * CachePageRatio();
+    }
+    if (HostRatioIsAuto(cache_host_ratio)) {
+      cache_host_ratio = 1.0;
+    }
+    return {cache_host_ratio, min_cache_page_bytes};
+  }
+
+  /**
+   * Configure the min_cache_page_bytes
+   */
+  // -1 if PCIe device, or something went wrong when running nvidia-smi
+  //
+  // GH200 1 CPU + 1 GPU has 10. For 1 CPU + 2 GPU, it's 5.
+  //
+  // Either way, we configure the cache based on the ratio between cache sizes and the
+  // available memory.
+  // Use half of the device memory for cache.
+  auto d_cache_nbytes = n_d_bytes / 2;
+
+  // Since half of the device is used for the cache, we have to use smaller page size.
+  if (CachePageBytesIsAuto(min_cache_page_bytes)) {
+    min_cache_page_bytes = n_d_bytes * (CachePageRatio() / 2.0);
+  }
+
+  /**
+   * Configure the ratio.
+   */
+  if (!HostRatioIsAuto(cache_host_ratio)) {
+    // Do nothing if it's provided by the user
+    return {cache_host_ratio, min_cache_page_bytes};
+  } else if (is_validation) {
+    // Use full host cache for the validation dataset.
+    cache_host_ratio = 1.0;
+  } else if (n_cache_bytes <= d_cache_nbytes) {
+    // The total size of the cache is smaller than the available device cache.
+    cache_host_ratio = 0.0;
+  } else {
+    // The number of bytes that must be in the host memory.
+    auto h_cache_nbytes = n_cache_bytes - d_cache_nbytes * 0.85;
+    cache_host_ratio = static_cast<double>(h_cache_nbytes) / static_cast<double>(n_cache_bytes);
+    if (lc > 0) {
+      // 0 < lc < 10, C2C is available, but with reduced link count.
+      // No need to exceed half in practice.
+      cache_host_ratio = std::max(cache_host_ratio, 0.5);
+    }
+  }
+#else
+  (void)n_cache_bytes;
+  (void)is_validation;
+#endif  // defined(XGBOOST_USE_CUDA)
+  return {cache_host_ratio, min_cache_page_bytes};
+}
 }  // namespace xgboost::data::detail
diff --git a/src/data/batch_utils.h b/src/data/batch_utils.h
index 8c8a2e7b3a72..08fbdef701bc 100644
--- a/src/data/batch_utils.h
+++ b/src/data/batch_utils.h
@@ -4,6 +4,12 @@
 #ifndef XGBOOST_DATA_BATCH_UTILS_H_
 #define XGBOOST_DATA_BATCH_UTILS_H_
 
+#include <cmath>    // for isnan
+#include <cstddef>  // for size_t
+#include <cstdint>  // for int64_t
+#include <limits>   // for numeric_limits
+#include <utility>  // for pair
+
 #include "xgboost/data.h"  // for BatchParam
 
 namespace xgboost::data::detail {
@@ -34,20 +40,44 @@ inline bool RegenGHist(BatchParam old, BatchParam p) {
  * @brief Validate the batch parameter from the caller
  */
 void CheckParam(BatchParam const& init, BatchParam const& param);
+
+/**
+ * @brief Configure the `cache_host_ratio` and the `min_cache_page_bytes`.
+ */
+[[nodiscard]] std::pair<double, std::int64_t> DftPageSizeHostRatio(
+    std::size_t n_cache_bytes, bool is_validation, double cache_host_ratio,
+    std::int64_t min_cache_page_bytes);
+
+/**
+ * @brief Check whether we should configure `cache_host_ratio`.
+ *
+ * Defined by @ref AutoHostRatio .
+ */
+[[nodiscard]] inline bool HostRatioIsAuto(float cache_host_ratio) {
+  return std::isnan(cache_host_ratio);
+}
+/**
+ * @brief Check whether we should configure `min_cache_page_bytes`.
+ *
+ * Defined by @ref AutoCachePageBytes .
+ */
+[[nodiscard]] inline bool CachePageBytesIsAuto(std::int64_t min_cache_page_bytes) {
+  return min_cache_page_bytes == -1;
+}
 }  // namespace xgboost::data::detail
 
 namespace xgboost::cuda_impl {
 // Indicator for XGBoost to not concatenate any page.
 constexpr std::int64_t MatchingPageBytes() { return 0; }
-// Maxmimum number of pages from the validation dataset to be cached in the device memory.
-constexpr std::int32_t MaxNumDevicePages() { return 1; }
-// Default size of the cached page
+// Default size of the cached page, 1/8
 constexpr double CachePageRatio() { return 0.125; }
 // Indicator for XGBoost to automatically concatenate pages.
 constexpr std::int64_t AutoCachePageBytes() { return -1; }
 // Use two batch for prefecting. There's always one batch being worked on, while the other
 // batch being transferred.
 constexpr auto DftPrefetchBatches() { return 2; }
+// The ratio of the cache split for external memory. Use -1 to indicate not-set.
+constexpr float AutoHostRatio() { return std::numeric_limits<float>::quiet_NaN(); }
 
 // Empty parameter to prevent regen, only used to control external memory prefetching.
 //
diff --git a/src/data/cat_container.cc b/src/data/cat_container.cc
index c70b7fc10579..f6f0fc0214ae 100644
--- a/src/data/cat_container.cc
+++ b/src/data/cat_container.cc
@@ -9,11 +9,15 @@
 #include <utility>    // for move
 #include <vector>     // for vector
 
-#include "../encoder/types.h"  // for Overloaded
-#include "xgboost/json.h"      // for Json
+#include "../collective/allreduce.h"         // for Allreduce
+#include "../collective/communicator-inl.h"  // for GetRank, GetWorldSize
+#include "../common/error_msg.h"             // for NoFloatCat
+#include "../encoder/types.h"                // for Overloaded
+#include "xgboost/json.h"                    // for Json
 
 namespace xgboost {
-CatContainer::CatContainer(enc::HostColumnsView const& df) : CatContainer{} {
+CatContainer::CatContainer(enc::HostColumnsView const& df, bool is_ref) : CatContainer{} {
+  this->is_ref_ = is_ref;
   this->n_total_cats_ = df.n_total_cats;
   if (this->n_total_cats_ == 0) {
     return;
@@ -39,6 +43,12 @@ CatContainer::CatContainer(enc::HostColumnsView const& df) : CatContainer{} {
                      using T =
                          typename cpu_impl::ViewToStorageImpl<std::decay_t<decltype(values)>>::Type;
                      this->cpu_impl_->columns.emplace_back();
+                     using ElemT = typename T::value_type;
+
+                     if constexpr (std::is_floating_point_v<ElemT>) {
+                       LOG(FATAL) << error::NoFloatCat();
+                     }
+
                      this->cpu_impl_->columns.back().emplace<T>();
                      auto& v = std::get<T>(this->cpu_impl_->columns.back());
                      v.resize(values.size());
@@ -54,12 +64,31 @@ CatContainer::CatContainer(enc::HostColumnsView const& df) : CatContainer{} {
   CHECK(this->HostCanRead());
   CHECK_EQ(this->n_total_cats_, df.feature_segments.back());
   CHECK_GE(this->n_total_cats_, 0) << "Too many categories.";
+  if (this->n_total_cats_ > 0) {
+    CHECK(!this->cpu_impl_->columns.empty());
+  }
 }
 
 namespace {
 template <typename T>
 struct PrimToUbj;
 
+template <>
+struct PrimToUbj<std::uint8_t> {
+  using Type = U8Array;
+};
+template <>
+struct PrimToUbj<std::uint16_t> {
+  using Type = U16Array;
+};
+template <>
+struct PrimToUbj<std::uint32_t> {
+  using Type = U32Array;
+};
+template <>
+struct PrimToUbj<std::uint64_t> {
+  using Type = U64Array;
+};
 template <>
 struct PrimToUbj<std::int8_t> {
   using Type = I8Array;
@@ -183,18 +212,34 @@ void CatContainer::Load(Json const& in) {
           LoadJson<std::int8_t>(jvalues, &columns.back());
           break;
         }
+        case T::kU8Array: {
+          LoadJson<std::uint8_t>(jvalues, &columns.back());
+          break;
+        }
         case T::kI16Array: {
           LoadJson<std::int16_t>(jvalues, &columns.back());
           break;
         }
+        case T::kU16Array: {
+          LoadJson<std::uint16_t>(jvalues, &columns.back());
+          break;
+        }
         case T::kI32Array: {
           LoadJson<std::int32_t>(jvalues, &columns.back());
           break;
         }
+        case T::kU32Array: {
+          LoadJson<std::uint32_t>(jvalues, &columns.back());
+          break;
+        }
         case T::kI64Array: {
           LoadJson<std::int64_t>(jvalues, &columns.back());
           break;
         }
+        case T::kU64Array: {
+          LoadJson<std::uint64_t>(jvalues, &columns.back());
+          break;
+        }
         case T::kF32Array: {
           LoadJson<float>(jvalues, &columns.back());
           break;
@@ -229,17 +274,43 @@ CatContainer::CatContainer() : cpu_impl_{std::make_unique<cpu_impl::CatContainer
 
 CatContainer::~CatContainer() = default;
 
-void CatContainer::Copy(Context const*, CatContainer const& that) { this->CopyCommon(that); }
+void CatContainer::Copy(Context const* ctx, CatContainer const& that) {
+  [[maybe_unused]] auto h_view = that.HostView();
+  this->CopyCommon(ctx, that);
+  this->cpu_impl_->Copy(that.cpu_impl_.get());
+}
 
 [[nodiscard]] enc::HostColumnsView CatContainer::HostView() const { return this->HostViewImpl(); }
 
+[[nodiscard]] bool CatContainer::Empty() const { return this->cpu_impl_->columns.empty(); }
+
+[[nodiscard]] std::size_t CatContainer::NumFeatures() const {
+  return this->cpu_impl_->columns.size();
+}
+
 void CatContainer::Sort(Context const* ctx) {
   CHECK(ctx->IsCPU());
   auto view = this->HostView();
   this->sorted_idx_.HostVector().resize(view.n_total_cats);
   enc::SortNames(enc::Policy<EncErrorPolicy>{}, view, this->sorted_idx_.HostSpan());
 }
-
-[[nodiscard]] bool CatContainer::DeviceCanRead() const { return false; }
 #endif  // !defined(XGBOOST_USE_CUDA)
+
+void SyncCategories(Context const* ctx, CatContainer* cats, bool is_empty) {
+  CHECK(cats);
+  if (!collective::IsDistributed()) {
+    return;
+  }
+
+  auto rank = collective::GetRank();
+  std::vector<std::int32_t> workers(collective::GetWorldSize(), 0);
+  workers[rank] = is_empty;
+  collective::SafeColl(collective::Allreduce(ctx, &workers, collective::Op::kSum));
+  if (cats->HasCategorical() &&
+      std::any_of(workers.cbegin(), workers.cend(), [](auto v) { return v == 1; })) {
+    LOG(FATAL)
+        << "A worker cannot have empty input when a dataframe with categorical features is used. "
+           "XGBoost cannot infer the categories if the input is empty.";
+  }
+}
 }  // namespace xgboost
diff --git a/src/data/cat_container.cu b/src/data/cat_container.cu
index fa5134905f77..d957089b8ea1 100644
--- a/src/data/cat_container.cu
+++ b/src/data/cat_container.cu
@@ -6,8 +6,10 @@
 #include <memory>  // for make_unique
 #include <vector>  // for vector
 
+#include "../common/cuda_context.cuh"    // for CUDAContext
 #include "../common/device_helpers.cuh"  // for ToSpan
 #include "../common/device_vector.cuh"   // for device_vector
+#include "../common/type.h"              // for GetValueT
 #include "../encoder/ordinal.cuh"        // for SortNames
 #include "../encoder/ordinal.h"          // for DictionaryView
 #include "../encoder/types.h"            // for Overloaded
@@ -22,41 +24,49 @@ struct CatContainerImpl {
   dh::device_vector<enc::DeviceCatIndexView> columns_v;
 
   template <typename VariantT>
-  void CopyFrom(enc::detail::ColumnsViewImpl<VariantT> that) {
+  void CopyFrom(Context const* ctx, enc::detail::ColumnsViewImpl<VariantT> that) {
     this->columns.resize(that.columns.size());
     this->columns_v.resize(that.columns.size());
     CHECK_EQ(this->columns.size(), this->columns_v.size());
+    auto stream = ctx->CUDACtx()->Stream();
 
     std::vector<decltype(columns_v)::value_type> h_columns_v(this->columns_v.size());
     for (std::size_t f_idx = 0, n = that.columns.size(); f_idx < n; ++f_idx) {
       auto const& col_v = that.columns[f_idx];
       auto dispatch = enc::Overloaded{
-          [this, f_idx, &h_columns_v](enc::CatStrArrayView const& str) {
+          [this, f_idx, &h_columns_v, stream](enc::CatStrArrayView const& str) {
             this->columns[f_idx].emplace<CatStrArray>();
             auto& col = std::get<CatStrArray>(this->columns[f_idx]);
             // Handle the offsets
             col.offsets.resize(str.offsets.size());
-            dh::safe_cuda(cudaMemcpyAsync(thrust::raw_pointer_cast(col.offsets.data()),
-                                          str.offsets.data(), str.offsets.size_bytes(),
-                                          cudaMemcpyDefault));
+            if (!str.offsets.empty()) {
+              dh::safe_cuda(cudaMemcpyAsync(thrust::raw_pointer_cast(col.offsets.data()),
+                                            str.offsets.data(), str.offsets.size_bytes(),
+                                            cudaMemcpyDefault, stream));
+            }
             // Handle the values
             col.values.resize(str.values.size());
-            dh::safe_cuda(cudaMemcpyAsync(thrust::raw_pointer_cast(col.values.data()),
-                                          str.values.data(), str.values.size_bytes(),
-                                          cudaMemcpyDefault));
+            if (!col.values.empty()) {
+              dh::safe_cuda(cudaMemcpyAsync(thrust::raw_pointer_cast(col.values.data()),
+                                            str.values.data(), str.values.size_bytes(),
+                                            cudaMemcpyDefault, stream));
+            }
             // Create the view
             h_columns_v[f_idx].emplace<enc::CatStrArrayView>();
             auto& col_v = cuda::std::get<enc::CatStrArrayView>(h_columns_v[f_idx]);
             col_v = {dh::ToSpan(col.offsets), dh::ToSpan(col.values)};
           },
-          [this, f_idx, &h_columns_v](auto&& values) {
+          [this, f_idx, &h_columns_v, stream](auto&& values) {
             using T = std::remove_cv_t<typename std::decay_t<decltype(values)>::value_type>;
 
             this->columns[f_idx].emplace<dh::device_vector<T>>();
             auto& col = std::get<dh::device_vector<T>>(this->columns[f_idx]);
 
             col.resize(values.size());
-            thrust::copy_n(values.data(), values.size(), col.data());
+            if (!values.empty()) {
+              dh::safe_cuda(cudaMemcpyAsync(col.data().get(), values.data(), values.size_bytes(),
+                                            cudaMemcpyDefault, stream));
+            }
 
             // Create the view
             using V = common::Span<std::add_const_t<T>>;
@@ -65,7 +75,7 @@ struct CatContainerImpl {
             col_v = dh::ToSpan(col);
           }};
       auto visit = [&](auto const& col) {
-        using ColT = std::remove_cv_t<std::remove_reference_t<decltype(col)>>;
+        using ColT = common::GetValueT<decltype(col)>;
         if constexpr (std::is_same_v<ColT, enc::HostCatIndexView>) {
           std::visit(dispatch, col);
         } else {
@@ -93,14 +103,18 @@ struct CatContainerImpl {
                        auto& out_str = std::get<cpu_impl::CatStrArray>(out_col);
                        // Offsets
                        out_str.offsets.resize(str.offsets.size());
-                       dh::safe_cuda(cudaMemcpyAsync(
-                           out_str.offsets.data(), thrust::raw_pointer_cast(str.offsets.data()),
-                           common::Span{out_str.offsets}.size_bytes(), cudaMemcpyDefault));
+                       if (!out_str.offsets.empty()) {
+                         dh::safe_cuda(cudaMemcpyAsync(
+                             out_str.offsets.data(), thrust::raw_pointer_cast(str.offsets.data()),
+                             common::Span{out_str.offsets}.size_bytes(), cudaMemcpyDefault));
+                       }
                        // Values
                        out_str.values.resize(str.values.size());
-                       dh::safe_cuda(cudaMemcpyAsync(
-                           out_str.values.data(), thrust::raw_pointer_cast(str.values.data()),
-                           common::Span{out_str.values}.size_bytes(), cudaMemcpyDefault));
+                       if (!out_str.values.empty()) {
+                         dh::safe_cuda(cudaMemcpyAsync(
+                             out_str.values.data(), thrust::raw_pointer_cast(str.values.data()),
+                             common::Span{out_str.values}.size_bytes(), cudaMemcpyDefault));
+                       }
                      },
                      [&](auto&& values) {
                        using T0 = decltype(values);
@@ -109,52 +123,72 @@ struct CatContainerImpl {
                        out_col.emplace<Vec>();
                        auto& out_vec = std::get<Vec>(out_col);
                        out_vec.resize(values.size());
-                       dh::safe_cuda(
-                           cudaMemcpyAsync(out_vec.data(), thrust::raw_pointer_cast(values.data()),
-                                           common::Span{out_vec}.size_bytes(), cudaMemcpyDefault));
+                       if (!out_vec.empty()) {
+                         dh::safe_cuda(cudaMemcpyAsync(
+                             out_vec.data(), thrust::raw_pointer_cast(values.data()),
+                             common::Span{out_vec}.size_bytes(), cudaMemcpyDefault));
+                       }
                      }},
                  col);
     }
     that->Finalize();
   }
 };
+
+[[nodiscard]] std::tuple<CatAccessor, dh::DeviceUVector<std::int32_t>> MakeCatAccessor(
+    Context const* ctx, enc::DeviceColumnsView const& new_enc, CatContainer const* orig_cats) {
+  dh::DeviceUVector<std::int32_t> mapping(new_enc.n_total_cats);
+  auto d_sorted_idx = orig_cats->RefSortedIndex(ctx);
+  auto orig_enc = orig_cats->DeviceView(ctx);
+  enc::Recode(EncPolicy, orig_enc, d_sorted_idx, new_enc, dh::ToSpan(mapping));
+  CHECK_EQ(new_enc.feature_segments.size(), orig_enc.feature_segments.size());
+  auto cats_mapping = enc::MappingView{new_enc.feature_segments, dh::ToSpan(mapping)};
+  auto acc = CatAccessor{cats_mapping};
+  return std::tuple{acc, std::move(mapping)};
+}
 }  // namespace cuda_impl
 
 CatContainer::CatContainer()  // NOLINT
     : cpu_impl_{std::make_unique<cpu_impl::CatContainerImpl>()},
       cu_impl_{std::make_unique<cuda_impl::CatContainerImpl>()} {}
 
-CatContainer::CatContainer(DeviceOrd device, enc::DeviceColumnsView const& df) : CatContainer{} {
+CatContainer::CatContainer(Context const* ctx, enc::DeviceColumnsView const& df, bool is_ref)
+    : CatContainer{} {
+  this->is_ref_ = is_ref;
   this->n_total_cats_ = df.n_total_cats;
 
-  this->feature_segments_.SetDevice(device);
+  this->feature_segments_.SetDevice(ctx->Device());
   this->feature_segments_.Resize(df.feature_segments.size());
   auto d_segs = this->feature_segments_.DeviceSpan();
-  thrust::copy_n(dh::tcbegin(df.feature_segments), df.feature_segments.size(), dh::tbegin(d_segs));
+  thrust::copy_n(ctx->CUDACtx()->CTP(), dh::tcbegin(df.feature_segments),
+                 df.feature_segments.size(), dh::tbegin(d_segs));
 
   // FIXME(jiamingy): We can use a single kernel for copying data once cuDF can return
-  // device data.
-  this->cu_impl_->CopyFrom(df);
+  // device data. Remove this along with the one in the device cuDF adapter.
+  this->cu_impl_->CopyFrom(ctx, df);
 
-  this->sorted_idx_.SetDevice(device);
+  this->sorted_idx_.SetDevice(ctx->Device());
   this->sorted_idx_.Resize(0);
   if (this->n_total_cats_ > 0) {
     CHECK(this->DeviceCanRead());
     CHECK(!this->HostCanRead());
+    CHECK(!this->cu_impl_->columns.empty());
   }
 }
 
 CatContainer::~CatContainer() = default;
 
-[[nodiscard]] bool CatContainer::DeviceCanRead() const { return !this->cu_impl_->columns.empty(); }
-
 void CatContainer::Copy(Context const* ctx, CatContainer const& that) {
-  this->CopyCommon(that);
   if (ctx->IsCPU()) {
-    auto h_view = that.HostView();
-    CHECK(!h_view.Empty());
+    // Pull data to host
+    [[maybe_unused]] auto h_view = that.HostView();
+    this->CopyCommon(ctx, that);
     this->cpu_impl_->Copy(that.cpu_impl_.get());
+    CHECK(!this->DeviceCanRead());
   } else {
+    // Pull data to device
+    [[maybe_unused]] auto d_view = that.DeviceView(ctx);
+    this->CopyCommon(ctx, that);
     auto const& that_impl = that.cu_impl_;
     this->cu_impl_->columns.resize(that.cu_impl_->columns.size());
 
@@ -186,17 +220,45 @@ void CatContainer::Copy(Context const* ctx, CatContainer const& that) {
                  col);
     }
     this->cu_impl_->columns_v = h_columns_v;
+    CHECK(this->Empty() || !this->HostCanRead());
   }
+  if (ctx->IsCPU()) {
+    CHECK_EQ(this->cpu_impl_->columns_v.size(), that.cpu_impl_->columns_v.size());
+    CHECK_EQ(this->cpu_impl_->columns.size(), that.cpu_impl_->columns.size());
+    CHECK(this->HostCanRead());
+  } else {
+    CHECK_EQ(this->cu_impl_->columns_v.size(), that.cu_impl_->columns_v.size());
+    CHECK_EQ(this->cu_impl_->columns.size(), that.cu_impl_->columns.size());
+    CHECK(this->DeviceCanRead());
+  }
+  CHECK_EQ(this->Empty(), that.Empty());
+  CHECK_EQ(this->NumCatsTotal(), that.NumCatsTotal());
+}
+
+[[nodiscard]] bool CatContainer::Empty() const {
+  return this->HostCanRead() ? this->cpu_impl_->columns.empty() : this->cu_impl_->columns.empty();
+}
+
+[[nodiscard]] std::size_t CatContainer::NumFeatures() const {
+  if (this->HostCanRead()) {
+    return this->cpu_impl_->columns.size();
+  }
+  return this->cu_impl_->columns.size();
 }
 
 void CatContainer::Sort(Context const* ctx) {
+  if (!this->HasCategorical()) {
+    return;
+  }
+
   if (ctx->IsCPU()) {
     auto view = this->HostView();
+    CHECK(!view.Empty()) << view.n_total_cats;
     this->sorted_idx_.HostVector().resize(view.n_total_cats);
-    enc::SortNames(enc::Policy<EncErrorPolicy>{}, view, this->sorted_idx_.HostSpan());
+    enc::SortNames(cpu_impl::EncPolicy, view, this->sorted_idx_.HostSpan());
   } else {
     auto view = this->DeviceView(ctx);
-    CHECK(!view.Empty()) << this->HostView().Size();
+    CHECK(!view.Empty()) << view.n_total_cats;
     this->sorted_idx_.SetDevice(ctx->Device());
     this->sorted_idx_.Resize(view.n_total_cats);
     enc::SortNames(cuda_impl::EncPolicy, view, this->sorted_idx_.DeviceSpan());
@@ -206,21 +268,30 @@ void CatContainer::Sort(Context const* ctx) {
 [[nodiscard]] enc::HostColumnsView CatContainer::HostView() const {
   std::lock_guard guard{device_mu_};
   if (!this->HostCanRead()) {
+    this->feature_segments_.ConstHostSpan();
     // Lazy copy to host
     this->cu_impl_->CopyTo(this->cpu_impl_.get());
   }
+  CHECK(this->HostCanRead());
   return this->HostViewImpl();
 }
 
 [[nodiscard]] enc::DeviceColumnsView CatContainer::DeviceView(Context const* ctx) const {
   CHECK(ctx->IsCUDA());
   std::lock_guard guard{device_mu_};
-  this->feature_segments_.SetDevice(ctx->Device());
   if (!this->DeviceCanRead()) {
+    this->feature_segments_.SetDevice(ctx->Device());
+    this->feature_segments_.ConstDeviceSpan();
     // Lazy copy to device
     auto h_view = this->HostViewImpl();
-    CHECK(!h_view.Empty());
-    this->cu_impl_->CopyFrom(h_view);
+    this->cu_impl_->CopyFrom(ctx, h_view);
+    CHECK_EQ(this->cu_impl_->columns_v.size(), this->cpu_impl_->columns_v.size());
+    CHECK_EQ(this->cu_impl_->columns.size(), this->cpu_impl_->columns.size());
+  }
+  CHECK(this->DeviceCanRead());
+  if (this->n_total_cats_ != 0) {
+    CHECK(!this->cu_impl_->columns_v.empty());
+    CHECK_EQ(this->feature_segments_.Size(), this->cu_impl_->columns_v.size() + 1);
   }
   return {dh::ToSpan(this->cu_impl_->columns_v), this->feature_segments_.ConstDeviceSpan(),
           this->n_total_cats_};
diff --git a/src/data/cat_container.cuh b/src/data/cat_container.cuh
index 8cfbf6ee16e1..318b45b0d414 100644
--- a/src/data/cat_container.cuh
+++ b/src/data/cat_container.cuh
@@ -60,16 +60,18 @@ struct EncThrustPolicy {
   template <typename T>
   using ThrustAllocator = dh::XGBDeviceAllocator<T>;
 
-  auto ThrustPolicy() const {
-#if defined(XGBOOST_USE_RMM)
-    return rmm::exec_policy_nosync{};
-#else
-    return dh::CachingThrustPolicy();
-#endif  // defined(XGBOOST_USE_RMM)
+  [[nodiscard]] auto ThrustPolicy() const {
+    dh::XGBCachingDeviceAllocator<char> alloc;
+    auto exec = thrust::cuda::par_nosync(alloc).on(curt::DefaultStream());
+    return exec;
   }
+  [[nodiscard]] auto Stream() const { return curt::DefaultStream(); }
 };
 
 using EncPolicyT = enc::Policy<EncErrorPolicy, EncThrustPolicy>;
 
 inline EncPolicyT EncPolicy = EncPolicyT{};
+
+[[nodiscard]] std::tuple<CatAccessor, dh::DeviceUVector<std::int32_t>> MakeCatAccessor(
+    Context const* ctx, enc::DeviceColumnsView const& new_enc, CatContainer const* orig_cats);
 }  // namespace xgboost::cuda_impl
diff --git a/src/data/cat_container.h b/src/data/cat_container.h
index b6ceed1f4219..4ad989b16ce4 100644
--- a/src/data/cat_container.h
+++ b/src/data/cat_container.h
@@ -3,21 +3,25 @@
  */
 #pragma once
 
-#include <xgboost/base.h>  // for bst_cat_t
-
 #include <cstdint>  // for int32_t, int8_t
 #include <memory>   // for unique_ptr
 #include <mutex>    // for mutex
 #include <string>   // for string
 #include <tuple>    // for tuple
+#include <utility>  // for move
 #include <vector>   // for vector
 
+#include "../common/categorical.h"       // for AsCat
 #include "../encoder/ordinal.h"          // for CatStrArrayView
 #include "../encoder/types.h"            // for Overloaded
+#include "entry.h"                       // for COOTuple
+#include "xgboost/base.h"                // for bst_cat_t
+#include "xgboost/data.h"                // for Entry
 #include "xgboost/host_device_vector.h"  // for HostDeviceVector
-#include "xgboost/json.h"                // for Json
 
 namespace xgboost {
+class Json;
+
 /**
  * @brief Error policy class used to interface with the encoder implementaion.
  */
@@ -104,47 +108,67 @@ struct CatContainerImpl;
  */
 class CatContainer {
   /**
-   * @brief Implementation of the Copy method, used by both CPU and GPU.
+   * @brief Implementation of the Copy method, used by both CPU and GPU. Note that this
+   * method changes the permission in the HostDeviceVector as we need to pull data into
+   * targeted devices.
    */
-  void CopyCommon(CatContainer const& that) {
-    this->sorted_idx_.SetDevice(that.sorted_idx_.Device());
+  void CopyCommon(Context const* ctx, CatContainer const& that) {
+    auto device = ctx->Device();
+
+    that.sorted_idx_.SetDevice(device);
+    this->sorted_idx_.SetDevice(device);
     this->sorted_idx_.Resize(that.sorted_idx_.Size());
     this->sorted_idx_.Copy(that.sorted_idx_);
 
-    this->feature_segments_.SetDevice(that.feature_segments_.Device());
+    this->feature_segments_.SetDevice(device);
+    that.feature_segments_.SetDevice(device);
     this->feature_segments_.Resize(that.feature_segments_.Size());
     this->feature_segments_.Copy(that.feature_segments_);
 
     this->n_total_cats_ = that.n_total_cats_;
+
+    if (!device.IsCPU()) {
+      // Pull to device
+      this->sorted_idx_.ConstDevicePointer();
+      this->feature_segments_.ConstDevicePointer();
+    }
   }
 
   [[nodiscard]] enc::HostColumnsView HostViewImpl() const {
     CHECK_EQ(this->cpu_impl_->columns.size(), this->cpu_impl_->columns_v.size());
+    if (this->n_total_cats_ != 0) {
+      CHECK(!this->cpu_impl_->columns_v.empty());
+    }
     return {common::Span{this->cpu_impl_->columns_v}, this->feature_segments_.ConstHostSpan(),
             this->n_total_cats_};
   }
 
  public:
   CatContainer();
-  explicit CatContainer(enc::HostColumnsView const& df);
+  explicit CatContainer(enc::HostColumnsView const& df, bool is_ref);
 #if defined(XGBOOST_USE_CUDA)
-  explicit CatContainer(DeviceOrd device, enc::DeviceColumnsView const& df);
+  explicit CatContainer(Context const* ctx, enc::DeviceColumnsView const& df, bool is_ref);
 #endif  // defined(XGBOOST_USE_CUDA)
   ~CatContainer();
 
   void Copy(Context const* ctx, CatContainer const& that);
 
-  [[nodiscard]] bool HostCanRead() const {
-    return !this->cpu_impl_->columns.empty() || this->n_total_cats_ == 0;
-  }
-  [[nodiscard]] bool DeviceCanRead() const;
+  [[nodiscard]] bool HostCanRead() const { return this->feature_segments_.HostCanRead(); }
+  [[nodiscard]] bool DeviceCanRead() const { return this->feature_segments_.DeviceCanRead(); }
 
   // Mostly used for testing.
   void Push(cpu_impl::ColumnType const& column) { this->cpu_impl_->columns.emplace_back(column); }
+  /**
+   * @brief Wether the container is initialized at all. If the input is not a DataFrame,
+   *        this method returns True.
+   */
+  [[nodiscard]] bool Empty() const;
+  [[nodiscard]] bool NeedRecode() const { return this->HasCategorical() && !this->is_ref_; }
 
-  [[nodiscard]] bool Empty() const { return this->cpu_impl_->columns.empty(); }
-
-  [[nodiscard]] std::size_t NumFeatures() const { return this->cpu_impl_->columns.size(); }
+  [[nodiscard]] std::size_t NumFeatures() const;
+  /**
+   * @brief The number of categories across all features.
+   */
   [[nodiscard]] std::size_t NumCatsTotal() const { return this->n_total_cats_; }
 
   /**
@@ -160,10 +184,9 @@ class CatContainer {
   [[nodiscard]] common::Span<bst_cat_t const> RefSortedIndex(Context const* ctx) const {
     std::lock_guard guard{device_mu_};
     if (ctx->IsCPU()) {
-      CHECK(this->sorted_idx_.HostCanRead());
       return this->sorted_idx_.ConstHostSpan();
     } else {
-      CHECK(this->sorted_idx_.DeviceCanRead());
+      sorted_idx_.SetDevice(ctx->Device());
       return this->sorted_idx_.ConstDeviceSpan();
     }
   }
@@ -199,5 +222,61 @@ class CatContainer {
 #if defined(XGBOOST_USE_CUDA)
   std::unique_ptr<cuda_impl::CatContainerImpl> cu_impl_;
 #endif  // defined(XGBOOST_USE_CUDA)
+  bool is_ref_{false};
 };
+
+/**
+ * @brief Accessor for obtaining re-coded categories.
+ */
+struct CatAccessor {
+  enc::MappingView enc;
+
+  template <typename T, typename Fidx>
+  [[nodiscard]] XGBOOST_DEVICE T operator()(T fvalue, Fidx f_idx) const {
+    if (!enc.Empty() && !enc[f_idx].empty()) {
+      auto f_mapping = enc[f_idx];
+      auto cat_idx = common::AsCat(fvalue);
+      if (cat_idx >= 0 && cat_idx < common::AsCat(f_mapping.size())) {
+        fvalue = f_mapping.data()[cat_idx];
+      }
+    }
+    return fvalue;
+  }
+  [[nodiscard]] XGBOOST_DEVICE float operator()(Entry const& e) const {
+    return this->operator()(e.fvalue, e.index);
+  }
+  [[nodiscard]] XGBOOST_DEVICE float operator()(data::COOTuple const& e) const {
+    return this->operator()(e.value, e.column_idx);
+  }
+};
+
+/**
+ * @brief No-op accessor used to handle numeric data.
+ */
+struct NoOpAccessor {
+  constexpr explicit NoOpAccessor(enc::MappingView const&) {}
+  constexpr NoOpAccessor() = default;
+  template <typename T, typename Fidx>
+  [[nodiscard]] XGBOOST_DEVICE T operator()(T fvalue, Fidx) const {
+    return fvalue;
+  }
+  [[nodiscard]] XGBOOST_DEVICE float operator()(data::COOTuple const& e) const { return e.value; }
+  [[nodiscard]] XGBOOST_DEVICE float operator()(Entry const& e) const { return e.fvalue; }
+};
+
+void SyncCategories(Context const* ctx, CatContainer* cats, bool is_empty);
+
+namespace cpu_impl {
+inline auto MakeCatAccessor(Context const* ctx, enc::HostColumnsView const& new_enc,
+                            CatContainer const* orig_cats) {
+  std::vector<std::int32_t> mapping(new_enc.n_total_cats);
+  auto sorted_idx = orig_cats->RefSortedIndex(ctx);
+  auto orig_enc = orig_cats->HostView();
+  enc::Recode(cpu_impl::EncPolicy, orig_enc, sorted_idx, new_enc, common::Span{mapping});
+  CHECK_EQ(new_enc.feature_segments.size(), orig_enc.feature_segments.size());
+  auto cats_mapping = enc::MappingView{new_enc.feature_segments, mapping};
+  auto acc = CatAccessor{cats_mapping};
+  return std::tuple{acc, std::move(mapping)};
+}
+}  // namespace cpu_impl
 }  // namespace xgboost
diff --git a/src/data/columnar.h b/src/data/columnar.h
new file mode 100644
index 000000000000..28ffa372554a
--- /dev/null
+++ b/src/data/columnar.h
@@ -0,0 +1,167 @@
+/**
+ *  Copyright 2025, XGBoost Contributors
+ *
+ * @brief Helpers for handling columnar data with adapters.
+ */
+#pragma once
+
+#include <algorithm>    // for max
+#include <cstddef>      // for size_t
+#include <cstdint>      // for int32_t
+#include <type_traits>  // for is_floating_point_v
+#include <vector>       // for vector
+
+#include "../common/error_msg.h"  // for NoFloatCat
+#include "../encoder/ordinal.h"   // for CatStrArrayView
+#include "array_interface.h"      // for ArrayInterfaceHandler
+#include "xgboost/context.h"      // for DeviceOrd
+#include "xgboost/json.h"         // for Json, Object
+#include "xgboost/span.h"         // for Span
+
+#if !defined(XGBOOST_USE_CUDA)
+#include "../common/common.h"  // for AssertGPUSupport
+#else
+#include <cuda_runtime_api.h>  // for cudaMemcpy
+#endif
+
+namespace xgboost::data {
+/**
+ * @brief Get string-based category index from arrow.
+ *
+ * @return The extracted category index
+ */
+template <typename CategoricalIndex>
+auto GetArrowNames(Object::Map const& jnames, std::vector<CategoricalIndex>* p_cat_columns) {
+  auto& cat_columns = *p_cat_columns;
+  // There are 3 buffers for a StringArray, validity mask, offset, and data. Mask
+  // and data are represented by a single masked array.
+  auto const& joffset = get<Object const>(jnames.at("offsets"));
+  auto offset = ArrayInterface<1>{joffset};
+  auto const& jstr = get<Object const>(jnames.at("values"));
+  auto strbuf = ArrayInterface<1>(jstr);
+
+  // Obtain the size of the string buffer using the offset
+  CHECK_GE(offset.n, 2);
+  auto offset_last_idx = offset.n - 1;
+  if (ArrayInterfaceHandler::IsCudaPtr(offset.data)) {
+    CHECK_EQ(strbuf.n, 0);  // Unknown
+#if defined(XGBOOST_USE_CUDA)
+    DispatchDType(offset.type, [&](auto t) {
+      using T = decltype(t);
+      if (!std::is_same_v<T, std::int32_t>) {
+        LOG(FATAL) << "Invalid type for the string offset from category index.";
+      }
+#if defined(__CUDACC__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 20208  // long double is treated as double in device code
+#endif  // defined(__CUDACC__)
+      T back{0};
+      dh::safe_cuda(cudaMemcpy(&back, static_cast<T const*>(offset.data) + offset_last_idx,
+                               sizeof(T), cudaMemcpyDeviceToHost));
+      strbuf.n = back;
+#if defined(__CUDACC__)
+#pragma nv_diagnostic pop
+#endif  // defined(__CUDACC__)
+    });
+#else
+    common::AssertGPUSupport();
+#endif
+  } else {
+    DispatchDType(offset.type, [&](auto t) {
+      using T = decltype(t);
+      if (!std::is_same_v<T, std::int32_t>) {
+        LOG(FATAL) << "Invalid type for the string offset from category index.";
+      }
+      auto back = offset(offset_last_idx);
+      strbuf.n = back;
+    });
+  }
+
+  CHECK_EQ(strbuf.type, ArrayInterfaceHandler::kI1);
+  CHECK_EQ(offset.type, ArrayInterfaceHandler::kI4);
+  auto names = enc::CatStrArrayView{
+      common::Span{static_cast<std::int32_t const*>(offset.data), offset.Shape<0>()},
+      common::Span<std::int8_t const>{reinterpret_cast<std::int8_t const*>(strbuf.data), strbuf.n}};
+  cat_columns.emplace_back(names);
+  return names;
+}
+
+/**
+ * @brief Get string names and codes for categorical features.
+ *
+ * @return The number of categories for the current column.
+ */
+template <typename CategoricalIndex, bool allow_mask>
+[[nodiscard]] std::size_t GetArrowDictionary(Json const& jcol,
+                                             std::vector<CategoricalIndex>* p_cat_columns,
+                                             std::vector<ArrayInterface<1, allow_mask>>* p_columns,
+                                             std::size_t* p_n_bytes, bst_idx_t* p_n_samples) {
+  auto const& tup = get<Array const>(jcol);
+  CHECK_EQ(tup.size(), 2);
+
+  auto names = GetArrowNames(get<Object const>(tup[0]), p_cat_columns);
+
+  // arrow Integer array for encoded categories
+  auto const& jcodes = get<Object const>(tup[1]);
+  auto codes = ArrayInterface<1>{jcodes};
+  p_columns->push_back(codes);
+
+  auto& n_bytes = *p_n_bytes;
+  n_bytes += codes.ElementSize() * codes.Shape<0>();
+  n_bytes += names.SizeBytes();
+
+  *p_n_samples = std::max(*p_n_samples, static_cast<bst_idx_t>(codes.Shape<0>()));
+  return names.size();
+}
+
+/**
+ * @brief Get numeric-based category index from arrow.
+ *
+ * @return The extracted category index
+ */
+template <typename CategoricalIndex>
+[[nodiscard]] std::size_t GetArrowNumericNames(DeviceOrd device, Object::Map const& jnames,
+                                               std::vector<CategoricalIndex>* p_cat_columns,
+                                               std::size_t* p_n_bytes) {
+  auto names = ArrayInterface<1>{jnames};
+  auto& n_bytes = *p_n_bytes;
+  DispatchDType(names, device, [&](auto t) {
+    using T = typename decltype(t)::value_type;
+    constexpr bool kKnownType = enc::MemberOf<std::remove_cv_t<T>, enc::CatPrimIndexTypes>::value;
+    CHECK(kKnownType) << "Unsupported categorical index type: `"
+                      << ArrayInterfaceHandler::TypeStr(names.type) << "`.";
+    if constexpr (std::is_floating_point_v<T>) {
+      LOG(FATAL) << error::NoFloatCat();
+    }
+    auto span = common::Span{t.Values().data(), t.Size()};
+    if constexpr (kKnownType) {
+      p_cat_columns->emplace_back(span);
+      n_bytes += span.size_bytes();
+    }
+  });
+  return names.n;
+}
+
+/**
+ * @brief Get numeric names and codes for categorical features.
+ *
+ * @return The number of categories for the current column.
+ */
+template <typename CategoricalIndex, bool allow_mask>
+[[nodiscard]] std::size_t GetArrowNumericIndex(
+    DeviceOrd device, Json jcol, std::vector<CategoricalIndex>* p_cat_columns,
+    std::vector<ArrayInterface<1, allow_mask>>* p_columns, std::size_t* p_n_bytes,
+    bst_idx_t* p_n_samples) {
+  auto const& first = get<Object const>(jcol[0]);
+  auto n_cats = GetArrowNumericNames(device, first, p_cat_columns, p_n_bytes);
+  auto& n_bytes = *p_n_bytes;
+  auto const& jcodes = get<Object const>(jcol[1]);
+  auto codes = ArrayInterface<1>{jcodes};
+  p_columns->push_back(codes);
+
+  n_bytes += codes.ElementSize() * codes.Shape<0>();
+  *p_n_samples = std::max(*p_n_samples, static_cast<bst_idx_t>(codes.Shape<0>()));
+
+  return n_cats;
+}
+}  // namespace xgboost::data
diff --git a/src/data/data.cc b/src/data/data.cc
index 043fc14bd2b5..fa0545d09b08 100644
--- a/src/data/data.cc
+++ b/src/data/data.cc
@@ -30,12 +30,12 @@
 #include "../common/numeric.h"                // for Iota, RunLengthEncode
 #include "../common/threading_utils.h"        // for ParallelFor
 #include "../common/version.h"                // for Version
-#include "../data/adapter.h"                  // for COOTuple, FileAdapter, IsValidFunctor
+#include "../data/adapter.h"                  // for FileAdapter
+#include "../data/entry.h"                    // for COOTuple, IsValidFunctor
 #include "../data/extmem_quantile_dmatrix.h"  // for ExtMemQuantileDMatrix
 #include "../data/iterative_dmatrix.h"        // for IterativeDMatrix
 #include "./sparse_page_dmatrix.h"            // for SparsePageDMatrix
 #include "array_interface.h"                  // for ArrayInterfaceHandler, ArrayInterface, Dispa...
-#include "batch_utils.h"                      // for MatchingPageBytes
 #include "cat_container.h"                    // for CatContainer
 #include "dmlc/base.h"                        // for BeginPtr
 #include "dmlc/common.h"                      // for OMPException
@@ -505,7 +505,7 @@ void CopyTensorInfoImpl(Context const* ctx, Json arr_interface, linalg::Tensor<T
   CHECK(t_out.CContiguous());
   auto const shape = t_out.Shape();
   DispatchDType(array, DeviceOrd::CPU(), [&](auto&& in) {
-    linalg::ElementWiseTransformHost(t_out, ctx->Threads(), [&](auto i, auto) {
+    linalg::cpu_impl::TransformIdxKernel(t_out, ctx->Threads(), [&](auto i, auto) {
       return std::apply(in, linalg::UnravelIndex<D>(i, shape));
     });
   });
@@ -876,8 +876,8 @@ bool MetaInfo::ShouldHaveLabels() const {
 
 void MetaInfo::Cats(std::shared_ptr<CatContainer> cats) {
   this->cats_ = std::move(cats);
-  CHECK_LT(cats_->NumFeatures(),
-           static_cast<decltype(cats->NumFeatures())>(std::numeric_limits<bst_cat_t>::max()));
+  CHECK_LT(cats_->NumCatsTotal(),
+           static_cast<decltype(cats->NumCatsTotal())>(std::numeric_limits<bst_cat_t>::max()));
 }
 
 using DMatrixThreadLocal =
@@ -919,16 +919,10 @@ DMatrix* TryLoadBinary(std::string fname, bool silent) {
 }  // namespace
 
 DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_split_mode) {
-  std::string fname, cache_file;
   auto dlm_pos = uri.find('#');
-  if (dlm_pos != std::string::npos) {
-    cache_file = uri.substr(dlm_pos + 1, uri.length());
-    fname = uri.substr(0, dlm_pos);
-    CHECK_EQ(cache_file.find('#'), std::string::npos)
-        << "Only one `#` is allowed in file path for cache file specification.";
-  } else {
-    fname = uri;
-  }
+  CHECK(dlm_pos == std::string::npos)
+      << "External memory training with text input has been removed.";
+  std::string fname = uri;
 
   // legacy handling of binary data loading
   DMatrix* loaded = TryLoadBinary(fname, silent);
@@ -937,30 +931,18 @@ DMatrix* DMatrix::Load(const std::string& uri, bool silent, DataSplitMode data_s
   }
 
   int partid = 0, npart = 1;
-  DMatrix* dmat{};
-
-  if (cache_file.empty()) {
-    fname = data::ValidateFileFormat(fname);
-    std::unique_ptr<dmlc::Parser<std::uint32_t>> parser(
-        dmlc::Parser<std::uint32_t>::Create(fname.c_str(), partid, npart, "auto"));
-    data::FileAdapter adapter(parser.get());
-    dmat = DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(),
-                           cache_file, data_split_mode);
-  } else {
-    CHECK(data_split_mode != DataSplitMode::kCol)
-        << "Column-wise data split is not supported for external memory.";
-    data::FileIterator iter{fname, static_cast<uint32_t>(partid), static_cast<uint32_t>(npart)};
-    auto config = ExtMemConfig{cache_file,
-                               false,
-                               cuda_impl::MatchingPageBytes(),
-                               std::numeric_limits<float>::quiet_NaN(),
-                               cuda_impl::MaxNumDevicePages(),
-                               1};
-    dmat = new data::SparsePageDMatrix{&iter, iter.Proxy(), data::fileiter::Reset,
-                                       data::fileiter::Next, config};
-  }
 
-  return dmat;
+  static std::once_flag warning_flag;
+  std::call_once(warning_flag, []() {
+    LOG(WARNING) << "Text file input has been deprecated since 3.1";
+  });
+
+  fname = data::ValidateFileFormat(fname);
+  std::unique_ptr<dmlc::Parser<std::uint32_t>> parser(
+      dmlc::Parser<std::uint32_t>::Create(fname.c_str(), partid, npart, "auto"));
+  data::FileAdapter adapter(parser.get());
+  return DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), Context{}.Threads(), "",
+                         data_split_mode);
 }
 
 template <typename DataIterHandle, typename DMatrixHandle, typename DataIterResetCallback,
@@ -1020,8 +1002,6 @@ DMatrix* DMatrix::Create(AdapterT* adapter, float missing, int nthread, const st
 
 INSTANTIATION_CREATE(DenseAdapter)
 INSTANTIATION_CREATE(ArrayAdapter)
-INSTANTIATION_CREATE(CSRAdapter)
-INSTANTIATION_CREATE(CSCAdapter)
 INSTANTIATION_CREATE(FileAdapter)
 INSTANTIATION_CREATE(CSRArrayAdapter)
 INSTANTIATION_CREATE(CSCArrayAdapter)
@@ -1031,7 +1011,7 @@ INSTANTIATION_CREATE(ColumnarAdapter)
 
 template DMatrix* DMatrix::Create(
     data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
-    float missing, int nthread, const std::string& cache_prefix, DataSplitMode data_split_mode);
+    float missing, int nthread, std::string const& cache_prefix, DataSplitMode data_split_mode);
 
 SparsePage SparsePage::GetTranspose(int num_columns, int32_t n_threads) const {
   SparsePage transpose;
@@ -1131,7 +1111,7 @@ void SparsePage::Push(const SparsePage &batch) {
 }
 
 template <typename AdapterBatchT>
-uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread) {
+bst_idx_t SparsePage::Push(AdapterBatchT const& batch, float missing, std::int32_t nthread) {
   constexpr bool kIsRowMajor = AdapterBatchT::kIsRowMajor;
   // Allow threading only for row-major case as column-major requires O(nthread*batch_size) memory
   nthread = kIsRowMajor ? nthread : 1;
@@ -1289,19 +1269,19 @@ void SparsePage::PushCSC(const SparsePage &batch) {
   self_offset = std::move(offset);
 }
 
-template uint64_t SparsePage::Push(const data::DenseAdapterBatch& batch, float missing,
-                                   int nthread);
-template uint64_t SparsePage::Push(const data::ArrayAdapterBatch& batch, float missing,
-                                   int nthread);
-template uint64_t SparsePage::Push(const data::CSRAdapterBatch& batch, float missing, int nthread);
-template uint64_t SparsePage::Push(const data::CSRArrayAdapterBatch& batch, float missing,
-                                   int nthread);
-template uint64_t SparsePage::Push(const data::CSCArrayAdapterBatch& batch, float missing,
-                                   int nthread);
-template uint64_t SparsePage::Push(const data::CSCAdapterBatch& batch, float missing, int nthread);
-template uint64_t SparsePage::Push(const data::FileAdapterBatch& batch, float missing, int nthread);
-template uint64_t SparsePage::Push(const data::ColumnarAdapterBatch& batch, float missing,
-                                   std::int32_t nthread);
+#define INSTANTIATE_PUSH(__BATCH_T)                                                    \
+  template std::uint64_t SparsePage::Push(const data::__BATCH_T& batch, float missing, \
+                                          std::int32_t nthread);
+
+INSTANTIATE_PUSH(DenseAdapterBatch)
+INSTANTIATE_PUSH(ArrayAdapterBatch)
+INSTANTIATE_PUSH(CSRArrayAdapterBatch)
+INSTANTIATE_PUSH(CSCArrayAdapterBatch)
+INSTANTIATE_PUSH(FileAdapterBatch)
+INSTANTIATE_PUSH(ColumnarAdapterBatch)
+INSTANTIATE_PUSH(EncColumnarAdapterBatch)
+
+#undef INSTANTIATE_PUSH
 
 namespace data {
 // List of files that will be force linked in static links.
diff --git a/src/data/data.cu b/src/data/data.cu
index 17fc54a562a4..6f605777e02b 100644
--- a/src/data/data.cu
+++ b/src/data/data.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  *
  * \file data.cu
  * \brief Handles setting metainfo from array interface.
@@ -7,6 +7,7 @@
 #include <thrust/gather.h>   // for gather
 #include <thrust/logical.h>  // for none_of
 
+#include "../common/algorithm.cuh"  // for RunLengthEncode
 #include "../common/cuda_context.cuh"
 #include "../common/device_helpers.cuh"
 #include "../common/linalg_op.cuh"
@@ -29,7 +30,7 @@ auto SetDeviceToPtr(void const* ptr) {
 }
 
 template <typename T, int32_t D>
-void CopyTensorInfoImpl(CUDAContext const* ctx, Json arr_interface, linalg::Tensor<T, D>* p_out) {
+void CopyTensorInfoImpl(Context const* ctx, Json arr_interface, linalg::Tensor<T, D>* p_out) {
   ArrayInterface<D> array(arr_interface);
   if (array.n == 0) {
     p_out->SetDevice(DeviceOrd::CUDA(0));
@@ -48,19 +49,15 @@ void CopyTensorInfoImpl(CUDAContext const* ctx, Json arr_interface, linalg::Tens
       // set data
       data->Resize(array.n);
       dh::safe_cuda(cudaMemcpyAsync(data->DevicePointer(), array.data, array.n * sizeof(T),
-                                    cudaMemcpyDefault, ctx->Stream()));
+                                    cudaMemcpyDefault, ctx->CUDACtx()->Stream()));
     });
     return;
   }
   p_out->Reshape(array.shape);
   auto t = p_out->View(ptr_device);
-  linalg::ElementWiseTransformDevice(
-      t,
-      [=] __device__(size_t i, T) {
-        return linalg::detail::Apply(TypedIndex<T, D>{array},
-                                     linalg::UnravelIndex<D>(i, array.shape));
-      },
-      ctx->Stream());
+  linalg::cuda_impl::TransformIdxKernel(ctx, t, [=] XGBOOST_DEVICE(std::size_t i, T) {
+    return std::apply(TypedIndex<T, D>{array}, linalg::UnravelIndex<D>(i, array.shape));
+  });
 }
 
 void CopyGroupInfoImpl(ArrayInterface<1> column, std::vector<bst_group_t>* out) {
@@ -103,17 +100,13 @@ void CopyQidImpl(Context const* ctx, ArrayInterface<1> array_interface,
   dh::safe_cuda(cudaMemcpy(&non_dec, flag.data().get(), sizeof(bool),
                            cudaMemcpyDeviceToHost));
   CHECK(non_dec) << "`qid` must be sorted in increasing order along with data.";
-  size_t bytes = 0;
+
   dh::caching_device_vector<uint32_t> out(array_interface.Shape<0>());
   dh::caching_device_vector<uint32_t> cnt(array_interface.Shape<0>());
   HostDeviceVector<int> d_num_runs_out(1, 0, d);
-  cub::DeviceRunLengthEncode::Encode(nullptr, bytes, it, out.begin(), cnt.begin(),
-                                     d_num_runs_out.DevicePointer(), array_interface.Shape<0>(),
-                                     cuctx->Stream());
-  dh::CachingDeviceUVector<char> tmp(bytes);
-  cub::DeviceRunLengthEncode::Encode(tmp.data(), bytes, it, out.begin(), cnt.begin(),
-                                     d_num_runs_out.DevicePointer(), array_interface.Shape<0>(),
-                                     cuctx->Stream());
+
+  common::RunLengthEncode(cuctx->Stream(), it, out.begin(), cnt.begin(),
+                          d_num_runs_out.DevicePointer(), array_interface.Shape<0>());
 
   auto h_num_runs_out = d_num_runs_out.HostSpan()[0];
   group_ptr_.clear();
@@ -127,10 +120,10 @@ void MetaInfo::SetInfoFromCUDA(Context const* ctx, StringView key, Json array) {
   // multi-dim float info
   auto cuctx = ctx->CUDACtx();
   if (key == "base_margin") {
-    CopyTensorInfoImpl(cuctx, array, &base_margin_);
+    CopyTensorInfoImpl(ctx, array, &base_margin_);
     return;
   } else if (key == "label") {
-    CopyTensorInfoImpl(cuctx, array, &labels);
+    CopyTensorInfoImpl(ctx, array, &labels);
     auto ptr = labels.Data()->ConstDevicePointer();
     auto valid = thrust::none_of(cuctx->CTP(), ptr, ptr + labels.Size(), data::LabelsCheck{});
     CHECK(valid) << "Label contains NaN, infinity or a value too large.";
@@ -150,7 +143,7 @@ void MetaInfo::SetInfoFromCUDA(Context const* ctx, StringView key, Json array) {
   }
   // float info
   linalg::Tensor<float, 1> t;
-  CopyTensorInfoImpl(cuctx, array, &t);
+  CopyTensorInfoImpl(ctx, array, &t);
   if (key == "weight") {
     this->weights_ = std::move(*t.Data());
     auto ptr = weights_.ConstDevicePointer();
diff --git a/src/data/device_adapter.cu b/src/data/device_adapter.cu
index 1462880eca1d..38a52ebfbb25 100644
--- a/src/data/device_adapter.cu
+++ b/src/data/device_adapter.cu
@@ -2,12 +2,37 @@
  * Copyright 2019-2025, XGBoost Contributors
  */
 #include "../common/cuda_rt_utils.h"  // for SetDevice, CurrentDevice
+#include "columnar.h"                 // for GetRefCats, GetArrowDictionary
 #include "device_adapter.cuh"
 
 namespace xgboost::data {
+namespace {
+auto GetRefCats(Context const* ctx, Json handle,
+                std::vector<enc::DeviceCatIndexView>* p_h_ref_cats) {
+  auto& h_ref_cats = *p_h_ref_cats;
+  auto cats = reinterpret_cast<CatContainer const*>(get<Integer const>(handle));
+  CHECK(cats);
+  auto d_cats = cats->DeviceView(ctx);
+  // FIXME(jiamingy): Remove this along with the host copy in the cat container once
+  // cuDF can return device-only data.
+  h_ref_cats.resize(d_cats.columns.size());
+  thrust::copy(dh::tcbegin(d_cats.columns), dh::tcend(d_cats.columns), h_ref_cats.begin());
+  d_cats.columns = common::Span{h_ref_cats};
+  return d_cats;
+}
+}  // anonymous namespace
+
 CudfAdapter::CudfAdapter(StringView cuda_arrinf) {
-  Json interfaces = Json::Load(cuda_arrinf);
-  std::vector<Json> const& jcolumns = get<Array>(interfaces);
+  Json jdf = Json::Load(cuda_arrinf);
+
+  if (IsA<Object>(jdf)) {
+    // Has reference categories.
+    auto ctx = Context{}.MakeCUDA(curt::CurrentDevice());
+    this->ref_cats_ = GetRefCats(&ctx, jdf["ref_categories"], &this->h_ref_cats_);
+    jdf = jdf["columns"];
+  }
+
+  std::vector<Json> const& jcolumns = get<Array>(jdf);
   std::size_t n_columns = jcolumns.size();
   CHECK_GT(n_columns, 0) << "The number of columns must not equal to 0.";
 
@@ -21,8 +46,13 @@ CudfAdapter::CudfAdapter(StringView cuda_arrinf) {
       auto const& first = get<Object const>(jcol[0]);
       if (first.find("offsets") == first.cend()) {
         // numeric index
-        n_cats =
-            GetArrowNumericIndex(DeviceOrd::CUDA(0), jcol, &cats_, &columns, &n_bytes_, &num_rows_);
+        if (device == -1) {
+          auto const& first = get<Object const>(jcol[0]);
+          auto names = ArrayInterface<1>{first};
+          device = dh::CudaGetPointerDevice(names.data);
+        }
+        n_cats = GetArrowNumericIndex(DeviceOrd::CUDA(device), jcol, &cats_, &columns, &n_bytes_,
+                                      &num_rows_);
       } else {
         // string index
         n_cats = GetArrowDictionary(jcol, &cats_, &columns, &n_bytes_, &num_rows_);
@@ -33,7 +63,7 @@ CudfAdapter::CudfAdapter(StringView cuda_arrinf) {
       columns.push_back(col);
       this->cats_.emplace_back();
       this->num_rows_ = std::max(num_rows_, col.Shape<0>());
-      CHECK_EQ(num_rows_, col.Shape<0>()) << "All columns should have same number of rows.";
+      CHECK_EQ(num_rows_, col.Shape<0>()) << "All columns should have the same number of rows.";
       n_bytes_ += col.ElementSize() * col.Shape<0>();
     }
     cat_segments.emplace_back(n_cats);
@@ -61,6 +91,11 @@ CudfAdapter::CudfAdapter(StringView cuda_arrinf) {
   curt::SetDevice(device_.ordinal);
 
   this->columns_ = columns;
-  batch_ = CudfAdapterBatch(dh::ToSpan(columns_), num_rows_);
+  batch_ = CudfAdapterBatch(dh::ToSpan(columns_), NoOpAccessor{}, num_rows_);
+
+  if (!this->ref_cats_.Empty()) {
+    CHECK_EQ(this->ref_cats_.Size(), this->columns_.size())
+        << "Invalid reference categories, different number of columns";
+  }
 }
 }  // namespace xgboost::data
diff --git a/src/data/device_adapter.cuh b/src/data/device_adapter.cuh
index 6203435b8c95..ed63892c5364 100644
--- a/src/data/device_adapter.cuh
+++ b/src/data/device_adapter.cuh
@@ -5,37 +5,40 @@
 #ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_
 #define XGBOOST_DATA_DEVICE_ADAPTER_H_
 
-#include <thrust/functional.h>                  // for maximum
-#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator
-#include <thrust/logical.h>                     // for none_of
+#include <thrust/functional.h>                   // for maximum
 
 #include <cstddef>           // for size_t
 #include <cuda/std/variant>  // for variant
 #include <limits>            // for numeric_limits
+#include <memory>            // for make_unique
 #include <string>            // for string
 
+#include "../common/algorithm.cuh"  // for AllOf
 #include "../common/cuda_context.cuh"
 #include "../common/device_helpers.cuh"
 #include "adapter.h"
 #include "array_interface.h"
+#include "cat_container.cuh"      // for MakeCatAccessor
 #include "xgboost/string_view.h"  // for StringView
 
 namespace xgboost::data {
-class CudfAdapterBatch : public detail::NoMetaInfo {
-  friend class CudfAdapter;
+template <typename EncAccessor>
+class EncCudfAdapterBatchImpl : public detail::NoMetaInfo {
+ private:
+  common::Span<ArrayInterface<1> const> columns_;
+  bst_idx_t n_samples_{0};
+  EncAccessor acc_;
 
  public:
-  CudfAdapterBatch() = default;
-  CudfAdapterBatch(common::Span<ArrayInterface<1>> columns, size_t num_rows)
-      : columns_(columns), num_rows_(num_rows) {}
-  [[nodiscard]] std::size_t Size() const { return num_rows_ * columns_.size(); }
-  [[nodiscard]] __device__ __forceinline__ COOTuple GetElement(size_t idx) const {
-    size_t column_idx = idx % columns_.size();
-    size_t row_idx = idx / columns_.size();
-    auto const& column = columns_[column_idx];
-    float value = column.valid.Data() == nullptr || column.valid.Check(row_idx)
-                      ? column(row_idx)
-                      : std::numeric_limits<float>::quiet_NaN();
+  EncCudfAdapterBatchImpl() = default;
+  EncCudfAdapterBatchImpl(common::Span<ArrayInterface<1> const> columns, EncAccessor acc,
+                          bst_idx_t n_samples)
+      : columns_(columns), n_samples_(n_samples), acc_{std::move(acc)} {}
+  [[nodiscard]] std::size_t Size() const { return n_samples_ * columns_.size(); }
+  [[nodiscard]] __device__ __forceinline__ COOTuple GetElement(bst_idx_t idx) const {
+    auto column_idx = idx % columns_.size();
+    auto row_idx = idx / columns_.size();
+    auto value = this->GetElement(row_idx, column_idx);
     return {row_idx, column_idx, value};
   }
 
@@ -44,64 +47,22 @@ class CudfAdapterBatch : public detail::NoMetaInfo {
     float value = column.valid.Data() == nullptr || column.valid.Check(ridx)
                       ? column(ridx)
                       : std::numeric_limits<float>::quiet_NaN();
-    return value;
+    return acc_(value, fidx);
   }
 
-  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return num_rows_; }
+  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return n_samples_; }
   [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return columns_.size(); }
-
- private:
-  common::Span<ArrayInterface<1>> columns_;
-  size_t num_rows_{0};
+  [[nodiscard]] common::Span<ArrayInterface<1> const> Columns() const { return this->columns_; }
 };
 
-/*!
- * Please be careful that, in official specification, the only three required
- * fields are `shape', `version' and `typestr'.  Any other is optional,
- * including `data'.  But here we have one additional requirements for input
- * data:
- *
- * - `data' field is required, passing in an empty dataset is not accepted, as
- * most (if not all) of our algorithms don't have test for empty dataset.  An
- * error is better than a crash.
- *
- * What if invalid value from dataframe is 0 but I specify missing=NaN in
- * XGBoost?  Since validity mask is ignored, all 0s are preserved in XGBoost.
- *
- * FIXME(trivialfis): Put above into document after we have a consistent way for
- * processing input data.
+using CudfAdapterBatch = EncCudfAdapterBatchImpl<NoOpAccessor>;
+using EncCudfAdapterBatch = EncCudfAdapterBatchImpl<CatAccessor>;
+
+/**
+ * @brief Device columnar format. We call it cuDF, but it's just arrow-CUDA since cuDF
+ * adopts the arrow format.
  *
- * Sample input:
- * [
- *   {
- *     "shape": [
- *       10
- *     ],
- *     "strides": [
- *       4
- *     ],
- *     "data": [
- *       30074864128,
- *       false
- *     ],
- *     "typestr": "<f4",
- *     "version": 1,
- *     "mask": {
- *       "shape": [
- *         64
- *       ],
- *       "strides": [
- *         1
- *       ],
- *       "data": [
- *         30074864640,
- *         false
- *       ],
- *       "typestr": "|i1",
- *       "version": 1
- *     }
- *   }
- * ]
+ * See @ref XGDMatrixCreateFromColumnar for notes
  */
 class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
  public:
@@ -109,8 +70,8 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
   explicit CudfAdapter(std::string cuda_interfaces_str)
       : CudfAdapter{StringView{cuda_interfaces_str}} {}
 
-  const CudfAdapterBatch& Value() const override {
-    CHECK_EQ(batch_.columns_.data(), columns_.data().get());
+  [[nodiscard]] CudfAdapterBatch const& Value() const override {
+    CHECK_EQ(batch_.Columns().data(), columns_.data().get());
     return batch_;
   }
 
@@ -125,7 +86,13 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
   [[nodiscard]] enc::DeviceColumnsView DCats() const {
     return {dh::ToSpan(this->d_cats_), dh::ToSpan(this->cat_segments_), this->n_total_cats_};
   }
-  [[nodiscard]] bool HasCategorical() const { return !(n_total_cats_ == 0); }
+  [[nodiscard]] enc::DeviceColumnsView RefCats() const { return ref_cats_; }
+  [[nodiscard]] bool HasCategorical() const { return n_total_cats_ != 0; }
+  [[nodiscard]] bool HasRefCategorical() const { return this->ref_cats_.n_total_cats != 0; }
+
+  [[nodiscard]] common::Span<ArrayInterface<1> const> Columns() const {
+    return dh::ToSpan(this->columns_);
+  }
 
  private:
   CudfAdapterBatch batch_;
@@ -137,6 +104,9 @@ class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
   dh::device_vector<std::int32_t> cat_segments_;
   std::int32_t n_total_cats_{0};
 
+  enc::DeviceColumnsView ref_cats_;                  // A view to the reference category.
+  std::vector<enc::DeviceCatIndexView> h_ref_cats_;  // host storage for column view
+
   size_t num_rows_{0};
   bst_idx_t n_bytes_{0};
   DeviceOrd device_{DeviceOrd::CPU()};
@@ -169,6 +139,18 @@ class CupyAdapterBatch : public detail::NoMetaInfo {
   ArrayInterface<2> array_interface_;
 };
 
+inline auto MakeEncColumnarBatch(Context const* ctx, CudfAdapter const* adapter) {
+  auto cats = std::make_unique<CatContainer>(ctx, adapter->RefCats(), true);
+  cats->Sort(ctx);
+  auto [acc, mapping] = ::xgboost::cuda_impl::MakeCatAccessor(ctx, adapter->DCats(), cats.get());
+  return std::tuple{EncCudfAdapterBatch{adapter->Columns(), acc, adapter->NumRows()},
+                    std::move(mapping)};
+}
+
+inline auto MakeEncColumnarBatch(Context const* ctx, std::shared_ptr<CudfAdapter> const& adapter) {
+  return MakeEncColumnarBatch(ctx, adapter.get());
+}
+
 class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
  public:
   explicit CupyAdapter(StringView cuda_interface_str) {
@@ -246,25 +228,18 @@ bst_idx_t GetRowCounts(Context const* ctx, const AdapterBatchT batch,
 }
 
 /**
- * \brief Check there's no inf in data.
+ * @brief Check there's no inf in data.
  */
 template <typename AdapterBatchT>
 bool NoInfInData(Context const* ctx, AdapterBatchT const& batch, IsValidFunctor is_valid) {
-  auto counting = thrust::make_counting_iterator(0llu);
-  auto value_iter = dh::MakeTransformIterator<bool>(counting, [=] XGBOOST_DEVICE(std::size_t idx) {
-    auto v = batch.GetElement(idx).value;
+  auto it = dh::MakeIndexTransformIter(
+      [=] XGBOOST_DEVICE(std::size_t idx) { return batch.GetElement(idx).value; });
+  return common::AllOf(ctx->CUDACtx()->CTP(), it, it + batch.Size(), [=] XGBOOST_DEVICE(float v) {
     if (is_valid(v) && isinf(v)) {
       return false;
     }
     return true;
   });
-  // The default implementation in thrust optimizes any_of/none_of/all_of by using small
-  // intervals to early stop. But we expect all data to be valid here, using small
-  // intervals only decreases performance due to excessive kernel launch and stream
-  // synchronization.
-  auto valid = dh::Reduce(ctx->CUDACtx()->CTP(), value_iter, value_iter + batch.Size(), true,
-                          thrust::logical_and<>{});
-  return valid;
 }
 }  // namespace xgboost::data
 #endif  // XGBOOST_DATA_DEVICE_ADAPTER_H_
diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu
index c3926cff15f7..3e7edacb666e 100644
--- a/src/data/ellpack_page.cu
+++ b/src/data/ellpack_page.cu
@@ -1,20 +1,22 @@
 /**
- * Copyright 2019-2024, XGBoost contributors
+ * Copyright 2019-2025, XGBoost contributors
  */
 #include <thrust/binary_search.h>                       // for lower_bound,  upper_bound
 #include <thrust/extrema.h>                             // for max_element
 #include <thrust/iterator/counting_iterator.h>          // for make_counting_iterator
 #include <thrust/iterator/transform_output_iterator.h>  // for transform_output_iterator
 
-#include <algorithm>  // for copy
-#include <limits>     // for numeric_limits
-#include <utility>    // for move
-#include <vector>     // for vector
+#include <algorithm>          // for copy
+#include <cuda/std/iterator>  // for distance
+#include <limits>             // for numeric_limits
+#include <utility>            // for move
+#include <vector>             // for vector
 
 #include "../common/algorithm.cuh"          // for InclusiveScan
 #include "../common/categorical.h"          // for IsCat
 #include "../common/cuda_context.cuh"       // for CUDAContext
 #include "../common/cuda_rt_utils.h"        // for SetDevice
+#include "../common/cuda_stream.h"          // for DefaultStream
 #include "../common/hist_util.cuh"          // for HistogramCuts
 #include "../common/ref_resource_view.cuh"  // for MakeFixedVecWithCudaMalloc
 #include "../common/transform_iterator.h"   // for MakeIndexTransformIter
@@ -74,13 +76,13 @@ __global__ void CompressBinEllpackKernel(
     auto row_end = entries + row_ptrs[irow + 1] - row_ptrs[0];
     auto it = thrust::make_transform_iterator(thrust::make_counting_iterator(0ul),
                                               [=](std::size_t i) { return row_beg[i].index; });
-    auto it_end = it + thrust::distance(row_beg, row_end);
+    auto it_end = it + cuda::std::distance(row_beg, row_end);
     auto res_it = thrust::lower_bound(thrust::seq, it, it_end, cpr_fidx);
     if (res_it == it_end || cpr_fidx != *res_it) {
       wr.AtomicWriteSymbol(buffer, bin, (irow + base_row) * row_stride + cpr_fidx);
       return;
     }
-    cpr_fidx = thrust::distance(it, res_it);
+    cpr_fidx = cuda::std::distance(it, res_it);
     SPAN_CHECK(cpr_fidx < row_length);
   }
 
@@ -229,11 +231,11 @@ EllpackPageImpl::EllpackPageImpl(Context const* ctx, DMatrix* p_fmat, const Batc
   }
 }
 
-template <typename AdapterBatchT>
+template <typename AdapterBatchT, typename IterT>
 struct WriteCompressedEllpackFunctor {
   WriteCompressedEllpackFunctor(common::CompressedByteT* buffer,
                                 const common::CompressedBufferWriter& writer, AdapterBatchT batch,
-                                EllpackDeviceAccessor accessor,
+                                EllpackAccessorImpl<IterT> accessor,
                                 common::Span<FeatureType const> feature_types,
                                 const data::IsValidFunctor& is_valid)
       : d_buffer(buffer),
@@ -246,7 +248,7 @@ struct WriteCompressedEllpackFunctor {
   common::CompressedByteT* d_buffer;
   common::CompressedBufferWriter writer;
   AdapterBatchT batch;
-  EllpackDeviceAccessor accessor;
+  EllpackAccessorImpl<IterT> accessor;
   common::Span<FeatureType const> feature_types;
   data::IsValidFunctor is_valid;
 
@@ -259,9 +261,9 @@ struct WriteCompressedEllpackFunctor {
   __device__ void Write(data::COOTuple const& e, bst_idx_t out_position) {
     bst_bin_t bin_idx = 0;
     if (common::IsCat(feature_types, e.column_idx)) {
-      bin_idx = accessor.SearchBin<true>(e.value, e.column_idx);
+      bin_idx = accessor.template SearchBin<true>(e.value, e.column_idx);
     } else {
-      bin_idx = accessor.SearchBin<false>(e.value, e.column_idx);
+      bin_idx = accessor.template SearchBin<false>(e.value, e.column_idx);
     }
     if constexpr (kIsDenseCompressed) {
       bin_idx -= accessor.feature_segments[e.column_idx];
@@ -317,42 +319,47 @@ void CopyDataToEllpack(Context const* ctx, const AdapterBatchT& batch,
   common::CompressedBufferWriter writer{n_symbols};
   auto d_compressed_buffer = dst->gidx_buffer.data();
 
-  // We redirect the scan output into this functor to do the actual writing
-  using Tuple = typename WriteCompressedEllpackFunctor<AdapterBatchT>::Tuple;
-  dh::TypedDiscard<Tuple> discard;
-  auto device_accessor = dst->GetDeviceAccessor(ctx);
-  WriteCompressedEllpackFunctor<AdapterBatchT> functor{
-      d_compressed_buffer, writer, batch, device_accessor, feature_types, is_valid};
-
-  // For dense compressed data, we can simply copy the data with the input position.
-  if (kIsDenseCompressed) {
-    CHECK(batch.NumRows() == 0 || batch.NumCols() == dst->info.row_stride);
-    thrust::for_each_n(ctx->CUDACtx()->CTP(), cnt, dst->Size() * dst->info.row_stride, functor);
-    return;
-  }
+  auto get_ridx = [=] __device__(std::size_t idx) {
+    return batch.GetElement(idx).row_idx;
+  };  // NOLINT
+  auto get_is_valid = [=] __device__(std::size_t idx) -> std::size_t {
+    return is_valid(batch.GetElement(idx));
+  };
+  dst->Visit(ctx, {}, [&](auto&& device_accessor) {
+    using IterT = typename std::remove_reference_t<decltype(device_accessor)>::IterType;
+    // We redirect the scan output into this functor to do the actual writing
+    using Tuple = typename WriteCompressedEllpackFunctor<AdapterBatchT, IterT>::Tuple;
+    dh::TypedDiscard<Tuple> discard;
+    WriteCompressedEllpackFunctor<AdapterBatchT, IterT> functor{
+        d_compressed_buffer, writer, batch, device_accessor, feature_types, is_valid};
+    // For dense compressed data, we can simply copy the data with the input position.
+    if (kIsDenseCompressed) {
+      CHECK(batch.NumRows() == 0 || batch.NumCols() == dst->info.row_stride);
+      thrust::for_each_n(ctx->CUDACtx()->CTP(), cnt, dst->Size() * dst->info.row_stride, functor);
+      return;
+    }
 
-  // Some witchcraft happens here.
-  //
-  // The goal is to copy valid elements out of the input to an ELLPACK matrix with a given
-  // row stride, using no extra working memory Standard stream compaction needs to be
-  // modified to do this, so we manually define a segmented stream compaction via
-  // operators on an inclusive scan. The output of this inclusive scan is fed to a custom
-  // function which works out the correct output position
-  auto key_iter = dh::MakeTransformIterator<size_t>(
-      cnt, [=] __device__(size_t idx) { return batch.GetElement(idx).row_idx; });
-  auto value_iter = dh::MakeTransformIterator<size_t>(
-      cnt, [=] __device__(size_t idx) -> size_t { return is_valid(batch.GetElement(idx)); });
-
-  auto key_value_index_iter =
-      thrust::make_zip_iterator(thrust::make_tuple(key_iter, value_iter, cnt));
-  thrust::transform_output_iterator<decltype(functor), decltype(discard)> out(discard, functor);
-  common::InclusiveScan(ctx, key_value_index_iter, out, TupleScanOp<Tuple>{}, batch.Size());
+    // Some witchcraft happens here.
+    //
+    // The goal is to copy valid elements out of the input to an ELLPACK matrix with a given
+    // row stride, using no extra working memory Standard stream compaction needs to be
+    // modified to do this, so we manually define a segmented stream compaction via
+    // operators on an inclusive scan. The output of this inclusive scan is fed to a custom
+    // function which works out the correct output position
+    auto key_iter = dh::MakeTransformIterator<size_t>(cnt, get_ridx);
+    auto value_iter = dh::MakeTransformIterator<size_t>(cnt, get_is_valid);
+
+    auto key_value_index_iter =
+        thrust::make_zip_iterator(thrust::make_tuple(key_iter, value_iter, cnt));
+    thrust::transform_output_iterator<decltype(functor), decltype(discard)> out(discard, functor);
+    common::InclusiveScan(ctx, key_value_index_iter, out, TupleScanOp<Tuple>{}, batch.Size());
+  });
 }
 
 void WriteNullValues(Context const* ctx, EllpackPageImpl* dst,
                      common::Span<size_t const> row_counts) {
   // Write the null values
-  auto null = dst->GetDeviceAccessor(ctx).NullValue();
+  auto null = dst->NullValue();;
   common::CompressedBufferWriter writer(dst->NumSymbols());
   auto d_compressed_buffer = dst->gidx_buffer.data();
   auto row_stride = dst->info.row_stride;
@@ -391,6 +398,7 @@ EllpackPageImpl::EllpackPageImpl(Context const* ctx, AdapterBatch batch, float m
       std::shared_ptr<common::HistogramCuts const> cuts);
 
 ELLPACK_BATCH_SPECIALIZE(data::CudfAdapterBatch)
+ELLPACK_BATCH_SPECIALIZE(data::EncCudfAdapterBatch)
 ELLPACK_BATCH_SPECIALIZE(data::CupyAdapterBatch)
 
 #undef ELLPACK_BATCH_SPECIALIZE
@@ -456,6 +464,10 @@ EllpackPageImpl::EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& pag
       info{CalcNumSymbols(
           ctx,
           [&] {
+            if (page.Size() == 0) {
+              return static_cast<typename decltype(page.row_ptr)::value_type>(0);
+            }
+            CHECK_GE(page.row_ptr.size(), 2);
             auto it = common::MakeIndexTransformIter(
                 [&](bst_idx_t i) { return page.row_ptr[i + 1] - page.row_ptr[i]; });
             return *std::max_element(it, it + page.Size());
@@ -472,20 +484,21 @@ EllpackPageImpl::EllpackPageImpl(Context const* ctx, GHistIndexMatrix const& pag
   dh::safe_cuda(cudaMemcpyAsync(d_row_ptr.data(), page.row_ptr.data(), d_row_ptr.size_bytes(),
                                 cudaMemcpyHostToDevice, ctx->CUDACtx()->Stream()));
 
-  auto accessor = this->GetDeviceAccessor(ctx, ft);
   this->monitor_.Start("CopyGHistToEllpack");
-  common::DispatchBinType(page.index.GetBinTypeSize(), [&](auto t) {
-    using T = decltype(t);
-    CopyGHistToEllpack<T>(ctx, page, d_row_ptr, this->info.row_stride, accessor.NullValue(),
-                          this->NumSymbols(), this->cuts_->cut_ptrs_.ConstDeviceSpan(),
-                          d_compressed_buffer);
+  this->Visit(ctx, ft, [&](auto&& accessor) {
+    common::DispatchBinType(page.index.GetBinTypeSize(), [&](auto t) {
+      using T = decltype(t);
+      CopyGHistToEllpack<T>(ctx, page, d_row_ptr, this->info.row_stride, accessor.NullValue(),
+                            this->NumSymbols(), this->cuts_->cut_ptrs_.ConstDeviceSpan(),
+                            d_compressed_buffer);
+    });
   });
   this->monitor_.Stop("CopyGHistToEllpack");
 }
 
 EllpackPageImpl::~EllpackPageImpl() noexcept(false) {
   // Sync the stream to make sure all running CUDA kernels finish before deallocation.
-  auto status = dh::DefaultStream().Sync(false);
+  auto status = curt::DefaultStream().Sync(false);
   if (status != cudaSuccess) {
     auto str = cudaGetErrorString(status);
     // For external-memory, throwing here can trigger a series of calls to
@@ -496,17 +509,18 @@ EllpackPageImpl::~EllpackPageImpl() noexcept(false) {
 }
 
 // A functor that copies the data from one EllpackPage to another.
+template <typename IterT>
 struct CopyPage {
   common::CompressedBufferWriter cbw;
   common::CompressedByteT* dst_data_d;
-  common::CompressedIterator<uint32_t> src_iterator_d;
+  IterT src_iterator_d;
   // The number of elements to skip.
   size_t offset;
 
-  CopyPage(EllpackPageImpl* dst, EllpackPageImpl const* src, size_t offset)
+  CopyPage(EllpackPageImpl* dst, EllpackAccessorImpl<IterT> src, size_t offset)
       : cbw{dst->NumSymbols()},
         dst_data_d{dst->gidx_buffer.data()},
-        src_iterator_d{src->gidx_buffer.data(), src->NumSymbols()},
+        src_iterator_d{src.gidx_iter},
         offset{offset} {}
 
   __device__ void operator()(std::size_t element_id) {
@@ -522,17 +536,20 @@ bst_idx_t EllpackPageImpl::Copy(Context const* ctx, EllpackPageImpl const* page,
   CHECK_EQ(this->info.row_stride, page->info.row_stride);
   CHECK_EQ(this->NumSymbols(), page->NumSymbols());
   CHECK_GE(this->n_rows * this->info.row_stride, offset + n_elements);
-  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), n_elements,
-                     CopyPage{this, page, offset});
+  page->Visit(ctx, {}, [&](auto&& src) {
+    thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), n_elements,
+                       CopyPage{this, src, offset});
+  });
   monitor_.Stop(__func__);
   return n_elements;
 }
 
 // A functor that compacts the rows from one EllpackPage into another.
+template <typename IterT>
 struct CompactPage {
   common::CompressedBufferWriter cbw;
   common::CompressedByteT* dst_data_d;
-  common::CompressedIterator<uint32_t> src_iterator_d;
+  IterT src_iterator_d;
   /**
    * @brief An array that maps the rows from the full DMatrix to the compacted page.
    *
@@ -548,13 +565,14 @@ struct CompactPage {
   size_t base_rowid;
   size_t row_stride;
 
-  CompactPage(EllpackPageImpl* dst, EllpackPageImpl const* src, common::Span<size_t> row_indexes)
+  CompactPage(EllpackPageImpl* dst, EllpackAccessorImpl<IterT> src,
+              common::Span<size_t> row_indexes)
       : cbw{dst->NumSymbols()},
         dst_data_d{dst->gidx_buffer.data()},
-        src_iterator_d{src->gidx_buffer.data(), src->NumSymbols()},
+        src_iterator_d{src.gidx_iter},
         row_indexes(row_indexes),
-        base_rowid{src->base_rowid},
-        row_stride{src->info.row_stride} {}
+        base_rowid{src.base_rowid},
+        row_stride{src.row_stride} {}
 
   __device__ void operator()(bst_idx_t row_id) {
     size_t src_row = base_rowid + row_id;
@@ -578,7 +596,10 @@ void EllpackPageImpl::Compact(Context const* ctx, EllpackPageImpl const* page,
   CHECK_EQ(this->NumSymbols(), page->NumSymbols());
   CHECK_LE(page->base_rowid + page->n_rows, row_indexes.size());
   auto cuctx = ctx->CUDACtx();
-  dh::LaunchN(page->n_rows, cuctx->Stream(), CompactPage{this, page, row_indexes});
+  page->Visit(ctx, {}, [&](auto&& src) {
+    dh::LaunchN(page->n_rows, cuctx->Stream(), CompactPage{this, src, row_indexes});
+  });
+
   monitor_.Stop(__func__);
 }
 
@@ -606,7 +627,7 @@ void EllpackPageImpl::CreateHistIndices(Context const* ctx, const SparsePage& ro
   }
 
   this->monitor_.Start(__func__);
-  auto null_gidx_value = this->GetDeviceAccessor(ctx, feature_types).NullValue();
+  auto null_gidx_value = this->NullValue();
 
   auto const& offset_vec = row_batch.offset.ConstHostVector();
 
@@ -655,12 +676,13 @@ void EllpackPageImpl::CreateHistIndices(Context const* ctx, const SparsePage& ro
     const dim3 block3(32, 8, 1);  // 256 threads
     const dim3 grid3(common::DivRoundUp(batch_nrows, block3.x),
                      common::DivRoundUp(this->info.row_stride, block3.y), 1);
-    auto device_accessor = this->GetDeviceAccessor(ctx);
     auto launcher = [&](auto kernel) {
-      dh::LaunchKernel{grid3, block3, 0, ctx->CUDACtx()->Stream()}(  // NOLINT
-          kernel, writer, gidx_buffer_data, row_ptrs.data(), entries_d.data(),
-          device_accessor.gidx_fvalue_map.data(), device_accessor.feature_segments, feature_types,
-          batch_row_begin, batch_nrows, this->info.row_stride, null_gidx_value);
+      this->Visit(ctx, {}, [&](auto&& device_accessor) {
+        dh::LaunchKernel{grid3, block3, 0, ctx->CUDACtx()->Stream()}(  // NOLINT
+            kernel, writer, gidx_buffer_data, row_ptrs.data(), entries_d.data(),
+            device_accessor.gidx_fvalue_map.data(), device_accessor.feature_segments, feature_types,
+            batch_row_begin, batch_nrows, this->info.row_stride, null_gidx_value);
+      });
     };
     if (this->IsDense()) {
       launcher(CompressBinEllpackKernel<true, true>);
@@ -678,55 +700,94 @@ void EllpackPageImpl::CreateHistIndices(Context const* ctx, const SparsePage& ro
 // Return the number of rows contained in this page.
 [[nodiscard]] bst_idx_t EllpackPageImpl::Size() const { return n_rows; }
 
-std::size_t EllpackPageImpl::MemCostBytes() const {
+[[nodiscard]] std::size_t EllpackPageImpl::MemCostBytes() const {
   return this->gidx_buffer.size_bytes() + sizeof(this->is_dense) + sizeof(this->n_rows) +
-         sizeof(this->base_rowid) + sizeof(this->info);
+         sizeof(this->base_rowid) + sizeof(this->info) + this->d_gidx_buffer.size_bytes();
 }
 
-EllpackDeviceAccessor EllpackPageImpl::GetDeviceAccessor(
+[[nodiscard]] EllpackAccessor EllpackPageImpl::GetDeviceEllpack(
     Context const* ctx, common::Span<FeatureType const> feature_types) const {
-  auto null = this->IsDense() ? this->NumSymbols() : this->NumSymbols() - 1;
-  return {ctx,
-          this->cuts_,
-          this->info.row_stride,
-          this->base_rowid,
-          this->n_rows,
-          common::CompressedIterator<uint32_t>{gidx_buffer.data(), this->NumSymbols()},
-          null,
-          feature_types};
+  // The compress iterator reads at least 5 bytes. The `CalculateBufferSize` method should
+  // guarantee that.
+  CHECK_GE(this->gidx_buffer.size_bytes() + this->d_gidx_buffer.size_bytes(), 5);
+  auto null = this->NullValue();
+  if (d_gidx_buffer.empty()) {
+    auto iter = common::CompressedIterator<std::uint32_t>{gidx_buffer.data(), this->NumSymbols()};
+    return EllpackDeviceAccessor{
+        ctx,  this->cuts_, this->info.row_stride, this->base_rowid, this->n_rows,
+        iter, null,        this->IsDense(),       feature_types};
+  } else {
+    auto iter = common::DoubleCompressedIter<std::uint32_t>{
+        gidx_buffer.data(), gidx_buffer.size_bytes(), d_gidx_buffer.data(), this->NumSymbols()};
+    return DoubleEllpackAccessor{
+        ctx,  this->cuts_, this->info.row_stride, this->base_rowid, this->n_rows,
+        iter, null,        this->IsDense(),       feature_types};
+  }
 }
 
-EllpackDeviceAccessor EllpackPageImpl::GetHostAccessor(
+[[nodiscard]] EllpackAccessor EllpackPageImpl::GetHostEllpack(
     Context const* ctx, std::vector<common::CompressedByteT>* h_gidx_buffer,
     common::Span<FeatureType const> feature_types) const {
-  h_gidx_buffer->resize(gidx_buffer.size());
-  CHECK_EQ(h_gidx_buffer->size(), gidx_buffer.size());
-  CHECK_NE(gidx_buffer.size(), 0);
-  dh::safe_cuda(cudaMemcpyAsync(h_gidx_buffer->data(), gidx_buffer.data(), gidx_buffer.size_bytes(),
-                                cudaMemcpyDefault, ctx->CUDACtx()->Stream()));
+  CHECK_GE(this->gidx_buffer.size_bytes() + this->d_gidx_buffer.size_bytes(), 5);
+  auto null = this->NullValue();
+
+  h_gidx_buffer->resize(this->gidx_buffer.size() + this->d_gidx_buffer.size());
+  if (!this->gidx_buffer.empty()) {
+    dh::safe_cuda(cudaMemcpyAsync(h_gidx_buffer->data(), this->gidx_buffer.data(),
+                                  this->gidx_buffer.size_bytes(), cudaMemcpyDefault,
+                                  ctx->CUDACtx()->Stream()));
+  }
+
+  if (!d_gidx_buffer.empty()) {
+    auto dst = h_gidx_buffer->data() + this->gidx_buffer.size_bytes();
+    auto src = d_gidx_buffer.data();
+    dh::safe_cuda(cudaMemcpyAsync(dst, src, this->d_gidx_buffer.size_bytes(), cudaMemcpyDefault,
+                                  ctx->CUDACtx()->Stream()));
+
+    auto iter = common::DoubleCompressedIter<std::uint32_t>{
+        h_gidx_buffer->data(), gidx_buffer.size_bytes(), dst, this->NumSymbols()};
+    return DoubleEllpackAccessor{
+        ctx,  this->cuts_, this->info.row_stride, this->base_rowid, this->n_rows,
+        iter, null,        this->IsDense(),       feature_types};
+  }
+
+  auto iter = common::CompressedIterator<std::uint32_t>{h_gidx_buffer->data(), this->NumSymbols()};
   Context cpu_ctx;
-  auto null = this->IsDense() ? this->NumSymbols() : this->NumSymbols() - 1;
-  return {ctx->IsCPU() ? ctx : &cpu_ctx,
-          this->cuts_,
-          this->info.row_stride,
-          this->base_rowid,
-          this->n_rows,
-          common::CompressedIterator<uint32_t>{h_gidx_buffer->data(), this->NumSymbols()},
-          null,
-          feature_types};
+  auto sctx = ctx->IsCPU() ? ctx : &cpu_ctx;
+  return EllpackDeviceAccessor{
+      sctx, this->cuts_, this->info.row_stride, this->base_rowid, this->n_rows,
+      iter, null,        this->IsDense(),       feature_types};
 }
 
+namespace {
+template <typename Accessor>
+struct CntOp {
+  Accessor d_acc;
+  explicit CntOp(Accessor d_acc) : d_acc{std::move(d_acc)} {}
+  XGBOOST_DEVICE auto operator()(std::size_t i) { return d_acc.gidx_iter[i]; }
+};
+template <typename Accessor>
+struct NotNullOp {
+  Accessor d_acc;
+  explicit NotNullOp(Accessor d_acc) : d_acc{std::move(d_acc)} {}
+
+  template <typename T>
+  XGBOOST_DEVICE auto operator()(T gidx) -> bool {
+    return gidx != d_acc.NullValue();
+  }
+};
+}  // namespace
+
 [[nodiscard]] bst_idx_t EllpackPageImpl::NumNonMissing(
     Context const* ctx, common::Span<FeatureType const> feature_types) const {
   if (this->IsDense()) {
     return this->n_rows * this->info.row_stride;
   }
-  auto d_acc = this->GetDeviceAccessor(ctx, feature_types);
-  using T = typename decltype(d_acc.gidx_iter)::value_type;
-  auto it = thrust::make_transform_iterator(
-      thrust::make_counting_iterator(0ull),
-      [=] XGBOOST_DEVICE(std::size_t i) { return d_acc.gidx_iter[i]; });
-  return thrust::count_if(ctx->CUDACtx()->CTP(), it, it + d_acc.row_stride * d_acc.n_rows,
-                          [=] XGBOOST_DEVICE(T gidx) -> bool { return gidx != d_acc.NullValue(); });
+  return this->Visit(ctx, feature_types, [&](auto&& d_acc) -> bst_idx_t {
+    using T = typename decltype(d_acc.gidx_iter)::value_type;
+    auto it = thrust::make_transform_iterator(thrust::make_counting_iterator(0ull), CntOp{d_acc});
+    return thrust::count_if(ctx->CUDACtx()->CTP(), it, it + d_acc.row_stride * d_acc.n_rows,
+                            NotNullOp{d_acc});
+  });
 }
 }  // namespace xgboost
diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh
index fc0340875645..a6013853b1e6 100644
--- a/src/data/ellpack_page.cuh
+++ b/src/data/ellpack_page.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_DATA_ELLPACK_PAGE_CUH_
 #define XGBOOST_DATA_ELLPACK_PAGE_CUH_
@@ -20,11 +20,24 @@ namespace xgboost {
 /**
  * @brief Struct for accessing and manipulating an ELLPACK matrix on the device.
  *
- * Does not own underlying memory and may be trivially copied into kernels.
+ * Does not own the underlying memory and may be trivially copied into kernels.
  */
-struct EllpackDeviceAccessor {
-  /** @brief Whether or not if the matrix is dense. */
-  bst_idx_t null_value;
+template <typename IterT>
+struct EllpackAccessorImpl {
+ private:
+  /**
+   * @brief Stores the null value and whether the matrix is dense. The `IsDense` is stored in the
+   * first bit of this value.
+   */
+  bst_idx_t null_value_;
+
+  constexpr static auto Ind() { return static_cast<bst_idx_t>(1); }
+  constexpr static std::size_t NullShift() { return sizeof(null_value_) * 8 - Ind(); }
+
+ public:
+  using IterType = IterT;
+
+ public:
   /** @brief Row length for ELLPACK, equal to number of features when the data is dense. */
   bst_idx_t row_stride;
   /** @brief Starting index of the rows. Used for external memory. */
@@ -32,7 +45,7 @@ struct EllpackDeviceAccessor {
   /** @brief Number of rows in this batch. */
   bst_idx_t n_rows;
   /** @brief Acessor for the gradient index. */
-  common::CompressedIterator<std::uint32_t> gidx_iter;
+  IterType gidx_iter;
   /** @brief Minimum value for each feature. Size equals to number of features. */
   common::Span<const float> min_fvalue;
   /** @brief Histogram cut pointers. Size equals to (number of features + 1). */
@@ -42,12 +55,12 @@ struct EllpackDeviceAccessor {
   /** @brief Type of each feature, categorical or numerical. */
   common::Span<const FeatureType> feature_types;
 
-  EllpackDeviceAccessor() = delete;
-  EllpackDeviceAccessor(Context const* ctx, std::shared_ptr<const common::HistogramCuts> cuts,
-                        bst_idx_t row_stride, bst_idx_t base_rowid, bst_idx_t n_rows,
-                        common::CompressedIterator<uint32_t> gidx_iter, bst_idx_t null_value,
-                        common::Span<FeatureType const> feature_types)
-      : null_value{null_value},
+  EllpackAccessorImpl() = delete;
+  EllpackAccessorImpl(Context const* ctx, std::shared_ptr<const common::HistogramCuts> cuts,
+                      bst_idx_t row_stride, bst_idx_t base_rowid, bst_idx_t n_rows,
+                      IterType gidx_iter, bst_idx_t null_value, bool is_dense,
+                      common::Span<FeatureType const> feature_types)
+      : null_value_{null_value},
         row_stride{row_stride},
         base_rowid{base_rowid},
         n_rows{n_rows},
@@ -65,8 +78,17 @@ struct EllpackDeviceAccessor {
       feature_segments = cuts->cut_ptrs_.ConstHostPointer();
       min_fvalue = cuts->min_vals_.ConstHostSpan();
     }
+
+    if (is_dense) {
+      static_assert(NullShift() == 63);
+      CHECK(!IsDense());
+      this->null_value_ |= (Ind() << NullShift());
+    }
   }
 
+  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bool IsDense() const {
+    return (this->null_value_ >> NullShift()) != 0;
+  }
   [[nodiscard]] XGBOOST_HOST_DEV_INLINE bool IsDenseCompressed() const {
     return this->row_stride == this->NumFeatures();
   }
@@ -133,11 +155,23 @@ struct EllpackDeviceAccessor {
     }
     return gidx_fvalue_map[gidx];
   }
-  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bst_idx_t NullValue() const { return this->null_value; }
+  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bst_idx_t NullValue() const {
+    return this->null_value_ & ((Ind() << NullShift()) - Ind());
+  }
   [[nodiscard]] XGBOOST_HOST_DEV_INLINE bst_idx_t NumBins() const { return gidx_fvalue_map.size(); }
+  [[nodiscard]] XGBOOST_HOST_DEV_INLINE bst_idx_t NumRows() const { return n_rows; }
   [[nodiscard]] XGBOOST_HOST_DEV_INLINE size_t NumFeatures() const { return min_fvalue.size(); }
 };
 
+using EllpackDeviceAccessor = EllpackAccessorImpl<common::CompressedIterator<std::uint32_t>>;
+
+using DoubleEllpackAccessor = EllpackAccessorImpl<common::DoubleCompressedIter<std::uint32_t>>;
+
+/**
+ * @brief The ellpack accessor uses different graident index iterator to facilitate
+ *        external memory training.
+ */
+using EllpackAccessor = std::variant<EllpackDeviceAccessor, DoubleEllpackAccessor>;
 
 class GHistIndexMatrix;
 
@@ -224,9 +258,7 @@ class EllpackPageImpl {
   [[nodiscard]] bst_idx_t Size() const;
 
   /** @brief Set the base row id for this page. */
-  void SetBaseRowId(std::size_t row_id) {
-    base_rowid = row_id;
-  }
+  void SetBaseRowId(std::size_t row_id) { base_rowid = row_id; }
 
   [[nodiscard]] common::HistogramCuts const& Cuts() const { return *cuts_; }
   [[nodiscard]] std::shared_ptr<common::HistogramCuts const> CutsShared() const { return cuts_; }
@@ -243,7 +275,7 @@ class EllpackPageImpl {
   }
 
   /** @return Estimation of memory cost of this page. */
-  std::size_t MemCostBytes() const;
+  [[nodiscard]] std::size_t MemCostBytes() const;
 
   /**
    * @brief Return the total number of symbols (total number of bins plus 1 for not
@@ -251,6 +283,12 @@ class EllpackPageImpl {
    */
   [[nodiscard]] auto NumSymbols() const { return this->info.n_symbols; }
   void SetNumSymbols(bst_idx_t n_symbols) { this->info.n_symbols = n_symbols; }
+  /**
+   * @brief Get the value used to represent missing.
+   */
+  [[nodiscard]] bst_idx_t NullValue() const {
+    return this->IsDense() ? this->NumSymbols() : this->NumSymbols() - 1;
+  }
   /**
    * @brief Copy basic shape from another page.
    */
@@ -263,16 +301,54 @@ class EllpackPageImpl {
     this->SetNumSymbols(page->NumSymbols());
   }
   /**
-   * @brief Get an accessor that can be passed into CUDA kernels.
+   * @brief Get an accessor backed by the device storage.
    */
-  [[nodiscard]] EllpackDeviceAccessor GetDeviceAccessor(
-      Context const* ctx, common::Span<FeatureType const> feature_types = {}) const;
+  EllpackAccessor GetDeviceEllpack(Context const* ctx,
+                                   common::Span<FeatureType const> feature_types = {}) const;
   /**
-   * @brief Get an accessor for host code.
+   * @brief Get an accessor backed by the host storage.
+   *
+   * @param h_gidx_buffer A buffer used as the backing storage of the accessor.
+   *
+   * @return An accessor variant.
    */
-  [[nodiscard]] EllpackDeviceAccessor GetHostAccessor(
-      Context const* ctx, std::vector<common::CompressedByteT>* h_gidx_buffer,
-      common::Span<FeatureType const> feature_types = {}) const;
+  EllpackAccessor GetHostEllpack(Context const* ctx,
+                                 std::vector<common::CompressedByteT>* h_gidx_buffer,
+                                 common::Span<FeatureType const> feature_types = {}) const;
+  /**
+   * @brief Vistor pattern.
+   *
+   * @param fn A callable that accepts both variants of the ellpack accessor.
+   *
+   * @return An accessor variant.
+   */
+  template <typename Fn>
+  decltype(auto) Visit(Context const* ctx, common::Span<FeatureType const> feature_types,
+                       Fn&& fn) const {
+    auto acc = this->GetDeviceEllpack(ctx, feature_types);
+    return std::visit(std::forward<Fn>(fn), acc);
+  }
+  /**
+   * @brief Vistor pattern with a host accessor.
+   *
+   * @param h_gidx_buffer A buffer used as the backing storage of the accessor.
+   * @param fn A callable that accepts both variants of the ellpack accessor.
+   */
+  template <typename Fn>
+  decltype(auto) VisitOnHost(Context const* ctx,
+                             std::vector<common::CompressedByteT>* h_gidx_buffer,
+                             common::Span<FeatureType const> feature_types, Fn&& fn) const {
+    auto acc = this->GetHostEllpack(ctx, h_gidx_buffer, feature_types);
+    return std::visit(std::forward<Fn>(fn), acc);
+  }
+  // helper for visit that doesn't need the raw data.
+  template <typename Fn>
+  decltype(auto) VisitOnHost(Context const* ctx, Fn&& fn) const {
+    common::Span<FeatureType const> feature_types;
+    std::vector<common::CompressedByteT> h_gidx_buffer;
+    auto acc = this->GetHostEllpack(ctx, &h_gidx_buffer, feature_types);
+    return std::visit(std::forward<Fn>(fn), acc);
+  }
   /**
    * @brief Calculate the number of non-missing values.
    */
@@ -305,6 +381,13 @@ class EllpackPageImpl {
    * This can be backed by various storage types.
    */
   common::RefResourceView<common::CompressedByteT> gidx_buffer;
+  /**
+   * @brief Second buffer. Used for external memory where we might have a part of the
+   * cache in device and another part of the cache in host.
+   *
+   * This buffer is optional. It must be on device if not empty.
+   */
+  common::RefResourceView<common::CompressedByteT const> d_gidx_buffer;
   /**
    * @brief Compression infomation.
    */
diff --git a/src/data/ellpack_page_raw_format.cu b/src/data/ellpack_page_raw_format.cu
index 2907174a0920..a5a2b3748100 100644
--- a/src/data/ellpack_page_raw_format.cu
+++ b/src/data/ellpack_page_raw_format.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost contributors
+ * Copyright 2019-2025, XGBoost contributors
  */
 #include <dmlc/registry.h>
 
@@ -7,6 +7,7 @@
 #include <vector>   // for vector
 
 #include "../common/cuda_rt_utils.h"
+#include "../common/cuda_stream.h"          // for Event
 #include "../common/io.h"                   // for AlignedResourceReadStream, AlignedFileWriteStream
 #include "../common/ref_resource_view.cuh"  // for MakeFixedVecWithCudaMalloc
 #include "../common/ref_resource_view.h"    // for ReadVec, WriteVec
@@ -40,7 +41,8 @@ template <typename T>
   }
 
   *vec = common::MakeFixedVecWithCudaMalloc<T>(n);
-  dh::safe_cuda(cudaMemcpyAsync(vec->data(), ptr, n_bytes, cudaMemcpyDefault, dh::DefaultStream()));
+  dh::safe_cuda(
+      cudaMemcpyAsync(vec->data(), ptr, n_bytes, cudaMemcpyDefault, curt::DefaultStream()));
   return true;
 }
 }  // namespace
@@ -71,11 +73,11 @@ template <typename T>
 
   impl->SetCuts(this->cuts_);
 
-  dh::DefaultStream().Sync();
+  curt::DefaultStream().Sync();
   return true;
 }
 
-[[nodiscard]] std::size_t EllpackPageRawFormat::Write(const EllpackPage& page,
+[[nodiscard]] std::size_t EllpackPageRawFormat::Write(EllpackPage const& page,
                                                       common::AlignedFileWriteStream* fo) {
   xgboost_NVTX_FN_RANGE();
 
@@ -86,38 +88,61 @@ template <typename T>
   bytes += fo->Write(impl->info.row_stride);
   std::vector<common::CompressedByteT> h_gidx_buffer;
   Context ctx = Context{}.MakeCUDA(curt::CurrentDevice());
-  [[maybe_unused]] auto h_accessor = impl->GetHostAccessor(&ctx, &h_gidx_buffer);
+  // write data into the h_gidx_buffer
+  [[maybe_unused]] auto h_accessor = impl->GetHostEllpack(&ctx, &h_gidx_buffer);
   bytes += common::WriteVec(fo, h_gidx_buffer);
   bytes += fo->Write(impl->base_rowid);
   bytes += fo->Write(impl->NumSymbols());
 
-  dh::DefaultStream().Sync();
+  curt::DefaultStream().Sync();
   return bytes;
 }
 
 [[nodiscard]] bool EllpackPageRawFormat::Read(EllpackPage* page, EllpackHostCacheStream* fi) const {
-  xgboost_NVTX_FN_RANGE();
+  xgboost_NVTX_FN_RANGE_C(252, 198, 3);
 
   auto* impl = page->Impl();
   CHECK(this->cuts_->cut_values_.DeviceCanRead());
 
-  fi->Read(page, this->param_.prefetch_copy || !this->has_hmm_ats_);
-  impl->SetCuts(this->cuts_);
+  auto ctx = Context{}.MakeCUDA(curt::CurrentDevice());
+
+  auto dispatch = [&] {
+    fi->Read(&ctx, page, this->param_.prefetch_copy || !this->has_hmm_ats_);
+    impl->SetCuts(this->cuts_);
+  };
 
-  dh::DefaultStream().Sync();
+  if (ConsoleLogger::GlobalVerbosity() == ConsoleLogger::LogVerbosity::kDebug) {
+    curt::Event start{false}, stop{false};
+    float milliseconds = 0;
+    start.Record(ctx.CUDACtx()->Stream());
+
+    dispatch();
+
+    stop.Record(ctx.CUDACtx()->Stream());
+    stop.Sync();
+    dh::safe_cuda(cudaEventElapsedTime(&milliseconds, start, stop));
+    double n_bytes = page->Impl()->MemCostBytes();
+    double tp = (n_bytes / static_cast<double>((1ul << 30))) * 1000.0 / milliseconds;
+    LOG(DEBUG) << "Ellpack " << __func__ << " throughput:" << tp << "GB/s";
+  } else {
+    dispatch();
+  }
+
+  curt::DefaultStream().Sync();
 
   return true;
 }
 
-[[nodiscard]] std::size_t EllpackPageRawFormat::Write(const EllpackPage& page,
+[[nodiscard]] std::size_t EllpackPageRawFormat::Write(EllpackPage const& page,
                                                       EllpackHostCacheStream* fo) const {
-  xgboost_NVTX_FN_RANGE();
+  xgboost_NVTX_FN_RANGE_C(3, 252, 198);
 
   bool new_page = fo->Write(page);
-  dh::DefaultStream().Sync();
+  curt::DefaultStream().Sync();
 
   if (new_page) {
-    return fo->Share()->pages.back()->MemCostBytes();
+    auto cache = fo->Share();
+    return cache->SizeBytes(cache->Size() - 1);  // last page
   } else {
     return InvalidPageSize();
   }
diff --git a/src/data/ellpack_page_raw_format.h b/src/data/ellpack_page_raw_format.h
index 9be2c50cff46..eda0e1d20978 100644
--- a/src/data/ellpack_page_raw_format.h
+++ b/src/data/ellpack_page_raw_format.h
@@ -38,11 +38,11 @@ class EllpackPageRawFormat : public SparsePageFormat<EllpackPage> {
         param_{std::move(param)},
         has_hmm_ats_{has_hmm_ats} {}
   [[nodiscard]] bool Read(EllpackPage* page, common::AlignedResourceReadStream* fi) override;
-  [[nodiscard]] std::size_t Write(const EllpackPage& page,
+  [[nodiscard]] std::size_t Write(EllpackPage const& page,
                                   common::AlignedFileWriteStream* fo) override;
 
   [[nodiscard]] bool Read(EllpackPage* page, EllpackHostCacheStream* fi) const;
-  [[nodiscard]] std::size_t Write(const EllpackPage& page, EllpackHostCacheStream* fo) const;
+  [[nodiscard]] std::size_t Write(EllpackPage const& page, EllpackHostCacheStream* fo) const;
 };
 
 #if !defined(XGBOOST_USE_CUDA)
diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu
index 1b839e89df15..5cddd94996da 100644
--- a/src/data/ellpack_page_source.cu
+++ b/src/data/ellpack_page_source.cu
@@ -1,71 +1,118 @@
 /**
  * Copyright 2019-2025, XGBoost contributors
  */
-#include <algorithm>  // for count_if
+#include <algorithm>  // for max
 #include <cstddef>    // for size_t
 #include <cstdint>    // for int8_t, uint64_t, uint32_t
 #include <memory>     // for shared_ptr, make_unique, make_shared
 #include <numeric>    // for accumulate
 #include <utility>    // for move
 
-#include "../common/common.h"               // for HumanMemUnit, safe_cuda
-#include "../common/cuda_rt_utils.h"        // for SetDevice
-#include "../common/device_helpers.cuh"     // for CUDAStreamView, DefaultStream
-#include "../common/ref_resource_view.cuh"  // for MakeFixedVecWithCudaMalloc
-#include "../common/resource.cuh"           // for PrivateCudaMmapConstStream
-#include "../common/transform_iterator.h"   // for MakeIndexTransformIter
-#include "ellpack_page.cuh"                 // for EllpackPageImpl
-#include "ellpack_page.h"                   // for EllpackPage
+#include "../common/common.h"                // for HumanMemUnit, safe_cuda
+#include "../common/cuda_dr_utils.h"         // for CUDA_HW_DECOM_AVAILABLE
+#include "../common/cuda_rt_utils.h"         // for SetDevice, GetDrVersionGlobal
+#include "../common/cuda_stream.h"           // for StreamRef, DefaultStream, Event
+#include "../common/cuda_stream_pool.h"      // for StreamPool
+#include "../common/device_compression.cuh"  // for CompressSnappy, MakeSnappyDecomprMgr
+#include "../common/device_helpers.cuh"      // for CurrentDevice
+#include "../common/numa_topo.h"             // for NumaMemCanCross, GetNumaMemBind
+#include "../common/ref_resource_view.cuh"   // for MakeFixedVecWithCudaMalloc
+#include "../common/resource.cuh"            // for PrivateCudaMmapConstStream
+#include "../common/transform_iterator.h"    // for MakeIndexTransformIter
+#include "batch_utils.h"                     // for HostRatioIsAuto
+#include "ellpack_page.cuh"                  // for EllpackPageImpl
+#include "ellpack_page.h"                    // for EllpackPage
 #include "ellpack_page_source.h"
-#include "proxy_dmatrix.cuh"  // for Dispatch
+#include "proxy_dmatrix.cuh"  // for DispatchAny
 #include "xgboost/base.h"     // for bst_idx_t
 
 namespace xgboost::data {
 namespace {
-[[nodiscard]] bool IsDevicePage(EllpackPageImpl const* page) {
-  switch (page->gidx_buffer.Resource()->Type()) {
-    case common::ResourceHandler::kCudaMalloc:
-    case common::ResourceHandler::kCudaGrowOnly: {
-      return true;
-    }
-    case common::ResourceHandler::kCudaHostCache:
-    case common::ResourceHandler::kCudaMmap:
-    case common::ResourceHandler::kMmap:
-    case common::ResourceHandler::kMalloc:
-      return false;
-  }
-  LOG(FATAL) << "Unreachable";
+// Can we use hardware decompression?
+[[nodiscard]] bool CanUseHwDecomp(EllpackPageImpl const* page, bool allow_fallback) {
+#if defined(CUDA_HW_DECOM_AVAILABLE) && defined(XGBOOST_USE_NVCOMP)
+  // We use it only for sparse pages.
+  return !page->IsDenseCompressed() && (dc::GetGlobalDeStatus().avail || allow_fallback);
+#else
+  (void)allow_fallback;
+  (void)page;
   return false;
+#endif
 }
-}  // anonymous namespace
+}  // namespace
 
 /**
  * Cache
  */
-EllpackMemCache::EllpackMemCache(EllpackCacheInfo cinfo)
+EllpackMemCache::EllpackMemCache(EllpackCacheInfo cinfo, std::int32_t n_workers)
     : cache_mapping{std::move(cinfo.cache_mapping)},
       buffer_bytes{std::move(cinfo.buffer_bytes)},
       buffer_rows{std::move(cinfo.buffer_rows)},
-      prefer_device{cinfo.prefer_device},
-      max_num_device_pages{cinfo.max_num_device_pages} {
+      cache_host_ratio{cinfo.cache_host_ratio},
+      hw_decomp_ratio{cinfo.hw_decomp_ratio},
+      allow_decomp_fallback{cinfo.allow_decomp_fallback},
+      streams{std::make_unique<curt::StreamPool>(n_workers)},
+      pool{[] {
+#if defined(__linux__)
+        std::int32_t major = -1, minor = -1;
+        curt::GetDrVersionGlobal(&major, &minor);
+        if (major >= 12 && minor >= 5 || major > 12) {
+          return std::make_shared<dc::HostPinnedMemPool>();
+        }
+        return std::shared_ptr<dc::HostPinnedMemPool>{nullptr};
+#else
+        return std::shared_ptr<dc::HostPinnedMemPool>{nullptr};
+#endif
+      }()} {
   CHECK_EQ(buffer_bytes.size(), buffer_rows.size());
+  CHECK(!detail::HostRatioIsAuto(this->cache_host_ratio));
+  CHECK_GE(this->cache_host_ratio, 0.0) << error::CacheHostRatioInvalid();
+  CHECK_LE(this->cache_host_ratio, 1.0) << error::CacheHostRatioInvalid();
 }
 
 EllpackMemCache::~EllpackMemCache() = default;
 
-[[nodiscard]] std::size_t EllpackMemCache::SizeBytes() const {
-  auto it = common::MakeIndexTransformIter([&](auto i) { return pages.at(i)->MemCostBytes(); });
+[[nodiscard]] std::size_t EllpackMemCache::SizeBytes() const noexcept(true) {
+  auto it = common::MakeIndexTransformIter([&](auto i) { return this->SizeBytes(i); });
+  using T = std::iterator_traits<decltype(it)>::value_type;
+  return std::accumulate(it, it + this->Size(), static_cast<T>(0));
+}
+
+[[nodiscard]] std::size_t EllpackMemCache::DeviceSizeBytes() const noexcept(true) {
+  auto it =
+      common::MakeIndexTransformIter([&](auto i) { return this->d_pages.at(i).size_bytes(); });
   using T = std::iterator_traits<decltype(it)>::value_type;
-  return std::accumulate(it, it + pages.size(), static_cast<T>(0));
+  return std::accumulate(it, it + this->Size(), static_cast<T>(0));
+}
+
+[[nodiscard]] std::size_t EllpackMemCache::SizeBytes(std::size_t i) const noexcept(true) {
+  return this->h_pages.at(i)->MemCostBytes() + this->d_pages.at(i).size_bytes() +
+         this->c_pages.at(i).first.DecompressedBytes();
 }
 
-[[nodiscard]] EllpackPageImpl const* EllpackMemCache::At(std::int32_t k) const {
-  return this->pages.at(k).get();
+[[nodiscard]] std::size_t EllpackMemCache::GidxSizeBytes(std::size_t i) const noexcept(true) {
+  return this->h_pages.at(i)->gidx_buffer.size_bytes() + this->d_pages.at(i).size_bytes() +
+         this->c_pages.at(i).first.DecompressedBytes();
 }
 
-[[nodiscard]] std::int64_t EllpackMemCache::NumDevicePages() const {
-  return std::count_if(this->pages.cbegin(), this->pages.cend(),
-                       [](auto const& page) { return IsDevicePage(page.get()); });
+[[nodiscard]] std::size_t EllpackMemCache::GidxSizeBytes() const noexcept(true) {
+  auto it = common::MakeIndexTransformIter([&](auto i) { return this->GidxSizeBytes(i); });
+  using T = std::iterator_traits<decltype(it)>::value_type;
+  return std::accumulate(it, it + this->Size(), static_cast<T>(0));
+}
+
+[[nodiscard]] EllpackMemCache::PagePtr EllpackMemCache::At(std::int32_t k) const {
+  auto const* h_ptr = this->h_pages.at(k).get();
+  auto const* d_ptr = &this->d_pages.at(k);
+  auto const* c_ptr = &this->c_pages.at(k);
+  return std::make_tuple(h_ptr, d_ptr, c_ptr);
+}
+
+[[nodiscard]] EllpackMemCache::PageRef EllpackMemCache::Back() {
+  auto& h_ref = this->h_pages.back();
+  auto& d_ref = this->d_pages.back();
+  auto& c_ref = this->c_pages.back();
+  return {h_ref, d_ref, c_ref};
 }
 
 /**
@@ -79,20 +126,20 @@ class EllpackHostCacheStreamImpl {
   explicit EllpackHostCacheStreamImpl(std::shared_ptr<EllpackMemCache> cache)
       : cache_{std::move(cache)} {}
 
-  auto Share() { return cache_; }
+  auto Share() const { return this->cache_; }
 
   void Seek(bst_idx_t offset_bytes) {
     std::size_t n_bytes{0};
     std::int32_t k{-1};
-    for (std::size_t i = 0, n = cache_->pages.size(); i < n; ++i) {
+    for (std::size_t i = 0, n = cache_->h_pages.size(); i < n; ++i) {
       if (n_bytes == offset_bytes) {
         k = i;
         break;
       }
-      n_bytes += cache_->pages[i]->MemCostBytes();
+      n_bytes += this->cache_->SizeBytes(i);
     }
     if (offset_bytes == n_bytes && k == -1) {
-      k = this->cache_->pages.size();  // seek end
+      k = this->cache_->h_pages.size();  // seek end
     }
     CHECK_NE(k, -1) << "Invalid offset:" << offset_bytes;
     ptr_ = k;
@@ -108,63 +155,115 @@ class EllpackHostCacheStreamImpl {
     CHECK_LT(orig_ptr, this->cache_->NumBatchesOrig());
     auto cache_idx = this->cache_->cache_mapping.at(orig_ptr);
     // Wrap up the previous page if this is a new page, or this is the last page.
-    auto new_page = cache_idx == this->cache_->pages.size();
-
+    auto new_page = cache_idx == this->cache_->h_pages.size();
+    // Last page expected from the user.
     auto last_page = (orig_ptr + 1) == this->cache_->NumBatchesOrig();
-    // No page concatenation is performed. If there's page concatenation, then the number
-    // of pages in the cache must be smaller than the input number of pages.
-    bool no_concat = this->cache_->NumBatchesOrig() == this->cache_->buffer_rows.size();
-    // Whether the page should be cached in device. If true, then we don't need to make a
-    // copy during write since the temporary page is already in device when page
-    // concatenation is enabled.
-    bool to_device = this->cache_->prefer_device &&
-                     this->cache_->NumDevicePages() < this->cache_->max_num_device_pages;
-
-    auto commit_page = [&ctx](EllpackPageImpl const* old_impl) {
+
+    bool const no_concat = this->cache_->NoConcat();
+
+    auto cache_host_ratio = this->cache_->cache_host_ratio;
+    CHECK_GE(cache_host_ratio, 0) << error::CacheHostRatioInvalid();
+    CHECK_LE(cache_host_ratio, 1) << error::CacheHostRatioInvalid();
+
+    // Get the size of the host cache.
+    auto get_host_nbytes = [&](EllpackPageImpl const* old_impl) {
+      // Special handling due to floating points.
+      if (this->cache_->cache_host_ratio == 1.0) {
+        return old_impl->gidx_buffer.size_bytes();
+      }
+      if (this->cache_->cache_host_ratio == 0.0) {
+        return static_cast<std::size_t>(0);
+      }
+      // Calculate based on the `cache_host_ratio` parameter.
+      auto n_bytes =
+          std::max(static_cast<std::size_t>(old_impl->gidx_buffer.size_bytes() * cache_host_ratio),
+                   std::size_t{1});
+      return n_bytes;
+    };
+
+    // Finish writing a (concatenated) cache page.
+    auto commit_page = [&](EllpackPageImpl const* old_impl) {
       CHECK_EQ(old_impl->gidx_buffer.Resource()->Type(), common::ResourceHandler::kCudaMalloc);
       auto new_impl = std::make_unique<EllpackPageImpl>();
       new_impl->CopyInfo(old_impl);
-      new_impl->gidx_buffer = common::MakeFixedVecWithPinnedMalloc<common::CompressedByteT>(
-          old_impl->gidx_buffer.size());
-      dh::safe_cuda(cudaMemcpyAsync(new_impl->gidx_buffer.data(), old_impl->gidx_buffer.data(),
-                                    old_impl->gidx_buffer.size_bytes(), cudaMemcpyDefault));
-      LOG(INFO) << "Create cache page with size:" << common::HumanMemUnit(new_impl->MemCostBytes());
-      return new_impl;
+
+      // Split the cache into host cache, compressed host cache, and the device cache. We
+      // use the decompression engine only for sparse data.
+      auto n_bytes = get_host_nbytes(old_impl);
+      CHECK_LE(n_bytes, old_impl->gidx_buffer.size_bytes());
+      std::size_t n_h_bytes = n_bytes, n_comp_bytes = 0;
+      bool can_use_hw = CanUseHwDecomp(old_impl, this->cache_->allow_decomp_fallback);
+      if (can_use_hw) {
+        // FIXME(jiamingy): The decomp_ratio is not exposed to the user and we don't yet
+        // have auto configuration for this parameter. We can make it more flexible. More
+        // profiling is needed.
+        bool specified = std::isnan(this->cache_->hw_decomp_ratio);
+        auto hw_decomp_ratio = specified ? 0.4f : this->cache_->hw_decomp_ratio;
+        CHECK_LE(hw_decomp_ratio, 1.0);
+        CHECK_GE(hw_decomp_ratio, 0.0);
+        n_comp_bytes = n_bytes * hw_decomp_ratio;
+        n_h_bytes = n_bytes - n_comp_bytes;
+      }
+      CHECK_EQ(n_bytes, n_h_bytes + n_comp_bytes);
+
+      // Normal host cache
+      new_impl->gidx_buffer =
+          common::MakeFixedVecWithPinnedMalloc<common::CompressedByteT>(n_h_bytes);
+      if (n_h_bytes > 0) {
+        dh::safe_cuda(cudaMemcpyAsync(new_impl->gidx_buffer.data(), old_impl->gidx_buffer.data(),
+                                      n_h_bytes, cudaMemcpyDefault));
+      }
+
+      // Compressed host cache
+      dh::DeviceUVector<std::uint8_t> tmp;
+      dc::CuMemParams c_out;
+      std::size_t constexpr kChunkSize = 1ul << 21;
+      auto params = dc::CompressSnappy(
+          &ctx, old_impl->gidx_buffer.ToSpan().subspan(n_h_bytes, n_comp_bytes), &tmp, kChunkSize);
+      common::RefResourceView<std::uint8_t> c_buf = dc::CoalesceCompressedBuffersToHost(
+          ctx.CUDACtx()->Stream(), this->cache_->pool, params, tmp, &c_out);
+      auto c_page = dc::MakeSnappyDecomprMgr(ctx.CUDACtx()->Stream(), this->cache_->pool,
+                                             std::move(c_out), c_buf.ToSpan());
+      CHECK_EQ(c_page.DecompressedBytes() + new_impl->gidx_buffer.size_bytes(), n_bytes);
+
+      // Device cache
+      auto remaining = old_impl->gidx_buffer.size_bytes() - n_bytes;
+      auto d_page = common::MakeFixedVecWithCudaMalloc<common::CompressedByteT>(remaining);
+      if (remaining > 0) {
+        dh::safe_cuda(cudaMemcpyAsync(d_page.data(), old_impl->gidx_buffer.data() + n_bytes,
+                                      remaining, cudaMemcpyDefault));
+      }
+      CHECK_LE(new_impl->gidx_buffer.size(), old_impl->gidx_buffer.size());
+      CHECK_EQ(new_impl->MemCostBytes() + d_page.size_bytes() + c_page.DecompressedBytes(),
+               old_impl->MemCostBytes());
+      LOG(INFO) << "Create cache page with size:"
+                << common::HumanMemUnit(new_impl->MemCostBytes() + d_page.size_bytes() +
+                                        c_page.DecompressedBytes());
+      return std::make_tuple(std::move(new_impl), std::move(d_page),
+                             std::make_pair(std::move(c_page), std::move(c_buf)));
     };
+
     if (no_concat) {
-      // Avoid a device->device->host copy.
       CHECK(new_page);
-      auto new_impl = std::make_unique<EllpackPageImpl>();
-      new_impl->CopyInfo(page.Impl());
-
-      if (to_device) {
-        // Copy to device
-        new_impl->gidx_buffer = common::MakeFixedVecWithCudaMalloc<common::CompressedByteT>(
-            page.Impl()->gidx_buffer.size());
-      } else {
-        // Copy to host
-        new_impl->gidx_buffer = common::MakeFixedVecWithPinnedMalloc<common::CompressedByteT>(
-            page.Impl()->gidx_buffer.size());
-      }
-      dh::safe_cuda(cudaMemcpyAsync(new_impl->gidx_buffer.data(), page.Impl()->gidx_buffer.data(),
-                                    page.Impl()->gidx_buffer.size_bytes(), cudaMemcpyDefault));
+      auto old_impl = page.Impl();
+      auto [commited, d_page, c_page] = commit_page(old_impl);
 
-      this->cache_->offsets.push_back(new_impl->n_rows * new_impl->info.row_stride);
-      this->cache_->pages.push_back(std::move(new_impl));
+      this->cache_->offsets.push_back(old_impl->n_rows * old_impl->info.row_stride);
+      this->cache_->h_pages.emplace_back(std::move(commited));
+      this->cache_->d_pages.emplace_back(std::move(d_page));
+      this->cache_->c_pages.emplace_back(std::move(c_page));
       return new_page;
     }
 
     if (new_page) {
-      // No need to copy if it's already in device.
-      if (!this->cache_->pages.empty() && !to_device) {
+      if (!this->cache_->h_pages.empty()) {
         // Need to wrap up the previous page.
-        auto commited = commit_page(this->cache_->pages.back().get());
-        // Replace the previous page with a new page.
-        this->cache_->pages.back() = std::move(commited);
+        // Replace the previous page (on device) with a new page on host.
+        this->cache_->Back() = commit_page(this->cache_->h_pages.back().get());
       }
       // Push a new page
-      auto n_bytes = this->cache_->buffer_bytes.at(this->cache_->pages.size());
-      auto n_samples = this->cache_->buffer_rows.at(this->cache_->pages.size());
+      auto n_bytes = this->cache_->buffer_bytes.at(this->cache_->h_pages.size());
+      auto n_samples = this->cache_->buffer_rows.at(this->cache_->h_pages.size());
       auto new_impl = std::make_unique<EllpackPageImpl>(&ctx, impl->CutsShared(), impl->IsDense(),
                                                         impl->info.row_stride, n_samples);
       new_impl->SetBaseRowId(impl->base_rowid);
@@ -174,42 +273,94 @@ class EllpackHostCacheStreamImpl {
       auto offset = new_impl->Copy(&ctx, impl, 0);
 
       this->cache_->offsets.push_back(offset);
-      this->cache_->pages.push_back(std::move(new_impl));
+
+      // Make sure we can always access the back of the vectors
+      this->cache_->h_pages.emplace_back(std::move(new_impl));
+      this->cache_->d_pages.emplace_back();
+      this->cache_->c_pages.emplace_back();
     } else {
-      CHECK(!this->cache_->pages.empty());
-      CHECK_EQ(cache_idx, this->cache_->pages.size() - 1);
-      auto& new_impl = this->cache_->pages.back();
+      // Concatenate into the device pages even though `d_pages` and `c_pages` are
+      // used. We split the page at the commit stage.
+      CHECK(!this->cache_->h_pages.empty());
+      CHECK_EQ(cache_idx, this->cache_->h_pages.size() - 1);
+      auto& new_impl = this->cache_->h_pages.back();
       auto offset = new_impl->Copy(&ctx, impl, this->cache_->offsets.back());
       this->cache_->offsets.back() += offset;
-      // No need to copy if it's already in device.
-      if (last_page && !to_device) {
-        auto commited = commit_page(this->cache_->pages.back().get());
-        this->cache_->pages.back() = std::move(commited);
-      }
     }
 
+    // No need to copy if it's already in device.
+    if (last_page) {
+      this->cache_->Back() = commit_page(this->cache_->h_pages.back().get());
+    }
+
+    CHECK_EQ(this->cache_->h_pages.size(), this->cache_->d_pages.size());
+    CHECK_EQ(this->cache_->h_pages.size(), this->cache_->c_pages.size());
     return new_page;
   }
 
-  void Read(EllpackPage* out, bool prefetch_copy) const {
-    auto page = this->cache_->At(this->ptr_);
-    if (IsDevicePage(page)) {
-      // Page is already in the device memory, no need to copy.
-      prefetch_copy = false;
-    }
+  void Read(Context const* ctx, EllpackPage* out, bool prefetch_copy) const {
+    CHECK_EQ(this->cache_->h_pages.size(), this->cache_->d_pages.size());
+    CHECK_EQ(this->cache_->h_pages.size(), this->cache_->c_pages.size());
+    auto [h_page, d_page, c_page] = this->cache_->At(this->ptr_);
+    // Skip copy if the full page is on device
+    bool on_device = (h_page->gidx_buffer.empty() && c_page->first.Empty()) && !d_page->empty();
+
     auto out_impl = out->Impl();
-    if (prefetch_copy) {
-      out_impl->gidx_buffer =
-          common::MakeFixedVecWithCudaMalloc<common::CompressedByteT>(page->gidx_buffer.size());
-      dh::safe_cuda(cudaMemcpyAsync(out_impl->gidx_buffer.data(), page->gidx_buffer.data(),
-                                    page->gidx_buffer.size_bytes(), cudaMemcpyDefault));
+    // We can't access a compressed page directly.
+    if (!c_page->first.Empty()) {
+      prefetch_copy = true;
+    }
+
+    LOG(DEBUG) << "On device: " << on_device << ", prefetch copy:" << prefetch_copy
+               << ", compressed:" << (!c_page->first.Empty());
+    if (on_device) {
+      CHECK(h_page->gidx_buffer.empty());
+      auto d_res = d_page->Resource();
+      out_impl->gidx_buffer = common::RefResourceView<common::CompressedByteT>{
+          d_res->DataAs<common::CompressedByteT>(), d_page->size(), d_res};
+      CHECK(out_impl->d_gidx_buffer.empty());
+    } else if (prefetch_copy) {
+      // Copy the data in the same order as written
+      // Normal host cache
+      auto n_bytes = this->cache_->GidxSizeBytes(this->ptr_);
+      out_impl->gidx_buffer = common::MakeFixedVecWithCudaMalloc<common::CompressedByteT>(n_bytes);
+      if (!h_page->gidx_buffer.empty()) {
+        dh::safe_cuda(cudaMemcpyAsync(out_impl->gidx_buffer.data(), h_page->gidx_buffer.data(),
+                                      h_page->gidx_buffer.size_bytes(), cudaMemcpyDefault,
+                                      ctx->CUDACtx()->Stream()));
+      }
+      // Compressed host cache
+      if (!c_page->first.Empty()) {
+        auto stream = this->cache_->streams->Next();
+        auto out = out_impl->gidx_buffer.ToSpan().subspan(h_page->gidx_buffer.size_bytes(),
+                                                          c_page->first.DecompressedBytes());
+        dc::DecompressSnappy(stream, c_page->first, out, this->cache_->allow_decomp_fallback);
+        curt::Event e;
+        e.Record(stream);
+        ctx->CUDACtx()->Stream().Wait(e);
+      }
+      // Device cache
+      if (!d_page->empty()) {
+        auto out = out_impl->gidx_buffer.ToSpan().subspan(h_page->gidx_buffer.size_bytes() +
+                                                          c_page->first.DecompressedBytes());
+        CHECK_EQ(out.size_bytes(), d_page->size_bytes());
+        dh::safe_cuda(cudaMemcpyAsync(out.data(), d_page->data(), d_page->size_bytes(),
+                                      cudaMemcpyDefault, ctx->CUDACtx()->Stream()));
+      }
     } else {
-      auto res = page->gidx_buffer.Resource();
+      // Direct access
+      auto h_res = h_page->gidx_buffer.Resource();
+      CHECK(h_res->DataAs<common::CompressedByteT>() == h_page->gidx_buffer.data());
       out_impl->gidx_buffer = common::RefResourceView<common::CompressedByteT>{
-          res->DataAs<common::CompressedByteT>(), page->gidx_buffer.size(), res};
+          h_res->DataAs<common::CompressedByteT>(), h_page->gidx_buffer.size(), h_res};
+      CHECK(out_impl->d_gidx_buffer.empty());
+      if (!d_page->empty()) {
+        out_impl->d_gidx_buffer = common::RefResourceView<common::CompressedByteT const>{
+            d_page->data(), d_page->size(), d_page->Resource()};
+      }
     }
 
-    out_impl->CopyInfo(page);
+    out_impl->CopyInfo(h_page);
   }
 };
 
@@ -227,8 +378,8 @@ std::shared_ptr<EllpackMemCache const> EllpackHostCacheStream::Share() const {
 
 void EllpackHostCacheStream::Seek(bst_idx_t offset_bytes) { this->p_impl_->Seek(offset_bytes); }
 
-void EllpackHostCacheStream::Read(EllpackPage* page, bool prefetch_copy) const {
-  this->p_impl_->Read(page, prefetch_copy);
+void EllpackHostCacheStream::Read(Context const* ctx, EllpackPage* page, bool prefetch_copy) const {
+  this->p_impl_->Read(ctx, page, prefetch_copy);
 }
 
 [[nodiscard]] bool EllpackHostCacheStream::Write(EllpackPage const& page) {
@@ -242,7 +393,11 @@ template <typename S, template <typename> typename F>
 [[nodiscard]] std::unique_ptr<typename EllpackCacheStreamPolicy<S, F>::WriterT>
 EllpackCacheStreamPolicy<S, F>::CreateWriter(StringView, std::uint32_t iter) {
   if (!this->p_cache_) {
-    this->p_cache_ = std::make_unique<EllpackMemCache>(this->CacheInfo());
+    CHECK(!detail::HostRatioIsAuto(this->CacheInfo().cache_host_ratio));
+    CHECK_GE(this->CacheInfo().cache_host_ratio, 0.0);
+    CHECK_LE(this->CacheInfo().cache_host_ratio, 1.0);
+    constexpr std::int32_t kMaxGpuExtMemWorkers = 4;
+    this->p_cache_ = std::make_unique<EllpackMemCache>(this->CacheInfo(), kMaxGpuExtMemWorkers);
   }
   auto fo = std::make_unique<EllpackHostCacheStream>(this->p_cache_);
   if (iter == 0) {
@@ -298,17 +453,36 @@ EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>::CreateReader(StringVi
 void CalcCacheMapping(Context const* ctx, bool is_dense,
                       std::shared_ptr<common::HistogramCuts const> cuts,
                       std::int64_t min_cache_page_bytes, ExternalDataInfo const& ext_info,
-                      EllpackCacheInfo* cinfo) {
+                      bool is_validation, EllpackCacheInfo* cinfo) {
   CHECK(cinfo->param.Initialized()) << "Need to initialize scalar fields first.";
   auto ell_info = CalcNumSymbols(ctx, ext_info.row_stride, is_dense, cuts);
+
+  /**
+   * Configure the cache
+   */
+  // The total size of the cache.
+  std::size_t n_cache_bytes = 0;
+  for (std::size_t i = 0; i < ext_info.n_batches; ++i) {
+    auto n_samples = ext_info.base_rowids.at(i + 1) - ext_info.base_rowids[i];
+    auto n_bytes = common::CompressedBufferWriter::CalculateBufferSize(
+        ext_info.row_stride * n_samples, ell_info.n_symbols);
+    n_cache_bytes += n_bytes;
+  }
+  std::tie(cinfo->cache_host_ratio, min_cache_page_bytes) = detail::DftPageSizeHostRatio(
+      n_cache_bytes, is_validation, cinfo->cache_host_ratio, min_cache_page_bytes);
+
+  /**
+   * Calculate the cache buffer size
+   */
   std::vector<std::size_t> cache_bytes;
   std::vector<std::size_t> cache_mapping(ext_info.n_batches, 0);
   std::vector<std::size_t> cache_rows;
 
   for (std::size_t i = 0; i < ext_info.n_batches; ++i) {
-    auto n_samples = ext_info.base_rowids.at(i + 1) - ext_info.base_rowids[i];
+    auto n_samples = ext_info.base_rowids[i + 1] - ext_info.base_rowids[i];
     auto n_bytes = common::CompressedBufferWriter::CalculateBufferSize(
         ext_info.row_stride * n_samples, ell_info.n_symbols);
+
     if (cache_bytes.empty()) {
       // Push the first page
       cache_bytes.push_back(n_bytes);
@@ -329,6 +503,14 @@ void CalcCacheMapping(Context const* ctx, bool is_dense,
   cinfo->cache_mapping = std::move(cache_mapping);
   cinfo->buffer_bytes = std::move(cache_bytes);
   cinfo->buffer_rows = std::move(cache_rows);
+
+  // Directly store in device if there's only one batch.
+  if (cinfo->NumBatchesCc() == 1) {
+    cinfo->cache_host_ratio = 0.0;
+  }
+
+  LOG(INFO) << "`cache_host_ratio`=" << cinfo->cache_host_ratio
+            << " `min_cache_page_bytes`=" << min_cache_page_bytes;
 }
 
 /**
@@ -378,7 +560,7 @@ void ExtEllpackPageSourceImpl<F>::Fetch() {
   if (!this->ReadCache()) {
     auto iter = this->source_->Iter();
     CHECK_EQ(this->Iter(), iter);
-    cuda_impl::Dispatch(proxy_, [this](auto const& value) {
+    cuda_impl::DispatchAny(proxy_, [this](auto const& value) {
       CHECK(this->proxy_->Ctx()->IsCUDA()) << "All batches must use the same device type.";
       proxy_->Info().feature_types.SetDevice(dh::GetDevice(this->ctx_));
       auto d_feature_types = proxy_->Info().feature_types.ConstDeviceSpan();
@@ -403,12 +585,12 @@ void ExtEllpackPageSourceImpl<F>::Fetch() {
                                              this->GetCuts()};
       this->info_->Extend(proxy_->Info(), false, true);
     });
-    LOG(INFO) << "Generated an Ellpack page with size: "
-              << common::HumanMemUnit(this->page_->Impl()->MemCostBytes())
-              << " from an batch with estimated size: "
-              << cuda_impl::Dispatch<false>(proxy_, [](auto const& adapter) {
-                   return common::HumanMemUnit(adapter->SizeBytes());
-                 });
+    LOG(DEBUG) << "Generated an Ellpack page with size: "
+               << common::HumanMemUnit(this->page_->Impl()->MemCostBytes())
+               << " from an batch with estimated size: "
+               << cuda_impl::DispatchAny<false>(proxy_, [](auto const& adapter) {
+                    return common::HumanMemUnit(adapter->SizeBytes());
+                  });
     this->page_->SetBaseRowId(this->ext_info_.base_rowids.at(iter));
     this->WriteCache();
   }
@@ -421,4 +603,26 @@ template void
 ExtEllpackPageSourceImpl<EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();
 template void
 ExtEllpackPageSourceImpl<EllpackMmapStreamPolicy<EllpackPage, EllpackFormatPolicy>>::Fetch();
+
+namespace detail {
+void EllpackFormatCheckNuma(StringView msg) {
+#if defined(__linux__)
+  bool can_cross = common::NumaMemCanCross();
+  std::uint32_t numa = 0;
+  auto incorrect = [&numa] {
+    std::uint32_t cpu = 0;
+    return common::GetCpuNuma(&cpu, &numa) && static_cast<std::int32_t>(numa) != curt::GetNumaId();
+  };
+
+  if (can_cross && !common::GetNumaMemBind()) {
+    LOG(WARNING) << "Running on a NUMA system without membind." << msg;
+  } else if (can_cross && incorrect()) {
+    LOG(WARNING) << "Incorrect NUMA CPU bind, CPU node:" << numa
+                 << ", GPU node:" << curt::GetNumaId() << "." << msg;
+  }
+#else
+  (void)msg;
+#endif
+}
+}  // namespace detail
 }  // namespace xgboost::data
diff --git a/src/data/ellpack_page_source.h b/src/data/ellpack_page_source.h
index cb921daa446f..d64f077e2b84 100644
--- a/src/data/ellpack_page_source.h
+++ b/src/data/ellpack_page_source.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 
 #ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
@@ -8,45 +8,79 @@
 #include <cstdint>  // for int32_t
 #include <limits>   // for numeric_limits
 #include <memory>   // for shared_ptr
+#include <tuple>    // for tuple
 #include <utility>  // for move
 #include <vector>   // for vector
 
-#include "../common/cuda_rt_utils.h"  // for SupportsPageableMem, SupportsAts
-#include "../common/hist_util.h"      // for HistogramCuts
-#include "ellpack_page.h"             // for EllpackPage
-#include "ellpack_page_raw_format.h"  // for EllpackPageRawFormat
-#include "sparse_page_source.h"       // for PageSourceIncMixIn
-#include "xgboost/base.h"             // for bst_idx_t
-#include "xgboost/context.h"          // for DeviceOrd
-#include "xgboost/data.h"             // for BatchParam
-#include "xgboost/span.h"             // for Span
+#include "../common/compressed_iterator.h"  // for CompressedByteT
+#include "../common/cuda_rt_utils.h"        // for SupportsPageableMem, SupportsAts
+#include "../common/device_compression.h"   // for SnappyDecomprMgr
+#include "../common/hist_util.h"            // for HistogramCuts
+#include "../common/ref_resource_view.h"    // for RefResourceView
+#include "../data/batch_utils.h"            // for AutoHostRatio
+#include "ellpack_page.h"                   // for EllpackPage
+#include "ellpack_page_raw_format.h"        // for EllpackPageRawFormat
+#include "sparse_page_source.h"             // for PageSourceIncMixIn
+#include "xgboost/base.h"                   // for bst_idx_t
+#include "xgboost/context.h"                // for DeviceOrd
+#include "xgboost/data.h"                   // for BatchParam
+#include "xgboost/span.h"                   // for Span
+
+namespace xgboost::curt {
+class StreamPool;
+}
+namespace xgboost::common::cuda_impl {
+class HostPinnedMemPool;
+}  // namespace xgboost::common::cuda_impl
 
 namespace xgboost::data {
 struct EllpackCacheInfo {
   BatchParam param;
-  bool prefer_device{false};  // Prefer to cache the page in the device memory instead of host.
-  std::int64_t max_num_device_pages{0};  // Maximum number of pages cached in device.
+  // The size ratio the host cache vs. the total cache
+  double cache_host_ratio{::xgboost::cuda_impl::AutoHostRatio()};
   float missing{std::numeric_limits<float>::quiet_NaN()};
+  // The ratio of the cache that can be compressed. Used for testing.
+  float hw_decomp_ratio{std::numeric_limits<float>::quiet_NaN()};
+  bool allow_decomp_fallback{false};
   std::vector<bst_idx_t> cache_mapping;
-  std::vector<bst_idx_t> buffer_bytes;
+  std::vector<bst_idx_t> buffer_bytes;  // N bytes of the concatenated pages.
   std::vector<bst_idx_t> buffer_rows;
 
   EllpackCacheInfo() = default;
-  EllpackCacheInfo(BatchParam param, bool prefer_device, std::int64_t max_num_device_pages,
-                   float missing)
+  EllpackCacheInfo(BatchParam param, double h_ratio, float missing)
+      : param{std::move(param)}, cache_host_ratio{h_ratio}, missing{missing} {}
+  EllpackCacheInfo(BatchParam param, ExtMemConfig const& config)
       : param{std::move(param)},
-        prefer_device{prefer_device},
-        max_num_device_pages{max_num_device_pages},
-        missing{missing} {}
+        cache_host_ratio{config.cache_host_ratio},
+        missing{config.missing},
+        hw_decomp_ratio{config.hw_decomp_ratio},
+        allow_decomp_fallback{config.allow_decomp_fallback} {}
+
+  // Only effective for host-based cache.
+  // The number of batches for the concatenated cache.
+  [[nodiscard]] std::size_t NumBatchesCc() const { return this->buffer_rows.size(); }
 };
 
 // We need to decouple the storage and the view of the storage so that we can implement
 // concurrent read. As a result, there are two classes, one for cache storage, another one
 // for stream.
 //
-// This is a memory-based cache. It can be a mixed of the device memory and the host memory.
+// This is a memory-based cache. It can be a mixed of the device memory and the host
+// memory.
 struct EllpackMemCache {
-  std::vector<std::unique_ptr<EllpackPageImpl>> pages;
+  // The host portion of each page.
+  std::vector<std::unique_ptr<EllpackPageImpl>> h_pages;
+  // The device portion of each page.
+  using DPage = common::RefResourceView<common::CompressedByteT>;
+  std::vector<DPage> d_pages;
+  // Storage for decompression parameters and the compressed buffer.
+  using CPage = std::pair<dc::SnappyDecomprMgr, common::RefResourceView<std::uint8_t>>;
+  // Compressed host page.
+  std::vector<CPage> c_pages;
+
+  using PagePtr = std::tuple<EllpackPageImpl const*, DPage const*, CPage const*>;
+  using PageRef = std::tuple<std::unique_ptr<EllpackPageImpl>&, DPage&, CPage&>;
+
   std::vector<std::size_t> offsets;
   // Size of each batch before concatenation.
   std::vector<bst_idx_t> sizes_orig;
@@ -55,21 +89,39 @@ struct EllpackMemCache {
   // Cache info
   std::vector<std::size_t> const buffer_bytes;
   std::vector<bst_idx_t> const buffer_rows;
-  bool const prefer_device;
-  std::int64_t const max_num_device_pages;
+  double const cache_host_ratio;
+  float const hw_decomp_ratio;
+  bool const allow_decomp_fallback;
 
-  explicit EllpackMemCache(EllpackCacheInfo cinfo);
-  ~EllpackMemCache();
+  std::unique_ptr<curt::StreamPool> streams;  // For decompression
+  std::shared_ptr<common::cuda_impl::HostPinnedMemPool> pool;
 
-  // The number of bytes for the entire cache.
-  [[nodiscard]] std::size_t SizeBytes() const;
+  explicit EllpackMemCache(EllpackCacheInfo cinfo, std::int32_t n_workers);
+  ~EllpackMemCache();
 
+  // The number of bytes of the entire cache.
+  [[nodiscard]] std::size_t SizeBytes() const noexcept(true);
+  // The number of bytes of the device cache.
+  [[nodiscard]] std::size_t DeviceSizeBytes() const noexcept(true);
+  // The number of bytes of each page.
+  [[nodiscard]] std::size_t SizeBytes(std::size_t i) const noexcept(true);
+  // The number of bytes of the gradient index (ellpack).
+  [[nodiscard]] std::size_t GidxSizeBytes(std::size_t i) const noexcept(true);
+  // The number of bytes of the gradient index (ellpack) of the entire cache.
+  [[nodiscard]] std::size_t GidxSizeBytes() const noexcept(true);
+  // The number of pages in the cache.
+  [[nodiscard]] std::size_t Size() const { return this->h_pages.size(); }
+  // Is the cache empty?
   [[nodiscard]] bool Empty() const { return this->SizeBytes() == 0; }
-
+  // No page concatenation is performed. If there's page concatenation, then the number of
+  // pages in the cache must be smaller than the input number of pages.
+  [[nodiscard]] bool NoConcat() const { return this->NumBatchesOrig() == this->buffer_rows.size(); }
+  // The number of pages before concatenatioin.
   [[nodiscard]] bst_idx_t NumBatchesOrig() const { return cache_mapping.size(); }
-  [[nodiscard]] EllpackPageImpl const* At(std::int32_t k) const;
-
-  [[nodiscard]] std::int64_t NumDevicePages() const;
+  // Get the pointers to the k^th concatenated page.
+  [[nodiscard]] PagePtr At(std::int32_t k) const;
+  // Get a reference to the last concatenated page.
+  [[nodiscard]] PageRef Back();
 };
 
 // Pimpl to hide CUDA calls from the host compiler.
@@ -102,7 +154,7 @@ class EllpackHostCacheStream {
    * @param page[out] The returned page.
    * @param prefetch_copy[in] Does the stream need to copy the page?
    */
-  void Read(EllpackPage* page, bool prefetch_copy) const;
+  void Read(Context const* ctx, EllpackPage* page, bool prefetch_copy) const;
   /**
    * @brief Add a new page to the host cache.
    *
@@ -115,6 +167,11 @@ class EllpackHostCacheStream {
   [[nodiscard]] bool Write(EllpackPage const& page);
 };
 
+namespace detail {
+// Not a member of `EllpackFormatPolicy`. Hide the impl without requiring template specialization.
+void EllpackFormatCheckNuma(StringView msg);
+}  // namespace detail
+
 template <typename S>
 class EllpackFormatPolicy {
   std::shared_ptr<common::HistogramCuts const> cuts_{nullptr};
@@ -142,13 +199,14 @@ class EllpackFormatPolicy {
       LOG(WARNING) << "`use_rmm` is set to false." << msg;
     }
     std::int32_t major{0}, minor{0};
-    curt::DrVersion(&major, &minor);
+    curt::GetDrVersionGlobal(&major, &minor);
     if ((major < 12 || (major == 12 && minor < 7)) && curt::SupportsAts()) {
       // Use ATS, but with an old kernel driver.
       LOG(WARNING) << "Using an old kernel driver with supported CTK<12.7."
                    << "The latest version of CTK supported by the current driver: " << major << "."
                    << minor << "." << msg;
     }
+    detail::EllpackFormatCheckNuma(msg);
   }
   // For testing with the HMM flag.
   explicit EllpackFormatPolicy(bool has_hmm) : has_hmm_{has_hmm} {}
@@ -187,6 +245,7 @@ class EllpackCacheStreamPolicy : public F<S> {
 
   [[nodiscard]] std::unique_ptr<ReaderT> CreateReader(StringView name, bst_idx_t offset,
                                                       bst_idx_t length) const;
+  std::shared_ptr<EllpackMemCache const> Share() const { return p_cache_; }
 };
 
 template <typename S, template <typename> typename F>
@@ -225,7 +284,7 @@ class EllpackMmapStreamPolicy : public F<S> {
 void CalcCacheMapping(Context const* ctx, bool is_dense,
                       std::shared_ptr<common::HistogramCuts const> cuts,
                       std::int64_t min_cache_page_bytes, ExternalDataInfo const& ext_info,
-                      EllpackCacheInfo* cinfo);
+                      bool is_validation, EllpackCacheInfo* cinfo);
 
 /**
  * @brief Ellpack source with sparse pages as the underlying source.
diff --git a/src/data/entry.h b/src/data/entry.h
new file mode 100644
index 000000000000..eabc981eba4f
--- /dev/null
+++ b/src/data/entry.h
@@ -0,0 +1,38 @@
+/**
+ *  Copyright 2019-2025, XGBoost Contributors
+ */
+#pragma once
+
+#include "../common/math.h"  // for CheckNAN
+#include "xgboost/base.h"    // for bst_idx_t
+#include "xgboost/data.h"    // for Entry
+
+namespace xgboost::data {
+struct COOTuple {
+  COOTuple() = default;
+  XGBOOST_DEVICE COOTuple(bst_idx_t row_idx, bst_idx_t column_idx, float value)
+      : row_idx(row_idx), column_idx(column_idx), value(value) {}
+
+  bst_idx_t row_idx{0};
+  bst_idx_t column_idx{0};
+  float value{0};
+};
+
+struct IsValidFunctor {
+  float missing;
+
+  XGBOOST_DEVICE explicit IsValidFunctor(float missing) : missing(missing) {}
+
+  XGBOOST_DEVICE bool operator()(float value) const {
+    return !(common::CheckNAN(value) || value == missing);
+  }
+
+  XGBOOST_DEVICE bool operator()(const data::COOTuple& e) const {
+    return !(common::CheckNAN(e.value) || e.value == missing);
+  }
+
+  XGBOOST_DEVICE bool operator()(const Entry& e) const {
+    return !(common::CheckNAN(e.fvalue) || e.fvalue == missing);
+  }
+};
+}  // namespace xgboost::data
diff --git a/src/data/extmem_quantile_dmatrix.cc b/src/data/extmem_quantile_dmatrix.cc
index 25b73cfe9700..3ceda2ffd211 100644
--- a/src/data/extmem_quantile_dmatrix.cc
+++ b/src/data/extmem_quantile_dmatrix.cc
@@ -7,6 +7,7 @@
 #include <string>  // for string
 #include <vector>  // for vector
 
+#include "../common/error_msg.h"    // for CacheHostRatio, InconsistentMaxBin
 #include "../tree/param.h"          // FIXME(jiamingy): Find a better way to share this parameter.
 #include "batch_utils.h"            // for CheckParam, RegenGHist
 #include "proxy_dmatrix.h"          // for DataIterProxy
@@ -40,6 +41,7 @@ ExtMemQuantileDMatrix::ExtMemQuantileDMatrix(DataIterHandle iter_handle, DMatrix
 
   BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};
   if (ctx.IsCPU()) {
+    CHECK(detail::HostRatioIsAuto(config.cache_host_ratio)) << error::CacheHostRatioNotImpl();
     this->InitFromCPU(&ctx, iter, proxy, p, config.missing, ref);
   } else {
     p.n_prefetch_batches = ::xgboost::cuda_impl::DftPrefetchBatches();
@@ -47,6 +49,8 @@ ExtMemQuantileDMatrix::ExtMemQuantileDMatrix(DataIterHandle iter_handle, DMatrix
   }
   this->batch_ = p;
   this->fmat_ctx_ = ctx;
+
+  SyncCategories(&ctx, info_.Cats(), info_.num_row_ == 0);
 }
 
 ExtMemQuantileDMatrix::~ExtMemQuantileDMatrix() {
@@ -75,7 +79,7 @@ void ExtMemQuantileDMatrix::InitFromCPU(
 
   common::HistogramCuts cuts;
   ExternalDataInfo ext_info;
-  cpu_impl::GetDataShape(ctx, proxy, *iter, missing, &ext_info);
+  cpu_impl::GetDataShape(ctx, proxy, iter.get(), missing, &ext_info);
   ext_info.SetInfo(ctx, true, &this->info_);
 
   this->n_batches_ = ext_info.n_batches;
diff --git a/src/data/extmem_quantile_dmatrix.cu b/src/data/extmem_quantile_dmatrix.cu
index 533d68b2b915..fa6daf249123 100644
--- a/src/data/extmem_quantile_dmatrix.cu
+++ b/src/data/extmem_quantile_dmatrix.cu
@@ -6,26 +6,27 @@
  * concatenate user-provded pages to form larger @ref EllpackPage to avoid small GPU
  * kernels.
  *
- * Given 1 training DMatrix and 1 validation DMatrix, with 2 pages from the validation set
- * cached in device memory, we can have at most 6 pages in the device memory. 2 from
- * prefetched training DMatrix, 2 from prefetched validation DMatrix, and 2 in the device
- * cache. If set the minimum @ref EllpackPage to 12GB in a 96GB GPU, 6 pages have 72GB
- * size in total. Without accounting for memory fragmentation, this should be very close
- * the upper boundary.
+ * Given 1 training DMatrix and 1 validation DMatrix, we can have at most 4 pages in the
+ * device memory. 2 from prefetched training DMatrix, 2 from prefetched validation
+ * DMatrix. If set the minimum @ref EllpackPage to 12GB in a 96GB GPU, 4 pages have 48GB
+ * size in total. Accounting for memory fragmentation, we still have some room in the
+ * device that can be used as a faster cache.
  */
+
 #include <memory>   // for shared_ptr
 #include <variant>  // for visit, get_if
 
-#include "../common/cuda_rt_utils.h"  // for xgboost_NVTX_FN_RANGE
-#include "batch_utils.h"              // for CheckParam, RegenGHist
-#include "ellpack_page.cuh"           // for EllpackPage
+#include "../common/nvtx_utils.h"  // for xgboost_NVTX_FN_RANGE
+#include "batch_utils.h"           // for CheckParam, RegenGHist
+#include "batch_utils.h"           // for AutoCachePageBytes
+#include "ellpack_page.cuh"        // for EllpackPage
 #include "extmem_quantile_dmatrix.h"
 #include "proxy_dmatrix.h"    // for DataIterProxy
 #include "xgboost/context.h"  // for Context
 #include "xgboost/data.h"     // for BatchParam
-#include "batch_utils.h"      // for AutoCachePageBytes
 
 namespace xgboost::data {
+namespace detail {
 [[nodiscard]] std::int64_t DftMinCachePageBytes(std::int64_t min_cache_page_bytes) {
   // Set to 0 if it should match the user input size.
   if (::xgboost::cuda_impl::AutoCachePageBytes() == min_cache_page_bytes) {
@@ -34,6 +35,7 @@ namespace xgboost::data {
   }
   return min_cache_page_bytes;
 }
+}  // namespace detail
 
 void ExtMemQuantileDMatrix::InitFromCUDA(
     Context const *ctx,
@@ -58,14 +60,14 @@ void ExtMemQuantileDMatrix::InitFromCUDA(
   /**
    * Calculate cache info
    */
-  // Prefer device storage for validation dataset since we can't hide it's data load
-  // overhead with inference. But the training procedures can confortably overlap with the
-  // data transfer.
-  auto cinfo = EllpackCacheInfo{p, (ref != nullptr), config.max_num_device_pages, config.missing};
-  CalcCacheMapping(ctx, this->info_.IsDense(), cuts,
-                   DftMinCachePageBytes(config.min_cache_page_bytes), ext_info, &cinfo);
+  auto is_validation = (ref != nullptr);
+  auto cinfo = EllpackCacheInfo{p, config};
+  CalcCacheMapping(ctx, this->info_.IsDense(), cuts, config.min_cache_page_bytes, ext_info,
+                   is_validation, &cinfo);
   CHECK_EQ(cinfo.cache_mapping.size(), ext_info.n_batches);
-  auto n_batches = cinfo.buffer_rows.size();  // The number of batches after page concatenation.
+  CHECK_GE(cinfo.cache_host_ratio, 0.0);
+  CHECK_LE(cinfo.cache_host_ratio, 1.0);
+  auto n_batches = cinfo.NumBatchesCc();
   LOG(INFO) << "Number of batches after concatenation:" << n_batches;
 
   /**
diff --git a/src/data/gradient_index.cc b/src/data/gradient_index.cc
index e9a1a7e329ff..88dc7304b2cb 100644
--- a/src/data/gradient_index.cc
+++ b/src/data/gradient_index.cc
@@ -128,8 +128,11 @@ void GHistIndexMatrix::PushAdapterBatchColumns(Context const *ctx, Batch const &
 
 INSTANTIATION_PUSH(data::CSRArrayAdapterBatch)
 INSTANTIATION_PUSH(data::ArrayAdapterBatch)
+INSTANTIATION_PUSH(data::DenseAdapterBatch)
 INSTANTIATION_PUSH(data::SparsePageAdapterBatch)
 INSTANTIATION_PUSH(data::ColumnarAdapterBatch)
+INSTANTIATION_PUSH(data::EncColumnarAdapterBatch)
+
 #undef INSTANTIATION_PUSH
 
 void GHistIndexMatrix::ResizeColumns(double sparse_thresh) {
diff --git a/src/data/gradient_index.cu b/src/data/gradient_index.cu
index 5e15ff5f0fa2..6717741bcda7 100644
--- a/src/data/gradient_index.cu
+++ b/src/data/gradient_index.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2022-2024, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #include <cstddef>  // for size_t
 #include <memory>   // for unique_ptr
@@ -18,39 +18,39 @@ template <typename BinT, typename DecompressOffset>
 void SetIndexData(Context const* ctx, EllpackPageImpl const* page,
                   std::vector<size_t>* p_hit_count_tloc, DecompressOffset&& get_offset,
                   GHistIndexMatrix* out) {
-  std::vector<common::CompressedByteT> h_gidx_buffer;
-  auto accessor = page->GetHostAccessor(ctx, &h_gidx_buffer);
-  auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());
+  page->VisitOnHost(ctx, [&](auto&& accessor) {
+    auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());
 
-  auto index_data_span = common::Span{out->index.data<BinT>(), out->index.Size()};
-  auto n_bins_total = page->Cuts().TotalBins();
+    auto index_data_span = common::Span{out->index.data<BinT>(), out->index.Size()};
+    auto n_bins_total = page->Cuts().TotalBins();
 
-  auto& hit_count_tloc = *p_hit_count_tloc;
-  hit_count_tloc.clear();
-  hit_count_tloc.resize(ctx->Threads() * n_bins_total, 0);
-  bool dense_compressed = page->IsDenseCompressed() && !page->IsDense();
-  common::ParallelFor(page->Size(), ctx->Threads(), [&](auto ridx) {
-    auto tid = omp_get_thread_num();
-    size_t in_rbegin = page->info.row_stride * ridx;
-    size_t out_rbegin = out->row_ptr[ridx];
-    if (dense_compressed) {
-      for (std::size_t j = 0, k = 0; j < page->info.row_stride; ++j) {
-        bst_bin_t bin_idx = accessor.gidx_iter[in_rbegin + j];
-        if (XGBOOST_EXPECT((bin_idx != kNull), true)) {  // relatively dense
-          bin_idx = get_offset(bin_idx, j);
-          index_data_span[out_rbegin + k++] = bin_idx;
-          ++hit_count_tloc[tid * n_bins_total + bin_idx];
+    auto& hit_count_tloc = *p_hit_count_tloc;
+    hit_count_tloc.clear();
+    hit_count_tloc.resize(ctx->Threads() * n_bins_total, 0);
+    bool dense_compressed = page->IsDenseCompressed() && !page->IsDense();
+    common::ParallelFor(page->Size(), ctx->Threads(), [&](auto ridx) {
+      auto tid = omp_get_thread_num();
+      size_t in_rbegin = page->info.row_stride * ridx;
+      size_t out_rbegin = out->row_ptr[ridx];
+      if (dense_compressed) {
+        for (std::size_t j = 0, k = 0; j < page->info.row_stride; ++j) {
+          bst_bin_t bin_idx = accessor.gidx_iter[in_rbegin + j];
+          if (XGBOOST_EXPECT((bin_idx != kNull), true)) {  // relatively dense
+            bin_idx = get_offset(bin_idx, j);
+            index_data_span[out_rbegin + k++] = bin_idx;
+            ++hit_count_tloc[tid * n_bins_total + bin_idx];
+          }
+        }
+      } else {
+        auto r_size = out->row_ptr[ridx + 1] - out->row_ptr[ridx];
+        for (size_t j = 0; j < r_size; ++j) {
+          bst_bin_t bin_idx = accessor.gidx_iter[in_rbegin + j];
+          assert(bin_idx != kNull);
+          index_data_span[out_rbegin + j] = bin_idx;
+          ++hit_count_tloc[tid * n_bins_total + get_offset(bin_idx, j)];
         }
       }
-    } else {
-      auto r_size = out->row_ptr[ridx + 1] - out->row_ptr[ridx];
-      for (size_t j = 0; j < r_size; ++j) {
-        bst_bin_t bin_idx = accessor.gidx_iter[in_rbegin + j];
-        assert(bin_idx != kNull);
-        index_data_span[out_rbegin + j] = bin_idx;
-        ++hit_count_tloc[tid * n_bins_total + get_offset(bin_idx, j)];
-      }
-    }
+    });
   });
 }
 
@@ -61,18 +61,18 @@ void GetRowPtrFromEllpack(Context const* ctx, EllpackPageImpl const* page,
   if (page->IsDense()) {
     std::fill(row_ptr.begin() + 1, row_ptr.end(), page->info.row_stride);
   } else {
-    std::vector<common::CompressedByteT> h_gidx_buffer;
-    auto accessor = page->GetHostAccessor(ctx, &h_gidx_buffer);
-    auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());
+    page->VisitOnHost(ctx, [&](auto& accessor) {
+      auto const kNull = static_cast<bst_bin_t>(accessor.NullValue());
 
-    common::ParallelFor(page->Size(), ctx->Threads(), [&](auto i) {
-      size_t ibegin = page->info.row_stride * i;
-      for (size_t j = 0; j < page->info.row_stride; ++j) {
-        bst_bin_t bin_idx = accessor.gidx_iter[ibegin + j];
-        if (bin_idx != kNull) {
-          row_ptr[i + 1]++;
+      common::ParallelFor(page->Size(), ctx->Threads(), [&](auto i) {
+        size_t ibegin = page->info.row_stride * i;
+        for (size_t j = 0; j < page->info.row_stride; ++j) {
+          bst_bin_t bin_idx = accessor.gidx_iter[ibegin + j];
+          if (bin_idx != kNull) {
+            row_ptr[i + 1]++;
+          }
         }
-      }
+      });
     });
   }
   std::partial_sum(row_ptr.begin(), row_ptr.end(), row_ptr.begin());
diff --git a/src/data/gradient_index.h b/src/data/gradient_index.h
index 6560198093c1..f1e1350ae648 100644
--- a/src/data/gradient_index.h
+++ b/src/data/gradient_index.h
@@ -20,7 +20,7 @@
 #include "../common/ref_resource_view.h"  // for RefResourceView
 #include "../common/threading_utils.h"
 #include "../common/transform_iterator.h"  // for MakeIndexTransformIter
-#include "adapter.h"
+#include "entry.h"                         // for IsValidFunctor
 #include "xgboost/base.h"
 #include "xgboost/data.h"
 
diff --git a/src/data/gradient_index_format.cc b/src/data/gradient_index_format.cc
index cd012937208e..75df6c37f33d 100644
--- a/src/data/gradient_index_format.cc
+++ b/src/data/gradient_index_format.cc
@@ -1,21 +1,23 @@
 /**
- * Copyright 2021-2024, XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include "gradient_index_format.h"
 
-#include <cstddef>                        // for size_t
-#include <cstdint>                        // for uint8_t
-#include <type_traits>                    // for underlying_type_t
-#include <vector>                         // for vector
+#include <cstddef>      // for size_t
+#include <cstdint>      // for uint8_t
+#include <type_traits>  // for underlying_type_t
+#include <vector>       // for vector
 
 #include "../common/hist_util.h"          // for HistogramCuts
 #include "../common/io.h"                 // for AlignedResourceReadStream
+#include "../common/nvtx_utils.h"         // for xgboost_NVTX_FN_RANGE
 #include "../common/ref_resource_view.h"  // for ReadVec, WriteVec
 #include "gradient_index.h"               // for GHistIndexMatrix
 
 namespace xgboost::data {
 [[nodiscard]] bool GHistIndexRawFormat::Read(GHistIndexMatrix* page,
                                              common::AlignedResourceReadStream* fi) {
+  xgboost_NVTX_FN_RANGE();
   CHECK(fi);
 
   page->Cuts() = this->cuts_;
diff --git a/src/data/gradient_index_page_source.cc b/src/data/gradient_index_page_source.cc
index e8ec30700b95..65110e61454e 100644
--- a/src/data/gradient_index_page_source.cc
+++ b/src/data/gradient_index_page_source.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #include "gradient_index_page_source.h"
 
@@ -8,6 +8,7 @@
 
 #include "../common/hist_util.h"  // for HistogramCuts
 #include "gradient_index.h"       // for GHistIndexMatrix
+#include "proxy_dmatrix.h"        // for DispatchAny
 
 namespace xgboost::data {
 void GradientIndexPageSource::Fetch() {
@@ -33,7 +34,7 @@ void ExtGradientIndexPageSource::Fetch() {
   if (!this->ReadCache()) {
     CHECK_EQ(count_, source_->Iter());
     CHECK_NE(cuts_.Values().size(), 0);
-    HostAdapterDispatch(proxy_, [this](auto const& value) {
+    cpu_impl::DispatchAny(proxy_, [this](auto const& value) {
       CHECK(this->proxy_->Ctx()->IsCPU()) << "All batches must use the same device type.";
       auto h_feature_types = proxy_->Info().feature_types.ConstHostSpan();
       // This does three things:
diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc
index 3f59af9ffda2..6cf6e24e8286 100644
--- a/src/data/iterative_dmatrix.cc
+++ b/src/data/iterative_dmatrix.cc
@@ -9,12 +9,13 @@
 #include <utility>    // for move
 #include <vector>     // for vector
 
-#include "../common/categorical.h"  // common::IsCat
+#include "../common/categorical.h"  // for IsCat
 #include "../common/hist_util.h"    // for HistogramCuts
 #include "../tree/param.h"          // FIXME(jiamingy): Find a better way to share this parameter.
 #include "batch_utils.h"            // for RegenGHist
+#include "cat_container.h"          // for SyncCategories
 #include "gradient_index.h"         // for GHistIndexMatrix
-#include "proxy_dmatrix.h"          // for DataIterProxy
+#include "proxy_dmatrix.h"          // for DataIterProxy, DispatchAny
 #include "quantile_dmatrix.h"       // for GetCutsFromRef
 #include "quantile_dmatrix.h"       // for GetDataShape, MakeSketches
 #include "simple_batch_iterator.h"  // for SimpleBatchIteratorImpl
@@ -26,10 +27,10 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro
                                    std::shared_ptr<DMatrix> ref, DataIterResetCallback* reset,
                                    XGDMatrixCallbackNext* next, float missing, int nthread,
                                    bst_bin_t max_bin, std::int64_t max_quantile_blocks)
-    : proxy_{proxy}, reset_{reset}, next_{next} {
-  // fetch the first batch
+    : proxy_{proxy} {
+  // The external iterator, fetch the first batch
   auto iter =
-      DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_handle, reset_, next_};
+      DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_handle, reset, next};
   iter.Reset();
   bool valid = iter.Next();
   CHECK(valid) << "Iterative DMatrix must have at least 1 batch.";
@@ -42,30 +43,30 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro
   BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};
 
   if (ctx.IsCUDA()) {
-    this->InitFromCUDA(&ctx, p, max_quantile_blocks, iter_handle, missing, ref);
+    this->InitFromCUDA(&ctx, p, max_quantile_blocks, std::move(iter), missing, ref);
   } else {
-    this->InitFromCPU(&ctx, p, iter_handle, missing, ref);
+    this->InitFromCPU(&ctx, p, std::move(iter), missing, ref);
   }
 
   this->fmat_ctx_ = ctx;
   this->batch_ = p;
 
+  SyncCategories(&ctx, info_.Cats(), info_.num_row_ == 0);
+
   LOG(INFO) << "Finished constructing the `IterativeDMatrix`: (" << this->Info().num_row_ << ", "
-            << this->Info().num_col_ << ", " << this->Info().num_nonzero_ << ").";
+            << this->Info().num_col_ << ", " << this->info_.num_nonzero_ << ").";
 }
 
-void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
-                                   DataIterHandle iter_handle, float missing,
-                                   std::shared_ptr<DMatrix> ref) {
+void IterativeDMatrix::InitFromCPU(
+    Context const* ctx, BatchParam const& p,
+    DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>&& iter, float missing,
+    std::shared_ptr<DMatrix> ref) {
   DMatrixProxy* proxy = MakeProxy(proxy_);
   CHECK(proxy);
 
-  // The external iterator
-  auto iter =
-      DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_handle, reset_, next_};
   common::HistogramCuts cuts;
   ExternalDataInfo ext_info;
-  cpu_impl::GetDataShape(ctx, proxy, iter, missing, &ext_info);
+  cpu_impl::GetDataShape(ctx, proxy, &iter, missing, &ext_info);
   ext_info.SetInfo(ctx, true, &this->info_);
 
   /**
@@ -82,7 +83,7 @@ void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
   std::size_t prev_sum = 0;
   std::size_t i = 0;
   while (iter.Next()) {
-    HostAdapterDispatch(proxy, [&](auto const& batch) {
+    cpu_impl::DispatchAny(proxy, [&](auto const& batch) {
       proxy->Info().num_nonzero_ = ext_info.batch_nnz[i];
       this->ghist_->PushAdapterBatch(ctx, rbegin, prev_sum, batch, missing, h_ft, p.sparse_thresh,
                                      Info().num_row_);
@@ -104,13 +105,13 @@ void IterativeDMatrix::InitFromCPU(Context const* ctx, BatchParam const& p,
    */
   bst_idx_t accumulated_rows = 0;
   while (iter.Next()) {
-    HostAdapterDispatch(proxy, [&](auto const& batch) {
+    cpu_impl::DispatchAny(proxy, [&](auto const& batch) {
       this->ghist_->PushAdapterBatchColumns(ctx, batch, missing, accumulated_rows);
     });
     accumulated_rows += BatchSamples(proxy);
   }
   iter.Reset();
-  CHECK_EQ(accumulated_rows, Info().num_row_);
+  CHECK_EQ(accumulated_rows, this->info_.num_row_);
 
   if (ext_info.n_batches == 1) {
     this->info_ = std::move(proxy->Info());
@@ -194,20 +195,27 @@ BatchSet<ExtSparsePage> IterativeDMatrix::GetExtBatches(Context const* ctx,
 }
 
 #if !defined(XGBOOST_USE_CUDA)
-inline void IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&, std::int64_t,
-                                           DataIterHandle, float, std::shared_ptr<DMatrix>) {
+void IterativeDMatrix::InitFromCUDA(Context const*, BatchParam const&, std::int64_t,
+                                    DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>&&,
+                                    float, std::shared_ptr<DMatrix>) {
   // silent the warning about unused variables.
   (void)(proxy_);
-  (void)(reset_);
-  (void)(next_);
   common::AssertGPUSupport();
 }
 
-inline BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const*,
-                                                                 BatchParam const&) {
+BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const*, BatchParam const&) {
   common::AssertGPUSupport();
   auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
   return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(begin_iter));
 }
+
+void IterativeDMatrix::Save(common::AlignedFileWriteStream*) const {
+  LOG(FATAL) << "Not implemented";
+}
+
+IterativeDMatrix* IterativeDMatrix::Load(common::AlignedResourceReadStream*) {
+  LOG(FATAL) << "Not implemented";
+  return nullptr;
+}
 #endif  // !defined(XGBOOST_USE_CUDA)
 }  // namespace xgboost::data
diff --git a/src/data/iterative_dmatrix.cu b/src/data/iterative_dmatrix.cu
index 444a679dadbd..4078ced6fb40 100644
--- a/src/data/iterative_dmatrix.cu
+++ b/src/data/iterative_dmatrix.cu
@@ -1,33 +1,28 @@
 /**
  * Copyright 2020-2025, XGBoost contributors
  */
-#include <memory>     // for shared_ptr
-#include <utility>    // for move
+#include <memory>   // for shared_ptr
+#include <utility>  // for move
 
 #include "batch_utils.h"  // for RegenGHist, CheckParam
 #include "device_adapter.cuh"
 #include "ellpack_page.cuh"
+#include "ellpack_page_raw_format.h"  // for EllpackPageRawFormat
 #include "iterative_dmatrix.h"
-#include "proxy_dmatrix.cuh"
-#include "proxy_dmatrix.h"  // for BatchSamples, BatchColumns
+#include "proxy_dmatrix.cuh"  // for DispatchAny
+#include "proxy_dmatrix.h"    // for BatchSamples, BatchColumns
 #include "simple_batch_iterator.h"
 
 namespace xgboost::data {
-void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
-                                    std::int64_t max_quantile_blocks, DataIterHandle iter_handle,
-                                    float missing, std::shared_ptr<DMatrix> ref) {
+void IterativeDMatrix::InitFromCUDA(
+    Context const* ctx, BatchParam const& p, std::int64_t max_quantile_blocks,
+    DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>&& iter, float missing,
+    std::shared_ptr<DMatrix> ref) {
   // A handle passed to external iterator.
   DMatrixProxy* proxy = MakeProxy(proxy_);
   CHECK(proxy);
 
-  // The external iterator
-  auto iter =
-      DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>{iter_handle, reset_, next_};
-
-  dh::XGBCachingDeviceAllocator<char> alloc;
-
   // Sketch for all batches.
-
   std::int32_t current_device{dh::CurrentDevice()};
   auto get_ctx = [&]() {
     Context d_ctx = (ctx->IsCUDA()) ? *ctx : Context{}.MakeCUDA(current_device);
@@ -68,14 +63,14 @@ void IterativeDMatrix::InitFromCUDA(Context const* ctx, BatchParam const& p,
     auto rows = BatchSamples(proxy);
     dh::device_vector<size_t> row_counts(rows + 1, 0);
     common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
-    cuda_impl::Dispatch(proxy, [=](auto const& value) {
+    cuda_impl::DispatchAny(proxy, [=](auto const& value) {
       return GetRowCounts(ctx, value, row_counts_span, dh::GetDevice(ctx), missing);
     });
     auto is_dense = this->IsDense();
 
     proxy->Info().feature_types.SetDevice(dh::GetDevice(ctx));
     auto d_feature_types = proxy->Info().feature_types.ConstDeviceSpan();
-    auto new_impl = cuda_impl::Dispatch(proxy, [&](auto const& value) {
+    auto new_impl = cuda_impl::DispatchAny(proxy, [&](auto const& value) {
       return EllpackPageImpl{
           &fmat_ctx_,          value, missing, is_dense, row_counts_span, d_feature_types,
           ext_info.row_stride, rows,  cuts};
@@ -146,4 +141,29 @@ BatchSet<EllpackPage> IterativeDMatrix::GetEllpackBatches(Context const* ctx,
   auto begin_iter = BatchIterator<EllpackPage>(new SimpleBatchIteratorImpl<EllpackPage>(ellpack_));
   return BatchSet<EllpackPage>(begin_iter);
 }
+
+void IterativeDMatrix::Save(common::AlignedFileWriteStream* fo) const {
+  CHECK(fo);
+  CHECK(this->ellpack_) << "Not implemented";
+  // Save cuts
+  auto const& p_cuts = this->ellpack_->Impl()->CutsShared();
+  p_cuts->Save(fo);
+  // Save ellpack
+  auto fmt =
+      std::make_unique<EllpackPageRawFormat>(p_cuts, this->Ctx()->Device(), BatchParam{}, false);
+  auto n_bytes = fmt->Write(*this->ellpack_, fo);
+  CHECK_GE(n_bytes, this->ellpack_->Impl()->MemCostBytes());
+}
+
+IterativeDMatrix* IterativeDMatrix::Load(common::AlignedResourceReadStream* fi) {
+  CHECK(fi);
+  // Load cuts
+  std::shared_ptr<common::HistogramCuts> p_cuts{common::HistogramCuts::Load(fi)};
+  // Load ellpack
+  auto fmt = std::make_unique<EllpackPageRawFormat>(p_cuts, DeviceOrd::CUDA(dh::CurrentDevice()),
+                                                    BatchParam{}, false);
+  auto ellpack = std::make_shared<EllpackPage>();
+  CHECK(fmt->Read(ellpack.get(), fi));
+  return new IterativeDMatrix{std::move(ellpack)};
+}
 }  // namespace xgboost::data
diff --git a/src/data/iterative_dmatrix.h b/src/data/iterative_dmatrix.h
index 6205989ccaf2..ab5c13b2d63e 100644
--- a/src/data/iterative_dmatrix.h
+++ b/src/data/iterative_dmatrix.h
@@ -1,13 +1,13 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
- * \file iterative_dmatrix.h
+ * Copyright 2020-2025, XGBoost Contributors
  *
- * \brief Implementation of the higher-level `QuantileDMatrix`.
+ * @brief Implementation of the higher-level `QuantileDMatrix`.
  */
 #ifndef XGBOOST_DATA_ITERATIVE_DMATRIX_H_
 #define XGBOOST_DATA_ITERATIVE_DMATRIX_H_
 
-#include <memory>  // for shared_ptr
+#include <memory>   // for shared_ptr
+#include <utility>  // for move
 
 #include "quantile_dmatrix.h"     // for QuantileDMatrix
 #include "xgboost/base.h"         // for bst_bin_t
@@ -18,7 +18,9 @@
 namespace xgboost {
 namespace common {
 class HistogramCuts;
-}
+class AlignedFileWriteStream;
+class AlignedResourceReadStream;
+}  // namespace common
 
 namespace data {
 /**
@@ -35,14 +37,18 @@ class IterativeDMatrix : public QuantileDMatrix {
   BatchParam batch_;
 
   DMatrixHandle proxy_;
-  DataIterResetCallback *reset_;
-  XGDMatrixCallbackNext *next_;
 
   void InitFromCUDA(Context const *ctx, BatchParam const &p, std::int64_t max_quantile_blocks,
-                    DataIterHandle iter_handle, float missing, std::shared_ptr<DMatrix> ref);
-  void InitFromCPU(Context const *ctx, BatchParam const &p, DataIterHandle iter_handle,
+                    DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> &&iter,
+                    float missing, std::shared_ptr<DMatrix> ref);
+  void InitFromCPU(Context const *ctx, BatchParam const &p,
+                   DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> &&iter,
                    float missing, std::shared_ptr<DMatrix> ref);
 
+  explicit IterativeDMatrix(std::shared_ptr<EllpackPage> ellpack) : ellpack_{std::move(ellpack)} {
+    this->fmat_ctx_.UpdateAllowUnknown(Args{{"device", DeviceSym::CUDA()}});
+  }
+
  public:
   explicit IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle proxy,
                             std::shared_ptr<DMatrix> ref, DataIterResetCallback *reset,
@@ -55,13 +61,15 @@ class IterativeDMatrix : public QuantileDMatrix {
 
   ~IterativeDMatrix() override = default;
 
-  bool EllpackExists() const override { return static_cast<bool>(ellpack_); }
-  bool GHistIndexExists() const override { return static_cast<bool>(ghist_); }
+  [[nodiscard]] bool EllpackExists() const override { return static_cast<bool>(ellpack_); }
+  [[nodiscard]] bool GHistIndexExists() const override { return static_cast<bool>(ghist_); }
 
   BatchSet<GHistIndexMatrix> GetGradientIndex(Context const *ctx, BatchParam const &param) override;
-
   BatchSet<EllpackPage> GetEllpackBatches(Context const *ctx, const BatchParam &param) override;
   BatchSet<ExtSparsePage> GetExtBatches(Context const *ctx, BatchParam const &param) override;
+
+  void Save(common::AlignedFileWriteStream *fo) const;
+  [[nodiscard]] static IterativeDMatrix *Load(common::AlignedResourceReadStream *fi);
 };
 }  // namespace data
 }  // namespace xgboost
diff --git a/src/data/proxy_dmatrix.cc b/src/data/proxy_dmatrix.cc
index fec64da0f364..3bd680be6bc0 100644
--- a/src/data/proxy_dmatrix.cc
+++ b/src/data/proxy_dmatrix.cc
@@ -1,11 +1,15 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 
 #include "proxy_dmatrix.h"
 
-#include <memory>  // for shared_ptr
+#include <memory>       // for shared_ptr
+#include <type_traits>  // for is_same_v
+#include <utility>      // for move
 
+#include "../common/type.h"   // for GetValueT
+#include "adapter.h"          // for ColumnarAdapter
 #include "xgboost/context.h"  // for Context
 #include "xgboost/data.h"     // for DMatrix
 #include "xgboost/logging.h"
@@ -16,42 +20,40 @@
 #endif
 
 namespace xgboost::data {
-void DMatrixProxy::SetColumnarData(StringView interface_str) {
-  std::shared_ptr<ColumnarAdapter> adapter{new ColumnarAdapter{interface_str}};
-  this->batch_ = adapter;
+void DMatrixProxy::SetColumnar(StringView data) {
+  std::shared_ptr<ColumnarAdapter> adapter{new ColumnarAdapter{data}};
   this->Info().num_col_ = adapter->NumColumns();
   this->Info().num_row_ = adapter->NumRows();
-  this->ctx_.Init(Args{{"device", "cpu"}});
+  this->batch_ = std::move(adapter);
+  this->ctx_.Init(Args{{"device", DeviceSym::CPU()}});
 }
 
-void DMatrixProxy::SetArrayData(StringView interface_str) {
-  std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter{interface_str}};
-  this->batch_ = adapter;
+void DMatrixProxy::SetArray(StringView data) {
+  std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter{data}};
   this->Info().num_col_ = adapter->NumColumns();
   this->Info().num_row_ = adapter->NumRows();
-  this->ctx_.Init(Args{{"device", "cpu"}});
+  this->batch_ = std::move(adapter);
+  this->ctx_.Init(Args{{"device", DeviceSym::CPU()}});
 }
 
-void DMatrixProxy::SetCSRData(char const *c_indptr, char const *c_indices, char const *c_values,
-                              bst_feature_t n_features, bool on_host) {
+void DMatrixProxy::SetCsr(char const *c_indptr, char const *c_indices, char const *c_values,
+                          bst_feature_t n_features, bool on_host) {
   CHECK(on_host) << "Not implemented on device.";
   std::shared_ptr<CSRArrayAdapter> adapter{new CSRArrayAdapter(
       StringView{c_indptr}, StringView{c_indices}, StringView{c_values}, n_features)};
-  this->batch_ = adapter;
   this->Info().num_col_ = adapter->NumColumns();
   this->Info().num_row_ = adapter->NumRows();
-  this->ctx_.Init(Args{{"device", "cpu"}});
+  this->batch_ = std::move(adapter);
+  this->ctx_.Init(Args{{"device", DeviceSym::CPU()}});
 }
 
-namespace cuda_impl {
-std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
-                                                std::shared_ptr<DMatrixProxy> proxy, float missing);
 #if !defined(XGBOOST_USE_CUDA)
-std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *, std::shared_ptr<DMatrixProxy>,
-                                                float) {
-  return nullptr;
-}
+void DMatrixProxy::SetCudaArray(StringView) { common::AssertGPUSupport(); }
+void DMatrixProxy::SetCudaColumnar(StringView) { common::AssertGPUSupport(); }
+#endif  // !defined(XGBOOST_USE_CUDA)
 
+namespace cuda_impl {
+#if !defined(XGBOOST_USE_CUDA)
 [[nodiscard]] bst_idx_t BatchSamples(DMatrixProxy const *) {
   common::AssertGPUSupport();
   return 0;
@@ -60,6 +62,9 @@ std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *, std::shared_ptr
   common::AssertGPUSupport();
   return 0;
 }
+#else
+std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
+                                                std::shared_ptr<DMatrixProxy> proxy, float missing);
 #endif  // XGBOOST_USE_CUDA
 }  // namespace cuda_impl
 
@@ -68,21 +73,44 @@ std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
                                                 float missing) {
   bool type_error{false};
   std::shared_ptr<DMatrix> p_fmat{nullptr};
+
   if (proxy->Ctx()->IsCUDA()) {
+#if defined(XGBOOST_USE_CUDA)
     p_fmat = cuda_impl::CreateDMatrixFromProxy(ctx, proxy, missing);
+#else
+    common::AssertGPUSupport();
+#endif
   } else {
-    p_fmat = data::HostAdapterDispatch<false>(
+    p_fmat = data::cpu_impl::DispatchAny<false>(
         proxy.get(),
         [&](auto const &adapter) {
           auto p_fmat =
               std::shared_ptr<DMatrix>(DMatrix::Create(adapter.get(), missing, ctx->Threads()));
+          CHECK_EQ(p_fmat->Info().num_row_, adapter->NumRows());
           return p_fmat;
         },
         &type_error);
   }
 
   CHECK(p_fmat) << "Failed to fallback.";
-  p_fmat->Info() = proxy->Info().Copy();
+  p_fmat->Info().Extend(proxy->Info(), /*accumulate_rows=*/false, true);
   return p_fmat;
 }
+
+[[nodiscard]] bool BatchCatsIsRef(DMatrixProxy const *proxy) {
+  if (proxy->Device().IsCUDA()) {
+#if defined(XGBOOST_USE_CUDA)
+    return cuda_impl::BatchCatsIsRef(proxy);
+#else
+    common::AssertGPUSupport();
+#endif
+  }
+  return cpu_impl::DispatchAny<false>(proxy, [&](auto const &adapter) {
+    using AdapterT = typename common::GetValueT<decltype(adapter)>::element_type;
+    if constexpr (std::is_same_v<AdapterT, ColumnarAdapter>) {
+      return adapter->HasRefCategorical();
+    }
+    return false;
+  });
+}
 }  // namespace xgboost::data
diff --git a/src/data/proxy_dmatrix.cu b/src/data/proxy_dmatrix.cu
index 9ad9ea227272..34d9221f2ece 100644
--- a/src/data/proxy_dmatrix.cu
+++ b/src/data/proxy_dmatrix.cu
@@ -4,11 +4,12 @@
 #include "../encoder/ordinal.h"  // for DeviceColumnsView
 #include "device_adapter.cuh"
 #include "proxy_dmatrix.cuh"
+#include "../common/type.h"  // for GetValueT
 #include "proxy_dmatrix.h"
 
 namespace xgboost::data {
-void DMatrixProxy::FromCudaColumnar(StringView interface_str) {
-  auto adapter{std::make_shared<CudfAdapter>(interface_str)};
+void DMatrixProxy::SetCudaColumnar(StringView data) {
+  auto adapter{std::make_shared<CudfAdapter>(data)};
   this->batch_ = adapter;
   this->Info().num_col_ = adapter->NumColumns();
   this->Info().num_row_ = adapter->NumRows();
@@ -21,8 +22,8 @@ void DMatrixProxy::FromCudaColumnar(StringView interface_str) {
   ctx_ = ctx_.MakeCUDA(adapter->Device().ordinal);
 }
 
-void DMatrixProxy::FromCudaArray(StringView interface_str) {
-  auto adapter(std::make_shared<CupyAdapter>(StringView{interface_str}));
+void DMatrixProxy::SetCudaArray(StringView data) {
+  auto adapter(std::make_shared<CupyAdapter>(StringView{data}));
   this->batch_ = adapter;
   this->Info().num_col_ = adapter->NumColumns();
   this->Info().num_row_ = adapter->NumRows();
@@ -39,24 +40,38 @@ namespace cuda_impl {
 std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const* ctx,
                                                 std::shared_ptr<DMatrixProxy> proxy,
                                                 float missing) {
-  return Dispatch<false>(proxy.get(), [&](auto const& adapter) {
+  return DispatchAny<false>(proxy.get(), [&](auto const& adapter) {
     auto p_fmat = std::shared_ptr<DMatrix>{DMatrix::Create(adapter.get(), missing, ctx->Threads())};
+    CHECK_EQ(p_fmat->Info().num_row_, adapter->NumRows());
     return p_fmat;
   });
 }
 
 [[nodiscard]] bst_idx_t BatchSamples(DMatrixProxy const* proxy) {
-  return cuda_impl::Dispatch(proxy, [](auto const& value) { return value.NumRows(); });
+  return cuda_impl::DispatchAny(proxy, [](auto const& value) { return value.NumRows(); });
 }
 
 [[nodiscard]] bst_idx_t BatchColumns(DMatrixProxy const* proxy) {
-  return cuda_impl::Dispatch(proxy, [](auto const& value) { return value.NumCols(); });
+  return cuda_impl::DispatchAny(proxy, [](auto const& value) { return value.NumCols(); });
+}
+
+[[nodiscard]] bool BatchCatsIsRef(DMatrixProxy const* proxy) {
+  return DispatchAny<false>(proxy, [&](auto const& adapter) {
+    using AdapterT = typename common::GetValueT<decltype(adapter)>::element_type;
+    if constexpr (std::is_same_v<AdapterT, CudfAdapter>) {
+      return adapter->HasRefCategorical();
+    }
+    return false;
+  });
 }
 
 [[nodiscard]] enc::DeviceColumnsView BatchCats(DMatrixProxy const* proxy) {
-  return Dispatch<false>(proxy, [&](auto const& adapter) {
-    using AdapterT = typename std::remove_reference_t<decltype(adapter)>::element_type;
+  return DispatchAny<false>(proxy, [&](auto const& adapter) {
+    using AdapterT = typename common::GetValueT<decltype(adapter)>::element_type;
     if constexpr (std::is_same_v<AdapterT, CudfAdapter>) {
+      if (adapter->HasRefCategorical()) {
+        return adapter->RefCats();
+      }
       return adapter->Cats();
     }
     return enc::DeviceColumnsView{};
diff --git a/src/data/proxy_dmatrix.cuh b/src/data/proxy_dmatrix.cuh
index db53b992df11..06c516d43931 100644
--- a/src/data/proxy_dmatrix.cuh
+++ b/src/data/proxy_dmatrix.cuh
@@ -1,39 +1,64 @@
 /**
- * Copyright 2021-2023 XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
-#include <any>  // for any, any_cast
+#include <any>     // for any_cast
+#include <memory>  // for shared_ptr
 
-#include "device_adapter.cuh"
+#include "device_adapter.cuh"  // for MakeEncColumnarBatch
 #include "proxy_dmatrix.h"
 
 namespace xgboost::data::cuda_impl {
-template <bool get_value = true, typename Fn>
-decltype(auto) Dispatch(DMatrixProxy const* proxy, Fn fn) {
-  if (proxy->Adapter().type() == typeid(std::shared_ptr<CupyAdapter>)) {
+// See the cpu impl for parameter documentation.
+template <bool get_value = true, template <typename A> typename AddPtrT = std::shared_ptr,
+          typename Fn>
+decltype(auto) DispatchAny(Context const* ctx, std::any x, Fn&& fn, bool* type_error = nullptr) {
+  auto has_type = [&] {
+    if (type_error) {
+      *type_error = false;
+    }
+  };
+  if (x.type() == typeid(AddPtrT<CupyAdapter>)) {
+    has_type();
     if constexpr (get_value) {
-      auto value = std::any_cast<std::shared_ptr<CupyAdapter>>(proxy->Adapter())->Value();
+      auto value = std::any_cast<AddPtrT<CupyAdapter>>(x)->Value();
       return fn(value);
     } else {
-      auto value = std::any_cast<std::shared_ptr<CupyAdapter>>(proxy->Adapter());
+      auto value = std::any_cast<AddPtrT<CupyAdapter>>(x);
       return fn(value);
     }
-  } else if (proxy->Adapter().type() == typeid(std::shared_ptr<CudfAdapter>)) {
+  } else if (x.type() == typeid(AddPtrT<CudfAdapter>)) {
+    has_type();
+    auto adapter = std::any_cast<AddPtrT<CudfAdapter>>(x);
     if constexpr (get_value) {
-      auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
+      auto value = adapter->Value();
+      if (adapter->HasRefCategorical()) {
+        auto [batch, mapping] = MakeEncColumnarBatch(ctx, adapter);
+        return fn(batch);
+      }
       return fn(value);
     } else {
-      auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter());
-      return fn(value);
+      return fn(adapter);
     }
   } else {
-    LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
-    if constexpr (get_value) {
-      auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter())->Value();
-      return fn(value);
+    if (type_error) {
+      *type_error = true;
     } else {
-      auto value = std::any_cast<std::shared_ptr<CudfAdapter>>(proxy->Adapter());
-      return fn(value);
+      LOG(FATAL) << "Unknown type: " << x.type().name();
     }
   }
+
+  // Dummy return value
+  if constexpr (get_value) {
+    auto value = std::any_cast<AddPtrT<CudfAdapter>>(x)->Value();
+    return fn(value);
+  } else {
+    auto value = std::any_cast<AddPtrT<CudfAdapter>>(x);
+    return fn(value);
+  }
+}
+
+template <bool get_value = true, typename Fn>
+decltype(auto) DispatchAny(DMatrixProxy const* proxy, Fn&& fn, bool* type_error = nullptr) {
+  return DispatchAny<get_value>(proxy->Ctx(), proxy->Adapter(), std::forward<Fn>(fn), type_error);
 }
 }  // namespace xgboost::data::cuda_impl
diff --git a/src/data/proxy_dmatrix.h b/src/data/proxy_dmatrix.h
index 78c070621a34..b2ea9a2d5afa 100644
--- a/src/data/proxy_dmatrix.h
+++ b/src/data/proxy_dmatrix.h
@@ -9,15 +9,17 @@
 #include <cstdint>      // for uint32_t, int32_t
 #include <memory>       // for shared_ptr
 #include <type_traits>  // for invoke_result_t, declval
+#include <utility>      // for forward
 #include <vector>       // for vector
 
-#include "../common/cuda_rt_utils.h"  // for xgboost_NVTX_FN_RANGE
-#include "../encoder/ordinal.h"       // for HostColumnsView
-#include "adapter.h"                  // for ColumnarAdapter, ArrayAdapter
-#include "xgboost/c_api.h"            // for DataIterHandle
-#include "xgboost/context.h"          // for Context
-#include "xgboost/data.h"             // for MetaInfo
-#include "xgboost/string_view.h"      // for StringView
+#include "../common/nvtx_utils.h"  // for xgboost_NVTX_FN_RANGE
+#include "../encoder/ordinal.h"    // for HostColumnsView
+#include "adapter.h"               // for ColumnarAdapter, ArrayAdapter, MakeEncColumnarBatch
+#include "cat_container.h"         // for CatContainer
+#include "xgboost/c_api.h"         // for DataIterHandle
+#include "xgboost/context.h"       // for Context
+#include "xgboost/data.h"          // for MetaInfo
+#include "xgboost/string_view.h"   // for StringView
 
 namespace xgboost::data {
 /**
@@ -40,8 +42,8 @@ class DataIterProxy {
       : iter_{iter}, reset_{reset}, next_{next} {}
   DataIterProxy(DataIterProxy&& that) = default;
   DataIterProxy& operator=(DataIterProxy&& that) = default;
-  DataIterProxy(DataIterProxy const& that) = default;
-  DataIterProxy& operator=(DataIterProxy const& that) = default;
+  DataIterProxy(DataIterProxy const& that) = delete;
+  DataIterProxy& operator=(DataIterProxy const& that) = delete;
 
   [[nodiscard]] bool Next() {
     xgboost_NVTX_FN_RANGE();
@@ -65,51 +67,39 @@ class DataIterProxy {
 };
 
 /**
- * @brief A proxy of DMatrix used by external iterator.
+ * @brief A proxy of DMatrix used by the external iterator.
  */
 class DMatrixProxy : public DMatrix {
   MetaInfo info_;
   std::any batch_;
   Context ctx_;
 
-#if defined(XGBOOST_USE_CUDA)
-  void FromCudaColumnar(StringView interface_str);
-  void FromCudaArray(StringView interface_str);
-#endif  // defined(XGBOOST_USE_CUDA)
-
  public:
   DeviceOrd Device() const { return ctx_.Device(); }
 
-  void SetCUDAArray(char const* c_interface) {
-    common::AssertGPUSupport();
-    CHECK(c_interface);
-#if defined(XGBOOST_USE_CUDA)
-    StringView interface_str{c_interface};
-    Json json_array_interface = Json::Load(interface_str);
-    if (IsA<Array>(json_array_interface)) {
-      this->FromCudaColumnar(interface_str);
-    } else {
-      this->FromCudaArray(interface_str);
-    }
-#endif  // defined(XGBOOST_USE_CUDA)
-  }
-
-  void SetColumnarData(StringView interface_str);
-
-  void SetArrayData(StringView interface_str);
-  void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
-                  bst_feature_t n_features, bool on_host);
+  /**
+   * Device setters
+   */
+  void SetCudaColumnar(StringView data);
+  void SetCudaArray(StringView data);
+  /**
+   * Host setters
+   */
+  void SetColumnar(StringView data);
+  void SetArray(StringView data);
+  void SetCsr(char const* c_indptr, char const* c_indices, char const* c_values,
+              bst_feature_t n_features, bool on_host);
 
   MetaInfo& Info() override { return info_; }
   MetaInfo const& Info() const override { return info_; }
   Context const* Ctx() const override { return &ctx_; }
 
-  bool EllpackExists() const override { return false; }
-  bool GHistIndexExists() const override { return false; }
-  bool SparsePageExists() const override { return false; }
+  [[nodiscard]] bool EllpackExists() const override { return false; }
+  [[nodiscard]] bool GHistIndexExists() const override { return false; }
+  [[nodiscard]] bool SparsePageExists() const override { return false; }
 
   template <typename Page>
-  BatchSet<Page> NoBatch() {
+  static BatchSet<Page> NoBatch() {
     LOG(FATAL) << "Proxy DMatrix cannot return data batch.";
     return BatchSet<Page>(BatchIterator<Page>(nullptr));
   }
@@ -168,6 +158,7 @@ struct ExternalDataInfo {
 
     CHECK_GE(this->n_features, 1) << "Data must has at least 1 column.";
     CHECK_EQ(this->base_rowids.size(), this->n_batches + 1);
+    CHECK_LE(this->row_stride, this->n_features);
   }
 
   void SetInfo(Context const* ctx, bool sync, MetaInfo* p_info) {
@@ -184,73 +175,114 @@ struct ExternalDataInfo {
   }
 };
 
+namespace cpu_impl {
 /**
- * @brief Dispatch function call based on input type.
+ * @brief Dispatch function call based on the input type.
  *
- * @tparam get_value Whether the funciton Fn accept an adapter batch or the adapter itself.
+ * @tparam get_value Whether the funciton Fn accepts an adapter batch or the adapter itself.
+ * @tparam AddPtrT   The type of the adapter pointer. Use std::add_pointer_t for raw pointer.
  * @tparam Fn        The type of the function to be dispatched.
  *
- * @param proxy The proxy object holding the reference to the input.
+ * @param x     Any any object that contains a (shared) pointer to an adapter.
  * @param fn    The function to be dispatched.
  * @param type_error[out] Set to ture if it's not null and the input data is not recognized by
  *                        the host.
  *
  * @return The return value of the function being dispatched.
  */
-template <bool get_value = true, typename Fn>
-decltype(auto) HostAdapterDispatch(DMatrixProxy const* proxy, Fn fn, bool* type_error = nullptr) {
-  CHECK(proxy->Adapter().has_value());
-  if (proxy->Adapter().type() == typeid(std::shared_ptr<CSRArrayAdapter>)) {
+template <bool get_value = true, template <typename A> typename AddPtrT = std::shared_ptr,
+          typename Fn>
+decltype(auto) DispatchAny(Context const* ctx, std::any x, Fn&& fn, bool* type_error = nullptr) {
+  // CSC, FileAdapter, and IteratorAdapter are not supported.
+  auto has_type = [&] {
+    if (type_error) {
+      *type_error = false;
+    }
+  };
+  CHECK(x.has_value());
+  if (x.type() == typeid(AddPtrT<data::DenseAdapter>)) {
+    has_type();
     if constexpr (get_value) {
-      auto value = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter())->Value();
+      auto value = std::any_cast<AddPtrT<DenseAdapter>>(x)->Value();
       return fn(value);
     } else {
-      auto value = std::any_cast<std::shared_ptr<CSRArrayAdapter>>(proxy->Adapter());
-      return fn(value);
+      auto value = std::any_cast<AddPtrT<DenseAdapter>>(x);
+      fn(value);
     }
-    if (type_error) {
-      *type_error = false;
-    }
-  } else if (proxy->Adapter().type() == typeid(std::shared_ptr<ArrayAdapter>)) {
+  } else if (x.type() == typeid(AddPtrT<ArrayAdapter>)) {
+    has_type();
     if constexpr (get_value) {
-      auto value = std::any_cast<std::shared_ptr<ArrayAdapter>>(proxy->Adapter())->Value();
+      auto value = std::any_cast<AddPtrT<ArrayAdapter>>(x)->Value();
       return fn(value);
     } else {
-      auto value = std::any_cast<std::shared_ptr<ArrayAdapter>>(proxy->Adapter());
+      auto value = std::any_cast<AddPtrT<ArrayAdapter>>(x);
       return fn(value);
     }
-    if (type_error) {
-      *type_error = false;
-    }
-  } else if (proxy->Adapter().type() == typeid(std::shared_ptr<ColumnarAdapter>)) {
+  } else if (x.type() == typeid(AddPtrT<CSRArrayAdapter>)) {
+    has_type();
     if constexpr (get_value) {
-      auto value = std::any_cast<std::shared_ptr<ColumnarAdapter>>(proxy->Adapter())->Value();
+      auto value = std::any_cast<AddPtrT<CSRArrayAdapter>>(x)->Value();
       return fn(value);
     } else {
-      auto value = std::any_cast<std::shared_ptr<ColumnarAdapter>>(proxy->Adapter());
+      auto value = std::any_cast<AddPtrT<CSRArrayAdapter>>(x);
       return fn(value);
     }
-    if (type_error) {
-      *type_error = false;
+  } else if (x.type() == typeid(AddPtrT<ColumnarAdapter>)) {
+    has_type();
+    auto adapter = std::any_cast<AddPtrT<ColumnarAdapter>>(x);
+    if constexpr (get_value) {
+      auto value = adapter->Value();
+      if (adapter->HasRefCategorical()) {
+        auto [batch, mapping] = MakeEncColumnarBatch(ctx, adapter);
+        return fn(batch);
+      }
+      return fn(value);
+    } else {
+      return fn(adapter);
     }
   } else {
     if (type_error) {
       *type_error = true;
     } else {
-      LOG(FATAL) << "Unknown type: " << proxy->Adapter().type().name();
+      LOG(FATAL) << "Unknown type: " << x.type().name();
     }
   }
 
   if constexpr (get_value) {
-    return std::invoke_result_t<Fn,
-                                decltype(std::declval<std::shared_ptr<ArrayAdapter>>()->Value())>();
+    return std::invoke_result_t<Fn, decltype(std::declval<AddPtrT<ArrayAdapter>>()->Value())>();
   } else {
-    return std::invoke_result_t<Fn, decltype(std::declval<std::shared_ptr<ArrayAdapter>>())>();
+    return std::invoke_result_t<Fn, decltype(std::declval<AddPtrT<ArrayAdapter>>())>();
   }
 }
 
+template <bool get_value = true, typename Fn>
+decltype(auto) DispatchAny(DMatrixProxy const* proxy, Fn&& fn, bool* type_error = nullptr) {
+  return DispatchAny<get_value>(proxy->Ctx(), proxy->Adapter(), std::forward<Fn>(fn), type_error);
+}
+
+/**
+ * @brief Get categories for the current batch.
+ *
+ * @return A host view to the categories
+ */
+[[nodiscard]] inline decltype(auto) BatchCats(DMatrixProxy const* proxy) {
+  return DispatchAny<false>(proxy, [](auto const& adapter) -> decltype(auto) {
+    using AdapterT = typename std::remove_reference_t<decltype(adapter)>::element_type;
+    if constexpr (std::is_same_v<AdapterT, ColumnarAdapter>) {
+      if (adapter->HasRefCategorical()) {
+        return adapter->RefCats();
+      }
+      return adapter->Cats();
+    }
+    return enc::HostColumnsView{};
+  });
+}
+}  // namespace cpu_impl
+
 /**
  * @brief Create a `SimpleDMatrix` instance from a `DMatrixProxy`.
+ *
+ *    This is used for enabling inplace-predict fallback.
  */
 std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const* ctx,
                                                 std::shared_ptr<DMatrixProxy> proxy, float missing);
@@ -259,6 +291,7 @@ namespace cuda_impl {
 [[nodiscard]] bst_idx_t BatchSamples(DMatrixProxy const*);
 [[nodiscard]] bst_idx_t BatchColumns(DMatrixProxy const*);
 #if defined(XGBOOST_USE_CUDA)
+[[nodiscard]] bool BatchCatsIsRef(DMatrixProxy const*);
 [[nodiscard]] enc::DeviceColumnsView BatchCats(DMatrixProxy const*);
 #endif  // defined(XGBOOST_USE_CUDA)
 }  // namespace cuda_impl
@@ -269,7 +302,7 @@ namespace cuda_impl {
 [[nodiscard]] inline bst_idx_t BatchSamples(DMatrixProxy const* proxy) {
   bool type_error = false;
   auto n_samples =
-      HostAdapterDispatch(proxy, [](auto const& value) { return value.NumRows(); }, &type_error);
+      cpu_impl::DispatchAny(proxy, [](auto const& value) { return value.NumRows(); }, &type_error);
   if (type_error) {
     n_samples = cuda_impl::BatchSamples(proxy);
   }
@@ -282,24 +315,14 @@ namespace cuda_impl {
 [[nodiscard]] inline bst_feature_t BatchColumns(DMatrixProxy const* proxy) {
   bool type_error = false;
   auto n_features =
-      HostAdapterDispatch(proxy, [](auto const& value) { return value.NumCols(); }, &type_error);
+      cpu_impl::DispatchAny(proxy, [](auto const& value) { return value.NumCols(); }, &type_error);
   if (type_error) {
     n_features = cuda_impl::BatchColumns(proxy);
   }
   return n_features;
 }
 
-namespace cpu_impl {
-// Get categories for the current batch.
-[[nodiscard]] inline decltype(auto) BatchCats(DMatrixProxy const* proxy) {
-  return HostAdapterDispatch<false>(proxy, [](auto const& adapter) -> decltype(auto) {
-    using AdapterT = typename std::remove_reference_t<decltype(adapter)>::element_type;
-    if constexpr (std::is_same_v<AdapterT, ColumnarAdapter>) {
-      return adapter->Cats();
-    }
-    return enc::HostColumnsView{};
-  });
-}
-}  // namespace cpu_impl
+namespace cpu_impl {}  // namespace cpu_impl
+[[nodiscard]] bool BatchCatsIsRef(DMatrixProxy const* proxy);
 }  // namespace xgboost::data
 #endif  // XGBOOST_DATA_PROXY_DMATRIX_H_
diff --git a/src/data/quantile_dmatrix.cc b/src/data/quantile_dmatrix.cc
index a1429ffd3fb3..2c30784719cf 100644
--- a/src/data/quantile_dmatrix.cc
+++ b/src/data/quantile_dmatrix.cc
@@ -9,6 +9,7 @@
 #include "../collective/communicator-inl.h"  // for IsDistributed
 #include "../common/error_msg.h"             // for InconsistentCategories
 #include "../common/threading_utils.h"       // for ParallelFor
+#include "proxy_dmatrix.h"                   // for DispatchAny
 #include "cat_container.h"                   // for CatContainer
 #include "gradient_index.h"                  // for GHistIndexMatrix
 #include "xgboost/collective/result.h"       // for SafeColl
@@ -84,13 +85,13 @@ void SyncFeatureType(Context const* ctx, std::vector<FeatureType>* p_h_ft) {
 }
 
 void GetDataShape(Context const* ctx, DMatrixProxy* proxy,
-                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> iter, float missing,
+                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext>* iter, float missing,
                   ExternalDataInfo* p_info) {
   auto& info = *p_info;
 
   auto const is_valid = data::IsValidFunctor{missing};
   auto nnz_cnt = [&]() {
-    return HostAdapterDispatch(proxy, [&](auto const& value) {
+    return DispatchAny(proxy, [&](auto const& value) {
       bst_idx_t n_threads = ctx->Threads();
       bst_idx_t n_features = info.column_sizes.size();
       linalg::Tensor<bst_idx_t, 2> column_sizes_tloc({n_threads, n_features}, DeviceOrd::CPU());
@@ -126,7 +127,8 @@ void GetDataShape(Context const* ctx, DMatrixProxy* proxy,
       collective::SafeColl(collective::Allreduce(ctx, &info.n_features, collective::Op::kMax));
       info.column_sizes.clear();
       info.column_sizes.resize(info.n_features, 0);
-      p_info->cats = std::make_shared<CatContainer>(cpu_impl::BatchCats(proxy));
+      p_info->cats =
+          std::make_shared<CatContainer>(cpu_impl::BatchCats(proxy), BatchCatsIsRef(proxy));
     } else {
       CHECK_EQ(info.n_features, BatchColumns(proxy)) << "Inconsistent number of columns.";
       auto cats = cpu_impl::BatchCats(proxy);
@@ -138,8 +140,8 @@ void GetDataShape(Context const* ctx, DMatrixProxy* proxy,
     info.nnz += info.batch_nnz.back();
     info.accumulated_rows += batch_size;
     info.n_batches++;
-  } while (iter.Next());
-  iter.Reset();
+  } while (iter->Next());
+  iter->Reset();
 
   std::partial_sum(info.base_rowids.cbegin(), info.base_rowids.cend(), info.base_rowids.begin());
 }
@@ -164,7 +166,7 @@ void MakeSketches(Context const* ctx,
         p_sketch = std::make_unique<common::HostSketchContainer>(
             ctx, p.max_bin, h_ft, ext_info.column_sizes, !proxy->Info().group_ptr_.empty());
       }
-      HostAdapterDispatch(proxy, [&](auto const& batch) {
+      DispatchAny(proxy, [&](auto const& batch) {
         proxy->Info().num_nonzero_ = ext_info.batch_nnz[i];
         // We don't need base row idx here as Info is from proxy and the number of rows in
         // it is consistent with data batch.
diff --git a/src/data/quantile_dmatrix.cu b/src/data/quantile_dmatrix.cu
index 4452d5e9212a..9a9963f7cc3f 100644
--- a/src/data/quantile_dmatrix.cu
+++ b/src/data/quantile_dmatrix.cu
@@ -10,14 +10,14 @@
 #include "../collective/allreduce.h"    // for Allreduce
 #include "../common/cuda_context.cuh"   // for CUDAContext
 #include "../common/cuda_rt_utils.h"    // for AllVisibleGPUs
-#include "../common/cuda_rt_utils.h"    // for xgboost_NVTX_FN_RANGE
 #include "../common/device_vector.cuh"  // for XGBCachingDeviceAllocator
 #include "../common/error_msg.h"        // for InconsistentCategories
 #include "../common/hist_util.cuh"      // for AdapterDeviceSketch
+#include "../common/nvtx_utils.h"       // for xgboost_NVTX_FN_RANGE
 #include "../common/quantile.cuh"       // for SketchContainer
 #include "cat_container.h"              // for CatContainer
 #include "ellpack_page.cuh"             // for EllpackPage
-#include "proxy_dmatrix.cuh"            // for Dispatch
+#include "proxy_dmatrix.cuh"            // for DispatchAny
 #include "proxy_dmatrix.h"              // for DataIterProxy
 #include "quantile_dmatrix.h"           // for GetCutsFromRef
 
@@ -73,14 +73,15 @@ void MakeSketches(Context const* ctx,
     /**
      * Get the data shape.
      */
-    // We use do while here as the first batch is fetched in ctor
+    // We use do while here as the first batch has been fetched in the ctor
     CHECK_LT(ctx->Ordinal(), curt::AllVisibleGPUs());
     auto device = dh::GetDevice(ctx);
     curt::SetDevice(device.ordinal);
     auto cats = cuda_impl::BatchCats(proxy);
     if (ext_info.n_features == 0) {
       ext_info.n_features = data::BatchColumns(proxy);
-      ext_info.cats = std::make_shared<CatContainer>(device, cats);
+      ext_info.cats =
+          std::make_shared<CatContainer>(p_ctx, cats, ::xgboost::data::BatchCatsIsRef(proxy));
       auto rc = collective::Allreduce(ctx, linalg::MakeVec(&ext_info.n_features, 1),
                                       collective::Op::kMax);
       SafeColl(rc);
@@ -114,7 +115,7 @@ void MakeSketches(Context const* ctx,
         lazy_init_sketch();  // Add a new level.
       }
       proxy->Info().weights_.SetDevice(dh::GetDevice(ctx));
-      Dispatch(proxy, [&](auto const& value) {
+      DispatchAny(proxy, [&](auto const& value) {
         common::AdapterDeviceSketch(p_ctx, value, p.max_bin, proxy->Info(), missing,
                                     sketches.back().first.get());
         sketches.back().second++;
@@ -128,7 +129,7 @@ void MakeSketches(Context const* ctx,
     dh::device_vector<size_t> row_counts(batch_rows + 1, 0);
     common::Span<size_t> row_counts_span(row_counts.data().get(), row_counts.size());
     ext_info.row_stride =
-        std::max(ext_info.row_stride, Dispatch(proxy, [=](auto const& value) {
+        std::max(ext_info.row_stride, DispatchAny(proxy, [=](auto const& value) {
                    return GetRowCounts(ctx, value, row_counts_span, dh::GetDevice(ctx), missing);
                  }));
     ext_info.nnz += thrust::reduce(ctx->CUDACtx()->CTP(), row_counts.begin(), row_counts.end());
diff --git a/src/data/quantile_dmatrix.h b/src/data/quantile_dmatrix.h
index 2021201f0382..5d0a58ad720b 100644
--- a/src/data/quantile_dmatrix.h
+++ b/src/data/quantile_dmatrix.h
@@ -91,7 +91,7 @@ void SyncFeatureType(Context const *ctx, std::vector<FeatureType> *p_h_ft);
  * @brief Fetch the external data shape.
  */
 void GetDataShape(Context const *ctx, DMatrixProxy *proxy,
-                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> iter, float missing,
+                  DataIterProxy<DataIterResetCallback, XGDMatrixCallbackNext> *iter, float missing,
                   ExternalDataInfo *p_info);
 
 /**
diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc
index 0cdaccad4109..a249c99ac515 100644
--- a/src/data/simple_dmatrix.cc
+++ b/src/data/simple_dmatrix.cc
@@ -21,6 +21,7 @@
 #include "cat_container.h"  // for CatContainer
 #include "ellpack_page.h"   // for EllpackPage
 #include "gradient_index.h"
+#include "proxy_dmatrix.h"  // for DispatchAny
 #include "xgboost/c_api.h"
 #include "xgboost/data.h"
 
@@ -50,10 +51,15 @@ DMatrix* SimpleDMatrix::Slice(common::Span<int32_t const> ridxs) {
     out->Info() = this->Info().Slice(&ctx, h_ridx, h_offset.back());
   }
   out->fmat_ctx_ = this->fmat_ctx_;
+
+  out->Info().Cats()->Copy(&fmat_ctx_, *this->Info().Cats());
   return out;
 }
 
 DMatrix* SimpleDMatrix::SliceCol(int num_slices, int slice_id) {
+  if (this->Cats()->HasCategorical()) {
+    LOG(FATAL) << "Slicing column is not supported for DataFrames with categorical columns.";
+  }
   auto out = new SimpleDMatrix;
   SparsePage& out_page = *out->sparse_page_;
   auto const slice_size = info_.num_col_ / num_slices;
@@ -226,22 +232,31 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
                              DataSplitMode data_split_mode) {
   Context ctx;
   ctx.Init(Args{{"nthread", std::to_string(nthread)}});
-
   std::vector<uint64_t> qids;
   uint64_t default_max = std::numeric_limits<uint64_t>::max();
   uint64_t last_group_id = default_max;
   bst_uint group_size = 0;
   auto& offset_vec = sparse_page_->offset.HostVector();
   auto& data_vec = sparse_page_->data.HostVector();
+  // batch_size is either number of rows or cols, depending on data layout
   uint64_t inferred_num_columns = 0;
   uint64_t total_batch_size = 0;
-  // batch_size is either number of rows or cols, depending on data layout
 
   adapter->BeforeFirst();
   // Iterate over batches of input data
   while (adapter->Next()) {
+    bool type_error = false;
+    auto push = [&](auto const& batch) {
+      return sparse_page_->Push(batch, missing, ctx.Threads());
+    };
+    bst_idx_t batch_max_columns =
+        cpu_impl::DispatchAny<true, std::add_pointer_t>(&ctx, adapter, push, &type_error);
     auto& batch = adapter->Value();
-    auto batch_max_columns = sparse_page_->Push(batch, missing, ctx.Threads());
+    if (type_error) {
+      // Not supported by the dispatch function.
+      batch_max_columns = push(batch);
+    }
+
     inferred_num_columns = std::max(batch_max_columns, inferred_num_columns);
     total_batch_size += batch.Size();
     // Append meta information if available
@@ -289,8 +304,10 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
   }
 
   if constexpr (std::is_same_v<AdapterT, ColumnarAdapter>) {
-    if (adapter->HasCategorical()) {
-      info_.Cats(std::make_shared<CatContainer>(adapter->Cats()));
+    if (adapter->HasRefCategorical()) {
+      info_.Cats(std::make_shared<CatContainer>(adapter->RefCats(), true));
+    } else if (adapter->HasCategorical()) {
+      info_.Cats(std::make_shared<CatContainer>(adapter->Cats(), false));
     }
   }
 
@@ -310,8 +327,7 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
         offset_vec.emplace_back(offset_vec.back());
       }
     } else {
-      CHECK((std::is_same_v<AdapterT, CSCAdapter> || std::is_same_v<AdapterT, CSCArrayAdapter>))
-          << "Expecting CSCAdapter";
+      CHECK((std::is_same_v<AdapterT, CSCArrayAdapter>)) << "Expecting a CSC adapter.";
       info_.num_row_ = offset_vec.size() - 1;
     }
   } else {
@@ -325,6 +341,8 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, int nthread,
   }
   info_.num_nonzero_ = data_vec.size();
 
+  SyncCategories(&ctx, info_.Cats(), info_.num_row_ == 0);
+
   // Sort the index for row partitioners used by variuos tree methods.
   if (!sparse_page_->IsIndicesSorted(ctx.Threads())) {
     sparse_page_->SortIndices(ctx.Threads());
@@ -351,23 +369,20 @@ void SimpleDMatrix::SaveToLocalFile(const std::string& fname) {
   fo->Write(sparse_page_->data.HostVector());
 }
 
-template SimpleDMatrix::SimpleDMatrix(DenseAdapter* adapter, float missing, int nthread,
-                                      DataSplitMode data_split_mode);
-template SimpleDMatrix::SimpleDMatrix(ArrayAdapter* adapter, float missing, int nthread,
-                                      DataSplitMode data_split_mode);
-template SimpleDMatrix::SimpleDMatrix(CSRAdapter* adapter, float missing, int nthread,
-                                      DataSplitMode data_split_mode);
-template SimpleDMatrix::SimpleDMatrix(CSRArrayAdapter* adapter, float missing, int nthread,
-                                      DataSplitMode data_split_mode);
-template SimpleDMatrix::SimpleDMatrix(CSCArrayAdapter* adapter, float missing, int nthread,
-                                      DataSplitMode data_split_mode);
-template SimpleDMatrix::SimpleDMatrix(CSCAdapter* adapter, float missing, int nthread,
-                                      DataSplitMode data_split_mode);
-template SimpleDMatrix::SimpleDMatrix(FileAdapter* adapter, float missing, int nthread,
-                                      DataSplitMode data_split_mode);
-template SimpleDMatrix::SimpleDMatrix(ColumnarAdapter* adapter, float missing, int nthread,
-                                      DataSplitMode data_split_mode);
-template SimpleDMatrix::SimpleDMatrix(
-    IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>* adapter,
-    float missing, int nthread, DataSplitMode data_split_mode);
+#define INSTANTIATE_SDCTOR(__ADAPTER_T)                                                            \
+  template SimpleDMatrix::SimpleDMatrix(__ADAPTER_T* adapter, float missing, std::int32_t nthread, \
+                                        DataSplitMode data_split_mode);
+
+INSTANTIATE_SDCTOR(DenseAdapter)
+INSTANTIATE_SDCTOR(ArrayAdapter)
+INSTANTIATE_SDCTOR(CSRArrayAdapter)
+INSTANTIATE_SDCTOR(CSCArrayAdapter)
+INSTANTIATE_SDCTOR(FileAdapter)
+INSTANTIATE_SDCTOR(ColumnarAdapter)
+namespace {
+using IterAdapterT = IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext, XGBoostBatchCSR>;
+}
+INSTANTIATE_SDCTOR(IterAdapterT)
+
+#undef INSTANTIATE_SDCTOR
 }  // namespace xgboost::data
diff --git a/src/data/simple_dmatrix.cu b/src/data/simple_dmatrix.cu
index 1436d982bc29..ede1b4b48a83 100644
--- a/src/data/simple_dmatrix.cu
+++ b/src/data/simple_dmatrix.cu
@@ -5,9 +5,10 @@
 #include <cstdint>  // for int32_t, int8_t
 #include <memory>   // for make_shared
 
-#include "../common/cuda_rt_utils.h"  // for CurrentDevice
+#include "../common/cuda_rt_utils.h"  // for CurrentDevice, SetDevice
 #include "cat_container.h"            // for CatContainer
-#include "device_adapter.cuh"         // for CurrentDevice
+#include "device_adapter.cuh"
+#include "proxy_dmatrix.cuh"  // for DispatchAny
 #include "simple_dmatrix.cuh"
 #include "simple_dmatrix.h"
 #include "xgboost/context.h"  // for Context
@@ -21,12 +22,12 @@ template <typename AdapterT>
 SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthread,
                              DataSplitMode data_split_mode) {
   CHECK(data_split_mode != DataSplitMode::kCol)
-      << "Column-wise data split is currently not supported on the GPU.";
+      << "Column-wise data split is currently not supported by the GPU.";
   auto device = (!adapter->Device().IsCUDA() || adapter->NumRows() == 0)
                     ? DeviceOrd::CUDA(curt::CurrentDevice())
                     : adapter->Device();
   CHECK(device.IsCUDA());
-  dh::safe_cuda(cudaSetDevice(device.ordinal));
+  curt::SetDevice(device.ordinal);
 
   Context ctx;
   ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"device", device.Name()}});
@@ -40,14 +41,17 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthr
   // Enforce single batch
   CHECK(!adapter->Next());
 
-  info_.num_nonzero_ =
-      CopyToSparsePage(&ctx, adapter->Value(), device, missing, sparse_page_.get());
+  cuda_impl::DispatchAny<true, std::add_pointer_t>(&ctx, adapter, [&](auto const& batch) {
+    info_.num_nonzero_ = CopyToSparsePage(&ctx, batch, device, missing, sparse_page_.get());
+  });
   info_.num_col_ = adapter->NumColumns();
   info_.num_row_ = adapter->NumRows();
 
   if constexpr (std::is_same_v<AdapterT, CudfAdapter>) {
-    if (adapter->HasCategorical()) {
-      info_.Cats(std::make_shared<CatContainer>(adapter->Device(), adapter->Cats()));
+    if (adapter->HasRefCategorical()) {
+      info_.Cats(std::make_shared<CatContainer>(&ctx, adapter->RefCats(), true));
+    } else if (adapter->HasCategorical()) {
+      info_.Cats(std::make_shared<CatContainer>(&ctx, adapter->Cats(), false));
     }
   }
   this->info_.SynchronizeNumberOfColumns(&ctx, data_split_mode);
diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc
index f3a26a391d9a..d6fac5096d6c 100644
--- a/src/data/sparse_page_dmatrix.cc
+++ b/src/data/sparse_page_dmatrix.cc
@@ -13,11 +13,11 @@
 #include <utility>    // for move
 #include <variant>    // for visit
 
-#include "batch_utils.h"         // for RegenGHist
-#include "cat_container.h"       // for CatContainer
-#include "gradient_index.h"      // for GHistIndexMatrix
-#include "sparse_page_source.h"  // for MakeCachePrefix
-#include "../common/error_msg.h"             // for InconsistentCategories
+#include "../common/error_msg.h"  // for InconsistentCategories, CacheHostRatio
+#include "batch_utils.h"          // for RegenGHist
+#include "cat_container.h"        // for CatContainer
+#include "gradient_index.h"       // for GHistIndexMatrix
+#include "sparse_page_source.h"   // for MakeCachePrefix
 
 namespace xgboost::data {
 MetaInfo &SparsePageDMatrix::Info() { return info_; }
@@ -34,7 +34,9 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
       missing_{config.missing},
       cache_prefix_{config.cache},
       on_host_{config.on_host},
+      cache_host_ratio_{config.cache_host_ratio},
       min_cache_page_bytes_{config.min_cache_page_bytes} {
+  CHECK(detail::HostRatioIsAuto(config.cache_host_ratio)) << error::CacheHostRatioNotImpl();
   Context ctx;
   ctx.Init(Args{{"nthread", std::to_string(config.n_threads)}});
   cache_prefix_ = MakeCachePrefix(cache_prefix_);
@@ -45,11 +47,11 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
 
   auto get_cats = [](DMatrixProxy const *proxy) {
     if (proxy->Ctx()->IsCPU()) {
-      return std::make_shared<CatContainer>(cpu_impl::BatchCats(proxy));
+      return std::make_shared<CatContainer>(cpu_impl::BatchCats(proxy), BatchCatsIsRef(proxy));
     } else {
-      common::AssertGPUSupport();
 #if defined(XGBOOST_USE_CUDA)
-      return std::make_shared<CatContainer>(proxy->Ctx()->Device(), cuda_impl::BatchCats(proxy));
+      return std::make_shared<CatContainer>(proxy->Ctx(), cuda_impl::BatchCats(proxy),
+                                            BatchCatsIsRef(proxy));
 #else
       common::AssertGPUSupport();
       return std::make_shared<CatContainer>();
@@ -81,8 +83,9 @@ SparsePageDMatrix::SparsePageDMatrix(DataIterHandle iter_handle, DMatrixHandle p
   iter.Reset();
 
   ext_info_.SetInfo(&ctx, true, &this->info_);
-
   fmat_ctx_ = ctx;
+
+  SyncCategories(&ctx, info_.Cats(), info_.num_row_ == 0);
 }
 
 SparsePageDMatrix::~SparsePageDMatrix() {
@@ -112,8 +115,8 @@ void SparsePageDMatrix::InitializeSparsePage(Context const *ctx) {
   // During initialization, the n_batches is 0.
   CHECK_EQ(this->ext_info_.n_batches, static_cast<decltype(this->ext_info_.n_batches)>(0));
   sparse_page_source_ = std::make_shared<SparsePageSource>(
-      iter, proxy, this->missing_, ctx->Threads(), this->info_.num_col_, this->ext_info_.n_batches,
-      cache_info_.at(id));
+      std::move(iter), proxy, this->missing_, ctx->Threads(), this->info_.num_col_,
+      this->ext_info_.n_batches, cache_info_.at(id));
 }
 
 BatchSet<SparsePage> SparsePageDMatrix::GetRowBatchesImpl(Context const *ctx) {
@@ -127,7 +130,7 @@ BatchSet<SparsePage> SparsePageDMatrix::GetRowBatches() {
 }
 
 BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches(Context const *ctx) {
-  auto id = MakeCache(this, ".col.page", on_host_, cache_prefix_, &cache_info_);
+  auto id = MakeCache(this, ".col.page", false, cache_prefix_, &cache_info_);
   CHECK_NE(this->Info().num_col_, 0);
   this->InitializeSparsePage(ctx);
   if (!column_source_) {
@@ -141,7 +144,7 @@ BatchSet<CSCPage> SparsePageDMatrix::GetColumnBatches(Context const *ctx) {
 }
 
 BatchSet<SortedCSCPage> SparsePageDMatrix::GetSortedColumnBatches(Context const *ctx) {
-  auto id = MakeCache(this, ".sorted.col.page", on_host_, cache_prefix_, &cache_info_);
+  auto id = MakeCache(this, ".sorted.col.page", false, cache_prefix_, &cache_info_);
   CHECK_NE(this->Info().num_col_, 0);
   this->InitializeSparsePage(ctx);
   if (!sorted_column_source_) {
@@ -160,11 +163,11 @@ BatchSet<GHistIndexMatrix> SparsePageDMatrix::GetGradientIndex(Context const *ct
     CHECK_GE(param.max_bin, 2);
   }
   detail::CheckEmpty(batch_param_, param);
-  auto id = MakeCache(this, ".gradient_index.page", on_host_, cache_prefix_, &cache_info_);
+  auto id = MakeCache(this, ".gradient_index.page", false, cache_prefix_, &cache_info_);
   if (!cache_info_.at(id)->written || detail::RegenGHist(batch_param_, param)) {
     this->InitializeSparsePage(ctx);
     cache_info_.erase(id);
-    id = MakeCache(this, ".gradient_index.page", on_host_, cache_prefix_, &cache_info_);
+    id = MakeCache(this, ".gradient_index.page", false, cache_prefix_, &cache_info_);
     LOG(INFO) << "Generating new Gradient Index.";
     // Use sorted sketch for approx.
     auto sorted_sketch = param.regen;
diff --git a/src/data/sparse_page_dmatrix.cu b/src/data/sparse_page_dmatrix.cu
index d6a7e1ac112d..dd3857cdab4d 100644
--- a/src/data/sparse_page_dmatrix.cu
+++ b/src/data/sparse_page_dmatrix.cu
@@ -53,9 +53,9 @@ BatchSet<EllpackPage> SparsePageDMatrix::GetEllpackBatches(Context const* ctx,
       ellpack_page_source_.emplace<EllpackHostPtr>(nullptr);
     }
 
-    auto cinfo = EllpackCacheInfo{param, /*prefer_device=*/false, /*max_num_device_pages=*/0,
-                                  this->missing_};
-    CalcCacheMapping(ctx, this->IsDense(), cuts, min_cache_page_bytes_, this->ext_info_, &cinfo);
+    auto cinfo = EllpackCacheInfo{param, this->cache_host_ratio_, this->missing_};
+    CalcCacheMapping(ctx, this->IsDense(), cuts, min_cache_page_bytes_, this->ext_info_, true,
+                     &cinfo);
     CHECK_EQ(cinfo.cache_mapping.size(), this->ext_info_.n_batches)
         << "Page concatenation is only supported by the `ExtMemQuantileDMatrix`.";
     std::visit(
diff --git a/src/data/sparse_page_dmatrix.h b/src/data/sparse_page_dmatrix.h
index b46c9bb03924..592c86b6b28f 100644
--- a/src/data/sparse_page_dmatrix.h
+++ b/src/data/sparse_page_dmatrix.h
@@ -74,6 +74,7 @@ class SparsePageDMatrix : public DMatrix {
   Context fmat_ctx_;
   std::string cache_prefix_;
   bool const on_host_;
+  float const cache_host_ratio_;
   std::int64_t const min_cache_page_bytes_;
   ExternalDataInfo ext_info_;
 
diff --git a/src/data/sparse_page_source.cu b/src/data/sparse_page_source.cu
index c60f0fb0569d..e480737ccd2a 100644
--- a/src/data/sparse_page_source.cu
+++ b/src/data/sparse_page_source.cu
@@ -1,8 +1,8 @@
 /**
- * Copyright 2021-2024, XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include "../common/device_helpers.cuh"  // for CurrentDevice
-#include "proxy_dmatrix.cuh"             // for Dispatch, DMatrixProxy
+#include "proxy_dmatrix.cuh"             // for DispatchAny, DMatrixProxy
 #include "simple_dmatrix.cuh"            // for CopyToSparsePage
 #include "sparse_page_source.h"
 #include "xgboost/data.h"  // for SparsePage
@@ -16,7 +16,7 @@ void DevicePush(DMatrixProxy *proxy, float missing, SparsePage *page) {
   CHECK(device.IsCUDA());
   auto ctx = Context{}.MakeCUDA(device.ordinal);
 
-  cuda_impl::Dispatch(
+  cuda_impl::DispatchAny(
       proxy, [&](auto const &value) { CopyToSparsePage(&ctx, value, device, missing, page); });
 }
 }  // namespace xgboost::data
diff --git a/src/data/sparse_page_source.h b/src/data/sparse_page_source.h
index ba2574481b0b..931da303316f 100644
--- a/src/data/sparse_page_source.h
+++ b/src/data/sparse_page_source.h
@@ -178,6 +178,17 @@ class ExceHandler {
   }
 };
 
+template <typename WriterT>
+std::unique_ptr<WriterT> DftCreateWriterImpl(StringView name, std::uint32_t iter) {
+  std::unique_ptr<common::AlignedFileWriteStream> fo;
+  if (iter == 0) {
+    fo = std::make_unique<common::AlignedFileWriteStream>(name, "wb");
+  } else {
+    fo = std::make_unique<common::AlignedFileWriteStream>(name, "ab");
+  }
+  return fo;
+}
+
 /**
  * @brief Default implementation of the stream creater.
  */
@@ -189,13 +200,7 @@ class DefaultFormatStreamPolicy : public F<S> {
 
  public:
   std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter) {
-    std::unique_ptr<common::AlignedFileWriteStream> fo;
-    if (iter == 0) {
-      fo = std::make_unique<common::AlignedFileWriteStream>(name, "wb");
-    } else {
-      fo = std::make_unique<common::AlignedFileWriteStream>(name, "ab");
-    }
-    return fo;
+    return DftCreateWriterImpl<WriterT>(name, iter);
   }
 
   std::unique_ptr<ReaderT> CreateReader(StringView name, std::uint64_t offset,
@@ -204,6 +209,23 @@ class DefaultFormatStreamPolicy : public F<S> {
   }
 };
 
+template <typename S, template <typename> typename F>
+class MemBufFileReadFormatStreamPolicy : public F<S> {
+ public:
+  using WriterT = common::AlignedFileWriteStream;
+  using ReaderT = common::AlignedResourceReadStream;
+
+ public:
+  std::unique_ptr<WriterT> CreateWriter(StringView name, std::uint32_t iter) {
+    return DftCreateWriterImpl<WriterT>(name, iter);
+  }
+
+  std::unique_ptr<ReaderT> CreateReader(StringView name, std::uint64_t offset,
+                                        std::uint64_t length) const {
+    return std::make_unique<common::MemBufFileReadStream>(std::string{name}, offset, length);
+  }
+};
+
 /**
  * @brief Default implementatioin of the format creator.
  */
@@ -297,11 +319,11 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S>, public FormatStreamPol
       if (restart) {
         this->param_.prefetch_copy = true;
       }
-      ring_->at(fetch_it) = this->workers_.Submit([fetch_it, self, this] {
+      auto p = this->param_;
+      ring_->at(fetch_it) = this->workers_.Submit([fetch_it, self, p, this] {
         auto page = std::make_shared<S>();
         this->exce_.Run([&] {
-          std::unique_ptr<typename FormatStreamPolicy::FormatT> fmt{
-              self->CreatePageFormat(self->param_)};
+          std::unique_ptr<typename FormatStreamPolicy::FormatT> fmt{self->CreatePageFormat(p)};
           auto name = self->cache_info_->ShardName();
           auto [offset, length] = self->cache_info_->View(fetch_it);
           std::unique_ptr<typename FormatStreamPolicy::ReaderT> fi{
@@ -340,7 +362,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S>, public FormatStreamPol
 
     timer.Stop();
     if (bytes != InvalidPageSize()) {
-      // Not entirely accurate, the kernels doesn't have to flush the data.
+      // Not entirely accurate, the kernel doesn't have to flush the data.
       LOG(INFO) << common::HumanMemUnit(bytes) << " written in " << timer.ElapsedSeconds()
                 << " seconds.";
       cache_info_->Push(bytes);
@@ -437,7 +459,7 @@ class SparsePageSource : public SparsePageSourceImpl<SparsePage> {
     if (!this->ReadCache()) {
       bool type_error{false};
       CHECK(proxy_);
-      HostAdapterDispatch(
+      cpu_impl::DispatchAny(
           proxy_,
           [&](auto const& adapter_batch) {
             page_->Push(adapter_batch, this->missing_, this->nthreads_);
@@ -459,7 +481,7 @@ class SparsePageSource : public SparsePageSourceImpl<SparsePage> {
                    DMatrixProxy* proxy, float missing, int nthreads, bst_feature_t n_features,
                    bst_idx_t n_batches, std::shared_ptr<Cache> cache)
       : SparsePageSourceImpl(missing, nthreads, n_features, cache),
-        iter_{iter},
+        iter_{std::move(iter)},
         proxy_{proxy},
         n_batches_{n_batches} {
     if (!cache_info_->written) {
diff --git a/src/encoder/ordinal.cuh b/src/encoder/ordinal.cuh
index 282441d4a0d3..2fa61bf06242 100644
--- a/src/encoder/ordinal.cuh
+++ b/src/encoder/ordinal.cuh
@@ -98,8 +98,8 @@ struct SegmentedSearchSortedNumOp {
         haystack_v.feature_segments[f_idx + 1] - haystack_v.feature_segments[f_idx]);
     auto end_it = it + f_sorted_idx.size();
     auto ret_it = thrust::lower_bound(thrust::seq, it, end_it, SearchKey(), [&](auto l, auto r) {
-      T l_value = l == SearchKey() ? needle : haystack[ref_sorted_idx[l]];
-      T r_value = r == SearchKey() ? needle : haystack[ref_sorted_idx[r]];
+      T l_value = l == SearchKey() ? needle : haystack[f_sorted_idx[l]];
+      T r_value = r == SearchKey() ? needle : haystack[f_sorted_idx[r]];
       return l_value < r_value;
     });
     if (ret_it == it + f_sorted_idx.size()) {
@@ -122,7 +122,8 @@ struct DftThrustPolicy {
   template <typename T>
   using ThrustAllocator = thrust::device_allocator<T>;
 
-  auto ThrustPolicy() const { return thrust::cuda::par_nosync; }
+  [[nodiscard]] auto ThrustPolicy() const { return thrust::cuda::par_nosync; }
+  [[nodiscard]] auto Stream() const { return cudaStreamPerThread; }
 };
 }  // namespace cuda_impl
 
@@ -144,12 +145,15 @@ using DftDevicePolicy = Policy<cuda_impl::DftThrustPolicy, detail::DftErrorHandl
 template <typename ExecPolicy>
 void SortNames(ExecPolicy const& policy, DeviceColumnsView orig_enc,
                Span<std::int32_t> sorted_idx) {
+  typename ExecPolicy::template ThrustAllocator<char> alloc;
+  auto exec = thrust::cuda::par_nosync(alloc).on(policy.Stream());
+
   auto n_total_cats = orig_enc.n_total_cats;
   if (static_cast<std::int32_t>(sorted_idx.size()) != orig_enc.n_total_cats) {
     policy.Error("`sorted_idx` should have the same size as `n_total_cats`.");
   }
   auto d_sorted_idx = dh::ToSpan(sorted_idx);
-  cuda_impl::SegmentedIota(policy.ThrustPolicy(), orig_enc.feature_segments, d_sorted_idx);
+  cuda_impl::SegmentedIota(exec, orig_enc.feature_segments, d_sorted_idx);
 
   // <fidx, sorted_idx>
   using Pair = cuda::std::pair<std::int32_t, std::int32_t>;
@@ -162,9 +166,9 @@ void SortNames(ExecPolicy const& policy, DeviceColumnsView orig_enc,
         auto idx = d_sorted_idx[i];
         return cuda::std::make_pair(static_cast<std::int32_t>(seg), idx);
       }));
-  thrust::copy(policy.ThrustPolicy(), key_it, key_it + n_total_cats, keys.begin());
+  thrust::copy(exec, key_it, key_it + n_total_cats, keys.begin());
 
-  thrust::sort(policy.ThrustPolicy(), keys.begin(), keys.end(),
+  thrust::sort(exec, keys.begin(), keys.end(),
                cuda::proclaim_return_type<bool>([=] __device__(Pair const& l, Pair const& r) {
                  if (l.first == r.first) {  // same feature
                    auto const& col = orig_enc.columns[l.first];
@@ -193,7 +197,7 @@ void SortNames(ExecPolicy const& policy, DeviceColumnsView orig_enc,
       thrust::make_counting_iterator(0),
       cuda::proclaim_return_type<decltype(Pair{}.second)>(
           [=] __device__(std::int32_t i) { return s_keys[i].second; }));
-  thrust::copy(policy.ThrustPolicy(), it, it + sorted_idx.size(), dh::tbegin(sorted_idx));
+  thrust::copy(exec, it, it + sorted_idx.size(), dh::tbegin(sorted_idx));
 }
 
 /**
@@ -212,8 +216,32 @@ template <typename ExecPolicy>
 void Recode(ExecPolicy const& policy, DeviceColumnsView orig_enc,
             Span<std::int32_t const> sorted_idx, DeviceColumnsView new_enc,
             Span<std::int32_t> mapping) {
-  auto exec = policy.ThrustPolicy();
+  typename ExecPolicy::template ThrustAllocator<char> alloc;
+  auto exec = thrust::cuda::par_nosync(alloc).on(policy.Stream());
   detail::BasicChecks(policy, orig_enc, sorted_idx, new_enc, mapping);
+  /**
+   * Check consistency.
+   */
+  auto check_it = thrust::make_transform_iterator(
+      thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool {
+        auto const& l_f = orig_enc.columns[i];
+        auto const& r_f = new_enc.columns[i];
+        if (l_f.index() != r_f.index()) {
+          return false;
+        }
+        auto l_is_empty = cuda::std::visit([](auto&& arg) { return arg.empty(); }, l_f);
+        auto r_is_empty = cuda::std::visit([](auto&& arg) { return arg.empty(); }, r_f);
+        return l_is_empty == r_is_empty;
+      });
+  bool valid = thrust::reduce(exec, check_it, check_it + new_enc.Size(), true,
+                              [=] XGBOOST_DEVICE(bool l, bool r) -> bool { return l && r; });
+  if (!valid) {
+    policy.Error(
+        "Invalid new DataFrame. "
+        "The data type doesn't match the one used in the training dataset. "
+        "Both should be either numeric or categorical. "
+        "For a categorical feature, the index type must match between the training and test set.");
+  }
 
   /**
    * search the index for the new encoding
@@ -222,9 +250,9 @@ void Recode(ExecPolicy const& policy, DeviceColumnsView orig_enc,
       exec, thrust::make_counting_iterator(0), new_enc.n_total_cats,
       [=] __device__(std::int32_t i) {
         auto f_idx = dh::SegmentId(new_enc.feature_segments, i);
-        std::int32_t searched_idx{-1};
+        std::int32_t searched_idx{detail::NotFound()};
         auto const& col = orig_enc.columns[f_idx];
-        cuda::std::visit(Overloaded{[&](CatStrArrayView const& str) {
+        cuda::std::visit(Overloaded{[&](CatStrArrayView const&) {
                                       auto op = cuda_impl::SegmentedSearchSortedStrOp{
                                           orig_enc, sorted_idx, new_enc, f_idx};
                                       searched_idx = op(i);
@@ -252,10 +280,9 @@ void Recode(ExecPolicy const& policy, DeviceColumnsView orig_enc,
         f_mapping[i - f_beg] = idx;
       });
 
-  auto err_it = thrust::find_if(exec, dh::tcbegin(mapping), dh::tcend(mapping),
-                                cuda::proclaim_return_type<bool>([=] __device__(std::int32_t v) {
-                                  return v == detail::NotFound();
-                                }));
+  auto err_it = thrust::find_if(
+      exec, dh::tcbegin(mapping), dh::tcend(mapping),
+      [=] XGBOOST_DEVICE(std::int32_t v) -> bool { return v == detail::NotFound(); });
 
   if (err_it != dh::tcend(mapping)) {
     // Report missing cat.
diff --git a/src/encoder/ordinal.h b/src/encoder/ordinal.h
index 83269d3c913f..cb8cf8855a34 100644
--- a/src/encoder/ordinal.h
+++ b/src/encoder/ordinal.h
@@ -81,7 +81,8 @@ struct CatStrArrayView {
  * @brief All the primitive types supported by the encoder.
  */
 using CatPrimIndexTypes =
-    std::tuple<std::int8_t, std::int16_t, std::int32_t, std::int64_t, float, double>;
+    std::tuple<std::uint8_t, std::int8_t, std::uint16_t, std::int16_t, std::uint32_t, std::int32_t,
+               std::uint64_t, std::int64_t, float, double>;
 
 /**
  * @brief All the column types supported by the encoder.
@@ -107,7 +108,8 @@ using DeviceCatIndexView = cuda_impl::TupToVarT<CatIndexViewTypes>;
  * Accepted policies:
  *
  * - A class with a `ThrustPolicy` method that returns a thrust execution policy, along with a
- *   `ThrustAllocator` template type. This is only used for the GPU implementation.
+ *   `ThrustAllocator` template type. In addition, a `Stream` method that returns a CUDA stream.
+ *   This is only used for the GPU implementation.
  *
  * - An error handling policy that exposes a single `Error` method, which takes a single
  *   string parameter for error message.
@@ -133,6 +135,7 @@ struct ColumnsViewImpl {
   [[nodiscard]] std::size_t Size() const { return columns.size(); }
   [[nodiscard]] bool Empty() const { return this->Size() == 0; }
   [[nodiscard]] auto operator[](std::size_t i) const { return columns[i]; }
+  [[nodiscard]] auto HasCategorical() const { return n_total_cats != 0; }
 };
 
 struct DftErrorHandler {
@@ -341,7 +344,24 @@ void Recode(ExecPolicy const &policy, HostColumnsView orig_enc, Span<std::int32_
 
   std::size_t out_idx = 0;
   for (std::size_t f_idx = 0, n_features = orig_enc.Size(); f_idx < n_features; f_idx++) {
-    bool is_empty = std::visit([](auto &&arg) { return arg.empty(); }, orig_enc.columns[f_idx]);
+    auto const& l_f = orig_enc.columns[f_idx];
+    auto const& r_f = new_enc.columns[f_idx];
+    auto report = [&] {
+      std::stringstream ss;
+      ss << "Invalid new DataFrame input for the: " << f_idx << "th feature (0-based). "
+         << "The data type doesn't match the one used in the training dataset. "
+         << "Both should be either numeric or categorical. For a categorical feature, the index "
+            "type must match between the training and test set.";
+      policy.Error(ss.str());
+    };
+    if (l_f.index() != r_f.index()) {
+      report();
+    }
+    bool is_empty = std::visit([](auto &&arg) { return arg.empty(); }, l_f);
+    bool new_is_empty = std::visit([](auto &&arg) { return arg.empty(); }, r_f);
+    if (is_empty != new_is_empty) {
+      report();
+    }
     if (is_empty) {
       continue;
     }
@@ -411,4 +431,25 @@ inline std::ostream &operator<<(std::ostream &os, CatStrArrayView const &strings
   os << "]";
   return os;
 }
+
+inline std::ostream &operator<<(std::ostream &os, HostColumnsView const &h_enc) {
+  for (std::size_t i = 0; i < h_enc.columns.size(); ++i) {
+    auto const &col = h_enc.columns[i];
+    os << "f" << i << ": ";
+    std::visit(enc::Overloaded{[&](enc::CatStrArrayView const &str) { os << str; },
+                               [&](auto &&values) {
+                                 os << "[";
+                                 for (std::size_t j = 0, n = values.size(); j < n; ++j) {
+                                   os << values[j];
+                                   if (j != n - 1) {
+                                     os << ", ";
+                                   }
+                                 }
+                                 os << "]";
+                               }},
+               col);
+    os << std::endl;
+  }
+  return os;
+}
 }  // namespace enc
diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc
index 2cacfe078b4b..3bd03a3b4a41 100644
--- a/src/gbm/gblinear.cc
+++ b/src/gbm/gblinear.cc
@@ -13,8 +13,6 @@
 #include <string>
 #include <vector>
 
-#include "../common/common.h"
-#include "../common/cuda_rt_utils.h"  // for AllVisibleGPUs
 #include "../common/error_msg.h"      // NoCategorical, DeprecatedFunc
 #include "../common/threading_utils.h"
 #include "../common/timer.h"
@@ -36,16 +34,6 @@ struct GBLinearTrainParam : public XGBoostParameter<GBLinearTrainParam> {
   float tolerance;
   size_t max_row_perbatch;
 
-  void CheckGPUSupport() {
-    auto n_gpus = curt::AllVisibleGPUs();
-    if (n_gpus == 0 && this->updater == "gpu_coord_descent") {
-      common::AssertGPUSupport();
-      this->UpdateAllowUnknown(Args{{"updater", "coord_descent"}});
-      LOG(WARNING) << "Loading configuration on a CPU only machine.   Changing "
-                      "updater to `coord_descent`.";
-    }
-  }
-
   DMLC_DECLARE_PARAMETER(GBLinearTrainParam) {
     DMLC_DECLARE_FIELD(updater)
         .set_default("shotgun")
@@ -73,26 +61,29 @@ class GBLinear : public GradientBooster {
       : GradientBooster{ctx},
         learner_model_param_{learner_model_param},
         model_{learner_model_param},
-        previous_model_{learner_model_param} {}
+        previous_model_{learner_model_param} {
+    monitor_.Init(__func__);
+  }
 
   void Configure(const Args& cfg) override {
     if (model_.weight.size() == 0) {
       model_.Configure(cfg);
     }
     param_.UpdateAllowUnknown(cfg);
-    param_.CheckGPUSupport();
     if (param_.updater == "gpu_coord_descent") {
-      LOG(WARNING) << error::DeprecatedFunc("gpu_coord_descent", "2.0.0",
-                                            R"(device="cuda", updater="coord_descent")");
+      LOG(FATAL) << error::DeprecatedFunc("gpu_coord_descent", "2.0.0",
+                                          R"(device="cuda", updater="coord_descent")");
     }
 
-    if (param_.updater == "coord_descent" && ctx_->IsCUDA()) {
-      updater_.reset(LinearUpdater::Create("gpu_coord_descent", ctx_));
-    } else {
-      updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
-    }
+    auto name = (param_.updater == "coord_descent")
+                    // Dispatch for coordinate descent
+                    ? this->ctx_->DispatchDevice([] { return "coord_descent"; },
+                                                 [] { return "gpu_coord_descent"; })
+                    : param_.updater;
+    LOG(INFO) << "Using the updater:" << name;
+
+    updater_.reset(LinearUpdater::Create(name, ctx_));
     updater_->Configure(cfg);
-    monitor_.Init("GBLinear");
   }
 
   int32_t BoostedRounds() const override {
@@ -101,13 +92,6 @@ class GBLinear : public GradientBooster {
 
   bool ModelFitted() const override { return BoostedRounds() != 0; }
 
-  void Load(dmlc::Stream* fi) override {
-    model_.Load(fi);
-  }
-  void Save(dmlc::Stream* fo) const override {
-    model_.Save(fo);
-  }
-
   void SaveModel(Json* p_out) const override {
     auto& out = *p_out;
     out["name"] = String{"gblinear"};
@@ -125,7 +109,6 @@ class GBLinear : public GradientBooster {
   void LoadConfig(Json const& in) override {
     CHECK_EQ(get<String>(in["name"]), "gblinear");
     FromJson(in["gblinear_train_param"], &param_);
-    param_.CheckGPUSupport();
     updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
     this->updater_->LoadConfig(in["updater"]);
   }
@@ -140,8 +123,13 @@ class GBLinear : public GradientBooster {
     this->updater_->SaveConfig(&j_updater);
   }
 
-  void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair, PredictionCacheEntry*,
+  void DoBoost(DMatrix* p_fmat, GradientContainer* in_gpair, PredictionCacheEntry*,
                ObjFunction const*) override {
+    if (in_gpair->HasValueGrad()) {
+      LOG(FATAL)
+          << "Multi-target with reduced gradient is not implemented for the current booster.";
+    }
+
     monitor_.Start("DoBoost");
 
     CHECK(!p_fmat->Info().HasCategorical()) << error::NoCategorical("`gblinear`");
@@ -149,7 +137,7 @@ class GBLinear : public GradientBooster {
     this->LazySumWeights(p_fmat);
 
     if (!this->CheckConvergence()) {
-      updater_->Update(in_gpair, p_fmat, &model_, sum_instance_weight_);
+      updater_->Update(in_gpair->Grad(), p_fmat, &model_, sum_instance_weight_);
     }
     model_.num_boosted_rounds++;
     monitor_.Stop("DoBoost");
@@ -249,14 +237,6 @@ class GBLinear : public GradientBooster {
     }
   }
 
-  [[nodiscard]] bool UseGPU() const override {
-    if (param_.updater == "gpu_coord_descent") {
-      return true;
-    } else {
-      return false;
-    }
-  }
-
  protected:
   void PredictBatchInternal(DMatrix *p_fmat,
                             std::vector<bst_float> *out_preds) {
diff --git a/src/gbm/gblinear_model.cc b/src/gbm/gblinear_model.cc
index 5e6f5dda9a1f..72853f544fdf 100644
--- a/src/gbm/gblinear_model.cc
+++ b/src/gbm/gblinear_model.cc
@@ -1,15 +1,12 @@
-/*!
- * Copyright 2019-2022 by Contributors
+/**
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <algorithm>
 #include <utility>
-#include <limits>
 #include "xgboost/json.h"
 #include "gblinear_model.h"
 
-namespace xgboost {
-namespace gbm {
-
+namespace xgboost::gbm {
 void GBLinearModel::SaveModel(Json* p_out) const {
   auto& out = *p_out;
 
@@ -43,7 +40,4 @@ void GBLinearModel::LoadModel(Json const& in) {
     this->num_boosted_rounds = 0;
   }
 }
-
-DMLC_REGISTER_PARAMETER(DeprecatedGBLinearModelParam);
-}  // namespace gbm
-}  // namespace xgboost
+}  // namespace xgboost::gbm
diff --git a/src/gbm/gblinear_model.h b/src/gbm/gblinear_model.h
index 91760346ca47..08ae56959461 100644
--- a/src/gbm/gblinear_model.h
+++ b/src/gbm/gblinear_model.h
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2018-2019 by Contributors
+/**
+ * Copyright 2018-2025, XGBoost Contributors
  */
 #pragma once
 #include <dmlc/io.h>
@@ -14,40 +14,14 @@
 #include "xgboost/feature_map.h"
 #include "xgboost/model.h"
 #include "xgboost/json.h"
-#include "xgboost/parameter.h"
 
 namespace xgboost {
 class Json;
 namespace gbm {
-// Deprecated in 1.0.0. model parameter.  Only staying here for compatible binary model IO.
-struct DeprecatedGBLinearModelParam : public dmlc::Parameter<DeprecatedGBLinearModelParam> {
-  // number of feature dimension
-  uint32_t deprecated_num_feature;
-  // deprecated. use learner_model_param_->num_output_group.
-  int32_t deprecated_num_output_group;
-  // reserved field
-  int32_t reserved[32];
-  // constructor
-  DeprecatedGBLinearModelParam() {
-    static_assert(sizeof(*this) == sizeof(int32_t) * 34,
-                  "Model parameter size can not be changed.");
-    std::memset(this, 0, sizeof(DeprecatedGBLinearModelParam));
-  }
-
-  DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {
-    DMLC_DECLARE_FIELD(deprecated_num_feature);
-    DMLC_DECLARE_FIELD(deprecated_num_output_group);
-  }
-};
-
 // model for linear booster
 class GBLinearModel : public Model {
- private:
-  // Deprecated in 1.0.0
-  DeprecatedGBLinearModelParam param_;
-
  public:
-  int32_t num_boosted_rounds{0};
+  std::int32_t num_boosted_rounds{0};
   LearnerModelParam const* learner_model_param;
 
  public:
@@ -71,17 +45,6 @@ class GBLinearModel : public Model {
   void SaveModel(Json *p_out) const override;
   void LoadModel(Json const &in) override;
 
-  // save the model to file
-  void Save(dmlc::Stream *fo) const {
-    fo->Write(&param_, sizeof(param_));
-    fo->Write(weight);
-  }
-  // load model from file
-  void Load(dmlc::Stream *fi) {
-    CHECK_EQ(fi->Read(&param_, sizeof(param_)), sizeof(param_));
-    fi->Read(&weight);
-  }
-
   // model bias
   inline bst_float *Bias() {
     return &weight[learner_model_param->num_feature *
diff --git a/src/gbm/gbm.cc b/src/gbm/gbm.cc
index 9f88e30f59df..ecee708ecb8e 100644
--- a/src/gbm/gbm.cc
+++ b/src/gbm/gbm.cc
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2015-2022 by XGBoost Contributors
+/**
+ * Copyright 2015-2025, XGBoost Contributors
  * \file gbm.cc
  * \brief Registry of gradient boosters.
  */
@@ -7,9 +7,7 @@
 
 #include <dmlc/registry.h>
 
-#include <memory>
 #include <string>
-#include <vector>
 
 #include "xgboost/context.h"
 #include "xgboost/learner.h"
diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index 8eb75323d00a..3239742452f1 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -1,5 +1,6 @@
 /**
  * Copyright 2014-2025, XGBoost Contributors
+ *
  * \file gbtree.cc
  * \brief gradient boosted tree implementation.
  * \author Tianqi Chen
@@ -61,11 +62,6 @@ std::string MapTreeMethodToUpdaters(Context const* ctx, TreeMethod tree_method)
     case TreeMethod::kExact:
       CHECK(ctx->IsCPU()) << "The `exact` tree method is not supported on GPU.";
       return "grow_colmaker,prune";
-    case TreeMethod::kGPUHist: {
-      common::AssertGPUSupport();
-      error::WarnDeprecatedGPUHist();
-      return "grow_gpu_hist";
-    }
     default:
       auto tm = static_cast<std::underlying_type_t<TreeMethod>>(tree_method);
       LOG(FATAL) << "Unknown tree_method: `" << tm << "`.";
@@ -149,6 +145,21 @@ void GBTree::Configure(Args const& cfg) {
   }
 }
 
+void GBTreeModel::InitTreesToUpdate() {
+  if (trees_to_update.empty()) {
+    for (auto& tree : trees) {
+      trees_to_update.push_back(std::move(tree));
+    }
+
+    trees.clear();
+    param.num_trees = 0;
+    tree_info.HostVector().clear();
+
+    iteration_indptr.clear();
+    iteration_indptr.push_back(0);
+  }
+}
+
 void GPUCopyGradient(Context const*, linalg::Matrix<GradientPair> const*, bst_group_t,
                      linalg::Matrix<GradientPair>*)
 #if defined(XGBOOST_USE_CUDA)
@@ -199,19 +210,29 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
   }
 }
 
-void GBTree::DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
-                     PredictionCacheEntry* predt, ObjFunction const* obj) {
+void GBTree::DoBoost(DMatrix* p_fmat, GradientContainer* in_gpair, PredictionCacheEntry* predt,
+                     ObjFunction const* obj) {
   if (model_.learner_model_param->IsVectorLeaf()) {
     CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
         << "Only the hist tree method is supported for building multi-target trees with vector "
            "leaf.";
-    CHECK(ctx_->IsCPU()) << "GPU is not yet supported for vector leaf.";
   }
 
   TreesOneIter new_trees;
   bst_target_t const n_groups = model_.learner_model_param->OutputLength();
   monitor_.Start("BoostNewTrees");
 
+  // Define the categories.
+  if (this->model_.Cats()->Empty() && !p_fmat->Cats()->Empty()) {
+    auto in_cats = p_fmat->Cats();
+    this->model_.Cats()->Copy(this->ctx_, *in_cats);
+    this->model_.Cats()->Sort(this->ctx_);
+  } else {
+    CHECK_EQ(this->model_.Cats()->NumCatsTotal(), p_fmat->Cats()->NumCatsTotal())
+        << "A new dataset with different categorical features is used for training an existing "
+           "model.";
+  }
+
   predt->predictions.SetDevice(ctx_->Device());
   auto out = linalg::MakeTensorView(ctx_, &predt->predictions, p_fmat->Info().num_row_,
                                     model_.learner_model_param->OutputLength());
@@ -222,6 +243,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
   std::vector<HostDeviceVector<bst_node_t>> node_position;
 
   if (model_.learner_model_param->IsVectorLeaf()) {
+    // Multi-target, vector leaf
     TreesOneGroup ret;
     BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret);
     UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret);
@@ -232,6 +254,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
       predt->Update(1);
     }
   } else if (model_.learner_model_param->OutputLength() == 1u) {
+    // Single target
     TreesOneGroup ret;
     BoostNewTrees(in_gpair, p_fmat, 0, &node_position, &ret);
     UpdateTreeLeaf(p_fmat, predt->predictions, obj, 0, node_position, &ret);
@@ -242,13 +265,16 @@ void GBTree::DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
       predt->Update(1);
     }
   } else {
-    CHECK_EQ(in_gpair->Size() % n_groups, 0U) << "must have exactly ngroup * nrow gpairs";
-    linalg::Matrix<GradientPair> tmp{{in_gpair->Shape(0), static_cast<std::size_t>(1ul)},
-                                     ctx_->Device()};
+    // Multi-target, scalar leaf
+    CHECK_EQ(in_gpair->gpair.Size() % n_groups, 0U)
+        << "Must have exactly n_groups * n_samples gpairs.";
+    GradientContainer tmp;
+    tmp.gpair = linalg::Matrix<GradientPair>{
+        {in_gpair->gpair.Shape(0), static_cast<std::size_t>(1ul)}, ctx_->Device()};
     bool update_predict = true;
     for (bst_target_t gid = 0; gid < n_groups; ++gid) {
       node_position.clear();
-      CopyGradient(ctx_, in_gpair, gid, &tmp);
+      CopyGradient(ctx_, &in_gpair->gpair, gid, &tmp.gpair);
       TreesOneGroup ret;
       BoostNewTrees(&tmp, p_fmat, gid, &node_position, &ret);
       UpdateTreeLeaf(p_fmat, predt->predictions, obj, gid, node_position, &ret);
@@ -269,9 +295,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair,
   this->CommitModel(std::move(new_trees));
 }
 
-void GBTree::BoostNewTrees(linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
-                           std::vector<HostDeviceVector<bst_node_t>>* out_position,
-                           TreesOneGroup* ret) {
+std::vector<RegTree*> GBTree::InitNewTrees(bst_target_t bst_group, TreesOneGroup* ret) {
   std::vector<RegTree*> new_trees;
   ret->clear();
   // create the trees
@@ -305,20 +329,30 @@ void GBTree::BoostNewTrees(linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat,
       ret->push_back(std::move(t));
     }
   }
+  return new_trees;
+}
+
+void GBTree::BoostNewTrees(GradientContainer* gpair, DMatrix* p_fmat, int bst_group,
+                           std::vector<HostDeviceVector<bst_node_t>>* out_position,
+                           TreesOneGroup* ret) {
+  std::vector<RegTree*> new_trees = this->InitNewTrees(bst_group, ret);
 
   // update the trees
   auto n_out = model_.learner_model_param->OutputLength() * p_fmat->Info().num_row_;
   StringView msg{
       "Mismatching size between number of rows from input data and size of gradient vector."};
   if (!model_.learner_model_param->IsVectorLeaf() && p_fmat->Info().num_row_ != 0) {
-    CHECK_EQ(n_out % gpair->Size(), 0) << msg;
-  } else {
-    CHECK_EQ(gpair->Size(), n_out) << msg;
+    CHECK_EQ(n_out % gpair->gpair.Size(), 0) << msg;
+  } else if (model_.learner_model_param->IsVectorLeaf()) {
+    // vector leaf
+    if (!gpair->HasValueGrad()) {
+      CHECK_EQ(gpair->gpair.Size(), n_out) << msg;
+    }
   }
 
   out_position->resize(new_trees.size());
 
-  // Rescale learning rate according to the size of trees
+  // Rescale learning rate according to the number of trees
   auto lr = tree_param_.learning_rate;
   tree_param_.learning_rate /= static_cast<float>(new_trees.size());
   for (auto& up : updaters_) {
@@ -345,20 +379,6 @@ void GBTree::LoadConfig(Json const& in) {
   tparam_.process_type = TreeProcessType::kDefault;
   std::int32_t const n_gpus = curt::AllVisibleGPUs();
 
-  auto msg = StringView{
-      R"(
-  Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
-  machine. Consider using `save_model/load_model` instead. See:
-
-    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
-
-  for more details about differences between saving model and serializing.)"};
-
-  if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
-    tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
-    LOG(WARNING) << msg << "  Changing `tree_method` to `hist`.";
-  }
-
   std::vector<Json> updater_seq;
   if (IsA<Object>(in["updater"])) {
     // before 2.0
@@ -451,7 +471,9 @@ void GBTree::Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, Gradien
 
   auto& out_indptr = out_model.iteration_indptr;
   TreesOneGroup& out_trees = out_model.trees;
-  std::vector<int32_t>& out_trees_info = out_model.tree_info;
+  auto& out_tree_info = out_model.tree_info.HostVector();
+
+  auto const& in_tree_info = this->model_.tree_info.ConstHostVector();
 
   bst_layer_t n_layers = (end - begin) / step;
   out_indptr.resize(n_layers + 1, 0);
@@ -466,11 +488,11 @@ void GBTree::Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, Gradien
 
   *out_of_bound =
       detail::SliceTrees(begin, end, step, this->model_, [&](auto in_tree_idx, auto out_l) {
-        auto new_tree = std::make_unique<RegTree>(*this->model_.trees.at(in_tree_idx));
+        std::unique_ptr<RegTree> new_tree{this->model_.trees.at(in_tree_idx)->Copy()};
         out_trees.emplace_back(std::move(new_tree));
 
-        bst_group_t group = this->model_.tree_info[in_tree_idx];
-        out_trees_info.push_back(group);
+        bst_group_t group = in_tree_info[in_tree_idx];
+        out_tree_info.push_back(group);
 
         out_model.iteration_indptr[out_l + 1]++;
       });
@@ -705,20 +727,6 @@ class Dart : public GBTree {
     }
   }
 
-  void Load(dmlc::Stream* fi) override {
-    GBTree::Load(fi);
-    weight_drop_.resize(model_.param.num_trees);
-    if (model_.param.num_trees != 0) {
-      fi->Read(&weight_drop_);
-    }
-  }
-  void Save(dmlc::Stream* fo) const override {
-    GBTree::Save(fo);
-    if (weight_drop_.size() != 0) {
-      fo->Write(weight_drop_);
-    }
-  }
-
   void LoadConfig(Json const& in) override {
     CHECK_EQ(get<String>(in["name"]), "dart");
     auto const& gbtree = in["gbtree"];
@@ -735,9 +743,8 @@ class Dart : public GBTree {
   }
 
   // An independent const function to make sure it's thread safe.
-  void PredictBatchImpl(DMatrix *p_fmat, PredictionCacheEntry *p_out_preds,
-                        bool training, unsigned layer_begin,
-                        unsigned layer_end) const {
+  void PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* p_out_preds, bool training,
+                        bst_layer_t layer_begin, bst_layer_t layer_end) const {
     CHECK(!this->model_.learner_model_param->IsVectorLeaf()) << "dart" << MTNotImplemented();
     auto& predictor = this->GetPredictor(training, &p_out_preds->predictions, p_fmat);
     CHECK(predictor);
@@ -756,7 +763,7 @@ class Dart : public GBTree {
     auto layer_trees = [&]() {
       return model_.param.num_parallel_tree * model_.learner_model_param->OutputLength();
     };
-
+    auto const& h_tree_info = this->model_.tree_info.ConstHostVector();
     for (bst_tree_t i = tree_begin; i < tree_end; i += 1) {
       if (training && std::binary_search(idx_drop_.cbegin(), idx_drop_.cend(), i)) {
         continue;
@@ -770,20 +777,19 @@ class Dart : public GBTree {
 
       // Multiple the weight to output prediction.
       auto w = this->weight_drop_.at(i);
-      auto group = model_.tree_info.at(i);
+      auto grp_idx = h_tree_info.at(i);
       CHECK_EQ(p_out_preds->predictions.Size(), predts.predictions.Size());
 
       size_t n_rows = p_fmat->Info().num_row_;
       if (predts.predictions.Device().IsCUDA()) {
         p_out_preds->predictions.SetDevice(predts.predictions.Device());
-        GPUDartPredictInc(p_out_preds->predictions.DeviceSpan(),
-                          predts.predictions.DeviceSpan(), w, n_rows, n_groups,
-                          group);
+        GPUDartPredictInc(p_out_preds->predictions.DeviceSpan(), predts.predictions.DeviceSpan(), w,
+                          n_rows, n_groups, grp_idx);
       } else {
         auto &h_out_predts = p_out_preds->predictions.HostVector();
-        auto &h_predts = predts.predictions.HostVector();
+        auto &h_predts = predts.predictions.ConstHostVector();
         common::ParallelFor(p_fmat->Info().num_row_, ctx_->Threads(), [&](auto ridx) {
-          const size_t offset = ridx * n_groups + group;
+          const size_t offset = ridx * n_groups + grp_idx;
           h_out_predts[offset] += (h_predts[offset] * w);
         });
       }
@@ -836,6 +842,7 @@ class Dart : public GBTree {
       CHECK(success) << msg;
     };
 
+    auto const& h_tree_info = this->model_.tree_info.ConstHostVector();
     // Inplace predict is not used for training, so no need to drop tree.
     for (bst_tree_t i = tree_begin; i < tree_end; ++i) {
       predict_impl(i);
@@ -858,7 +865,7 @@ class Dart : public GBTree {
       }
       // Multiple the tree weight
       auto w = this->weight_drop_.at(i);
-      auto group = model_.tree_info.at(i);
+      auto group = h_tree_info.at(i);
       CHECK_EQ(predts.predictions.Size(), p_out_preds->predictions.Size());
 
       size_t n_rows = p_fmat->Info().num_row_;
@@ -872,9 +879,10 @@ class Dart : public GBTree {
         auto base_score = model_.learner_model_param->BaseScore(DeviceOrd::CPU());
         auto& h_predts = predts.predictions.HostVector();
         auto& h_out_predts = p_out_preds->predictions.HostVector();
+        CHECK_EQ(base_score.Size(), n_groups);
         common::ParallelFor(n_rows, ctx_->Threads(), [&](auto ridx) {
           const size_t offset = ridx * n_groups + group;
-          h_out_predts[offset] += (h_predts[offset] - base_score(0)) * w;
+          h_out_predts[offset] += (h_predts[offset] - base_score(group)) * w;
         });
       }
     }
@@ -906,7 +914,7 @@ class Dart : public GBTree {
   }
 
   // Select which trees to drop.
-  inline void DropTrees(bool is_training) {
+  void DropTrees(bool is_training) {
     if (!is_training) {
       // This function should be thread safe when it's not training.
       return;
@@ -916,10 +924,12 @@ class Dart : public GBTree {
     std::uniform_real_distribution<> runif(0.0, 1.0);
     auto& rnd = common::GlobalRandom();
     bool skip = false;
-    if (dparam_.skip_drop > 0.0) skip = (runif(rnd) < dparam_.skip_drop);
+    if (dparam_.skip_drop > 0.0) {
+      skip = (runif(rnd) < dparam_.skip_drop);
+    }
     // sample some trees to drop
     if (!skip) {
-      if (dparam_.sample_type == 1) {
+      if (dparam_.sample_type == DartSampleType::kWeighted) {
         bst_float sum_weight = 0.0;
         for (auto elem : weight_drop_) {
           sum_weight += elem;
@@ -989,17 +999,6 @@ class Dart : public GBTree {
     return num_drop;
   }
 
-  // init thread buffers
-  inline void InitThreadTemp(int nthread) {
-    int prev_thread_temp_size = thread_temp_.size();
-    if (prev_thread_temp_size < nthread) {
-      thread_temp_.resize(nthread, RegTree::FVec());
-      for (int i = prev_thread_temp_size; i < nthread; ++i) {
-        thread_temp_[i].Init(model_.learner_model_param->num_feature);
-      }
-    }
-  }
-
   // --- data structure ---
   // training parameter
   DartTrainParam dparam_;
@@ -1019,7 +1018,7 @@ DMLC_REGISTER_PARAMETER(DartTrainParam);
 XGBOOST_REGISTER_GBM(GBTree, "gbtree")
     .describe("Tree booster, gradient boosted trees.")
     .set_body([](LearnerModelParam const* booster_config, Context const* ctx) {
-      auto* p = new GBTree(booster_config, ctx);
+      auto* p = new GBTree{booster_config, ctx};
       return p;
     });
 XGBOOST_REGISTER_GBM(Dart, "dart")
diff --git a/src/gbm/gbtree.cu b/src/gbm/gbtree.cu
index 8c4a960904f4..4e80a55de82d 100644
--- a/src/gbm/gbtree.cu
+++ b/src/gbm/gbtree.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2023, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator
 
@@ -34,10 +34,10 @@ void GPUDartInplacePredictInc(common::Span<float> out_predts, common::Span<float
                               float tree_w, size_t n_rows,
                               linalg::TensorView<float const, 1> base_score, bst_group_t n_groups,
                               bst_group_t group) {
-  CHECK_EQ(base_score.Size(), 1);
+  CHECK_EQ(base_score.Size(), n_groups);
   dh::LaunchN(n_rows, [=] XGBOOST_DEVICE(size_t ridx) {
     const size_t offset = ridx * n_groups + group;
-    out_predts[offset] += (predts[offset] - base_score(0)) * tree_w;
+    out_predts[offset] += (predts[offset] - base_score(group)) * tree_w;
   });
 }
 }  // namespace xgboost::gbm
diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h
index 1fbf0ebdaf7f..20975b610d5e 100644
--- a/src/gbm/gbtree.h
+++ b/src/gbm/gbtree.h
@@ -18,7 +18,8 @@
 #include <vector>
 
 #include "../common/timer.h"
-#include "../tree/param.h"  // TrainParam
+#include "../tree/param.h"      // TrainParam
+#include "../tree/tree_view.h"  // for WalkTree
 #include "gbtree_model.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
@@ -32,8 +33,10 @@
 
 namespace xgboost {
 enum class TreeMethod : int {
-  kAuto = 0, kApprox = 1, kExact = 2, kHist = 3,
-  kGPUHist = 5
+  kAuto = 0,
+  kApprox = 1,
+  kExact = 2,
+  kHist = 3,
 };
 
 // boosting process types
@@ -41,10 +44,17 @@ enum class TreeProcessType : int {
   kDefault = 0,
   kUpdate = 1
 };
+
+// Sampling type for dart weights.
+enum class DartSampleType : std::int32_t {
+  kUniform = 0,
+  kWeighted = 1,
+};
 }  // namespace xgboost
 
 DECLARE_FIELD_ENUM_CLASS(xgboost::TreeMethod);
 DECLARE_FIELD_ENUM_CLASS(xgboost::TreeProcessType);
+DECLARE_FIELD_ENUM_CLASS(xgboost::DartSampleType);
 
 namespace xgboost::gbm {
 /*! \brief training parameters */
@@ -71,15 +81,13 @@ struct GBTreeTrainParam : public XGBoostParameter<GBTreeTrainParam> {
         .add_enum("approx",    TreeMethod::kApprox)
         .add_enum("exact",     TreeMethod::kExact)
         .add_enum("hist",      TreeMethod::kHist)
-        .add_enum("gpu_hist",  TreeMethod::kGPUHist)
         .describe("Choice of tree construction method.");
   }
 };
 
-/*! \brief training parameters */
+/** @brief Dart training parameters */
 struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
-  /*! \brief type of sampling algorithm */
-  int sample_type;
+  DartSampleType sample_type;
   /*! \brief type of normalization algorithm */
   int normalize_type;
   /*! \brief fraction of trees to drop during the dropout */
@@ -88,12 +96,12 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
   bool one_drop;
   /*! \brief probability of skipping the dropout during an iteration */
   float skip_drop;
-  // declare parameters
+
   DMLC_DECLARE_PARAMETER(DartTrainParam) {
     DMLC_DECLARE_FIELD(sample_type)
-        .set_default(0)
-        .add_enum("uniform", 0)
-        .add_enum("weighted", 1)
+        .set_default(DartSampleType::kUniform)
+        .add_enum("uniform", DartSampleType::kUniform)
+        .add_enum("weighted", DartSampleType::kWeighted)
         .describe("Different types of sampling algorithm.");
     DMLC_DECLARE_FIELD(normalize_type)
         .set_default(0)
@@ -104,9 +112,8 @@ struct DartTrainParam : public XGBoostParameter<DartTrainParam> {
         .set_range(0.0f, 1.0f)
         .set_default(0.0f)
         .describe("Fraction of trees to drop during the dropout.");
-    DMLC_DECLARE_FIELD(one_drop)
-        .set_default(false)
-        .describe("Whether at least one tree should always be dropped during the dropout.");
+    DMLC_DECLARE_FIELD(one_drop).set_default(false).describe(
+        "Whether at least one tree should always be dropped during the dropout.");
     DMLC_DECLARE_FIELD(skip_drop)
         .set_range(0.0f, 1.0f)
         .set_default(0.0f)
@@ -181,18 +188,11 @@ class GBTree : public GradientBooster {
   /**
    * @brief Carry out one iteration of boosting.
    */
-  void DoBoost(DMatrix* p_fmat, linalg::Matrix<GradientPair>* in_gpair, PredictionCacheEntry* predt,
+  void DoBoost(DMatrix* p_fmat, GradientContainer* in_gpair, PredictionCacheEntry* predt,
                ObjFunction const* obj) override;
 
-  [[nodiscard]] bool UseGPU() const override { return tparam_.tree_method == TreeMethod::kGPUHist; }
-
   [[nodiscard]] GBTreeTrainParam const& GetTrainParam() const { return tparam_; }
 
-  void Load(dmlc::Stream* fi) override { model_.Load(fi); }
-  void Save(dmlc::Stream* fo) const override {
-    model_.Save(fo);
-  }
-
   void LoadConfig(Json const& in) override;
   void SaveConfig(Json* p_out) const override;
 
@@ -237,7 +237,7 @@ class GBTree : public GradientBooster {
       for (auto idx : trees) {
         CHECK_LE(idx, total_n_trees) << "Invalid tree index.";
         auto const& tree = *model_.trees[idx];
-        tree.WalkTree([&](bst_node_t nidx) {
+        tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {
           if (!tree.IsLeaf(nidx)) {
             split_counts[tree.SplitIndex(nidx)]++;
             fn(tree, nidx, tree.SplitIndex(nidx));
@@ -252,18 +252,20 @@ class GBTree : public GradientBooster {
         gain_map[split] = split_counts[split];
       });
     } else if (importance_type == "gain" || importance_type == "total_gain") {
-      if (!model_.trees.empty() && model_.trees.front()->IsMultiTarget()) {
-        LOG(FATAL) << "gain/total_gain " << MTNotImplemented();
-      }
       add_score([&](auto const& tree, bst_node_t nidx, bst_feature_t split) {
-        gain_map[split] += tree.Stat(nidx).loss_chg;
+        if constexpr (tree::IsScalarTree<decltype(tree)>()) {
+          gain_map[split] += tree.Stat(nidx).loss_chg;
+        } else {
+          LOG(FATAL) << "gain/total_gain " << MTNotImplemented();
+        }
       });
     } else if (importance_type == "cover" || importance_type == "total_cover") {
-      if (!model_.trees.empty() && model_.trees.front()->IsMultiTarget()) {
-        LOG(FATAL) << "cover/total_cover " << MTNotImplemented();
-      }
       add_score([&](auto const& tree, bst_node_t nidx, bst_feature_t split) {
-        gain_map[split] += tree.Stat(nidx).sum_hess;
+        if constexpr (tree::IsScalarTree<decltype(tree)>()) {
+          gain_map[split] += tree.Stat(nidx).sum_hess;
+        } else {
+          LOG(FATAL) << "cover/total_cover " << MTNotImplemented();
+        }
       });
     } else {
       LOG(FATAL)
@@ -287,6 +289,8 @@ class GBTree : public GradientBooster {
     }
   }
 
+  [[nodiscard]] CatContainer const* Cats() const override { return this->model_.Cats(); }
+
   void PredictLeaf(DMatrix* p_fmat,
                    HostDeviceVector<bst_float>* out_preds,
                    uint32_t layer_begin, uint32_t layer_end) override {
@@ -322,10 +326,12 @@ class GBTree : public GradientBooster {
   }
 
  protected:
-  void BoostNewTrees(linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat, int bst_group,
+  void BoostNewTrees(GradientContainer* gpair, DMatrix* p_fmat, int bst_group,
                      std::vector<HostDeviceVector<bst_node_t>>* out_position,
                      std::vector<std::unique_ptr<RegTree>>* ret);
 
+  std::vector<RegTree*> InitNewTrees(bst_target_t bst_group, TreesOneGroup* ret);
+
   [[nodiscard]] std::unique_ptr<Predictor> const& GetPredictor(
       bool is_training, HostDeviceVector<float> const* out_pred = nullptr,
       DMatrix* f_dmat = nullptr) const;
diff --git a/src/gbm/gbtree_model.cc b/src/gbm/gbtree_model.cc
index 2edb456c95de..ecb6a66e9c17 100644
--- a/src/gbm/gbtree_model.cc
+++ b/src/gbm/gbtree_model.cc
@@ -1,17 +1,14 @@
 /**
- * Copyright 2019-2023, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include "gbtree_model.h"
 
-#include <algorithm>                    // for transform, max_element
-#include <cstddef>                      // for size_t
-#include <numeric>                      // for partial_sum
-#include <ostream>                      // for operator<<, basic_ostream
-#include <utility>                      // for move, pair
+#include <algorithm>  // for transform, max_element
+#include <cstddef>    // for size_t
+#include <numeric>    // for partial_sum
+#include <utility>    // for move, pair
 
 #include "../common/threading_utils.h"  // for ParallelFor
-#include "dmlc/base.h"                  // for BeginPtr
-#include "dmlc/io.h"                    // for Stream
 #include "xgboost/context.h"            // for Context
 #include "xgboost/json.h"               // for Json, get, Integer, Array, FromJson, ToJson, Json...
 #include "xgboost/learner.h"            // for LearnerModelParam
@@ -22,7 +19,7 @@ namespace xgboost::gbm {
 namespace {
 // For creating the tree indptr from old models.
 void MakeIndptr(GBTreeModel* out_model) {
-  auto const& tree_info = out_model->tree_info;
+  auto const& tree_info = out_model->tree_info.ConstHostVector();
   if (tree_info.empty()) {
     return;
   }
@@ -44,68 +41,12 @@ void MakeIndptr(GBTreeModel* out_model) {
 // Validate the consistency of the model.
 void Validate(GBTreeModel const& model) {
   CHECK_EQ(model.trees.size(), model.param.num_trees);
-  CHECK_EQ(model.tree_info.size(), model.param.num_trees);
+  CHECK_EQ(model.tree_info.Size(), model.param.num_trees);
   // True even if the model is empty since we should always have 0 as the first element.
   CHECK_EQ(model.iteration_indptr.back(), model.param.num_trees);
 }
 }  // namespace
 
-void GBTreeModel::Save(dmlc::Stream* fo) const {
-  CHECK_EQ(param.num_trees, static_cast<int32_t>(trees.size()));
-
-  if (DMLC_IO_NO_ENDIAN_SWAP) {
-    fo->Write(&param, sizeof(param));
-  } else {
-    auto x = param.ByteSwap();
-    fo->Write(&x, sizeof(x));
-  }
-  for (const auto & tree : trees) {
-    tree->Save(fo);
-  }
-  if (tree_info.size() != 0) {
-    if (DMLC_IO_NO_ENDIAN_SWAP) {
-      fo->Write(dmlc::BeginPtr(tree_info), sizeof(int32_t) * tree_info.size());
-    } else {
-      for (const auto& e : tree_info) {
-        auto x = e;
-        dmlc::ByteSwap(&x, sizeof(x), 1);
-        fo->Write(&x, sizeof(x));
-      }
-    }
-  }
-}
-
-void GBTreeModel::Load(dmlc::Stream* fi) {
-  CHECK_EQ(fi->Read(&param, sizeof(param)), sizeof(param))
-      << "GBTree: invalid model file";
-  if (!DMLC_IO_NO_ENDIAN_SWAP) {
-    param = param.ByteSwap();
-  }
-  trees.clear();
-  trees_to_update.clear();
-  for (int32_t i = 0; i < param.num_trees; ++i) {
-    std::unique_ptr<RegTree> ptr(new RegTree());
-    ptr->Load(fi);
-    trees.push_back(std::move(ptr));
-  }
-  tree_info.resize(param.num_trees);
-  if (param.num_trees != 0) {
-    if (DMLC_IO_NO_ENDIAN_SWAP) {
-      CHECK_EQ(
-          fi->Read(dmlc::BeginPtr(tree_info), sizeof(int32_t) * param.num_trees),
-          sizeof(int32_t) * param.num_trees);
-    } else {
-      for (auto& info : tree_info) {
-        CHECK_EQ(fi->Read(&info, sizeof(int32_t)), sizeof(int32_t));
-        dmlc::ByteSwap(&info, sizeof(info), 1);
-      }
-    }
-  }
-
-  MakeIndptr(this);
-  Validate(*this);
-}
-
 void GBTreeModel::SaveModel(Json* p_out) const {
   auto& out = *p_out;
   CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
@@ -120,9 +61,10 @@ void GBTreeModel::SaveModel(Json* p_out) const {
     trees_json[t] = std::move(jtree);
   });
 
-  std::vector<Json> tree_info_json(tree_info.size());
-  for (size_t i = 0; i < tree_info.size(); ++i) {
-    tree_info_json[i] = Integer(tree_info[i]);
+  auto const& h_tree_info = tree_info.ConstHostVector();
+  std::vector<Json> tree_info_json(tree_info.Size());
+  for (size_t i = 0; i < h_tree_info.size(); ++i) {
+    tree_info_json[i] = Integer(h_tree_info[i]);
   }
 
   out["trees"] = Array(std::move(trees_json));
@@ -132,6 +74,8 @@ void GBTreeModel::SaveModel(Json* p_out) const {
   std::transform(iteration_indptr.cbegin(), iteration_indptr.cend(), jiteration_indptr.begin(),
                  [](bst_tree_t i) { return Integer{i}; });
   out["iteration_indptr"] = Array{std::move(jiteration_indptr)};
+
+  this->Cats()->Save(&out["cats"]);
 }
 
 void GBTreeModel::LoadModel(Json const& in) {
@@ -142,13 +86,14 @@ void GBTreeModel::LoadModel(Json const& in) {
 
   auto const& jmodel = get<Object const>(in);
 
-  auto const& trees_json = get<Array const>(in["trees"]);
+  auto const& trees_json = get<Array const>(jmodel.at("trees"));
   CHECK_EQ(trees_json.size(), param.num_trees);
   trees.resize(param.num_trees);
 
-  auto const& tree_info_json = get<Array const>(in["tree_info"]);
+  auto const& tree_info_json = get<Array const>(jmodel.at("tree_info"));
   CHECK_EQ(tree_info_json.size(), param.num_trees);
-  tree_info.resize(param.num_trees);
+  auto& h_tree_info = this->tree_info.HostVector();
+  h_tree_info.resize(param.num_trees);
 
   common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {
     auto tree_id = get<Integer const>(trees_json[t]["id"]);
@@ -157,7 +102,7 @@ void GBTreeModel::LoadModel(Json const& in) {
   });
 
   for (bst_tree_t i = 0; i < param.num_trees; ++i) {
-    tree_info[i] = get<Integer const>(tree_info_json[i]);
+    h_tree_info[i] = get<Integer const>(tree_info_json[i]);
   }
 
   auto indptr_it = jmodel.find("iteration_indptr");
@@ -171,6 +116,12 @@ void GBTreeModel::LoadModel(Json const& in) {
     MakeIndptr(this);
   }
 
+  auto p_cats = std::make_shared<CatContainer>();
+  auto cat_it = jmodel.find("cats");
+  if (cat_it != jmodel.cend()) {
+    p_cats->Load(cat_it->second);
+  }
+  this->cats_ = std::move(p_cats);
   Validate(*this);
 }
 
@@ -193,4 +144,18 @@ bst_tree_t GBTreeModel::CommitModel(TreesOneIter&& new_trees) {
   Validate(*this);
   return n_new_trees;
 }
+
+void GBTreeModel::CommitModelGroup(TreesOneGroup&& new_trees, bst_target_t group_idx) {
+  auto& h_tree_info = this->tree_info.HostVector();
+  for (auto& new_tree : new_trees) {
+    trees.push_back(std::move(new_tree));
+    h_tree_info.push_back(group_idx);
+  }
+  param.num_trees += static_cast<int>(new_trees.size());
+}
+
+common::Span<bst_target_t const> GBTreeModel::TreeGroups(DeviceOrd device) const {
+  return device.IsCPU() ? this->tree_info.ConstHostSpan()
+                        : (this->tree_info.SetDevice(device), this->tree_info.ConstDeviceSpan());
+}
 }  // namespace xgboost::gbm
diff --git a/src/gbm/gbtree_model.h b/src/gbm/gbtree_model.h
index 32fa868638bb..51f0962abf37 100644
--- a/src/gbm/gbtree_model.h
+++ b/src/gbm/gbtree_model.h
@@ -1,24 +1,23 @@
 /**
- * Copyright 2017-2023, XGBoost Contributors
- * \file gbtree_model.h
+ * Copyright 2017-2025, XGBoost Contributors
+ *
+ * @file gbtree_model.h
  */
 #ifndef XGBOOST_GBM_GBTREE_MODEL_H_
 #define XGBOOST_GBM_GBTREE_MODEL_H_
 
-#include <dmlc/io.h>
 #include <dmlc/parameter.h>
-#include <xgboost/context.h>
-#include <xgboost/learner.h>
-#include <xgboost/model.h>
-#include <xgboost/parameter.h>
-#include <xgboost/tree_model.h>
 
 #include <memory>
 #include <string>
-#include <utility>
 #include <vector>
 
 #include "../common/threading_utils.h"
+#include "../data/cat_container.h"  // for CatContainer
+#include "xgboost/context.h"
+#include "xgboost/learner.h"
+#include "xgboost/model.h"
+#include "xgboost/tree_model.h"
 
 namespace xgboost {
 
@@ -26,42 +25,32 @@ class Json;
 
 namespace gbm {
 /**
- * \brief Container for all trees built (not update) for one group.
+ * @brief Container for all trees built (not update) for one group.
  */
 using TreesOneGroup = std::vector<std::unique_ptr<RegTree>>;
 /**
- * \brief Container for all trees built (not update) for one iteration.
+ * @brief Container for all trees built (not update) for one iteration.
  */
 using TreesOneIter = std::vector<TreesOneGroup>;
 
-/*! \brief model parameters */
+/** @brief GBTree model parameters. */
 struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
  public:
   /**
-   * \brief number of trees
+   * @brief The number of trees.
    */
-  std::int32_t num_trees;
+  std::int32_t num_trees{0};
   /**
-   * \brief Number of trees for a forest.
+   * @brief Number of trees for a single forest.
    */
-  std::int32_t num_parallel_tree;
-  /*! \brief reserved parameters */
-  int32_t reserved[38];
-
-  /*! \brief constructor */
-  GBTreeModelParam() {
-    std::memset(this, 0, sizeof(GBTreeModelParam));  // FIXME(trivialfis): Why?
-    static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int32_t),
-                  "64/32 bit compatibility issue");
-    num_parallel_tree = 1;
-  }
+  std::int32_t num_parallel_tree{1};
+
+  GBTreeModelParam() = default;
 
   // declare parameters, only declare those that need to be set.
   DMLC_DECLARE_PARAMETER(GBTreeModelParam) {
-    DMLC_DECLARE_FIELD(num_trees)
-        .set_lower_bound(0)
-        .set_default(0)
-        .describe("Number of features used for training and prediction.");
+    DMLC_DECLARE_FIELD(num_trees).set_lower_bound(0).set_default(0).describe(
+        "Number of trees for the entire booster model.");
     DMLC_DECLARE_FIELD(num_parallel_tree)
         .set_default(1)
         .set_lower_bound(1)
@@ -69,45 +58,20 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
             "Number of parallel trees constructed during each iteration."
             " This option is used to support boosted random forest.");
   }
-
-  // Swap byte order for all fields. Useful for transporting models between machines with different
-  // endianness (big endian vs little endian)
-  GBTreeModelParam ByteSwap() const {
-    GBTreeModelParam x = *this;
-    dmlc::ByteSwap(&x.num_trees, sizeof(x.num_trees), 1);
-    dmlc::ByteSwap(&x.num_parallel_tree, sizeof(x.num_parallel_tree), 1);
-    dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
-    return x;
-  }
 };
 
 struct GBTreeModel : public Model {
  public:
   explicit GBTreeModel(LearnerModelParam const* learner_model, Context const* ctx)
       : learner_model_param{learner_model}, ctx_{ctx} {}
-  void Configure(const Args& cfg) {
+  void Configure(Args const& cfg) {
     // initialize model parameters if not yet been initialized.
     if (trees.size() == 0) {
       param.UpdateAllowUnknown(cfg);
     }
   }
-
-  void InitTreesToUpdate() {
-    if (trees_to_update.size() == 0u) {
-      for (auto & tree : trees) {
-        trees_to_update.push_back(std::move(tree));
-      }
-      trees.clear();
-      param.num_trees = 0;
-      tree_info.clear();
-
-      iteration_indptr.clear();
-      iteration_indptr.push_back(0);
-    }
-  }
-
-  void Load(dmlc::Stream* fi);
-  void Save(dmlc::Stream* fo) const;
+  /** @brief Move existing trees into the update queue. */
+  void InitTreesToUpdate();
 
   void SaveModel(Json* p_out) const override;
   void LoadModel(Json const& p_out) override;
@@ -120,19 +84,13 @@ struct GBTreeModel : public Model {
     return dump;
   }
   /**
-   * \brief Add trees to the model.
+   * @brief Add trees to the model.
    *
-   * \return The number of new trees.
+   * @return The number of new trees.
    */
   bst_tree_t CommitModel(TreesOneIter&& new_trees);
 
-  void CommitModelGroup(std::vector<std::unique_ptr<RegTree>>&& new_trees, bst_target_t group_idx) {
-    for (auto& new_tree : new_trees) {
-      trees.push_back(std::move(new_tree));
-      tree_info.push_back(group_idx);
-    }
-    param.num_trees += static_cast<int>(new_trees.size());
-  }
+  void CommitModelGroup(TreesOneGroup&& new_trees, bst_target_t group_idx);
 
   [[nodiscard]] std::int32_t BoostedRounds() const {
     if (trees.empty()) {
@@ -141,27 +99,39 @@ struct GBTreeModel : public Model {
     return static_cast<std::int32_t>(iteration_indptr.size() - 1);
   }
 
-  // base margin
+  /** @brief Global model properties. */
   LearnerModelParam const* learner_model_param;
-  // model parameter
+  /** @brief GBTree model parameters. */
   GBTreeModelParam param;
   /*! \brief vector of trees stored in the model */
-  std::vector<std::unique_ptr<RegTree> > trees;
+  std::vector<std::unique_ptr<RegTree>> trees;
   /*! \brief for the update process, a place to keep the initial trees */
-  std::vector<std::unique_ptr<RegTree> > trees_to_update;
+  std::vector<std::unique_ptr<RegTree>> trees_to_update;
   /**
-   * \brief Group index for trees.
+   * @brief Group index for trees.
    */
-  std::vector<int> tree_info;
+  HostDeviceVector<bst_target_t> tree_info;
   /**
-   * \brief Number of trees accumulated for each iteration.
+   * @brief Number of trees accumulated for each iteration.
    */
   std::vector<bst_tree_t> iteration_indptr{0};
 
+  [[nodiscard]] CatContainer const* Cats() const { return this->cats_.get(); }
+  [[nodiscard]] CatContainer* Cats() { return this->cats_.get(); }
+  [[nodiscard]] std::shared_ptr<CatContainer> CatsShared() const { return this->cats_; }
+  void Cats(std::shared_ptr<CatContainer> cats) { this->cats_ = cats; }
+
+  auto const* Ctx() const { return this->ctx_; }
+  /**
+   * @brief Getter for the tree group index.
+   */
+  common::Span<bst_target_t const> TreeGroups(DeviceOrd device) const;
+
  private:
   /**
-   * \brief Whether the stack contains multi-target tree.
+   * @brief Categories in the training data.
    */
+  std::shared_ptr<CatContainer> cats_{std::make_shared<CatContainer>()};
   Context const* ctx_;
 };
 }  // namespace gbm
diff --git a/src/learner.cc b/src/learner.cc
index 34f395beb34b..5a10eda78832 100644
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2024, XGBoost Contributors
+ * Copyright 2014-2025, XGBoost Contributors
  * \file learner.cc
  * \brief Implementation of learning algorithm.
  * \author Tianqi Chen
@@ -11,7 +11,6 @@
 #include <dmlc/thread_local.h>            // for ThreadLocalStore
 
 #include <algorithm>                      // for equal, max, transform, sort, find_if, all_of
-#include <array>                          // for array
 #include <atomic>                         // for atomic
 #include <cctype>                         // for isalpha, isspace
 #include <cmath>                          // for isnan, isinf
@@ -34,23 +33,22 @@
 #include "collective/aggregator.h"        // for ApplyWithLabels
 #include "collective/communicator-inl.h"  // for Allreduce, Broadcast, GetRank, IsDistributed
 #include "common/api_entry.h"             // for XGBAPIThreadLocalEntry
+#include "common/param_array.h"           // for ParamArray
 #include "common/charconv.h"              // for to_chars, to_chars_result, NumericLimits, from_...
-#include "common/common.h"                // for ToString, Split
 #include "common/error_msg.h"             // for MaxFeatureSize, WarnOldSerialization, ...
 #include "common/io.h"                    // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
 #include "common/observer.h"              // for TrainingObserver
 #include "common/random.h"                // for GlobalRandom
 #include "common/timer.h"                 // for Monitor
 #include "common/version.h"               // for Version
-#include "dmlc/endian.h"                  // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP
-#include "xgboost/base.h"                 // for Args, bst_float, GradientPair, bst_feature_t, ...
+#include "xgboost/base.h"                 // for Args, GradientPair, bst_feature_t
 #include "xgboost/context.h"              // for Context
 #include "xgboost/data.h"                 // for DMatrix, MetaInfo
 #include "xgboost/gbm.h"                  // for GradientBooster
 #include "xgboost/global_config.h"        // for GlobalConfiguration, GlobalConfigThreadLocalStore
 #include "xgboost/host_device_vector.h"   // for HostDeviceVector
 #include "xgboost/json.h"                 // for Json, get, Object, String, IsA, Array, ToJson
-#include "xgboost/linalg.h"               // for Tensor, TensorView
+#include "xgboost/linalg.h"               // for Vector, VectorView
 #include "xgboost/logging.h"              // for CHECK, LOG, CHECK_EQ
 #include "xgboost/metric.h"               // for Metric
 #include "xgboost/objective.h"            // for ObjFunction
@@ -80,29 +78,25 @@ T& UsePtr(T& ptr) {  // NOLINT
 /*! \brief training parameter for regression
  *
  * Should be deprecated, but still used for being compatible with binary IO.
- * Once it's gone, `LearnerModelParam` should handle transforming `base_margin`
+ * Once it's gone, `LearnerModelParam` should handle transforming `base_score`
  * with objective by itself.
  */
 struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy> {
-  /* \brief global bias */
-  bst_float base_score;
-  /* \brief number of features  */
-  bst_feature_t num_feature;
-  /* \brief number of classes, if it is multi-class classification  */
-  std::int32_t num_class;
-  /*! \brief Model contain additional properties */
-  int32_t contain_extra_attrs;
-  /*! \brief Model contain eval metrics */
-  int32_t contain_eval_metrics;
-  /*! \brief the version of XGBoost. */
-  std::uint32_t major_version;
-  std::uint32_t minor_version;
+  /** @brief Global bias/intercept. */
+  common::ParamArray<float> base_score{"base_score"};
+  /** @brief number of features  */
+  bst_feature_t num_feature{0};
+  /** @brief number of classes, if it is multi-class classification, 0 otherwise.  */
+  std::int32_t num_class{0};
+  /**! @brief the version of XGBoost. */
+  std::int32_t major_version{std::get<0>(Version::Self())};
+  std::int32_t minor_version{std::get<1>(Version::Self())};
   /**
-   * \brief Number of target variables.
+   * @brief Number of target variables.
    */
-  bst_target_t num_target;
+  bst_target_t num_target{1};
   /**
-   * \brief Whether we should calculate the base score from training data.
+   * @brief Whether we should calculate the base score from training data.
    *
    *   This is a private parameter as we can't expose it as boolean due to binary model
    *   format. Exposing it as integer creates inconsistency with other parameters.
@@ -111,31 +105,18 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
    *   of bool for the ease of serialization.
    */
   std::int32_t boost_from_average{true};
-  /*! \brief reserved field */
-  int reserved[25];
-  /*! \brief constructor */
-  LearnerModelParamLegacy() {
-    std::memset(this, 0, sizeof(LearnerModelParamLegacy));
-    base_score = ObjFunction::DefaultBaseScore();
-    num_target = 1;
-    major_version = std::get<0>(Version::Self());
-    minor_version = std::get<1>(Version::Self());
-    boost_from_average = true;
-    static_assert(sizeof(LearnerModelParamLegacy) == 136,
-                  "Do not change the size of this struct, as it will break binary IO.");
-  }
-
-  // Skip other legacy fields.
+
+  LearnerModelParamLegacy() = default;
+
   [[nodiscard]] Json ToJson() const {
     Json obj{Object{}};
-    char floats[NumericLimits<float>::kToCharsSize];
-    auto ret = to_chars(floats, floats + NumericLimits<float>::kToCharsSize, base_score);
-    CHECK(ret.ec == std::errc{});
-    obj["base_score"] = std::string{floats, static_cast<size_t>(std::distance(floats, ret.ptr))};
+    std::stringstream ss;
+    ss << base_score;
+    obj["base_score"] = ss.str();
 
     char integers[NumericLimits<int64_t>::kToCharsSize];
-    ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
-                   static_cast<int64_t>(num_feature));
+    auto ret = to_chars(integers, integers + NumericLimits<int64_t>::kToCharsSize,
+                        static_cast<int64_t>(num_feature));
     CHECK(ret.ec == std::errc());
     obj["num_feature"] =
         std::string{integers, static_cast<size_t>(std::distance(integers, ret.ptr))};
@@ -169,74 +150,73 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
     if (bse_it != j_param.cend()) {
       m["boost_from_average"] = get<String const>(bse_it->second);
     }
-
-    this->Init(m);
-
     std::string str = get<String const>(j_param.at("base_score"));
-    from_chars(str.c_str(), str.c_str() + str.size(), base_score);
+    m["base_score"] = str;
+    this->Init(m);
+    this->HandleOldFormat();
   }
-
-  [[nodiscard]] LearnerModelParamLegacy ByteSwap() const {
-    LearnerModelParamLegacy x = *this;
-    dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
-    dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
-    dmlc::ByteSwap(&x.num_class, sizeof(x.num_class), 1);
-    dmlc::ByteSwap(&x.contain_extra_attrs, sizeof(x.contain_extra_attrs), 1);
-    dmlc::ByteSwap(&x.contain_eval_metrics, sizeof(x.contain_eval_metrics), 1);
-    dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
-    dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
-    dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
-    dmlc::ByteSwap(&x.boost_from_average, sizeof(x.boost_from_average), 1);
-    dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
-    return x;
+  // Handle old model formats, before 3.1, the intercept was always a scalar.
+  void HandleOldFormat() {
+    if (this->base_score.size() == 1 && this->OutputLength() > 1) {
+      this->base_score.Resize(this->OutputLength(), this->base_score[0]);
+    }
   }
 
   template <typename Container>
   Args UpdateAllowUnknown(Container const& kwargs) {
     // Detect whether user has made their own base score.
-    auto find_key = [&kwargs](char const* key) {
+    auto has_key = [&kwargs](char const* key) {
       return std::find_if(kwargs.cbegin(), kwargs.cend(),
-                          [key](auto const& kv) { return kv.first == key; });
+                          [key](auto const& kv) { return kv.first == key; }) != kwargs.cend();
     };
-    auto it = find_key("base_score");
-    if (it != kwargs.cend()) {
-      boost_from_average = false;
+    if (has_key("base_score")) {
+      this->boost_from_average = false;
     }
     return dmlc::Parameter<LearnerModelParamLegacy>::UpdateAllowUnknown(kwargs);
   }
-  // sanity check
-  void Validate(Context const* ctx) {
+  // The number of outputs of the model.
+  [[nodiscard]] bst_target_t OutputLength() const noexcept {
+    return std::max({this->num_target, static_cast<bst_target_t>(this->num_class),
+                     static_cast<bst_target_t>(1)});
+  }
+
+  // Sanity checks
+  void Validate(Context const* ctx) const {
+    this->ValidateLength();
+    CHECK(std::none_of(base_score.cbegin(), base_score.cend(),
+                       [](float v) { return std::isnan(v) || std::isinf(v); }));
+
     if (!collective::IsDistributed()) {
       return;
     }
 
-    std::array<std::int32_t, 6> data;
-    std::size_t pos{0};
-    std::memcpy(data.data() + pos, &base_score, sizeof(base_score));
-    pos += 1;
-    std::memcpy(data.data() + pos, &num_feature, sizeof(num_feature));
-    pos += 1;
-    std::memcpy(data.data() + pos, &num_class, sizeof(num_class));
-    pos += 1;
-    std::memcpy(data.data() + pos, &num_target, sizeof(num_target));
-    pos += 1;
-    std::memcpy(data.data() + pos, &major_version, sizeof(major_version));
-    pos += 1;
-    std::memcpy(data.data() + pos, &minor_version, sizeof(minor_version));
-
-    std::array<std::int32_t, 6> sync;
-    std::copy(data.cbegin(), data.cend(), sync.begin());
+    std::vector<char> data;
+    Json::Dump(this->ToJson(), &data, std::ios::binary);
+    std::vector<char> sync{data};
+
     auto rc = collective::Broadcast(ctx, linalg::MakeVec(sync.data(), sync.size()), 0);
     collective::SafeColl(rc);
+
     CHECK(std::equal(data.cbegin(), data.cend(), sync.cbegin()))
-        << "Different model parameter across workers.";
+        << "Different model parameter across workers:\n\t"
+        << Json::Load(StringView{data.data(), data.size()}, std::ios::binary) << "\nvs.\n\t"
+        << Json::Load(StringView{sync.data(), sync.size()}, std::ios::binary);
+  }
+
+  void ValidateLength() const {
+    CHECK_GE(this->base_score.size(), 1);
+    std::size_t n_classes = static_cast<std::size_t>(num_class),
+                n_targets = static_cast<std::size_t>(num_target);
+    if (!(base_score.size() == n_classes || base_score.size() == n_targets)) {
+      error::InvalidIntercept(n_classes, n_targets, base_score.size());
+    }
   }
 
   // declare parameters
   DMLC_DECLARE_PARAMETER(LearnerModelParamLegacy) {
     DMLC_DECLARE_FIELD(base_score)
-        .set_default(ObjFunction::DefaultBaseScore())
-        .describe("Global bias of the model.");
+        .describe("Global bias of the model.")
+        .set_default(common::ParamArray<float>{"base_score"});
     DMLC_DECLARE_FIELD(num_feature)
         .set_default(0)
         .describe(
@@ -254,12 +234,13 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
         .describe("Whether we should calculate the base score from training data.");
   }
 };
+}  // namespace xgboost
 
+namespace xgboost {
 LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t,
                                      MultiStrategy multi_strategy)
     : num_feature{user_param.num_feature},
-      num_output_group{
-          std::max(static_cast<std::uint32_t>(user_param.num_class), user_param.num_target)},
+      num_output_group{user_param.OutputLength()},
       task{t},
       multi_strategy{multi_strategy} {
   if (user_param.num_class > 1 && user_param.num_target > 1) {
@@ -269,22 +250,22 @@ LearnerModelParam::LearnerModelParam(LearnerModelParamLegacy const& user_param,
 }
 
 LearnerModelParam::LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
-                                     linalg::Tensor<float, 1> base_margin, ObjInfo t,
+                                     linalg::Vector<float> base_score, ObjInfo t,
                                      MultiStrategy multi_strategy)
     : LearnerModelParam{user_param, t, multi_strategy} {
-  std::swap(base_score_, base_margin);
+  std::swap(base_score_, base_score);
   // Make sure read access everywhere for thread-safe prediction.
   std::as_const(base_score_).HostView();
-  if (ctx->IsCUDA()) {
+  if (!ctx->IsCPU()) {
     std::as_const(base_score_).View(ctx->Device());
   }
   CHECK(std::as_const(base_score_).Data()->HostCanRead());
 }
 
-linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(DeviceOrd device) const {
+linalg::VectorView<float const> LearnerModelParam::BaseScore(DeviceOrd device) const {
   // multi-class is not yet supported.
-  CHECK_EQ(base_score_.Size(), 1) << ModelNotFitted();
-  if (!device.IsCUDA()) {
+  CHECK_GE(base_score_.Size(), 1) << ModelNotFitted();
+  if (device.IsCPU()) {
     // Make sure that we won't run into race condition.
     CHECK(base_score_.Data()->HostCanRead());
     return base_score_.HostView();
@@ -296,7 +277,7 @@ linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(DeviceOrd device
   return v;
 }
 
-linalg::TensorView<float const, 1> LearnerModelParam::BaseScore(Context const* ctx) const {
+linalg::VectorView<float const> LearnerModelParam::BaseScore(Context const* ctx) const {
   return this->BaseScore(ctx->Device());
 }
 
@@ -354,7 +335,143 @@ DMLC_REGISTER_PARAMETER(LearnerTrainParam);
 using LearnerAPIThreadLocalStore =
     dmlc::ThreadLocalStore<std::map<Learner const *, XGBAPIThreadLocalEntry>>;
 
-class LearnerConfiguration : public Learner {
+namespace {
+/**
+ * @brief Handler for the `n_targets` property and the intercept.
+ */
+class Intercept : public Learner {
+  using CacheT = common::GetValueT<decltype(std::declval<PredictionContainer>().Container())>;
+
+ protected:
+  /**
+   * @brief User-provided model parameter.
+   *
+   * This parameter is the most difficult one in XGBoost. It stores basic properties of
+   * the booster model and is saved as part of the booster. We need to configure it
+   * automatically from input training data while taking user-provided parameters into
+   * account.
+   *
+   * It's difficult because XGBoost has an interface that exposes many states. For
+   * instance, we need to have a valid model after configuration, without seeing the
+   * training data. This exposes a partially initialized model that's semi-valid.
+   */
+  LearnerModelParamLegacy mparam_;
+  /**
+   * @brief Internal model parameter.
+   */
+  LearnerModelParam learner_model_param_;
+
+ private:
+  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) {
+    base_score->SetDevice(this->Ctx()->Device());
+    base_score->Reshape(this->mparam_.OutputLength());
+    collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),
+                                [&] { UsePtr(obj_)->InitEstimation(info, base_score); });
+  }
+
+  [[nodiscard]] bool NeedFit() const {
+    return this->mparam_.boost_from_average && !UsePtr(gbm_)->ModelFitted();
+  }
+
+  // Create the internal model parameter from user inputs, this requires the user input to
+  // be initialized first.
+  //
+  // Don't apply the link function if the base_score is a dummy value.
+  //
+  // This function should be called for every `Configure` call ot make sure the base_score
+  // is stored in the right place.
+  void InitModelParam(LearnerTrainParam const& tparam, bool apply_link) {
+    auto const& in = this->mparam_.base_score;
+    auto task = UsePtr(this->obj_)->Task();
+    linalg::Vector<float> base_score{in.cbegin(), in.cend(), {in.size()}, this->ctx_.Device()};
+    if (apply_link) {
+      UsePtr(this->obj_)->ProbToMargin(&base_score);
+    }
+
+    learner_model_param_ =
+        LearnerModelParam{Ctx(), mparam_, std::move(base_score), task, tparam.multi_strategy};
+  }
+
+  /**
+   * Get the number of targets from the cache using the objective function.
+   */
+  void GetNumTargets(CacheT const& cache) {
+    CHECK(this->obj_);
+    bst_target_t n_targets = 1;
+    for (auto const& d : cache) {
+      if (n_targets == 1) {
+        n_targets = this->obj_->Targets(d.first.ptr->Info());
+      } else {
+        auto t = this->obj_->Targets(d.first.ptr->Info());
+        CHECK(n_targets == t || 1 == t) << "Inconsistent labels.";
+      }
+    }
+
+    if (mparam_.num_target > 1) {
+      CHECK(n_targets == 1 || n_targets == mparam_.num_target)
+          << "Inconsistent configuration of the `num_target`.  Configuration result from input "
+          << "data:" << n_targets << ", configuration from parameters:" << mparam_.num_target;
+    } else {
+      mparam_.num_target = n_targets;
+    }
+  }
+
+ protected:
+  void CheckModelInitialized() const {
+    CHECK(learner_model_param_.Initialized()) << ModelNotFitted();
+    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted();
+  }
+
+  void InitModelUserParam(LearnerTrainParam const& tparam, CacheT const& cache) {
+    this->GetNumTargets(cache);
+
+    if (this->NeedFit()) {
+      // Initialize with a sensible default value to get prediction/model io going.
+      this->mparam_.base_score.Resize(this->mparam_.OutputLength(),
+                                      ObjFunction::DefaultBaseScore());
+      this->InitModelParam(tparam, false);
+      // This should not be altered, we will estimate it later.
+      CHECK(this->NeedFit());
+    } else if (this->gbm_->ModelFitted()) {
+      this->mparam_.ValidateLength();
+      // Init with a valid (configured) mparam
+      this->InitModelParam(tparam, true);
+    } else {
+      // user-provided
+      this->mparam_.HandleOldFormat();
+      this->InitModelParam(tparam, true);
+    }
+  }
+
+  /**
+   * @brief Calculate the `base_score` based on input data.
+   *
+   * @param p_fmat The training DMatrix used to estimate the base score.
+   */
+  void FitIntercept(LearnerTrainParam const& tparam, DMatrix const* p_fmat) {
+    // Estimate the intercept if this is the first iteration.
+    if (this->NeedFit()) {
+      // The DMatrix can be null if a method other than training is called.
+      if (p_fmat) {
+        auto const& info = p_fmat->Info();
+        info.Validate(Ctx()->Device());
+        // We estimate it from the input data.
+        linalg::Vector<float> base_score;
+        this->InitEstimation(info, &base_score);
+
+        mparam_.base_score = base_score.Data()->ConstHostVector();
+      }
+      this->InitModelParam(tparam, true);
+      // Check whether the base score is valid.
+      mparam_.Validate(&ctx_);
+    }
+
+    this->CheckModelInitialized();
+  }
+};
+}  // namespace
+
+class LearnerConfiguration : public Intercept {
  private:
   std::mutex config_lock_;
 
@@ -372,71 +489,12 @@ class LearnerConfiguration : public Learner {
   std::vector<std::string> feature_types_;
 
   common::Monitor monitor_;
-  LearnerModelParamLegacy mparam_;
-  LearnerModelParam learner_model_param_;
   LearnerTrainParam tparam_;
   // Initial prediction.
   PredictionContainer prediction_container_;
 
   std::vector<std::string> metric_names_;
 
-  void ConfigureModelParamWithoutBaseScore() {
-    // Convert mparam to learner_model_param
-    this->ConfigureTargets();
-
-    auto task = UsePtr(obj_)->Task();
-    linalg::Tensor<float, 1> base_score({1}, Ctx()->Device());
-    auto h_base_score = base_score.HostView();
-
-    // transform to margin
-    h_base_score(0) = obj_->ProbToMargin(mparam_.base_score);
-    CHECK(tparam_.GetInitialised());
-    // move it to model param, which is shared with all other components.
-    learner_model_param_ =
-        LearnerModelParam(Ctx(), mparam_, std::move(base_score), task, tparam_.multi_strategy);
-    CHECK(learner_model_param_.Initialized());
-    CHECK_NE(learner_model_param_.BaseScore(Ctx()).Size(), 0);
-  }
-  /**
-   * \brief Calculate the `base_score` based on input data.
-   *
-   * \param p_fmat The training DMatrix used to estimate the base score.
-   */
-  void InitBaseScore(DMatrix const* p_fmat) {
-    // Before 1.0.0, we save `base_score` into binary as a transformed value by objective.
-    // After 1.0.0 we save the value provided by user and keep it immutable instead.  To
-    // keep the stability, we initialize it in binary LoadModel instead of configuration.
-    // Under what condition should we omit the transformation:
-    //
-    // - base_score is loaded from old binary model.
-    //
-    // What are the other possible conditions:
-    //
-    // - model loaded from new binary or JSON.
-    // - model is created from scratch.
-    // - model is configured second time due to change of parameter
-    if (!learner_model_param_.Initialized()) {
-      this->ConfigureModelParamWithoutBaseScore();
-    }
-    if (mparam_.boost_from_average && !UsePtr(gbm_)->ModelFitted()) {
-      if (p_fmat) {
-        auto const& info = p_fmat->Info();
-        info.Validate(Ctx()->Device());
-        // We estimate it from input data.
-        linalg::Tensor<float, 1> base_score;
-        this->InitEstimation(info, &base_score);
-        CHECK_EQ(base_score.Size(), 1);
-        mparam_.base_score = base_score(0);
-        CHECK(!std::isnan(mparam_.base_score));
-      }
-      // Update the shared model parameter
-      this->ConfigureModelParamWithoutBaseScore();
-      mparam_.Validate(&ctx_);
-    }
-    CHECK(!std::isnan(mparam_.base_score));
-    CHECK(!std::isinf(mparam_.base_score));
-  }
-
  public:
   explicit LearnerConfiguration(std::vector<std::shared_ptr<DMatrix>> cache)
       : need_configuration_{true} {
@@ -484,9 +542,8 @@ class LearnerConfiguration : public Learner {
 
     learner_model_param_.task = obj_->Task();  // required by gbm configuration.
     this->ConfigureGBM(old_tparam, args);
-    ctx_.ConfigureGpuId(this->gbm_->UseGPU());
 
-    this->ConfigureModelParamWithoutBaseScore();
+    this->InitModelUserParam(this->tparam_, this->prediction_container_.Container());
 
     this->ConfigureMetrics(args);
 
@@ -499,11 +556,6 @@ class LearnerConfiguration : public Learner {
     monitor_.Stop("Configure");
   }
 
-  void CheckModelInitialized() const {
-    CHECK(learner_model_param_.Initialized()) << ModelNotFitted();
-    CHECK_NE(learner_model_param_.BaseScore(this->Ctx()).Size(), 0) << ModelNotFitted();
-  }
-
   void LoadConfig(Json const& in) override {
     // If configuration is loaded, ensure that the model came from the same version
     CHECK(IsA<Object>(in));
@@ -555,8 +607,6 @@ class LearnerConfiguration : public Learner {
     }
 
     FromJson(learner_parameters.at("generic_param"), &ctx_);
-    // make sure the GPU ID is valid in new environment before start running configure.
-    ctx_.ConfigureGpuId(false);
 
     this->need_configuration_ = true;
   }
@@ -613,7 +663,6 @@ class LearnerConfiguration : public Learner {
 
   void SetAttr(const std::string& key, const std::string& value) override {
     attributes_[key] = value;
-    mparam_.contain_extra_attrs = 1;
   }
 
   bool GetAttr(const std::string& key, std::string* out) const override {
@@ -646,6 +695,10 @@ class LearnerConfiguration : public Learner {
     auto& ft = *p_ft;
     ft = this->feature_types_;
   }
+  [[nodiscard]] CatContainer const* Cats() const override {
+    this->CheckModelInitialized();
+    return this->gbm_->Cats();
+  }
 
   std::vector<std::string> GetAttrNames() const override {
     std::vector<std::string> out;
@@ -709,7 +762,6 @@ class LearnerConfiguration : public Learner {
     // FIXME(trivialfis): Make eval_metric a training parameter.
     keys.emplace_back(kEvalMetric);
     keys.emplace_back("num_output_group");
-    keys.emplace_back("gpu_id");  // deprecated param.
 
     std::sort(keys.begin(), keys.end());
 
@@ -809,7 +861,6 @@ class LearnerConfiguration : public Learner {
       auto DupCheck = [&name](std::unique_ptr<Metric> const& m) { return m->Name() != name; };
       if (std::all_of(metrics_.begin(), metrics_.end(), DupCheck)) {
         metrics_.emplace_back(std::unique_ptr<Metric>(Metric::Create(name, &ctx_)));
-        mparam_.contain_eval_metrics = 1;
       }
     }
 
@@ -823,7 +874,7 @@ class LearnerConfiguration : public Learner {
    */
   void ConfigureTargets() {
     CHECK(this->obj_);
-    auto const& cache = prediction_container_.Container();
+    auto const& cache = this->prediction_container_.Container();
     bst_target_t n_targets = 1;
     for (auto const& d : cache) {
       if (n_targets == 1) {
@@ -836,15 +887,16 @@ class LearnerConfiguration : public Learner {
 
     if (mparam_.num_target > 1) {
       CHECK(n_targets == 1 || n_targets == mparam_.num_target)
-          << "Inconsistent configuration of num_target.  Configuration result from input data:"
-          << n_targets << ", configuration from parameter:" << mparam_.num_target;
+          << "Inconsistent configuration of the `num_target`.  Configuration result from input "
+          << "data:" << n_targets << ", configuration from parameters:" << mparam_.num_target;
     } else {
       mparam_.num_target = n_targets;
     }
   }
 
-  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
-    base_score->Reshape(1);
+  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) {
+    base_score->SetDevice(this->Ctx()->Device());
+    base_score->Reshape(this->mparam_.OutputLength());
     collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),
                                 [&] { UsePtr(obj_)->InitEstimation(info, base_score); });
   }
@@ -853,11 +905,6 @@ class LearnerConfiguration : public Learner {
 std::string const LearnerConfiguration::kEvalMetric {"eval_metric"};  // NOLINT
 
 class LearnerIO : public LearnerConfiguration {
- private:
-  // Used to identify the offset of JSON string when
-  // Will be removed once JSON takes over.  Right now we still loads some RDS files from R.
-  std::string const serialisation_header_ { u8"CONFIG-offset:" };
-
  protected:
   void ClearCaches() { this->prediction_container_ = PredictionContainer{}; }
 
@@ -870,7 +917,7 @@ class LearnerIO : public LearnerConfiguration {
     if (std::get<0>(version) == 1 && std::get<1>(version) < 6) {
       LOG(WARNING)
           << "Found JSON model saved before XGBoost 1.6, please save the model using current "
-             "version again. The support for old JSON model will be discontinued in XGBoost 2.3.";
+             "version again. The support for old JSON model will be discontinued in XGBoost 3.2";
     }
 
     auto const& learner = get<Object>(in["learner"]);
@@ -952,192 +999,6 @@ class LearnerIO : public LearnerConfiguration {
     }
   }
 
-  // About to be deprecated by JSON format
-  void LoadModel(dmlc::Stream* fi) override {
-    ctx_.UpdateAllowUnknown(Args{});
-    tparam_.Init(std::vector<std::pair<std::string, std::string>>{});
-    // TODO(tqchen) mark deprecation of old format.
-    common::PeekableInStream fp(fi);
-
-    // backward compatible header check.
-    std::string header;
-    header.resize(4);
-    if (fp.PeekRead(&header[0], 4) == 4) {
-      CHECK_NE(header, "bs64")
-          << "Base64 format is no longer supported in brick.";
-      if (header == "binf") {
-        CHECK_EQ(fp.Read(&header[0], 4), 4U);
-      }
-    }
-
-    // FIXME(jiamingy): Move this out of learner after the old binary model is remove.
-    auto first_non_space = [&](std::string::const_iterator beg, std::string::const_iterator end) {
-      for (auto i = beg; i != end; ++i) {
-        if (!std::isspace(*i)) {
-          return i;
-        }
-      }
-      return end;
-    };
-
-    if (header[0] == '{') {  // Dispatch to JSON
-      auto buffer = common::ReadAll(fi, &fp);
-      Json model;
-      auto it = first_non_space(buffer.cbegin() + 1, buffer.cend());
-      if (it != buffer.cend() && *it == '"') {
-        model = Json::Load(StringView{buffer});
-      } else if (it != buffer.cend() && std::isalpha(*it)) {
-        model = Json::Load(StringView{buffer}, std::ios::binary);
-      } else {
-        LOG(FATAL) << "Invalid model format";
-      }
-      this->LoadModel(model);
-      return;
-    }
-
-    // use the peekable reader.
-    fi = &fp;
-    // read parameter
-    CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_))
-        << "BoostLearner: wrong model format";
-    if (!DMLC_IO_NO_ENDIAN_SWAP) {
-      mparam_ = mparam_.ByteSwap();
-    }
-    CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format";
-    CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format";
-
-    obj_.reset(ObjFunction::Create(tparam_.objective, &ctx_));
-    gbm_.reset(GradientBooster::Create(tparam_.booster, &ctx_, &learner_model_param_));
-    gbm_->Load(fi);
-    if (mparam_.contain_extra_attrs != 0) {
-      std::vector<std::pair<std::string, std::string> > attr;
-      fi->Read(&attr);
-      attributes_ = std::map<std::string, std::string>(attr.begin(), attr.end());
-    }
-    bool warn_old_model { false };
-    if (attributes_.find("count_poisson_max_delta_step") != attributes_.cend()) {
-      // Loading model from < 1.0.0, objective is not saved.
-      cfg_["max_delta_step"] = attributes_.at("count_poisson_max_delta_step");
-      attributes_.erase("count_poisson_max_delta_step");
-      warn_old_model = true;
-    } else {
-      warn_old_model = false;
-    }
-
-    if (mparam_.major_version < 1) {
-      // Before 1.0.0, base_score is saved as a transformed value, and there's no version
-      // attribute (saved a 0) in the saved model.
-      std::string multi{"multi:"};
-      if (!std::equal(multi.cbegin(), multi.cend(), tparam_.objective.cbegin())) {
-        HostDeviceVector<float> t;
-        t.HostVector().resize(1);
-        t.HostVector().at(0) = mparam_.base_score;
-        this->obj_->PredTransform(&t);
-        auto base_score = t.HostVector().at(0);
-        mparam_.base_score = base_score;
-      }
-      warn_old_model = true;
-    }
-
-    learner_model_param_ =
-        LearnerModelParam(&ctx_, mparam_,
-                          linalg::Tensor<float, 1>{{std::isnan(mparam_.base_score)
-                                                        ? std::numeric_limits<float>::quiet_NaN()
-                                                        : obj_->ProbToMargin(mparam_.base_score)},
-                                                   {1},
-                                                   DeviceOrd::CPU()},
-                          obj_->Task(), tparam_.multi_strategy);
-
-    if (attributes_.find("objective") != attributes_.cend()) {
-      auto obj_str = attributes_.at("objective");
-      auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()});
-      obj_->LoadConfig(j_obj);
-      attributes_.erase("objective");
-    } else {
-      warn_old_model = true;
-    }
-    if (attributes_.find("metrics") != attributes_.cend()) {
-      auto metrics_str = attributes_.at("metrics");
-      std::vector<std::string> names { common::Split(metrics_str, ';') };
-      attributes_.erase("metrics");
-      for (auto const& n : names) {
-        this->SetParam(kEvalMetric, n);
-      }
-    }
-
-    if (warn_old_model) {
-      LOG(WARNING) << "Loading model from XGBoost < 1.0.0, consider saving it "
-                      "again for improved compatibility";
-    }
-
-    // Renew the version.
-    mparam_.major_version = std::get<0>(Version::Self());
-    mparam_.minor_version = std::get<1>(Version::Self());
-
-    cfg_["num_feature"] = std::to_string(mparam_.num_feature);
-
-    auto n = tparam_.__DICT__();
-    cfg_.insert(n.cbegin(), n.cend());
-
-    this->need_configuration_ = true;
-    this->ClearCaches();
-  }
-
-  // Save model into binary format.  The code is about to be deprecated by more robust
-  // JSON serialization format.
-  void SaveModel(dmlc::Stream* fo) const override {
-    this->CheckModelInitialized();
-    CHECK(!this->learner_model_param_.IsVectorLeaf())
-        << "Please use JSON/UBJ format for model serialization with multi-output models.";
-
-    LearnerModelParamLegacy mparam = mparam_;  // make a copy to potentially modify
-    std::vector<std::pair<std::string, std::string> > extra_attr;
-    mparam.contain_extra_attrs = 1;
-
-    if (!this->feature_names_.empty() || !this->feature_types_.empty()) {
-      LOG(WARNING) << "feature names and feature types are being disregarded, use JSON/UBJSON "
-                      "format instead.";
-    }
-
-    {
-      // Similar to JSON model IO, we save the objective.
-      Json j_obj { Object() };
-      obj_->SaveConfig(&j_obj);
-      std::string obj_doc;
-      Json::Dump(j_obj, &obj_doc);
-      extra_attr.emplace_back("objective", obj_doc);
-    }
-    // As of 1.0.0, JVM Package and R Package uses Save/Load model for serialization.
-    // Remove this part once they are ported to use actual serialization methods.
-    if (mparam.contain_eval_metrics != 0) {
-      std::stringstream os;
-      for (auto& ev : metrics_) {
-        os << ev->Name() << ";";
-      }
-      extra_attr.emplace_back("metrics", os.str());
-    }
-
-    std::string header {"binf"};
-    fo->Write(header.data(), 4);
-    if (DMLC_IO_NO_ENDIAN_SWAP) {
-      fo->Write(&mparam, sizeof(LearnerModelParamLegacy));
-    } else {
-      LearnerModelParamLegacy x = mparam.ByteSwap();
-      fo->Write(&x, sizeof(LearnerModelParamLegacy));
-    }
-    fo->Write(tparam_.objective);
-    fo->Write(tparam_.booster);
-    gbm_->Save(fo);
-    if (mparam.contain_extra_attrs != 0) {
-      std::map<std::string, std::string> attr(attributes_);
-      for (const auto& kv : extra_attr) {
-        attr[kv.first] = kv.second;
-      }
-      fo->Write(std::vector<std::pair<std::string, std::string>>(
-          attr.begin(), attr.end()));
-    }
-  }
-
   void Save(dmlc::Stream* fo) const override {
     this->CheckModelInitialized();
 
@@ -1158,47 +1019,23 @@ class LearnerIO : public LearnerConfiguration {
     common::PeekableInStream fp(fi);
     char header[2];
     fp.PeekRead(header, 2);
-    if (header[0] == '{') {
-      auto buffer = common::ReadAll(fi, &fp);
-      Json memory_snapshot;
-      if (header[1] == '"') {
-        memory_snapshot = Json::Load(StringView{buffer});
-        error::WarnOldSerialization();
-      } else if (std::isalpha(header[1])) {
-        memory_snapshot = Json::Load(StringView{buffer}, std::ios::binary);
-      } else {
-        LOG(FATAL) << "Invalid serialization file.";
-      }
-      if (IsA<Null>(memory_snapshot["Model"])) {
-        // R has xgb.load that doesn't distinguish whether configuration is saved.
-        // We should migrate to use `xgb.load.raw` instead.
-        this->LoadModel(memory_snapshot);
-      } else {
-        this->LoadModel(memory_snapshot["Model"]);
-        this->LoadConfig(memory_snapshot["Config"]);
-      }
+    StringView msg = "Invalid serialization file.";
+    CHECK_EQ(header[0], '{') << msg;
+
+    auto buffer = common::ReadAll(fi, &fp);
+    Json memory_snapshot;
+    CHECK(std::isalpha(header[1])) << msg;
+    if (header[1] == '"') {
+      memory_snapshot = Json::Load(StringView{buffer});
+      error::WarnOldSerialization();
+    } else if (std::isalpha(header[1])) {
+      memory_snapshot = Json::Load(StringView{buffer}, std::ios::binary);
     } else {
-      std::string header;
-      header.resize(serialisation_header_.size());
-      CHECK_EQ(fp.Read(&header[0], header.size()), serialisation_header_.size());
-      // Avoid printing the content in loaded header, which might be random binary code.
-      CHECK(header == serialisation_header_) << error::OldSerialization();
-      int64_t sz {-1};
-      CHECK_EQ(fp.Read(&sz, sizeof(sz)), sizeof(sz));
-      if (!DMLC_IO_NO_ENDIAN_SWAP) {
-        dmlc::ByteSwap(&sz, sizeof(sz), 1);
-      }
-      CHECK_GT(sz, 0);
-      size_t json_offset = static_cast<size_t>(sz);
-      std::string buffer;
-      common::FixedSizeStream{&fp}.Take(&buffer);
-
-      common::MemoryFixSizeBuffer binary_buf(&buffer[0], json_offset);
-      this->LoadModel(&binary_buf);
-
-      auto config = Json::Load({buffer.c_str() + json_offset, buffer.size() - json_offset});
-      this->LoadConfig(config);
+      LOG(FATAL) << "Invalid serialization file.";
     }
+
+    this->LoadModel(memory_snapshot["Model"]);
+    this->LoadConfig(memory_snapshot["Config"]);
   }
 };
 
@@ -1289,7 +1126,7 @@ class LearnerImpl : public LearnerIO {
     monitor_.Start("UpdateOneIter");
     TrainingObserver::Instance().Update(iter);
     this->Configure();
-    this->InitBaseScore(train.get());
+    this->FitIntercept(this->tparam_, train.get());
 
     if (ctx_.seed_per_iteration) {
       common::GlobalRandom().seed(ctx_.seed * kRandSeedMagic + iter);
@@ -1305,17 +1142,17 @@ class LearnerImpl : public LearnerIO {
     monitor_.Stop("PredictRaw");
 
     monitor_.Start("GetGradient");
-    GetGradient(predt->predictions, train->Info(), iter, &gpair_);
+    GetGradient(predt->predictions, train->Info(), iter, &gpair_.gpair);
     monitor_.Stop("GetGradient");
-    TrainingObserver::Instance().Observe(*gpair_.Data(), "Gradients");
+    TrainingObserver::Instance().Observe(gpair_.Grad()->Data(), "Gradients");
 
     gbm_->DoBoost(train.get(), &gpair_, predt.get(), obj_.get());
     monitor_.Stop("UpdateOneIter");
   }
 
-  void BoostOneIter(int iter, std::shared_ptr<DMatrix> train,
-                    linalg::Matrix<GradientPair>* in_gpair) override {
-    monitor_.Start("BoostOneIter");
+  void BoostOneIter(std::int32_t iter, std::shared_ptr<DMatrix> train,
+                    GradientContainer* in_gpair) override {
+    this->monitor_.Start(__func__);
     this->Configure();
 
     if (ctx_.seed_per_iteration) {
@@ -1323,13 +1160,17 @@ class LearnerImpl : public LearnerIO {
     }
 
     this->ValidateDMatrix(train.get(), true);
-
-    CHECK_EQ(this->learner_model_param_.OutputLength(), in_gpair->Shape(1))
-        << "The number of columns in gradient should be equal to the number of targets/classes in "
-           "the model.";
+    if (in_gpair->HasValueGrad()) {
+      CHECK_EQ(this->learner_model_param_.OutputLength(), in_gpair->NumTargets())
+          << "Value gradient should have the same number of targets as the overall model.";
+    } else {
+      CHECK_EQ(this->learner_model_param_.OutputLength(), in_gpair->NumSplitTargets())
+          << "The number of columns in gradient should be equal to the number of "
+             "targets/classes in the model.";
+    }
     auto predt = prediction_container_.Cache(train, ctx_.Device());
-    gbm_->DoBoost(train.get(), in_gpair, predt.get(), obj_.get());
-    monitor_.Stop("BoostOneIter");
+    this->gbm_->DoBoost(train.get(), in_gpair, predt.get(), obj_.get());
+    this->monitor_.Stop(__func__);
   }
 
   std::string EvalOneIter(int iter,
@@ -1372,15 +1213,15 @@ class LearnerImpl : public LearnerIO {
   }
 
   void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
-               HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
-               bst_layer_t layer_end, bool training, bool pred_leaf, bool pred_contribs,
-               bool approx_contribs, bool pred_interactions) override {
+               HostDeviceVector<float>* out_preds, bst_layer_t layer_begin, bst_layer_t layer_end,
+               bool training, bool pred_leaf, bool pred_contribs, bool approx_contribs,
+               bool pred_interactions) override {
     int multiple_predictions = static_cast<int>(pred_leaf) +
                                static_cast<int>(pred_interactions) +
                                static_cast<int>(pred_contribs);
     this->Configure();
     if (training) {
-      this->InitBaseScore(nullptr);
+      this->FitIntercept(this->tparam_, nullptr);
     }
     this->CheckModelInitialized();
 
@@ -1491,8 +1332,8 @@ class LearnerImpl : public LearnerIO {
   }
 
  private:
-  void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
-                   std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
+  void GetGradient(HostDeviceVector<float> const& preds, MetaInfo const& info, std::int32_t iter,
+                   linalg::Matrix<GradientPair>* out_gpair) {
     out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
     collective::ApplyWithLabels(&ctx_, info, out_gpair->Data(),
                                 [&] { obj_->GetGradient(preds, info, iter, out_gpair); });
@@ -1501,7 +1342,7 @@ class LearnerImpl : public LearnerIO {
   /*! \brief random number transformation seed. */
   static int32_t constexpr kRandSeedMagic = 127;
   // gradient pairs
-  linalg::Matrix<GradientPair> gpair_;
+  GradientContainer gpair_;
   /*! \brief Temporary storage to prediction.  Useful for storing data transformed by
    *  objective function */
   PredictionContainer output_predictions_;
diff --git a/src/linear/coordinate_common.h b/src/linear/coordinate_common.h
index f08856bd1bfc..60959f122d00 100644
--- a/src/linear/coordinate_common.h
+++ b/src/linear/coordinate_common.h
@@ -1,20 +1,21 @@
 /**
- * Copyright 2018-2023 by XGBoost Contributors
+ * Copyright 2018-2025, XGBoost Contributors
  * \author Rory Mitchell
  */
 #pragma once
 #include <algorithm>
+#include <cmath>  // for fpclassify
+#include <limits>
 #include <string>
 #include <utility>
 #include <vector>
-#include <limits>
 
-#include "xgboost/data.h"
-#include "xgboost/parameter.h"
-#include "./param.h"
-#include "../gbm/gblinear_model.h"
 #include "../common/random.h"
 #include "../common/threading_utils.h"
+#include "../gbm/gblinear_model.h"
+#include "./param.h"
+#include "xgboost/data.h"
+#include "xgboost/parameter.h"
 
 namespace xgboost {
 namespace linear {
@@ -64,7 +65,11 @@ inline double CoordinateDelta(double sum_grad, double sum_hess, double w,
  * \return  The weight update.
  */
 inline double CoordinateDeltaBias(double sum_grad, double sum_hess) {
-  return -sum_grad / sum_hess;
+  auto b = -sum_grad / sum_hess;
+  if (std::isnan(b) || std::isinf(b)) {
+    b = 0;
+  }
+  return b;
 }
 
 /**
diff --git a/src/linear/updater_coordinate.cc b/src/linear/updater_coordinate.cc
index 0d61d7c7cb00..709d1fc8772d 100644
--- a/src/linear/updater_coordinate.cc
+++ b/src/linear/updater_coordinate.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2018-2023 by XGBoost Contributors
+ * Copyright 2018-2025, XGBoost Contributors
  * \author Rory Mitchell
  */
 
@@ -49,13 +49,13 @@ class CoordinateUpdater : public LinearUpdater {
               double sum_instance_weight) override {
     auto gpair = in_gpair->Data();
     tparam_.DenormalizePenalties(sum_instance_weight);
-    const int ngroup = model->learner_model_param->num_output_group;
+    auto ngroup = model->learner_model_param->num_output_group;
     // update bias
-    for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
+    for (decltype(ngroup) group_idx = 0; group_idx < ngroup; ++group_idx) {
       auto grad = GetBiasGradientParallel(group_idx, ngroup, gpair->ConstHostVector(), p_fmat,
                                           ctx_->Threads());
-      auto dbias = static_cast<float>(tparam_.learning_rate *
-                                      CoordinateDeltaBias(grad.first, grad.second));
+      auto dbias =
+          static_cast<float>(tparam_.learning_rate * CoordinateDeltaBias(grad.first, grad.second));
       model->Bias()[group_idx] += dbias;
       UpdateBiasResidualParallel(ctx_, group_idx, ngroup, dbias, &gpair->HostVector(), p_fmat);
     }
@@ -63,7 +63,7 @@ class CoordinateUpdater : public LinearUpdater {
     selector_->Setup(ctx_, *model, gpair->ConstHostVector(), p_fmat, tparam_.reg_alpha_denorm,
                      tparam_.reg_lambda_denorm, cparam_.top_k);
     // update weights
-    for (int group_idx = 0; group_idx < ngroup; ++group_idx) {
+    for (decltype(ngroup) group_idx = 0; group_idx < ngroup; ++group_idx) {
       for (unsigned i = 0U; i < model->learner_model_param->num_feature; i++) {
         int fidx =
             selector_->NextFeature(ctx_, i, *model, group_idx, gpair->ConstHostVector(), p_fmat,
diff --git a/src/metric/auc.cu b/src/metric/auc.cu
index f5ad30ffd39d..a5f45d6ba76d 100644
--- a/src/metric/auc.cu
+++ b/src/metric/auc.cu
@@ -6,7 +6,8 @@
 #include <thrust/scan.h>
 
 #include <cassert>
-#include <cub/cub.cuh>  // NOLINT
+#include <cuda/std/utility>  // for pair
+#include <functional>        // for equal_to
 #include <limits>
 #include <memory>
 #include <tuple>
@@ -372,15 +373,9 @@ double GPUMultiClassAUCOVR(Context const *ctx, MetaInfo const &info,
   dh::TemporaryArray<uint32_t> unique_class_ptr(d_class_ptr.size());
   auto d_unique_class_ptr = dh::ToSpan(unique_class_ptr);
   auto n_uniques = dh::SegmentedUniqueByKey(
-      ctx->CUDACtx()->TP(),
-      dh::tbegin(d_class_ptr),
-      dh::tend(d_class_ptr),
-      uni_key,
-      uni_key + d_sorted_idx.size(),
-      dh::tbegin(d_unique_idx),
-      d_unique_class_ptr.data(),
-      dh::tbegin(d_unique_idx),
-      thrust::equal_to<thrust::pair<uint32_t, float>>{});
+      ctx->CUDACtx()->TP(), dh::tbegin(d_class_ptr), dh::tend(d_class_ptr), uni_key,
+      uni_key + d_sorted_idx.size(), dh::tbegin(d_unique_idx), d_unique_class_ptr.data(),
+      dh::tbegin(d_unique_idx), std::equal_to<thrust::pair<uint32_t, float>>{});
   d_unique_idx = d_unique_idx.subspan(0, n_uniques);
 
   auto get_class_id = [=] XGBOOST_DEVICE(size_t idx) { return idx / n_samples; };
@@ -746,15 +741,9 @@ std::pair<double, uint32_t> GPURankingPRAUCImpl(Context const *ctx,
   dh::TemporaryArray<uint32_t> unique_class_ptr(d_group_ptr.size());
   auto d_unique_class_ptr = dh::ToSpan(unique_class_ptr);
   auto n_uniques = dh::SegmentedUniqueByKey(
-      ctx->CUDACtx()->TP(),
-      dh::tbegin(d_group_ptr),
-      dh::tend(d_group_ptr),
-      uni_key,
-      uni_key + d_sorted_idx.size(),
-      dh::tbegin(d_unique_idx),
-      d_unique_class_ptr.data(),
-      dh::tbegin(d_unique_idx),
-      thrust::equal_to<thrust::pair<uint32_t, float>>{});
+      ctx->CUDACtx()->TP(), dh::tbegin(d_group_ptr), dh::tend(d_group_ptr), uni_key,
+      uni_key + d_sorted_idx.size(), dh::tbegin(d_unique_idx), d_unique_class_ptr.data(),
+      dh::tbegin(d_unique_idx), std::equal_to<cuda::std::pair<uint32_t, float>>{});
   d_unique_idx = d_unique_idx.subspan(0, n_uniques);
 
   auto get_group_id = [=] XGBOOST_DEVICE(size_t idx) {
@@ -861,8 +850,8 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
         return thrust::make_pair(y * w, (1.0 - y) * w);
       });
   thrust::reduce_by_key(ctx->CUDACtx()->CTP(), key_it, key_it + predts.size(), val_it,
-                        thrust::make_discard_iterator(), totals.begin(), thrust::equal_to<size_t>{},
-                        PairPlus<double, double>{});
+                        thrust::make_discard_iterator(), totals.begin(), std::equal_to<size_t>{},
+                        PairPlus<double, double>{});  // NOLINT
 
   /**
    * Calculate AUC
diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu
index b66558db670b..98329153bd70 100644
--- a/src/metric/elementwise_metric.cu
+++ b/src/metric/elementwise_metric.cu
@@ -78,15 +78,10 @@ PackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss,
     // - sqrt(avg_t0) + sqrt(avg_t1) + ... sqrt(avg_tm)  // distributed
 
     auto size = info.labels.Size() * num_preds;
-    auto const kBlockSize = 2048;
-    auto n_blocks = size / kBlockSize + 1;
-
-    common::ParallelFor(n_blocks, n_threads, [&](auto block_idx) {
-      const size_t begin = block_idx * kBlockSize;
-      const size_t end = std::min(size, begin + kBlockSize);
-
+    std::size_t constexpr kBlockSize = 2048;
+    common::ParallelFor1d<kBlockSize>(size, n_threads, [&](auto&& block) {
       double sum_score = 0, sum_weight = 0;
-      for (std::size_t i = begin; i < end; ++i) {
+      for (std::size_t i = block.begin(), n = block.end(); i < n; ++i) {
         auto [sample_id, target_id] = linalg::UnravelIndex(i, labels.Shape());
 
         auto [v, wt] = loss(i, sample_id, target_id);
@@ -98,6 +93,7 @@ PackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss,
       score_tloc[t_idx] += sum_score;
       weight_tloc[t_idx] += sum_weight;
     });
+
     double residue_sum = std::accumulate(score_tloc.cbegin(), score_tloc.cend(), 0.0);
     double weights_sum = std::accumulate(weight_tloc.cbegin(), weight_tloc.cend(), 0.0);
     result = PackedReduceResult{residue_sum, weights_sum};
@@ -182,7 +178,7 @@ struct EvalRowLogLoss {
 };
 
 class PseudoErrorLoss : public MetricNoCache {
-  PesudoHuberParam param_;
+  PseudoHuberParam param_;
 
  public:
   const char* Name() const override { return "mphe"; }
diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc
index d0247c14b3d2..d8f69513c24b 100644
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2023 by XGBoost contributors
+ * Copyright 2020-2025, XGBoost contributors
  */
 #include "rank_metric.h"
 
@@ -10,7 +10,6 @@
 #include <array>                             // for array
 #include <cmath>                             // for log, sqrt
 #include <functional>                        // for less, greater
-#include <limits>                            // for numeric_limits
 #include <map>                               // for operator!=, _Rb_tree_const_iterator
 #include <memory>                            // for allocator, unique_ptr, shared_ptr, __shared_...
 #include <numeric>                           // for accumulate
@@ -22,7 +21,6 @@
 #include "../collective/aggregator.h"        // for ApplyWithLabels
 #include "../common/algorithm.h"             // for ArgSort, Sort
 #include "../common/linalg_op.h"             // for cbegin, cend
-#include "../common/math.h"                  // for CmpFirst
 #include "../common/optional_weight.h"       // for OptionalWeights, MakeOptionalWeights
 #include "dmlc/common.h"                     // for OMPException
 #include "metric_common.h"                   // for MetricNoCache, GPUMetric, PackedReduceResult
@@ -250,10 +248,6 @@ class EvalRankWithCache : public Metric {
     }
     param_.UpdateAllowUnknown(Args{});
   }
-  void Configure(Args const&) override {
-    // do not configure, otherwise the ndcg param will be forced into the same as the one in
-    // objective.
-  }
   void LoadConfig(Json const& in) override {
     if (IsA<Null>(in)) {
       return;
@@ -331,7 +325,7 @@ class EvalPrecision : public EvalRankWithCache<ltr::PreCache> {
     auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
     auto rank_idx = p_cache->SortedIdx(ctx_, predt.ConstHostSpan());
 
-    auto weight = common::MakeOptionalWeights(ctx_, info.weights_);
+    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);
     auto pre = p_cache->Pre(ctx_);
 
     common::ParallelFor(p_cache->Groups(), ctx_->Threads(), [&](auto g) {
@@ -365,6 +359,18 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
  public:
   using EvalRankWithCache::EvalRankWithCache;
 
+  void Configure(Args const& args) override {
+    // do not configure, otherwise the ndcg param like top-k will be forced into the same
+    // as the one in objective. The metric has its own syntax for parameter.
+    for (auto const& [key, value] : args) {
+      // Make a special case for the exp gain parameter, which is not exposed in the
+      // metric configuration syntax.
+      if (key == "ndcg_exp_gain") {
+        this->param_.UpdateAllowUnknown(Args{{key, value}});
+      }
+    }
+  }
+
   double Eval(HostDeviceVector<float> const& preds, MetaInfo const& info,
               std::shared_ptr<ltr::NDCGCache> p_cache) override {
     if (ctx_->IsCUDA()) {
@@ -383,7 +389,7 @@ class EvalNDCG : public EvalRankWithCache<ltr::NDCGCache> {
 
     auto h_label = info.labels.HostView();
     auto h_predt = linalg::MakeTensorView(ctx_, &preds, preds.Size());
-    auto weights = common::MakeOptionalWeights(ctx_, info.weights_);
+    auto weights = common::MakeOptionalWeights(ctx_->Device(), info.weights_);
 
     common::ParallelFor(n_groups, ctx_->Threads(), [&](auto g) {
       auto g_predt = h_predt.Slice(linalg::Range(group_ptr[g], group_ptr[g + 1]));
@@ -459,7 +465,7 @@ class EvalMAPScore : public EvalRankWithCache<ltr::MAPCache> {
     });
 
     auto sw = 0.0;
-    auto weight = common::MakeOptionalWeights(ctx_, info.weights_);
+    auto weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);
     if (!weight.Empty()) {
       CHECK_EQ(weight.weights.size(), p_cache->Groups());
     }
diff --git a/src/metric/rank_metric.cu b/src/metric/rank_metric.cu
index d43125dcbaf7..b3e41a5a5b53 100644
--- a/src/metric/rank_metric.cu
+++ b/src/metric/rank_metric.cu
@@ -38,7 +38,7 @@ PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
   predt.SetDevice(ctx->Device());
   auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
   auto topk = p_cache->Param().TopK();
-  auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
+  auto d_weight = common::MakeOptionalWeights(ctx->Device(), info.weights_);
 
   auto it = dh::MakeTransformIterator<double>(
       thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) {
@@ -62,11 +62,12 @@ PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
   thrust::fill_n(cuctx->CTP(), pre.data(), pre.size(), 0.0);
 
   std::size_t bytes;
-  cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(), d_gptr.data(),
-                                  d_gptr.data() + 1, cuctx->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, bytes, it, pre.data(), p_cache->Groups(),
+                                                d_gptr.data(), d_gptr.data() + 1, cuctx->Stream()));
   dh::TemporaryArray<char> temp(bytes);
-  cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, it, pre.data(), p_cache->Groups(),
-                                  d_gptr.data(), d_gptr.data() + 1, cuctx->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, it, pre.data(),
+                                                p_cache->Groups(), d_gptr.data(), d_gptr.data() + 1,
+                                                cuctx->Stream()));
 
   auto w_it =
       dh::MakeTransformIterator<double>(thrust::make_counting_iterator(0ul),
@@ -85,7 +86,7 @@ PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
   CHECK(p_cache);
 
   auto const &p = p_cache->Param();
-  auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
+  auto d_weight = common::MakeOptionalWeights(ctx->Device(), info.weights_);
   if (!d_weight.Empty()) {
     CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
   }
@@ -166,16 +167,18 @@ PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
         });
 
     std::size_t bytes;
-    cub::DeviceSegmentedReduce::Sum(nullptr, bytes, val_it, map.data(), p_cache->Groups(),
-                                    d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
+    dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, bytes, val_it, map.data(),
+                                                  p_cache->Groups(), d_group_ptr.data(),
+                                                  d_group_ptr.data() + 1, cuctx->Stream()));
     dh::TemporaryArray<char> temp(bytes);
-    cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, val_it, map.data(), p_cache->Groups(),
-                                    d_group_ptr.data(), d_group_ptr.data() + 1, cuctx->Stream());
+    dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(temp.data().get(), bytes, val_it, map.data(),
+                                                  p_cache->Groups(), d_group_ptr.data(),
+                                                  d_group_ptr.data() + 1, cuctx->Stream()));
   }
 
   PackedReduceResult result{0.0, 0.0};
   {
-    auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
+    auto d_weight = common::MakeOptionalWeights(ctx->Device(), info.weights_);
     if (!d_weight.Empty()) {
       CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
     }
diff --git a/src/objective/adaptive.cc b/src/objective/adaptive.cc
index b38bbb98eb0d..3f8fe4f2a17e 100644
--- a/src/objective/adaptive.cc
+++ b/src/objective/adaptive.cc
@@ -15,6 +15,7 @@
 #include "../common/threading_utils.h"     // ParallelFor
 #include "../common/transform_iterator.h"  // MakeIndexTransformIter
 #include "../tree/sample_position.h"       // for SamplePosition
+#include "../tree/tree_view.h"             // for WalkTree
 #include "xgboost/base.h"                  // bst_node_t
 #include "xgboost/context.h"               // Context
 #include "xgboost/data.h"                  // MetaInfo
@@ -48,8 +49,8 @@ void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
   CHECK_LE(begin_pos, sorted_pos.size());
 
   std::vector<bst_node_t> leaf;
-  tree.WalkTree([&](bst_node_t nidx) {
-    if (tree[nidx].IsLeaf()) {
+  tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {
+    if (tree.IsLeaf(nidx)) {
       leaf.push_back(nidx);
     }
     return true;
@@ -79,7 +80,7 @@ void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
 }
 
 void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
-                        std::int32_t group_idx, MetaInfo const& info, float learning_rate,
+                        bst_target_t group_idx, MetaInfo const& info, float learning_rate,
                         HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
   auto& tree = *p_tree;
 
@@ -162,7 +163,7 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
 }
 
 #if !defined(XGBOOST_USE_CUDA)
-void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
+void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, bst_target_t,
                           MetaInfo const&, float, HostDeviceVector<float> const&, float, RegTree*) {
   common::AssertGPUSupport();
 }
diff --git a/src/objective/adaptive.cu b/src/objective/adaptive.cu
index 7f2a9175d91d..81ebbcb6b9a5 100644
--- a/src/objective/adaptive.cu
+++ b/src/objective/adaptive.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2022-2024, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #include <thrust/sort.h>
 
@@ -8,9 +8,11 @@
 
 #include "../collective/aggregator.h"
 #include "../common/cuda_context.cuh"  // CUDAContext
+#include "../common/cuda_stream.h"     // for Event, Stream
 #include "../common/device_helpers.cuh"
 #include "../common/stats.cuh"
 #include "../tree/sample_position.h"  // for SamplePosition
+#include "../tree/tree_view.h"        // for WalkTree
 #include "adaptive.h"
 #include "xgboost/context.h"
 
@@ -39,8 +41,8 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
                    sorted_position.cbegin();
   if (beg_pos == sorted_position.size()) {
     auto& leaf = p_nidx->HostVector();
-    tree.WalkTree([&](bst_node_t nidx) {
-      if (tree[nidx].IsLeaf()) {
+    tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {
+      if (tree.IsLeaf(nidx)) {
         leaf.push_back(nidx);
       }
       return true;
@@ -69,10 +71,10 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
 
   dh::PinnedMemory pinned_pool;
   auto pinned = pinned_pool.GetSpan<char>(sizeof(size_t) + sizeof(bst_node_t));
-  dh::CUDAStream copy_stream;
+  curt::Stream copy_stream;
   size_t* h_num_runs = reinterpret_cast<size_t*>(pinned.subspan(0, sizeof(size_t)).data());
 
-  dh::CUDAEvent e;
+  curt::Event e;
   e.Record(cuctx->Stream());
   copy_stream.View().Wait(e);
   // flag for whether there's ignored position
@@ -121,8 +123,8 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
     nidx.Resize(*h_num_runs);
 
     std::vector<bst_node_t> leaves;
-    tree.WalkTree([&](bst_node_t nidx) {
-      if (tree[nidx].IsLeaf()) {
+    tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {
+      if (tree.IsLeaf(nidx)) {
         leaves.push_back(nidx);
       }
       return true;
@@ -142,7 +144,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
 }
 
 void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
-                          std::int32_t group_idx, MetaInfo const& info, float learning_rate,
+                          bst_target_t group_idx, MetaInfo const& info, float learning_rate,
                           HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
   dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
   dh::device_vector<size_t> ridx;
diff --git a/src/objective/adaptive.h b/src/objective/adaptive.h
index 1a7aef0516d1..5f0b1c8ad11c 100644
--- a/src/objective/adaptive.h
+++ b/src/objective/adaptive.h
@@ -87,16 +87,16 @@ inline std::size_t IdxY(MetaInfo const& info, bst_group_t group_idx) {
 }
 
 void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
-                          std::int32_t group_idx, MetaInfo const& info, float learning_rate,
+                          bst_target_t group_idx, MetaInfo const& info, float learning_rate,
                           HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree);
 
 void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& position,
-                        std::int32_t group_idx, MetaInfo const& info, float learning_rate,
+                        bst_target_t group_idx, MetaInfo const& info, float learning_rate,
                         HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree);
 }  // namespace detail
 
 inline void UpdateTreeLeaf(Context const* ctx, HostDeviceVector<bst_node_t> const& position,
-                           std::int32_t group_idx, MetaInfo const& info, float learning_rate,
+                           bst_target_t group_idx, MetaInfo const& info, float learning_rate,
                            HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
   if (ctx->IsCUDA()) {
     position.SetDevice(ctx->Device());
diff --git a/src/objective/aft_obj.cu b/src/objective/aft_obj.cu
index 3ad9ca847db7..f535fa0aecae 100644
--- a/src/objective/aft_obj.cu
+++ b/src/objective/aft_obj.cu
@@ -1,24 +1,21 @@
 /**
- * Copyright 2019-2023, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  * \file aft_obj.cu
  * \brief Definition of AFT loss for survival analysis.
  * \author Avinash Barnwal, Hyunsu Cho and Toby Hocking
  */
 
-#include <vector>
-#include <limits>
-#include <memory>
-#include <utility>
+#include <cmath>    // for log
+#include <cstddef>  // for size_t
 
+#include "../common/linalg_op.h"  // for ElementWiseKernel
+#include "../common/survival_util.h"
+#include "../common/transform.h"
 #include "xgboost/host_device_vector.h"
 #include "xgboost/json.h"
-#include "xgboost/parameter.h"
-#include "xgboost/span.h"
 #include "xgboost/logging.h"
 #include "xgboost/objective.h"
-
-#include "../common/transform.h"
-#include "../common/survival_util.h"
+#include "xgboost/span.h"
 
 using AFTParam = xgboost::common::AFTParam;
 using ProbabilityDistributionType = xgboost::common::ProbabilityDistributionType;
@@ -116,8 +113,11 @@ class AFTObj : public ObjFunction {
     // do nothing here, since the AFT metric expects untransformed prediction score
   }
 
-  bst_float ProbToMargin(bst_float base_score) const override {
-    return std::log(base_score);
+  void ProbToMargin(linalg::Vector<float>* base_score) const override {
+    auto intercept = base_score->View(this->ctx_->Device());
+    linalg::ElementWiseKernel(ctx_, intercept, [=] XGBOOST_DEVICE(std::size_t i) mutable {
+      intercept(i) = std::log(intercept(i));
+    });
   }
 
   const char* DefaultEvalMetric() const override {
diff --git a/src/objective/hinge.cu b/src/objective/hinge.cu
index a850df09ea06..285f65c6f4f5 100644
--- a/src/objective/hinge.cu
+++ b/src/objective/hinge.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2018-2023, XGBoost Contributors
+ * Copyright 2018-2025, XGBoost Contributors
  * \file hinge.cc
  * \brief Provides an implementation of the hinge loss function
  * \author Henry Gouk
@@ -8,14 +8,8 @@
 #include <cstddef>    // for size_t
 #include <cstdint>    // for int32_t
 
-#include "../common/common.h"  // for Range
-#if defined(XGBOOST_USE_CUDA)
-#include "../common/linalg_op.cuh"
-#endif
-#if defined(XGBOOST_USE_SYCL)
-#include "../../plugin/sycl/common/linalg_op.h"
-#endif
-#include "../common/linalg_op.h"
+#include "../common/common.h"            // for Range
+#include "../common/linalg_op.h"         // for ElementWiseKernel
 #include "../common/optional_weight.h"   // for OptionalWeights
 #include "../common/transform.h"         // for Transform
 #include "init_estimation.h"             // for FitIntercept
diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
index 259e5ceee30a..f94d2f8ba286 100644
--- a/src/objective/init_estimation.cc
+++ b/src/objective/init_estimation.cc
@@ -32,11 +32,7 @@ void FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector<float>* b
   new_obj->GetGradient(dummy_predt, info, 0, &gpair);
 
   bst_target_t n_targets = this->Targets(info);
-  linalg::Vector<float> leaf_weight;
-  tree::FitStump(this->ctx_, info, gpair, n_targets, &leaf_weight);
-  // Workaround, we don't support multi-target due to binary model serialization for
-  // base margin.
-  common::Mean(this->ctx_, leaf_weight, base_score);
+  tree::FitStump(this->ctx_, info, gpair, n_targets, base_score);
   this->PredTransform(base_score->Data());
 }
 
@@ -45,13 +41,12 @@ void FitInterceptGlmLike::InitEstimation(MetaInfo const& info,
   if (this->Task().task == ObjInfo::kRegression) {
     CheckInitInputs(info);
   }
-  linalg::Vector<float> out;
   if (info.weights_.Empty()) {
-    common::SampleMean(this->ctx_, info.IsColumnSplit(), info.labels, &out);
+    common::SampleMean(this->ctx_, info.IsColumnSplit(), info.labels, base_score);
   } else {
-    common::WeightedSampleMean(this->ctx_, info.IsColumnSplit(), info.labels, info.weights_, &out);
+    common::WeightedSampleMean(this->ctx_, info.IsColumnSplit(), info.labels, info.weights_,
+                               base_score);
   }
-  common::Mean(this->ctx_, out, base_score);
-  CHECK_EQ(base_score->Size(), 1);
+  CHECK_GE(base_score->Size(), 1);
 }
 }  // namespace xgboost::obj
diff --git a/src/objective/lambdarank_obj.cc b/src/objective/lambdarank_obj.cc
index b19f72e1d46f..c1857bf73d46 100644
--- a/src/objective/lambdarank_obj.cc
+++ b/src/objective/lambdarank_obj.cc
@@ -16,10 +16,9 @@
 #include <tuple>        // for apply, make_tuple
 #include <type_traits>  // for is_floating_point
 #include <utility>      // for pair, swap
-#include <vector>       // for vector
 
 #include "../common/error_msg.h"         // for GroupWeight, LabelScoreSize
-#include "../common/linalg_op.h"         // for begin, cbegin, cend
+#include "../common/linalg_op.h"         // for begin, cbegin, cend, SaveVector
 #include "../common/optional_weight.h"   // for MakeOptionalWeights, OptionalWeights
 #include "../common/ranking_utils.h"     // for RankingCache, LambdaRankParam, MAPCache, NDCGC...
 #include "../common/threading_utils.h"   // for ParallelFor, Sched
@@ -225,10 +224,23 @@ class LambdaRankObj : public FitIntercept {
     };
 
     MakePairs(ctx_, iter, p_cache_, g, g_label, g_rank, loop);
-    if (sum_lambda > 0.0 && param_.lambdarank_normalization) {
-      double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
-      std::transform(g_gpair.Values().data(), g_gpair.Values().data() + g_gpair.Size(),
-                     g_gpair.Values().data(), [norm](GradientPair const& g) { return g * norm; });
+    if (param_.lambdarank_normalization) {
+      double norm = 1.0;
+      if (param_.IsMean()) {
+        // Normalize using the number of pairs for mean.
+        auto n_pairs = this->p_cache_->Param().NumPair();
+        auto scale = 1.0 / static_cast<double>(n_pairs);
+        norm = scale;
+      } else {
+        // Normalize using gradient for top-k.
+        if (sum_lambda > 0.0) {
+          norm = std::log2(1.0 + sum_lambda) / sum_lambda;
+        }
+      }
+      if (norm != 1.0) {
+        std::transform(linalg::begin(g_gpair), linalg::end(g_gpair), linalg::begin(g_gpair),
+                       [norm](GradientPair const& g) { return g * norm; });
+      }
     }
 
     auto w_norm = p_cache_->WeightNorm();
@@ -244,18 +256,11 @@ class LambdaRankObj : public FitIntercept {
     out["name"] = String(Loss::Name());
     out["lambdarank_param"] = ToJson(param_);
 
-    auto save_bias = [](linalg::Vector<double> const& in, Json out) {
-      auto& out_array = get<F32Array>(out);
-      out_array.resize(in.Size());
-      auto h_in = in.HostView();
-      std::copy(linalg::cbegin(h_in), linalg::cend(h_in), out_array.begin());
-    };
-
     if (param_.lambdarank_unbiased) {
       out["ti+"] = F32Array();
-      save_bias(ti_plus_, out["ti+"]);
+      linalg::SaveVector(ti_plus_, &out["ti+"]);
       out["tj-"] = F32Array();
-      save_bias(tj_minus_, out["tj-"]);
+      linalg::SaveVector(tj_minus_, &out["tj-"]);
     }
   }
   void LoadConfig(Json const& in) override {
@@ -265,24 +270,8 @@ class LambdaRankObj : public FitIntercept {
     }
 
     if (param_.lambdarank_unbiased) {
-      auto load_bias = [](Json in, linalg::Vector<double>* out) {
-        if (IsA<F32Array>(in)) {
-          // JSON
-          auto const& array = get<F32Array>(in);
-          out->Reshape(array.size());
-          auto h_out = out->HostView();
-          std::copy(array.cbegin(), array.cend(), linalg::begin(h_out));
-        } else {
-          // UBJSON
-          auto const& array = get<Array>(in);
-          out->Reshape(array.size());
-          auto h_out = out->HostView();
-          std::transform(array.cbegin(), array.cend(), linalg::begin(h_out),
-                         [](Json const& v) { return get<Number const>(v); });
-        }
-      };
-      load_bias(in["ti+"], &ti_plus_);
-      load_bias(in["tj-"], &tj_minus_);
+      linalg::LoadVector(in["ti+"], &ti_plus_);
+      linalg::LoadVector(in["tj-"], &tj_minus_);
     }
   }
 
@@ -370,16 +359,17 @@ class LambdaRankNDCG : public LambdaRankObj<LambdaRankNDCG, ltr::NDCGCache> {
       return;
     }
 
+    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();
     bst_group_t n_groups = p_cache_->Groups();
     auto gptr = p_cache_->DataGroupPtr(ctx_);
 
-    out_gpair->SetDevice(ctx_->Device());
+    out_gpair->SetDevice(device);
     out_gpair->Reshape(info.num_row_, 1);
 
     auto h_gpair = out_gpair->HostView();
     auto h_predt = predt.ConstHostSpan();
     auto h_label = info.labels.HostView();
-    auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_);
+    auto h_weight = common::MakeOptionalWeights(device, info.weights_);
     auto make_range = [&](bst_group_t g) {
       return linalg::Range(gptr[g], gptr[g + 1]);
     };
@@ -497,14 +487,15 @@ class LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {
     bst_group_t n_groups = p_cache_->Groups();
 
     CHECK_EQ(info.labels.Shape(1), 1) << "multi-target for learning to rank is not yet supported.";
-    out_gpair->SetDevice(ctx_->Device());
+    auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();
+    out_gpair->SetDevice(device);
     out_gpair->Reshape(info.num_row_, this->Targets(info));
 
     auto h_gpair = out_gpair->HostView();
     auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
     auto h_predt = predt.ConstHostSpan();
     auto rank_idx = p_cache_->SortedIdx(ctx_, h_predt);
-    auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_);
+    auto h_weight = common::MakeOptionalWeights(device, info.weights_);
 
     auto make_range = [&](bst_group_t g) {
       return linalg::Range(gptr[g], gptr[g + 1]);
@@ -601,7 +592,7 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
     auto h_gpair = out_gpair->HostView();
     auto h_label = info.labels.HostView().Slice(linalg::All(), 0);
     auto h_predt = predt.ConstHostSpan();
-    auto h_weight = common::MakeOptionalWeights(ctx_, info.weights_);
+    auto h_weight = common::MakeOptionalWeights(ctx_->Device(), info.weights_);
 
     auto make_range = [&](bst_group_t g) {
       return linalg::Range(gptr[g], gptr[g + 1]);
diff --git a/src/objective/lambdarank_obj.cu b/src/objective/lambdarank_obj.cu
index eae067a56649..64cca2fdfafe 100644
--- a/src/objective/lambdarank_obj.cu
+++ b/src/objective/lambdarank_obj.cu
@@ -4,19 +4,18 @@
  * \brief CUDA implementation of lambdarank.
  */
 #include <dmlc/registry.h>                      // for DMLC_REGISTRY_FILE_TAG
-
 #include <thrust/fill.h>                        // for fill_n
 #include <thrust/for_each.h>                    // for for_each_n
 #include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator
 #include <thrust/iterator/zip_iterator.h>       // for make_zip_iterator
 #include <thrust/tuple.h>                       // for make_tuple, tuple, tie, get
 
-#include <algorithm>                            // for min
-#include <cassert>                              // for assert
-#include <cmath>                                // for abs, log2, isinf
-#include <cstddef>                              // for size_t
-#include <cstdint>                              // for int32_t
-#include <memory>                               // for shared_ptr
+#include <algorithm>  // for min
+#include <cassert>    // for assert
+#include <cmath>      // for abs, log2, isinf
+#include <cstddef>    // for size_t
+#include <cstdint>    // for int32_t
+#include <memory>     // for shared_ptr
 #include <utility>
 
 #include "../common/algorithm.cuh"       // for SegmentedArgSort
@@ -33,7 +32,7 @@
 #include "xgboost/host_device_vector.h"  // for HostDeviceVector
 #include "xgboost/linalg.h"              // for VectorView, Range, Vector
 #include "xgboost/logging.h"
-#include "xgboost/span.h"                // for Span
+#include "xgboost/span.h"  // for Span
 
 namespace xgboost::obj {
 DMLC_REGISTRY_FILE_TAG(lambdarank_obj_cu);
@@ -64,11 +63,11 @@ void MinBias(Context const* ctx, std::shared_ptr<ltr::RankingCache> p_cache,
                                                     return std::abs(t_plus(i));
                                                   });
   std::size_t bytes;
-  cub::DeviceSegmentedReduce::Min(nullptr, bytes, val_it, d_min.data(), 2, key_it, key_it + 1,
-                                  cuctx->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Min(nullptr, bytes, val_it, d_min.data(), 2, key_it,
+                                                key_it + 1, cuctx->Stream()));
   dh::TemporaryArray<char> temp(bytes);
-  cub::DeviceSegmentedReduce::Min(temp.data().get(), bytes, val_it, d_min.data(), 2, key_it,
-                                  key_it + 1, cuctx->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Min(temp.data().get(), bytes, val_it, d_min.data(), 2,
+                                                key_it, key_it + 1, cuctx->Stream()));
 }
 
 /**
@@ -84,7 +83,7 @@ struct GetGradOp {
   MakePairsOp<has_truncation> make_pair;
   Delta delta;
 
-  bool need_update;
+  bool const need_update;
 
   auto __device__ operator()(std::size_t idx) -> GradCostNorm {
     auto const& args = make_pair.args;
@@ -110,7 +109,9 @@ struct GetGradOp {
 
     double cost{0};
 
-    auto delta_op = [&](auto const&... args) { return delta(args..., g); };
+    auto delta_op = [&](auto const&... args) {
+      return delta(args..., g);
+    };
     GradientPair pg =
         LambdaGrad<unbiased, norm_by_diff>(g_label, g_predt, g_rank, rank_high, rank_low, delta_op,
                                            args.ti_plus, args.tj_minus, &cost);
@@ -120,7 +121,6 @@ struct GetGradOp {
 
     if (need_update) {
       // second run, update the gradient
-
       auto ng = Repulse(pg);
 
       auto gr = args.d_roundings(g);
@@ -155,6 +155,7 @@ struct GetGradOp {
         }
       }
     }
+
     return thrust::make_tuple(GradientPair{std::abs(pg.GetGrad()), std::abs(pg.GetHess())},
                               std::abs(cost), -2.0 * static_cast<double>(pg.GetGrad()));
   }
@@ -217,20 +218,20 @@ void CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr<ltr::Ran
     auto hess = std::max(lg.GetHess(), rg.GetHess());
     auto cost = std::max(thrust::get<1>(l), thrust::get<1>(r));
     double sum_lambda = thrust::get<2>(l) + thrust::get<2>(r);
-    return thrust::make_tuple(GradientPair{std::abs(grad), std::abs(hess)}, cost, sum_lambda);
+    return thrust::make_tuple(GradientPair{grad, hess}, cost, sum_lambda);
   };
   auto init = thrust::make_tuple(GradientPair{0.0f, 0.0f}, 0.0, 0.0);
   common::Span<GradCostNorm> d_max_lambdas = p_cache->MaxLambdas<GradCostNorm>(ctx, n_groups);
   CHECK_EQ(n_groups * sizeof(GradCostNorm), d_max_lambdas.size_bytes());
-
+  // Reduce by group.
   std::size_t bytes;
-  cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, d_max_lambdas.data(), n_groups,
-                                     d_threads_group_ptr.data(), d_threads_group_ptr.data() + 1,
-                                     reduction_op, init, ctx->CUDACtx()->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Reduce(
+      nullptr, bytes, val_it, d_max_lambdas.data(), n_groups, d_threads_group_ptr.data(),
+      d_threads_group_ptr.data() + 1, reduction_op, init, ctx->CUDACtx()->Stream()));
   dh::TemporaryArray<char> temp(bytes);
-  cub::DeviceSegmentedReduce::Reduce(
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Reduce(
       temp.data().get(), bytes, val_it, d_max_lambdas.data(), n_groups, d_threads_group_ptr.data(),
-      d_threads_group_ptr.data() + 1, reduction_op, init, ctx->CUDACtx()->Stream());
+      d_threads_group_ptr.data() + 1, reduction_op, init, ctx->CUDACtx()->Stream()));
 
   dh::TemporaryArray<double> min_bias(2);
   auto d_min_bias = dh::ToSpan(min_bias);
@@ -267,24 +268,37 @@ void CalcGrad(Context const* ctx, MetaInfo const& info, std::shared_ptr<ltr::Ran
   /**
    * Lastly, normalization and weight.
    */
-  auto d_weights = common::MakeOptionalWeights(ctx, info.weights_);
+  auto d_weights = common::MakeOptionalWeights(ctx->Device(), info.weights_);
   auto w_norm = p_cache->WeightNorm();
-  auto norm = p_cache->Param().lambdarank_normalization;
+  auto need_norm = p_cache->Param().lambdarank_normalization;
+  auto n_pairs = p_cache->Param().NumPair();
+  bool is_mean = p_cache->Param().IsMean();
+  CHECK_EQ(is_mean, !has_truncation);
   thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_gpair.Size(),
                      [=] XGBOOST_DEVICE(std::size_t i) mutable {
                        auto g = dh::SegmentId(d_gptr, i);
-                       auto sum_lambda = thrust::get<2>(d_max_lambdas[g]);
-                       // Normalization
-                       if (sum_lambda > 0.0 && norm) {
-                         double norm = std::log2(1.0 + sum_lambda) / sum_lambda;
+                       if (need_norm) {
+                         double norm = 1.0;
+                         if (has_truncation) {
+                           // Normalize using gradient for top-k.
+                           auto sum_lambda = thrust::get<2>(d_max_lambdas[g]);
+                           if (sum_lambda > 0.0) {
+                             norm = std::log2(1.0 + sum_lambda) / sum_lambda;
+                           }
+                         } else {
+                           // Normalize using the number of pairs for mean.
+                           double scale = 1.0 / static_cast<double>(n_pairs);
+                           norm = scale;
+                         }
                          d_gpair(i, 0) *= norm;
                        }
+
                        d_gpair(i, 0) *= (d_weights[g] * w_norm);
                      });
 }
 
 /**
- * \brief Handles boilerplate code like getting device span.
+ * @brief Handles boilerplate code like getting device spans.
  */
 template <bool norm_by_diff, typename Delta>
 void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const& preds,
@@ -304,7 +318,6 @@ void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const
   out_gpair->Reshape(preds.Size(), 1);
 
   CHECK(p_cache);
-
   auto d_rounding = p_cache->CUDARounding(ctx);
   auto d_cost_rounding = p_cache->CUDACostRounding(ctx);
 
@@ -577,11 +590,13 @@ void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double
 
   auto init = thrust::make_tuple(0.0, 0.0);
   std::size_t bytes;
-  cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, out_it, k, key_it, key_it + 1,
-                                     ReduceOp{}, init, ctx->CUDACtx()->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Reduce(nullptr, bytes, val_it, out_it, k, key_it,
+                                                   key_it + 1, ReduceOp{}, init,
+                                                   ctx->CUDACtx()->Stream()));
   dh::TemporaryArray<char> temp(bytes);
-  cub::DeviceSegmentedReduce::Reduce(temp.data().get(), bytes, val_it, out_it, k, key_it,
-                                     key_it + 1, ReduceOp{}, init, ctx->CUDACtx()->Stream());
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Reduce(temp.data().get(), bytes, val_it, out_it, k,
+                                                   key_it, key_it + 1, ReduceOp{}, init,
+                                                   ctx->CUDACtx()->Stream()));
 
   thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), li.Size(),
                      [=] XGBOOST_DEVICE(std::size_t i) mutable {
diff --git a/src/objective/lambdarank_obj.cuh b/src/objective/lambdarank_obj.cuh
index e1a78f905434..8e94befa2462 100644
--- a/src/objective/lambdarank_obj.cuh
+++ b/src/objective/lambdarank_obj.cuh
@@ -136,9 +136,10 @@ struct MakePairsOp {
     // The index pointing to the first element of the next bucket
     std::size_t right_bound = n_data - n_rights;
 
-    thrust::minstd_rand rng(args.iter);
+    std::uint32_t seed = args.iter * (static_cast<std::uint32_t>(args.d_group_ptr.size()) - 1) + g;
+    thrust::minstd_rand rng(seed);
     auto pair_idx = i;
-    rng.discard(sample_pair_idx * n_data + g + pair_idx);  // fixme
+    rng.discard(idx - args.d_threads_group_ptr[g]);  // idx within group
     thrust::uniform_int_distribution<std::size_t> dist(0, n_lefts + n_rights - 1);
     auto ridx = dist(rng);
     SPAN_CHECK(ridx < n_lefts + n_rights);
diff --git a/src/objective/lambdarank_obj.h b/src/objective/lambdarank_obj.h
index 113fce832492..bb8484ce0317 100644
--- a/src/objective/lambdarank_obj.h
+++ b/src/objective/lambdarank_obj.h
@@ -227,15 +227,16 @@ void MakePairs(Context const* ctx, std::int32_t iter,
   ltr::position_t cnt = group_ptr[g + 1] - group_ptr[g];
 
   if (cache->Param().HasTruncation()) {
-    for (std::size_t i = 0; i < std::min(cnt, cache->Param().NumPair()); ++i) {
+    for (std::size_t i = 0, n = std::min(cnt, cache->Param().NumPair()); i < n; ++i) {
       for (std::size_t j = i + 1; j < cnt; ++j) {
         op(i, j);
       }
     }
   } else {
     CHECK_EQ(g_rank.size(), g_label.Size());
-    std::minstd_rand rnd(iter);
-    rnd.discard(g);  // fixme(jiamingy): honor the global seed
+
+    std::uint32_t seed = (iter + 1) * (static_cast<std::uint32_t>(group_ptr.size()) - 1) + g;
+    std::minstd_rand rnd(seed);
     // sort label according to the rank list
     auto it = common::MakeIndexTransformIter(
         [&g_rank, &g_label](std::size_t idx) { return g_label(g_rank[idx]); });
@@ -244,7 +245,6 @@ void MakePairs(Context const* ctx, std::int32_t iter,
     // permutation iterator to get the original label
     auto rev_it = common::MakeIndexTransformIter(
         [&](std::size_t idx) { return g_label(g_rank[y_sorted_idx[idx]]); });
-
     for (std::size_t i = 0; i < cnt;) {
       std::size_t j = i + 1;
       // find the bucket boundary
diff --git a/src/objective/multiclass_obj.cu b/src/objective/multiclass_obj.cu
index 1a3df38841bd..118a2bf71a79 100644
--- a/src/objective/multiclass_obj.cu
+++ b/src/objective/multiclass_obj.cu
@@ -1,156 +1,177 @@
 /**
- * Copyright 2015-2023, XGBoost Contributors
+ * Copyright 2015-2025, XGBoost Contributors
  * \file multi_class.cc
  * \brief Definition of multi-class classification objectives.
  * \author Tianqi Chen
  */
 #include <dmlc/omp.h>
 
-#include <vector>
-#include <algorithm>
+#include <cassert>  // for assert
 #include <limits>
-#include <utility>
 
-#include "xgboost/parameter.h"
+#include "../collective/aggregator.h"  // for GlobalSum
+#include "../common/common.h"          // for AssertGPUSupport
+#include "../common/linalg_op.h"
+#include "../common/math.h"
+#include "../common/optional_weight.h"  // for MakeOptionalWeights
+#include "../common/transform.h"
 #include "xgboost/data.h"
+#include "xgboost/json.h"
 #include "xgboost/logging.h"
 #include "xgboost/objective.h"
-#include "xgboost/json.h"
 
-#include "../common/common.h"
-#include "../common/math.h"
-#include "../common/transform.h"
+#if defined(XGBOOST_USE_CUDA)
 
-#include "multiclass_param.h"
+#include "../common/algorithm.cuh"     // for AllOf
+#include "../common/cuda_context.cuh"  // for CUDAContext
 
-namespace xgboost {
-namespace obj {
+#endif  // defined(XGBOOST_USE_CUDA)
+
+#include "multiclass_param.h"
 
+namespace xgboost::obj {
 #if defined(XGBOOST_USE_CUDA)
 DMLC_REGISTRY_FILE_TAG(multiclass_obj_gpu);
 #endif  // defined(XGBOOST_USE_CUDA)
 
+namespace {
+void ValidateLabel(Context const* ctx, MetaInfo const& info, std::int64_t n_classes) {
+  auto label = info.labels.View(ctx->Device());
+  CHECK_LE(label.Shape(1), 1) << "multi-class-multi-label is not yet supported.";
+  auto check = [=] XGBOOST_DEVICE(float y) -> bool {
+    return y >= 0 && y < n_classes && std::floor(y) == y;
+  };
+  auto valid = ctx->DispatchDevice(
+      [&] { return std::all_of(linalg::cbegin(label), linalg::cend(label), check); },
+      [&] {
+#if defined(XGBOOST_USE_CUDA)
+        return common::AllOf(ctx->CUDACtx()->CTP(), linalg::tcbegin(label), linalg::tcend(label),
+                             check);
+#else
+        common::AssertGPUSupport();
+        return false;
+#endif  // defined(XGBOOST_USE_CUDA)
+      },
+      [&] {
+#if defined(XGBOOST_USE_SYCL)
+        return sycl::linalg::Validate(ctx->Device(), label, check);
+#else
+        common::AssertSYCLSupport();
+        return false;
+#endif  // defined(XGBOOST_USE_SYCL)
+      });
+  CHECK(valid)
+      << "SoftmaxMultiClassObj: label must be discrete values in the range of [0, num_class).";
+}
+}  // namespace
+
 class SoftmaxMultiClassObj : public ObjFunction {
  public:
-  explicit SoftmaxMultiClassObj(bool output_prob)
-  : output_prob_(output_prob) {}
+  explicit SoftmaxMultiClassObj(bool output_prob) : output_prob_(output_prob) {}
 
-  void Configure(Args const& args) override {
-    param_.UpdateAllowUnknown(args);
-  }
+  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }
 
   ObjInfo Task() const override { return ObjInfo::kClassification; }
 
-  void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info, std::int32_t,
+  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info, std::int32_t iter,
                    linalg::Matrix<GradientPair>* out_gpair) override {
     if (info.labels.Size() == 0) {
       return;
     }
-    CHECK(preds.Size() == (static_cast<size_t>(param_.num_class) * info.labels.Size()))
+    std::int64_t n_classes = param_.num_class;
+    CHECK(preds.Size() == (static_cast<std::size_t>(n_classes) * info.labels.Size()))
         << "SoftmaxMultiClassObj: label size and pred size does not match.\n"
-        << "label.Size() * num_class: "
-        << info.labels.Size() * static_cast<size_t>(param_.num_class) << "\n"
+        << "label.Size() * num_class: " << info.labels.Size() * n_classes << "\n"
         << "num_class: " << param_.num_class << "\n"
         << "preds.Size(): " << preds.Size();
 
-    const int nclass = param_.num_class;
-    const auto ndata = static_cast<int64_t>(preds.Size() / nclass);
+    if (iter == 0) {
+      ValidateLabel(this->ctx_, info, n_classes);
+    }
 
-    auto device = ctx_->Device();
-    out_gpair->SetDevice(device);
-    info.labels.SetDevice(device);
-    info.weights_.SetDevice(device);
-    preds.SetDevice(device);
+    const auto n_samples = preds.Size() / n_classes;
+    CHECK_EQ(n_samples, info.num_row_);
 
-    label_correct_.Resize(1);
-    label_correct_.SetDevice(device);
+    // fallback to cpu if current device doesn't supports fp64
+    auto device = ctx_->DeviceFP64();
+    auto labels = info.labels.View(device);
 
-    out_gpair->Reshape(info.num_row_, static_cast<std::uint64_t>(nclass));
-    label_correct_.Fill(1);
+    out_gpair->SetDevice(device);
+    out_gpair->Reshape(info.num_row_, n_classes);
+    auto gpair = out_gpair->View(device);
 
-    const bool is_null_weight = info.weights_.Size() == 0;
-    if (!is_null_weight) {
-      CHECK_EQ(info.weights_.Size(), ndata)
+    if (!info.weights_.Empty()) {
+      CHECK_EQ(info.weights_.Size(), n_samples)
           << "Number of weights should be equal to number of data points.";
     }
+    info.weights_.SetDevice(device);
+    auto weights = common::MakeOptionalWeights(this->ctx_->Device(), info.weights_);
 
-    common::Transform<>::Init(
-        [=] XGBOOST_DEVICE(size_t idx,
-                           common::Span<GradientPair> gpair,
-                           common::Span<bst_float const> labels,
-                           common::Span<bst_float const> preds,
-                           common::Span<bst_float const> weights,
-                           common::Span<int> _label_correct) {
-          common::Span<bst_float const> point = preds.subspan(idx * nclass, nclass);
-
-          // Part of Softmax function
-          bst_float wmax = std::numeric_limits<bst_float>::min();
-          for (auto const i : point) { wmax = fmaxf(i, wmax); }
-          double wsum = 0.0f;
-          for (auto const i : point) { wsum += expf(i - wmax); }
-          auto label = labels[idx];
-          if (label < 0 || label >= nclass) {
-            _label_correct[0] = 0;
-            label = 0;
-          }
-          bst_float wt = is_null_weight ? 1.0f : weights[idx];
-          for (int k = 0; k < nclass; ++k) {
-            // Computation duplicated to avoid creating a cache.
-            bst_float p = expf(point[k] - wmax) / static_cast<float>(wsum);
-            const float eps = 1e-16f;
-            const bst_float h = fmax(2.0f * p * (1.0f - p) * wt, eps);
-            p = label == k ? p - 1.0f : p;
-            gpair[idx * nclass + k] = GradientPair(p * wt, h);
-          }
-        }, common::Range{0, ndata}, ctx_->Threads(), device)
-        .Eval(out_gpair->Data(), info.labels.Data(), &preds, &info.weights_, &label_correct_);
-
-    std::vector<int>& label_correct_h = label_correct_.HostVector();
-    for (auto const flag : label_correct_h) {
-      if (flag != 1) {
-        LOG(FATAL) << "SoftmaxMultiClassObj: label must be in [0, num_class).";
+    preds.SetDevice(device);
+    auto predt = linalg::MakeTensorView(this->ctx_, &preds, n_samples, n_classes);
+    CHECK_EQ(labels.Shape(1), 1);
+    auto y1d = labels.Slice(linalg::All(), 0);
+    CHECK_EQ(y1d.Shape(0), info.num_row_);
+    linalg::ElementWiseKernel(this->ctx_, y1d, [=] XGBOOST_DEVICE(std::size_t idx) mutable {
+      auto point = predt.Slice(idx, linalg::All());
+      assert(point.Size() == static_cast<std::size_t>(n_classes));
+
+      // Part of the common::Softmax function
+      float wmax = std::numeric_limits<float>::min();
+      for (std::size_t k = 0, m = point.Size(); k < m; ++k) {
+        wmax = fmaxf(point(k), wmax);
       }
-    }
+      double wsum = 0.0f;
+      for (std::size_t k = 0, m = point.Size(); k < m; ++k) {
+        wsum += expf(point(k) - wmax);
+      }
+      auto label = y1d(idx);
+
+      float wt = weights[idx];
+      for (decltype(n_classes) k = 0; k < n_classes; ++k) {
+        // Computation duplicated to avoid creating a cache.
+        float p = expf(point(k) - wmax) / static_cast<float>(wsum);
+        constexpr float kEps = 1e-16f;
+        float h = fmax(2.0f * p * (1.0f - p) * wt, kEps);
+        p = label == k ? p - 1.0f : p;
+        gpair(idx, k) = GradientPair{p * wt, h};
+      }
+    });
   }
-  void PredTransform(HostDeviceVector<bst_float>* io_preds) const override {
+
+  void PredTransform(HostDeviceVector<float>* io_preds) const override {
     this->Transform(io_preds, output_prob_);
   }
-  void EvalTransform(HostDeviceVector<bst_float>* io_preds) override {
+  void EvalTransform(HostDeviceVector<float>* io_preds) override {
     this->Transform(io_preds, true);
   }
-  const char* DefaultEvalMetric() const override {
-    return "mlogloss";
-  }
+  const char* DefaultEvalMetric() const override { return "mlogloss"; }
 
-  inline void Transform(HostDeviceVector<bst_float> *io_preds, bool prob) const {
-    const int nclass = param_.num_class;
-    const auto ndata = static_cast<int64_t>(io_preds->Size() / nclass);
+  void Transform(HostDeviceVector<float>* io_preds, bool prob) const {
+    const int n_classes = param_.num_class;
+    const auto n_samples = static_cast<int64_t>(io_preds->Size() / n_classes);
 
     auto device = io_preds->Device();
     if (prob) {
       common::Transform<>::Init(
-          [=] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {
-            common::Span<bst_float> point =
-                _preds.subspan(_idx * nclass, nclass);
+          [=] XGBOOST_DEVICE(size_t _idx, common::Span<float> _preds) {
+            common::Span<float> point = _preds.subspan(_idx * n_classes, n_classes);
             common::Softmax(point.begin(), point.end());
           },
-          common::Range{0, ndata}, this->ctx_->Threads(), device)
+          common::Range{0, n_samples}, this->ctx_->Threads(), device)
           .Eval(io_preds);
     } else {
       io_preds->SetDevice(device);
-      HostDeviceVector<bst_float> max_preds;
+      HostDeviceVector<float> max_preds;
       max_preds.SetDevice(device);
-      max_preds.Resize(ndata);
+      max_preds.Resize(n_samples);
       common::Transform<>::Init(
-          [=] XGBOOST_DEVICE(size_t _idx, common::Span<const bst_float> _preds,
-                             common::Span<bst_float> _max_preds) {
-            common::Span<const bst_float> point =
-                _preds.subspan(_idx * nclass, nclass);
-            _max_preds[_idx] =
-                common::FindMaxIndex(point.cbegin(), point.cend()) -
-                point.cbegin();
+          [=] XGBOOST_DEVICE(size_t _idx, common::Span<const float> _preds,
+                             common::Span<float> _max_preds) {
+            common::Span<const float> point = _preds.subspan(_idx * n_classes, n_classes);
+            _max_preds[_idx] = common::FindMaxIndex(point.cbegin(), point.cend()) - point.cbegin();
           },
-          common::Range{0, ndata}, this->ctx_->Threads(), device)
+          common::Range{0, n_samples}, this->ctx_->Threads(), device)
           .Eval(io_preds, &max_preds);
       io_preds->Resize(max_preds.Size());
       io_preds->Copy(max_preds);
@@ -167,29 +188,44 @@ class SoftmaxMultiClassObj : public ObjFunction {
     out["softmax_multiclass_param"] = ToJson(param_);
   }
 
-  void LoadConfig(Json const& in) override {
-    FromJson(in["softmax_multiclass_param"], &param_);
+  void LoadConfig(Json const& in) override { FromJson(in["softmax_multiclass_param"], &param_); }
+
+  void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override {
+    std::int64_t n_classes = this->param_.num_class;
+    ValidateLabel(this->ctx_, info, n_classes);
+
+    *base_score = linalg::Zeros<float>(this->ctx_, n_classes);
+
+    std::size_t n = info.labels.Size();
+
+    auto labels = info.labels.View(ctx_->Device());
+    auto weights = common::MakeOptionalWeights(this->ctx_->Device(), info.weights_);
+    auto intercept = base_score->View(ctx_->Device());
+    CHECK_EQ(intercept.Size(), n_classes);
+    CHECK_EQ(n, info.num_row_);
+    linalg::SmallHistogram(ctx_, labels, weights, intercept);
+    auto sum_weight = common::SumOptionalWeights(this->ctx_, weights, n);
+    auto status = collective::GlobalSum(this->ctx_, info, intercept, &sum_weight);
+    collective::SafeColl(status);
+    CHECK_GE(sum_weight, kRtEps);
+    linalg::VecScaDiv(this->ctx_, intercept, sum_weight);
   }
 
  private:
   // output probability
-  bool output_prob_;
+  bool const output_prob_;
   // parameter
   SoftmaxMultiClassParam param_;
-  // Cache for max_preds
-  HostDeviceVector<int> label_correct_;
 };
 
 // register the objective functions
 DMLC_REGISTER_PARAMETER(SoftmaxMultiClassParam);
 
 XGBOOST_REGISTER_OBJECTIVE(SoftmaxMultiClass, "multi:softmax")
-.describe("Softmax for multi-class classification, output class index.")
-.set_body([]() { return new SoftmaxMultiClassObj(false); });
+    .describe("Softmax for multi-class classification, output class index.")
+    .set_body([]() { return new SoftmaxMultiClassObj(false); });
 
 XGBOOST_REGISTER_OBJECTIVE(SoftprobMultiClass, "multi:softprob")
-.describe("Softmax for multi-class classification, output probability distribution.")
-.set_body([]() { return new SoftmaxMultiClassObj(true); });
-
-}  // namespace obj
-}  // namespace xgboost
+    .describe("Softmax for multi-class classification, output probability distribution.")
+    .set_body([]() { return new SoftmaxMultiClassObj(true); });
+}  // namespace xgboost::obj
diff --git a/src/objective/multiclass_param.h b/src/objective/multiclass_param.h
index d1dea15fd0d4..b25f38f29732 100644
--- a/src/objective/multiclass_param.h
+++ b/src/objective/multiclass_param.h
@@ -1,25 +1,21 @@
-/*!
- * Copyright 2015-2023 by Contributors
- * \file multiclass_param.h
- * \brief Definition of multi-class classification parameters.
+/**
+ * Copyright 2015-2025, XGBoost Contributors
+ *
+ * @brief Definition of multi-class classification parameters.
  */
 #ifndef XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_
 #define XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_
 
 #include "xgboost/parameter.h"
 
-namespace xgboost {
-namespace obj {
-
+namespace xgboost::obj {
 struct SoftmaxMultiClassParam : public XGBoostParameter<SoftmaxMultiClassParam> {
-  int num_class;
+  int num_class{1};
   // declare parameters
   DMLC_DECLARE_PARAMETER(SoftmaxMultiClassParam) {
-    DMLC_DECLARE_FIELD(num_class).set_lower_bound(1)
-        .describe("Number of output class in the multi-class classification.");
+    DMLC_DECLARE_FIELD(num_class).set_lower_bound(1).describe(
+        "Number of output class in the multi-class classification.");
   }
 };
-
-}  // namespace obj
-}  // namespace xgboost
+}  // namespace xgboost::obj
 #endif  // XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_
diff --git a/src/objective/objective.cc b/src/objective/objective.cc
index cc54d14c23cc..8731394dfc25 100644
--- a/src/objective/objective.cc
+++ b/src/objective/objective.cc
@@ -1,15 +1,14 @@
-/*!
- * Copyright 2015-2022 by Contributors
- * \file objective.cc
- * \brief Registry of all objective functions.
+/**
+ * Copyright 2015-2025, XGBoost Contributors
+ *
+ * @brief Registry of all objective functions.
  */
 #include <dmlc/registry.h>
 #include <xgboost/context.h>
 #include <xgboost/objective.h>
 
-#include <sstream>
-
-#include "xgboost/host_device_vector.h"
+#include <sstream>  // for stringstream
+#include <string>   // for string
 
 namespace dmlc {
 DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);
@@ -33,10 +32,10 @@ ObjFunction* ObjFunction::Create(const std::string& name, Context const* ctx) {
   return pobj;
 }
 
-void ObjFunction::InitEstimation(MetaInfo const&, linalg::Tensor<float, 1>* base_score) const {
+void ObjFunction::InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const {
   CHECK(base_score);
-  base_score->Reshape(1);
-  (*base_score)(0) = DefaultBaseScore();
+  auto n_targets = this->Targets(info);
+  *base_score = linalg::Constant(this->ctx_, DefaultBaseScore(), n_targets);
 }
 }  // namespace xgboost
 
diff --git a/src/objective/quantile_obj.cu b/src/objective/quantile_obj.cu
index c88b3e836c71..dbe25e72d735 100644
--- a/src/objective/quantile_obj.cu
+++ b/src/objective/quantile_obj.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023-2024, XGBoost contributors
+ * Copyright 2023-2025, XGBoost contributors
  */
 #include <array>                            // std::array
 #include <cstddef>                          // std::size_t
@@ -10,7 +10,6 @@
 #include "../common/quantile_loss_utils.h"  // QuantileLossParam
 #include "../common/stats.h"                // Quantile,WeightedQuantile
 #include "adaptive.h"                       // UpdateTreeLeaf
-#include "dmlc/parameter.h"                 // DMLC_DECLARE_PARAMETER
 #include "init_estimation.h"                // CheckInitInputs
 #include "xgboost/base.h"                   // GradientPair,XGBOOST_DEVICE,bst_target_t
 #include "xgboost/data.h"                   // MetaInfo
@@ -18,19 +17,13 @@
 #include "xgboost/json.h"                   // Json,String,ToJson,FromJson
 #include "xgboost/linalg.h"                 // Tensor,MakeTensorView,MakeVec
 #include "xgboost/objective.h"              // ObjFunction
-#include "xgboost/parameter.h"              // XGBoostParameter
 
 #if defined(XGBOOST_USE_CUDA)
 
-#include "../common/linalg_op.cuh"  // ElementWiseKernel
 #include "../common/stats.cuh"      // SegmentedQuantile
 
 #endif                              // defined(XGBOOST_USE_CUDA)
 
-#if defined(XGBOOST_USE_SYCL)
-#include "../../plugin/sycl/common/linalg_op.h"  // ElementWiseKernel
-#endif
-
 namespace xgboost::obj {
 class QuantileRegression : public ObjFunction {
   common::QuantileLossParam param_;
@@ -107,7 +100,6 @@ class QuantileRegression : public ObjFunction {
     base_score->SetDevice(ctx_->Device());
     base_score->Reshape(n_targets);
 
-    double sw{0};
     if (ctx_->IsCUDA()) {
 #if defined(XGBOOST_USE_CUDA)
       alpha_.SetDevice(ctx_->Device());
@@ -127,7 +119,6 @@ class QuantileRegression : public ObjFunction {
       if (info.weights_.Empty()) {
         common::SegmentedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1, val_it,
                                   val_it + n, base_score->Data());
-        sw = info.num_row_;
       } else {
         info.weights_.SetDevice(ctx_->Device());
         auto d_weights = info.weights_.ConstDeviceSpan();
@@ -139,8 +130,6 @@ class QuantileRegression : public ObjFunction {
         common::SegmentedWeightedQuantile(ctx_, d_alpha.data(), seg_it, seg_it + d_alpha.size() + 1,
                                           val_it, val_it + n, weight_it, weight_it + n,
                                           base_score->Data());
-        sw = dh::Reduce(ctx_->CUDACtx()->CTP(), dh::tcbegin(d_weights), dh::tcend(d_weights), 0.0,
-                        thrust::plus<double>{});
       }
 #else
       common::AssertGPUSupport();
@@ -148,11 +137,6 @@ class QuantileRegression : public ObjFunction {
     } else {
       auto quantiles = base_score->HostView();
       auto h_weights = info.weights_.ConstHostVector();
-      if (info.weights_.Empty()) {
-        sw = info.num_row_;
-      } else {
-        sw = std::accumulate(std::cbegin(h_weights), std::cend(h_weights), 0.0);
-      }
       for (bst_target_t t{0}; t < n_targets; ++t) {
         auto alpha = param_.quantile_alpha[t];
         auto h_labels = info.labels.HostView();
@@ -167,25 +151,18 @@ class QuantileRegression : public ObjFunction {
       }
     }
 
-    // For multiple quantiles, we should extend the base score to a vector instead of
-    // computing the average. For now, this is a workaround.
-    linalg::Vector<float> temp;
-    common::Mean(ctx_, *base_score, &temp);
-    double meanq = temp(0) * sw;
-
-    std::array<double, 2> dat{meanq, sw};
-    auto rc = collective::GlobalSum(ctx_, info, linalg::MakeVec(dat.data(), dat.size()));
-    collective::SafeColl(rc);
-
-    std::tie(meanq, sw) = std::tuple_cat(dat);
-    meanq /= (sw + kRtEps);
-    base_score->Reshape(1);
-    base_score->Data()->Fill(meanq);
+    // Global mean. There's no strong preference on whether weighted mean should be used
+    // with weighted quantiles. The proper way to do this might be using an approximated
+    // quantile algorithm with stream inputs, but it's also much more expensive.
+    auto intercept = base_score->View(this->ctx_->Device());
+    collective::SafeColl(collective::GlobalSum(ctx_, info, intercept));
+    double n_workers = info.IsColumnSplit() ? 1.0 : collective::GetWorldSize();
+    linalg::VecScaDiv(ctx_, intercept, n_workers);
   }
 
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
                       float learning_rate, HostDeviceVector<float> const& prediction,
-                      std::int32_t group_idx, RegTree* p_tree) const override {
+                      bst_target_t group_idx, RegTree* p_tree) const override {
     auto alpha = param_.quantile_alpha[group_idx];
     ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction,
                                    alpha, p_tree);
diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h
index d2710d35a65e..c65d57d6e946 100644
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -1,16 +1,13 @@
 /**
- * Copyright 2017-2023 by XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
 #define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
 
-#include <dmlc/omp.h>
-
 #include <cmath>
 
 #include "../common/math.h"
-#include "xgboost/data.h"  // MetaInfo
-#include "xgboost/logging.h"
+#include "xgboost/string_view.h"
 #include "xgboost/task.h"  // ObjInfo
 
 namespace xgboost::obj {
@@ -22,7 +19,11 @@ struct LinearSquareLoss {
     return predt - label;
   }
   XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float, bst_float) { return 1.0f; }
-  static bst_float ProbToMargin(bst_float base_score) { return base_score; }
+
+  XGBOOST_DEVICE static float ProbToMargin(float base_score) { return base_score; }
+  constexpr static StringView InterceptErrorMsg() { return ""; }
+  XGBOOST_DEVICE static bool CheckIntercept(float) { return true; }
+
   static const char* LabelErrorMsg() { return ""; }
   static const char* DefaultEvalMetric() { return "rmse"; }
 
@@ -43,7 +44,11 @@ struct SquaredLogError {
     res = fmaxf(res, 1e-6f);
     return res;
   }
-  static bst_float ProbToMargin(bst_float base_score) { return base_score; }
+
+  XGBOOST_DEVICE static float ProbToMargin(float base_score) { return base_score; }
+  constexpr static StringView InterceptErrorMsg() { return ""; }
+  XGBOOST_DEVICE static bool CheckIntercept(float) { return true; }
+
   static const char* LabelErrorMsg() {
     return "label must be greater than -1 for rmsle so that log(label + 1) can be valid.";
   }
@@ -65,11 +70,17 @@ struct LogisticRegression {
     const float eps = 1e-16f;
     return fmaxf(predt * (1.0f - predt), eps);
   }
-  static bst_float ProbToMargin(bst_float base_score) {
-    CHECK(base_score > 0.0f && base_score < 1.0f)
-        << "base_score must be in (0,1) for logistic loss, got: " << base_score;
+
+  XGBOOST_DEVICE static float ProbToMargin(float base_score) {
     return -logf(1.0f / base_score - 1.0f);
   }
+  constexpr static StringView InterceptErrorMsg() {
+    return "base_score must be in (0,1) for the logistic loss.";
+  }
+  XGBOOST_DEVICE static bool CheckIntercept(float base_score) {
+    return base_score > 0.0f && base_score < 1.0f;
+  }
+
   static const char* LabelErrorMsg() { return "label must be in [0,1] for logistic regression"; }
   static const char* DefaultEvalMetric() { return "rmse"; }
 
@@ -97,7 +108,11 @@ struct LogisticRaw : public LogisticRegression {
     predt = common::Sigmoid(predt);
     return fmaxf(predt * (1.0f - predt), eps);
   }
-  static bst_float ProbToMargin(bst_float base_score) { return base_score; }
+
+  XGBOOST_DEVICE static float ProbToMargin(float base_score) { return base_score; }
+  constexpr static StringView InterceptErrorMsg() { return ""; }
+  XGBOOST_DEVICE static bool CheckIntercept(float) { return true; }
+
   static const char* DefaultEvalMetric() { return "logloss"; }
 
   static const char* Name() { return "binary:logitraw"; }
@@ -109,7 +124,13 @@ struct LogisticRaw : public LogisticRegression {
 class GammaDeviance {
  public:
   XGBOOST_DEVICE static float PredTransform(float x) { return std::exp(x); }
+
   XGBOOST_DEVICE static float ProbToMargin(float x) { return std::log(x); }
+  constexpr static StringView InterceptErrorMsg() {
+    return "`base_score` must be greater than 0 for gamma regression";
+  }
+  XGBOOST_DEVICE static bool CheckIntercept(float base_score) { return base_score > 0; }
+
   XGBOOST_DEVICE static float FirstOrderGradient(float p, float y) {
     return 1.0f - y / p;
   }
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index b5e57199f969..122002f04a6b 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -1,28 +1,29 @@
 /**
- * Copyright 2015-2024, XGBoost Contributors
+ * Copyright 2015-2025, XGBoost Contributors
  * \file regression_obj.cu
  * \brief Definition of single-value regression and classification objectives.
  * \author Tianqi Chen, Kailong Chen
  */
 #include <dmlc/omp.h>
 
-#include <algorithm>
+#include <algorithm>  // for all_of
 #include <cmath>
-#include <cstdint>  // std::int32_t
-#include <memory>
-#include <vector>
+#include <cstdint>  // for  int32_t
+#include <vector>   // for vector
 
 #include "../common/common.h"
-#include "../common/linalg_op.h"
-#include "../common/numeric.h"          // Reduce
-#include "../common/optional_weight.h"  // OptionalWeights
+#include "../common/linalg_op.h"        // for ElementWiseKernel
+#include "../common/numeric.h"          // for Reduce
+#include "../common/optional_weight.h"  // for OptionalWeights
 #include "../common/pseudo_huber.h"
 #include "../common/stats.h"
 #include "../common/threading_utils.h"
 #include "../common/transform.h"
+#include "../common/utils.h"  // for NoOp
 #include "./regression_loss.h"
 #include "adaptive.h"
 #include "init_estimation.h"  // FitIntercept
+#include "regression_param.h"
 #include "xgboost/base.h"
 #include "xgboost/context.h"  // Context
 #include "xgboost/data.h"     // MetaInfo
@@ -35,31 +36,91 @@
 #include "xgboost/span.h"
 #include "xgboost/tree_model.h"  // RegTree
 
-#include "regression_param.h"
-
 #if defined(XGBOOST_USE_CUDA)
-#include "../common/cuda_context.cuh"  // for CUDAContext
-#include "../common/device_helpers.cuh"
-#include "../common/linalg_op.cuh"
+#include "../common/algorithm.cuh"       // for AllOf
+#include "../common/cuda_context.cuh"    // for CUDAContext
+#include "../common/device_helpers.cuh"  // for MakeIndexTransformIter
 #endif  // defined(XGBOOST_USE_CUDA)
 
-#if defined(XGBOOST_USE_SYCL)
-#include "../../plugin/sycl/common/linalg_op.h"
-#endif
-
 namespace xgboost::obj {
 namespace {
-void CheckRegInputs(MetaInfo const& info, HostDeviceVector<bst_float> const& preds) {
+void CheckRegInputs(MetaInfo const& info, HostDeviceVector<float> const& preds) {
   CheckInitInputs(info);
   CHECK_EQ(info.labels.Size(), preds.Size()) << "Invalid shape of labels.";
 }
-}  // anonymous namespace
 
+template <typename Loss>
+void ValidateLabel(Context const* ctx, MetaInfo const& info) {
+  auto label = info.labels.View(ctx->Device());
+  auto valid = ctx->DispatchDevice(
+      [&] {
+        return std::all_of(linalg::cbegin(label), linalg::cend(label),
+                           [](float y) -> bool { return Loss::CheckLabel(y); });
+      },
+      [&] {
 #if defined(XGBOOST_USE_CUDA)
-DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);
+        auto it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) -> float {
+          auto [m, n] = linalg::UnravelIndex(i, label.Shape());
+          return label(m, n);
+        });
+        return common::AllOf(ctx->CUDACtx()->CTP(), it, it + label.Size(),
+                             [] XGBOOST_DEVICE(float y) { return Loss::CheckLabel(y); });
+#else
+        common::AssertGPUSupport();
+        return false;
 #endif  // defined(XGBOOST_USE_CUDA)
+      },
+      [&] {
+#if defined(XGBOOST_USE_SYCL)
+        return sycl::linalg::Validate(ctx->Device(), label,
+                                      [](float y) -> bool { return Loss::CheckLabel(y); });
+#else
+        common::AssertSYCLSupport();
+        return false;
+#endif  // defined(XGBOOST_USE_SYCL)
+      });
+  if (!valid) {
+    LOG(FATAL) << Loss::LabelErrorMsg();
+  }
+  if (!info.weights_.Empty()) {
+    CHECK_EQ(info.weights_.Size(), info.num_row_)
+        << "Number of weights should be equal to the number of data points.";
+  }
+}
 
+template <typename Fn, typename Chk = common::NoOp<bool>, typename Err = common::NoOp<StringView>>
+void ProbToMarginImpl(Context const* ctx, linalg::Vector<float>* base_score, Fn&& fn,
+                      Chk check = common::NoOp{true}, Err error = common::NoOp<StringView>{{}}) {
+  auto intercept = base_score->View(ctx->Device());
+  bool is_valid = ctx->DispatchDevice(
+      [&] { return std::all_of(linalg::cbegin(intercept), linalg::cend(intercept), check); },
+      [&] {
+#if defined(XGBOOST_USE_CUDA)
+        return common::AllOf(ctx->CUDACtx()->CTP(), linalg::tcbegin(intercept),
+                             linalg::tcend(intercept), check);
+#else
+        common::AssertGPUSupport();
+        return false;
+#endif  // defined(XGBOOST_USE_CUDA)
+      },
+      [&] {
+#if defined(XGBOOST_USE_SYCL)
+        return sycl::linalg::Validate(ctx->Device(), intercept, check);
+#else
+        common::AssertSYCLSupport();
+        return false;
+#endif  // defined(XGBOOST_USE_SYCL)
+      });
+  CHECK(is_valid) << error();
+  linalg::ElementWiseKernel(ctx, intercept, [=] XGBOOST_DEVICE(std::size_t i) mutable {
+    intercept(i) = fn(intercept(i));
+  });
+}
+}  // anonymous namespace
 
+#if defined(XGBOOST_USE_CUDA)
+DMLC_REGISTRY_FILE_TAG(regression_obj_gpu);
+#endif  // defined(XGBOOST_USE_CUDA)
 
 template<typename Loss>
 class RegLossObj : public FitInterceptGlmLike {
@@ -67,46 +128,10 @@ class RegLossObj : public FitInterceptGlmLike {
   HostDeviceVector<float> additional_input_;
 
  public:
-  void ValidateLabel(MetaInfo const& info) {
-    auto label = info.labels.View(ctx_->Device());
-    auto valid = ctx_->DispatchDevice(
-        [&] {
-          return std::all_of(linalg::cbegin(label), linalg::cend(label),
-                             [](float y) -> bool { return Loss::CheckLabel(y); });
-        },
-        [&] {
-#if defined(XGBOOST_USE_CUDA)
-          auto cuctx = ctx_->CUDACtx();
-          auto it = dh::MakeTransformIterator<bool>(
-              thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool {
-                auto [m, n] = linalg::UnravelIndex(i, label.Shape());
-                return Loss::CheckLabel(label(m, n));
-              });
-          return dh::Reduce(cuctx->CTP(), it, it + label.Size(), true, thrust::logical_and<>{});
-#else
-          common::AssertGPUSupport();
-          return false;
-#endif  // defined(XGBOOST_USE_CUDA)
-        },
-        [&] {
-#if defined(XGBOOST_USE_SYCL)
-          return sycl::linalg::Validate(ctx_->Device(), label,
-                                        [](float y) -> bool { return Loss::CheckLabel(y); });
-#else
-          common::AssertSYCLSupport();
-          return false;
-#endif  // defined(XGBOOST_USE_SYCL)
-        });
-    if (!valid) {
-      LOG(FATAL) << Loss::LabelErrorMsg();
-    }
-  }
   // 0 - scale_pos_weight, 1 - is_null_weight
-  RegLossObj(): additional_input_(2) {}
+  RegLossObj() : additional_input_(2) {}
 
-  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
-    param_.UpdateAllowUnknown(args);
-  }
+  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }
 
   [[nodiscard]] ObjInfo Task() const override { return Loss::Info(); }
 
@@ -115,11 +140,11 @@ class RegLossObj : public FitInterceptGlmLike {
     return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));
   }
 
-  void GetGradient(const HostDeviceVector<bst_float>& preds, const MetaInfo& info,
-                   std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) override {
+  void GetGradient(const HostDeviceVector<float>& preds, const MetaInfo& info, std::int32_t iter,
+                   linalg::Matrix<GradientPair>* out_gpair) override {
     CheckRegInputs(info, preds);
     if (iter == 0) {
-      ValidateLabel(info);
+      ValidateLabel<Loss>(this->ctx_, info);
     }
 
     size_t const ndata = preds.Size();
@@ -197,8 +222,10 @@ class RegLossObj : public FitInterceptGlmLike {
     }
   }
 
-  [[nodiscard]] float ProbToMargin(float base_score) const override {
-    return Loss::ProbToMargin(base_score);
+  void ProbToMargin(linalg::Vector<float>* base_score) const override {
+    ProbToMarginImpl(
+        this->ctx_, base_score, [] XGBOOST_DEVICE(float v) { return Loss::ProbToMargin(v); },
+        [] XGBOOST_DEVICE(float v) { return Loss::CheckIntercept(v); }, Loss::InterceptErrorMsg);
   }
 
   void SaveConfig(Json* p_out) const override {
@@ -208,7 +235,11 @@ class RegLossObj : public FitInterceptGlmLike {
   }
 
   void LoadConfig(Json const& in) override {
-    FromJson(in["reg_loss_param"], &param_);
+    auto obj = get<Object const>(in);
+    auto it = obj.find("reg_loss_param");
+    if (it != obj.cend()) {
+      FromJson(it->second, &param_);
+    }
   }
 
  protected:
@@ -222,10 +253,6 @@ XGBOOST_REGISTER_OBJECTIVE(SquaredLossRegression, LinearSquareLoss::Name())
 .describe("Regression with squared error.")
 .set_body([]() { return new RegLossObj<LinearSquareLoss>(); });
 
-XGBOOST_REGISTER_OBJECTIVE(SquareLogError, SquaredLogError::Name())
-.describe("Regression with root mean squared logarithmic error.")
-.set_body([]() { return new RegLossObj<SquaredLogError>(); });
-
 XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, LogisticRegression::Name())
 .describe("Logistic regression for probability regression task.")
 .set_body([]() { return new RegLossObj<LogisticRegression>(); });
@@ -251,8 +278,57 @@ XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
     return new RegLossObj<LinearSquareLoss>(); });
 // End deprecated
 
+class SquaredLogErrorRegression : public FitIntercept {
+ public:
+  static auto Name() { return SquaredLogError::Name(); }
+
+  void Configure(Args const&) override {}
+  [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }
+  [[nodiscard]] bst_target_t Targets(MetaInfo const& info) const override {
+    return std::max(static_cast<std::size_t>(1), info.labels.Shape(1));
+  }
+  void GetGradient(HostDeviceVector<bst_float> const& preds, const MetaInfo& info,
+                   std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) override {
+    if (iter == 0) {
+      ValidateLabel<SquaredLogError>(this->ctx_, info);
+    }
+    auto labels = info.labels.View(ctx_->Device());
+
+    out_gpair->SetDevice(ctx_->Device());
+    out_gpair->Reshape(info.num_row_, this->Targets(info));
+    auto gpair = out_gpair->View(ctx_->Device());
+
+    preds.SetDevice(ctx_->Device());
+    auto predt = linalg::MakeTensorView(ctx_, &preds, info.num_row_, this->Targets(info));
+
+    info.weights_.SetDevice(ctx_->Device());
+    common::OptionalWeights weight{ctx_->IsCPU() ? info.weights_.ConstHostSpan()
+                                                 : info.weights_.ConstDeviceSpan()};
+    linalg::ElementWiseKernel(this->ctx_, labels,
+                              [=] XGBOOST_DEVICE(std::size_t i, std::size_t j) mutable {
+                                auto p = predt(i, j);
+                                auto y = labels(i, j);
+                                auto w = weight[i];
+                                auto grad = SquaredLogError::FirstOrderGradient(p, y);
+                                auto hess = SquaredLogError::SecondOrderGradient(p, y);
+                                gpair(i) = {grad * w, hess * w};
+                              });
+  }
+  [[nodiscard]] const char* DefaultEvalMetric() const override { return "rmsle"; }
+
+  void SaveConfig(Json* p_out) const override {
+    auto& out = *p_out;
+    out["name"] = String(Name());
+  }
+  void LoadConfig(Json const&) override {}
+};
+
+XGBOOST_REGISTER_OBJECTIVE(SquaredLogErrorRegression, SquaredLogErrorRegression::Name())
+    .describe("Root mean squared log error.")
+    .set_body([]() { return new SquaredLogErrorRegression(); });
+
 class PseudoHuberRegression : public FitIntercept {
-  PesudoHuberParam param_;
+  PseudoHuberParam param_;
 
  public:
   void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }
@@ -336,9 +412,7 @@ struct PoissonRegressionParam : public XGBoostParameter<PoissonRegressionParam>
 class PoissonRegression : public FitInterceptGlmLike {
  public:
   // declare functions
-  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
-    param_.UpdateAllowUnknown(args);
-  }
+  void Configure(Args const& args) override { param_.UpdateAllowUnknown(args); }
 
   [[nodiscard]] ObjInfo Task() const override { return ObjInfo::kRegression; }
 
@@ -397,8 +471,8 @@ class PoissonRegression : public FitInterceptGlmLike {
   void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
     PredTransform(io_preds);
   }
-  [[nodiscard]] float ProbToMargin(bst_float base_score) const override {
-    return std::log(base_score);
+  void ProbToMargin(linalg::Vector<float>* base_score) const override {
+    ProbToMarginImpl(this->ctx_, base_score, [] XGBOOST_DEVICE(float v) { return std::log(v); });
   }
   [[nodiscard]] const char* DefaultEvalMetric() const override {
     return "poisson-nloglik";
@@ -504,8 +578,8 @@ class CoxRegression : public FitIntercept {
   void EvalTransform(HostDeviceVector<bst_float> *io_preds) override {
     PredTransform(io_preds);
   }
-  [[nodiscard]] float ProbToMargin(bst_float base_score) const override {
-    return std::log(base_score);
+  void ProbToMargin(linalg::Vector<float>* base_score) const override {
+    ProbToMarginImpl(this->ctx_, base_score, [] XGBOOST_DEVICE(float v) { return std::log(v); });
   }
   [[nodiscard]] const char* DefaultEvalMetric() const override {
     return "cox-nloglik";
@@ -537,7 +611,7 @@ struct TweedieRegressionParam : public XGBoostParameter<TweedieRegressionParam>
 class TweedieRegression : public FitInterceptGlmLike {
  public:
   // declare functions
-  void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
+  void Configure(Args const& args) override {
     param_.UpdateAllowUnknown(args);
     std::ostringstream os;
     os << "tweedie-nloglik@" << param_.tweedie_variance_power;
@@ -604,9 +678,8 @@ class TweedieRegression : public FitInterceptGlmLike {
         io_preds->Device())
         .Eval(io_preds);
   }
-
-  [[nodiscard]] float ProbToMargin(bst_float base_score) const override {
-    return std::log(base_score);
+  void ProbToMargin(linalg::Vector<float>* base_score) const override {
+    ProbToMarginImpl(this->ctx_, base_score, [] XGBOOST_DEVICE(float v) { return std::log(v); });
   }
 
   [[nodiscard]] const char* DefaultEvalMetric() const override {
@@ -670,51 +743,43 @@ class MeanAbsoluteError : public ObjFunction {
         });
   }
 
-  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_margin) const override {
+  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) const override {
     CheckInitInputs(info);
-    base_margin->Reshape(this->Targets(info));
+    base_score->Reshape(this->Targets(info));
 
-    double w{0.0};
+    double sum_weight{0.0};
     if (info.weights_.Empty()) {
-      w = static_cast<double>(info.num_row_);
+      sum_weight = static_cast<double>(info.num_row_);
     } else {
-      w = common::Reduce(ctx_, info.weights_);
+      sum_weight = common::Reduce(ctx_, info.weights_);
     }
 
     if (info.num_row_ == 0) {
-      auto out = base_margin->HostView();
-      out(0) = 0;
+      auto out = base_score->HostView();
+      std::fill(linalg::begin(out), linalg::end(out), 0.0f);
     } else {
-      linalg::Vector<float> temp;
-      common::Median(ctx_, info.labels, info.weights_, &temp);
-      common::Mean(ctx_, temp, base_margin);
+      common::Median(ctx_, info.labels, info.weights_, base_score);
     }
-    CHECK_EQ(base_margin->Size(), 1);
-    auto out = base_margin->HostView();
+
+    auto intercept = base_score->View(this->ctx_->Device());
     // weighted avg
-    std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
-                   [w](float v) { return v * w; });
-
-    auto rc = collective::Success() << [&] {
-      return collective::GlobalSum(ctx_, info, out);
-    } << [&] {
-      return collective::GlobalSum(ctx_, info, linalg::MakeVec(&w, 1));
-    };
+    linalg::VecScaMul(this->ctx_, intercept, sum_weight);
+    auto rc = collective::GlobalSum(ctx_, info, intercept, &sum_weight);
     collective::SafeColl(rc);
 
-    if (common::CloseTo(w, 0.0)) {
+    if (common::CloseTo(sum_weight, 0.0)) {
       // Mostly for handling empty dataset test.
       LOG(WARNING) << "Sum of weights is close to 0.0, skipping base score estimation.";
-      out(0) = ObjFunction::DefaultBaseScore();
+      *base_score = linalg::Zeros<float>(ctx_, base_score->Shape(0));
       return;
     }
-    std::transform(linalg::cbegin(out), linalg::cend(out), linalg::begin(out),
-                   [w](float v) { return v / w; });
+
+    linalg::VecScaDiv(this->ctx_, intercept, sum_weight);
   }
 
   void UpdateTreeLeaf(HostDeviceVector<bst_node_t> const& position, MetaInfo const& info,
                       float learning_rate, HostDeviceVector<float> const& prediction,
-                      std::int32_t group_idx, RegTree* p_tree) const override {
+                      bst_target_t group_idx, RegTree* p_tree) const override {
     ::xgboost::obj::UpdateTreeLeaf(ctx_, position, group_idx, info, learning_rate, prediction, 0.5,
                                    p_tree);
   }
diff --git a/src/predictor/array_tree_layout.h b/src/predictor/array_tree_layout.h
new file mode 100644
index 000000000000..0332565b0c2c
--- /dev/null
+++ b/src/predictor/array_tree_layout.h
@@ -0,0 +1,227 @@
+/**
+ * Copyright 2021-2025, XGBoost Contributors
+ * \file array_tree_layout.cc
+ * \brief Implementation of array tree layout -- a powerfull inference optimization method.
+ */
+#ifndef XGBOOST_PREDICTOR_ARRAY_TREE_LAYOUT_H_
+#define XGBOOST_PREDICTOR_ARRAY_TREE_LAYOUT_H_
+
+#include <array>
+#include <limits>
+#include <type_traits>  // for conditional_t
+
+#include "../common/categorical.h"            // for IsCat
+#include "../tree/tree_view.h"                // for ScalarTreeView, MultiTargetTreeView
+#include "xgboost/tree_model.h"               // for RegTree
+
+namespace xgboost::predictor {
+
+/**
+ * @brief The class holds the array-based representation of the top levels of a single tree.
+ *
+ * @tparam has_categorical if the tree has categorical features
+ *
+ * @tparam any_missing if the class is able to process missing values
+ *
+ * @tparam kNumDeepLevels number of tree leveles being unrolled into array-based structure
+ */
+template <bool has_categorical, bool any_missing, int kNumDeepLevels, typename TreeView>
+class ArrayTreeLayout {
+ private:
+  /* Number of nodes in the array based representation of the top levels of the tree
+   */
+  constexpr static size_t kNodesCount = (1u << kNumDeepLevels) - 1;
+
+  struct Empty {};
+  using DefaultLeftType =
+      typename std::conditional_t<any_missing, std::array<uint8_t, kNodesCount>, Empty>;
+  using IsCatType =
+      typename std::conditional_t<has_categorical, std::array<uint8_t, kNodesCount>, Empty>;
+  using CatSegmentType =
+      typename std::conditional_t<has_categorical,
+                                  std::array<common::Span<uint32_t const>, kNodesCount>, Empty>;
+
+  DefaultLeftType default_left_;
+  IsCatType is_cat_;
+  CatSegmentType cat_segment_;
+
+  std::array<bst_feature_t, kNodesCount> split_index_;
+  std::array<float, kNodesCount> split_cond_;
+  /* The nodes at tree levels 0, 1, ..., kNumDeepLevels - 1 are unrolled into an array-based structure.
+   *  If the tree has additional levels, this array stores the node indices of the sub-trees at level kNumDeepLevels.
+   *  This is necessary to continue processing nodes that are not eligible for array-based unrolling.
+   *  The number of sub-trees packed into this array is equal to the number of nodes at tree level kNumDeepLevels,
+   *  which is calculated as (1u << kNumDeepLevels) == kNodesCount + 1.
+   */
+  // Mapping from array node index to the RegTree node index.
+  std::array<bst_node_t, kNodesCount + 1> nidx_in_tree_;
+
+ /**
+ * @brief Traverse the top levels of original tree and fill internal arrays
+ *
+ * @tparam depth the tree level being processing
+ *
+ * @param tree the original tree
+ * @param cats matrix of categorical splits
+ * @param nidx_array node idx in the array layout
+ * @param nidx node idx in the original tree
+ */
+  template <int depth = 0>
+  void Populate(TreeView const& tree, RegTree::CategoricalSplitMatrix const& cats,
+                bst_node_t nidx_array = 0, bst_node_t nidx = 0) {
+    if constexpr (depth == kNumDeepLevels + 1) {
+      return;
+    } else if constexpr (depth == kNumDeepLevels) {
+        /* We store the node index in the original tree to ensure continued processing
+         * for nodes that are not eligible for array layout optimization.
+         */
+        nidx_in_tree_[nidx_array - kNodesCount] = nidx;
+    } else {
+      if (tree.IsLeaf(nidx)) {
+        split_index_[nidx_array]  = 0;
+
+        /*
+         * If the tree is not fully populated, we can reduce transfer costs.
+         * The values for the unpopulated parts of the tree are set to ensure
+         * that any move will always proceed in the "right" direction.
+         * This is achieved by exploiting the fact that comparisons with NaN always result in false.
+         */
+        if constexpr (any_missing) default_left_[nidx_array] = 0;
+        if constexpr (has_categorical) is_cat_[nidx_array] = 0;
+        split_cond_[nidx_array]   = std::numeric_limits<float>::quiet_NaN();
+
+        Populate<depth + 1>(tree, cats, 2 * nidx_array + 2, nidx);
+      } else {
+        if constexpr (any_missing) default_left_[nidx_array] = tree.DefaultLeft(nidx);
+        if constexpr (has_categorical) {
+          is_cat_[nidx_array] = common::IsCat(cats.split_type, nidx);
+          if (is_cat_[nidx_array]) {
+            cat_segment_[nidx_array] = cats.categories.subspan(cats.node_ptr[nidx].beg,
+                                                               cats.node_ptr[nidx].size);
+          }
+        }
+
+        split_index_[nidx_array]  = tree.SplitIndex(nidx);
+        split_cond_[nidx_array]   = tree.SplitCond(nidx);
+
+        /*
+         * LeftChild is used to determine if a node is a leaf, so it is always a valid value.
+         * However, RightChild can be invalid in some exotic cases.
+         * A tree with an invalid RightChild can still be correctly processed using classical methods
+         * if the split conditions are correct.
+         * However, in an array layout, an invalid RightChild, even if unreachable, can lead to memory corruption.
+         * A check should be added to prevent this.
+         */
+        Populate<depth + 1>(tree, cats, 2 * nidx_array + 1, tree.LeftChild(nidx));
+        bst_node_t right_child = tree.RightChild(nidx);
+        if (right_child != RegTree::kInvalidNodeId) {
+          Populate<depth + 1>(tree, cats, 2 * nidx_array + 2, right_child);
+        }
+      }
+    }
+  }
+
+  bool GetDecision(float fvalue, bst_node_t nidx) const {
+    if constexpr (has_categorical) {
+      if (is_cat_[nidx]) {
+       return common::Decision(cat_segment_[nidx], fvalue);
+      } else {
+        return fvalue < split_cond_[nidx];
+      }
+    } else {
+      return fvalue < split_cond_[nidx];
+    }
+  }
+
+ public:
+  /* Ad-hoc value.
+   * Increasing doesn't lead to perf gain, since bottleneck is now at gather instructions.
+   */
+  constexpr static int kMaxNumDeepLevels = 6;
+  static_assert(kNumDeepLevels <= kMaxNumDeepLevels);
+
+  ArrayTreeLayout(TreeView const& tree, RegTree::CategoricalSplitMatrix const &cats) {
+    Populate(tree, cats);
+  }
+
+  const auto& SplitIndex() const {
+    return split_index_;
+  }
+
+  const auto& SplitCond() const {
+    return split_cond_;
+  }
+
+  const auto& DefaultLeft() const {
+    return default_left_;
+  }
+
+  const auto& NidxInTree() const {
+    return nidx_in_tree_;
+  }
+
+  /**
+   * @brief Traverse the top levels of the tree for the entire block_size.
+   *
+   * In the array layout, it is organized to guarantee that if a node at the current level
+   * has index nidx, then the node index for the left child at the next level is always
+   * 2*nidx, and the node index for the right child at the next level is always 2*nidx+1.
+   * This greatly improves data locality.
+   *
+   * @param fvec_tloc buffer holding the feature values
+   * @param block_size size of the current block (1 < block_size <= 64)
+   * @param p_nidx Pointer to the vector of node indexes in the original tree with size
+   *               equals to the block size. (One node per sample). The value corresponds
+   *               to the level next after kNumDeepLevels
+   */
+  void Process(common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,
+               bst_node_t* p_nidx) {
+    for (int depth = 0; depth < kNumDeepLevels; ++depth) {
+      std::size_t first_node = (1u << depth) - 1;
+
+      for (std::size_t i = 0; i < block_size; ++i) {
+        bst_node_t idx = p_nidx[i];
+
+        const auto& feat = fvec_tloc[i];
+        bst_feature_t split = split_index_[first_node + idx];
+        auto fvalue = feat.GetFvalue(split);
+        if constexpr (any_missing) {
+          bool go_left = feat.IsMissing(split) ? default_left_[first_node + idx]
+                                               : GetDecision(fvalue, first_node + idx);
+          p_nidx[i] = 2 * idx + !go_left;
+        } else {
+          p_nidx[i] = 2 * idx + !GetDecision(fvalue, first_node + idx);
+        }
+      }
+    }
+    // Remap to the original index.
+    for (std::size_t i = 0; i < block_size; ++i) {
+      p_nidx[i] = nidx_in_tree_[p_nidx[i]];
+    }
+  }
+};
+
+template <bool has_categorical, bool any_missing, int num_deep_levels = 1, typename TreeView>
+void ProcessArrayTree(TreeView const& tree, common::Span<RegTree::FVec> fvec_tloc,
+                      std::size_t const block_size, bst_node_t* p_nidx, bst_node_t tree_depth) {
+  constexpr int kMaxNumDeepLevels =
+      ArrayTreeLayout<has_categorical, any_missing, 0, TreeView>::kMaxNumDeepLevels;
+
+  // Fill the array tree, then output predicted node idx.
+  if constexpr (num_deep_levels == kMaxNumDeepLevels) {
+    ArrayTreeLayout<has_categorical, any_missing, num_deep_levels, TreeView> buffer{
+        tree, tree.GetCategoriesMatrix()};
+    buffer.Process(fvec_tloc, block_size, p_nidx);
+  } else {
+    if (tree_depth <= num_deep_levels) {
+      ArrayTreeLayout<has_categorical, any_missing, num_deep_levels, TreeView> buffer{
+          tree, tree.GetCategoriesMatrix()};
+      buffer.Process(fvec_tloc, block_size, p_nidx);
+    } else {
+      ProcessArrayTree<has_categorical, any_missing, num_deep_levels + 1>(
+          tree, fvec_tloc, block_size, p_nidx, tree_depth);
+    }
+  }
+}
+}  // namespace xgboost::predictor
+#endif  // XGBOOST_PREDICTOR_ARRAY_TREE_LAYOUT_H_
diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc
index 9e6289c2b630..03206abcf00f 100644
--- a/src/predictor/cpu_predictor.cc
+++ b/src/predictor/cpu_predictor.cc
@@ -2,30 +2,30 @@
  * Copyright 2017-2025, XGBoost Contributors
  */
 #include <algorithm>  // for max, fill, min
-#include <any>        // for any, any_cast
 #include <cassert>    // for assert
 #include <cstddef>    // for size_t
 #include <cstdint>    // for uint32_t, int32_t, uint64_t
 #include <memory>     // for unique_ptr, shared_ptr
-#include <ostream>    // for char_traits, operator<<, basic_ostream
-#include <typeinfo>   // for type_info
 #include <vector>     // for vector
 
 #include "../collective/allreduce.h"          // for Allreduce
 #include "../collective/communicator-inl.h"   // for IsDistributed
 #include "../common/bitfield.h"               // for RBitField8
 #include "../common/column_matrix.h"          // for ColumnMatrix
-#include "../common/common.h"                 // for DivRoundUp
 #include "../common/error_msg.h"              // for InplacePredictProxy
 #include "../common/math.h"                   // for CheckNAN
 #include "../common/threading_utils.h"        // for ParallelFor
 #include "../data/adapter.h"                  // for ArrayAdapter, CSRAdapter, CSRArrayAdapter
+#include "../data/cat_container.h"            // for CatContainer
 #include "../data/gradient_index.h"           // for GHistIndexMatrix
 #include "../data/proxy_dmatrix.h"            // for DMatrixProxy
 #include "../gbm/gbtree_model.h"              // for GBTreeModel, GBTreeModelParam
-#include "cpu_treeshap.h"                     // for CalculateContributions
+#include "array_tree_layout.h"                // for ProcessArrayTree
 #include "dmlc/registry.h"                    // for DMLC_REGISTRY_FILE_TAG
+#include "gbtree_view.h"                      // for GBTreeModelView
 #include "predict_fn.h"                       // for GetNextNode, GetNextNodeMulti
+#include "treeshap.h"                         // for CalculateContributions
+#include "utils.h"                            // for CheckProxyDMatrix
 #include "xgboost/base.h"                     // for bst_float, bst_node_t, bst_omp_uint, bst_fe...
 #include "xgboost/context.h"                  // for Context
 #include "xgboost/data.h"                     // for Entry, DMatrix, MetaInfo, SparsePage, Batch...
@@ -42,122 +42,196 @@ namespace xgboost::predictor {
 
 DMLC_REGISTRY_FILE_TAG(cpu_predictor);
 
-namespace scalar {
-template <bool has_missing, bool has_categorical>
-bst_node_t GetLeafIndex(RegTree const &tree, const RegTree::FVec &feat,
-                        RegTree::CategoricalSplitMatrix const &cats) {
-  bst_node_t nidx{0};
-  while (!tree[nidx].IsLeaf()) {
-    bst_feature_t split_index = tree[nidx].SplitIndex();
+namespace {
+using TreeViewVar = std::variant<tree::ScalarTreeView, tree::MultiTargetTreeView>;
+struct CopyViews {
+  void operator()(std::vector<TreeViewVar> *p_dst, std::vector<TreeViewVar> &&src) const {
+    std::swap(src, *p_dst);
+  }
+};
+
+template <typename T>
+using Vec = std::vector<T, std::allocator<T>>;
+// The input device should be DeviceOrd::CPU() instead of Context::Device(). The GBTree
+// has an optimization to use CPU predictor when the DMatrix SparsePage is on CPU, even if
+// the context is a CUDA context.
+using HostModel = GBTreeModelView<Vec, TreeViewVar, CopyViews>;
+
+template <bool has_missing, bool has_categorical, typename TreeView>
+bst_node_t GetLeafIndex(TreeView const &tree, const RegTree::FVec &feat,
+                        RegTree::CategoricalSplitMatrix const &cats, bst_node_t nidx) {
+  while (!tree.IsLeaf(nidx)) {
+    bst_feature_t split_index = tree.SplitIndex(nidx);
     auto fvalue = feat.GetFvalue(split_index);
     nidx = GetNextNode<has_missing, has_categorical>(
-        tree[nidx], nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
+        tree, nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
   }
   return nidx;
 }
+}  // namespace
 
+namespace scalar {
 template <bool has_categorical>
-[[nodiscard]] float PredValueByOneTree(const RegTree::FVec &p_feats, RegTree const &tree,
-                                       RegTree::CategoricalSplitMatrix const &cats) noexcept(true) {
+[[nodiscard]] float PredValueByOneTree(const RegTree::FVec &p_feats,
+                                       tree::ScalarTreeView const &tree,
+                                       RegTree::CategoricalSplitMatrix const &cats,
+                                       bst_node_t nidx) noexcept(true) {
   const bst_node_t leaf = p_feats.HasMissing()
-                              ? GetLeafIndex<true, has_categorical>(tree, p_feats, cats)
-                              : GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
-  return tree[leaf].LeafValue();
+                              ? GetLeafIndex<true, has_categorical>(tree, p_feats, cats, nidx)
+                              : GetLeafIndex<false, has_categorical>(tree, p_feats, cats, nidx);
+  return tree.LeafValue(leaf);
 }
-}  // namespace scalar
 
-namespace multi {
-template <bool has_missing, bool has_categorical>
-bst_node_t GetLeafIndex(MultiTargetTree const &tree, const RegTree::FVec &feat,
-                        RegTree::CategoricalSplitMatrix const &cats) {
-  bst_node_t nidx{0};
-  while (!tree.IsLeaf(nidx)) {
-    bst_feature_t split_index = tree.SplitIndex(nidx);
-    auto fvalue = feat.GetFvalue(split_index);
-    nidx = GetNextNodeMulti<has_missing, has_categorical>(
-        tree, nidx, fvalue, has_missing && feat.IsMissing(split_index), cats);
+template <bool has_categorical, bool any_missing, bool use_array_tree_layout>
+void PredValueByOneTree(tree::ScalarTreeView const &tree, std::size_t const predict_offset,
+                        common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,
+                        linalg::MatrixView<float> out_predt, bst_node_t *p_nidx, int depth,
+                        int gid) {
+  auto const &cats = tree.GetCategoriesMatrix();
+  if constexpr (use_array_tree_layout) {
+    ProcessArrayTree<has_categorical, any_missing>(tree, fvec_tloc, block_size, p_nidx, depth);
+  }
+  for (std::size_t i = 0; i < block_size; ++i) {
+    bst_node_t nidx = 0;
+    /*
+     * If array_tree_layout was used, we start processing from the nidx calculated using
+     * the array tree.
+     */
+    if constexpr (use_array_tree_layout) {
+      nidx = p_nidx[i];
+      p_nidx[i] = 0;
+    }
+    out_predt(predict_offset + i, gid) +=
+        PredValueByOneTree<has_categorical>(fvec_tloc[i], tree, cats, nidx);
   }
-  return nidx;
 }
+}  // namespace scalar
 
+namespace multi {
 template <bool has_categorical>
-void PredValueByOneTree(RegTree::FVec const &p_feats, MultiTargetTree const &tree,
+void PredValueByOneTree(RegTree::FVec const &p_feats, tree::MultiTargetTreeView const &tree,
                         RegTree::CategoricalSplitMatrix const &cats,
-                        linalg::VectorView<float> out_predt) {
+                        linalg::VectorView<float> out_predt, bst_node_t nidx) {
   bst_node_t const leaf = p_feats.HasMissing()
-                              ? GetLeafIndex<true, has_categorical>(tree, p_feats, cats)
-                              : GetLeafIndex<false, has_categorical>(tree, p_feats, cats);
+                              ? GetLeafIndex<true, has_categorical>(tree, p_feats, cats, nidx)
+                              : GetLeafIndex<false, has_categorical>(tree, p_feats, cats, nidx);
   auto leaf_value = tree.LeafValue(leaf);
   assert(out_predt.Shape(0) == leaf_value.Shape(0) && "shape mismatch.");
   for (size_t i = 0; i < leaf_value.Size(); ++i) {
     out_predt(i) += leaf_value(i);
   }
 }
+
+template <bool has_categorical, bool any_missing, bool use_array_tree_layout>
+void PredValueByOneTree(tree::MultiTargetTreeView const &tree, std::size_t const predict_offset,
+                        common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,
+                        linalg::MatrixView<float> out_predt, bst_node_t *p_nidx, bst_node_t depth) {
+  auto const &cats = tree.GetCategoriesMatrix();
+  if constexpr (use_array_tree_layout) {
+    ProcessArrayTree<has_categorical, any_missing>(tree, fvec_tloc, block_size, p_nidx, depth);
+  }
+  for (std::size_t i = 0; i < block_size; ++i) {
+    bst_node_t nidx = RegTree::kRoot;
+    if constexpr (use_array_tree_layout) {
+      nidx = p_nidx[i];
+      p_nidx[i] = RegTree::kRoot;
+    }
+    auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
+    PredValueByOneTree<has_categorical>(fvec_tloc[i], tree, cats, t_predts, nidx);
+  }
+}
 }  // namespace multi
 
 namespace {
-void PredictByAllTrees(gbm::GBTreeModel const &model, std::uint32_t const tree_begin,
-                       std::uint32_t const tree_end, std::size_t const predict_offset,
-                       std::vector<RegTree::FVec> const &thread_temp, std::size_t const offset,
-                       std::size_t const block_size, linalg::MatrixView<float> out_predt) {
-  for (std::uint32_t tree_id = tree_begin; tree_id < tree_end; ++tree_id) {
-    auto const &tree = *model.trees.at(tree_id);
-    auto const &cats = tree.GetCategoriesMatrix();
-    bool has_categorical = tree.HasCategoricalSplit();
-
-    if (tree.IsMultiTarget()) {
-      if (has_categorical) {
-        for (std::size_t i = 0; i < block_size; ++i) {
-          auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
-          multi::PredValueByOneTree<true>(thread_temp[offset + i], *tree.GetMultiTargetTree(), cats,
-                                          t_predts);
-        }
-      } else {
-        for (std::size_t i = 0; i < block_size; ++i) {
-          auto t_predts = out_predt.Slice(predict_offset + i, linalg::All());
-          multi::PredValueByOneTree<false>(thread_temp[offset + i], *tree.GetMultiTargetTree(),
-                                           cats, t_predts);
+template <bool use_array_tree_layout, bool any_missing>
+void PredictBlockByAllTrees(HostModel const &model, std::size_t const predict_offset,
+                            common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,
+                            linalg::MatrixView<float> out_predt,
+                            const std::vector<int> &tree_depth) {
+  std::vector<bst_node_t> nidx;
+  if constexpr (use_array_tree_layout) {
+    nidx.resize(block_size, 0);
+  }
+  auto trees = model.Trees();
+  for (bst_tree_t tree_id = 0, n_trees = model.Trees().size(); tree_id < n_trees; ++tree_id) {
+    bst_node_t depth = use_array_tree_layout ? tree_depth[tree_id] : 0;
+    std::visit(
+        enc::Overloaded{
+            [&](tree::ScalarTreeView const &tree) {
+              bool has_categorical = tree.HasCategoricalSplit();
+              auto const gid = model.tree_groups[tree_id];
+              if (has_categorical) {
+                scalar::PredValueByOneTree<true, any_missing, use_array_tree_layout>(
+                    tree, predict_offset, fvec_tloc, block_size, out_predt, nidx.data(), depth,
+                    gid);
+              } else {
+                scalar::PredValueByOneTree<false, any_missing, use_array_tree_layout>(
+                    tree, predict_offset, fvec_tloc, block_size, out_predt, nidx.data(), depth,
+                    gid);
+              }
+            },
+            [&](tree::MultiTargetTreeView const &tree) {
+              bool has_categorical = tree.HasCategoricalSplit();
+              if (has_categorical) {
+                multi::PredValueByOneTree<true, any_missing, use_array_tree_layout>(
+                    tree, predict_offset, fvec_tloc, block_size, out_predt, nidx.data(), depth);
+              } else {
+                multi::PredValueByOneTree<false, any_missing, use_array_tree_layout>(
+                    tree, predict_offset, fvec_tloc, block_size, out_predt, nidx.data(), depth);
+              }
+            }},
+        trees[tree_id]);
+  }
+}
+
+// Dispatch between template implementations
+void DispatchArrayLayout(HostModel const &model, std::size_t const predict_offset,
+                         common::Span<RegTree::FVec> fvec_tloc, std::size_t const block_size,
+                         linalg::MatrixView<float> out_predt, const std::vector<int> &tree_depth,
+                         bool any_missing) {
+  auto n_trees = model.tree_end - model.tree_begin;
+  CHECK_EQ(n_trees, model.Trees().size());
+  /*
+   * We transform trees to array layout for each block of data to avoid memory overheads.
+   * It makes the array layout inefficient for block_size == 1
+   */
+  const bool use_array_tree_layout = block_size > 1;
+  if (use_array_tree_layout) {
+    CHECK_EQ(n_trees, tree_depth.size());
+    // Recheck if the current block has missing values.
+    if (any_missing) {
+      any_missing = false;
+      for (std::size_t i = 0; i < block_size; ++i) {
+        any_missing |= fvec_tloc[i].HasMissing();
+        if (any_missing) {
+          break;
         }
       }
+    }
+    if (any_missing) {
+      PredictBlockByAllTrees<true, true>(model, predict_offset, fvec_tloc, block_size, out_predt,
+                                         tree_depth);
     } else {
-      auto const gid = model.tree_info[tree_id];
-      if (has_categorical) {
-        for (std::size_t i = 0; i < block_size; ++i) {
-          out_predt(predict_offset + i, gid) +=
-              scalar::PredValueByOneTree<true>(thread_temp[offset + i], tree, cats);
-        }
-      } else {
-        for (std::size_t i = 0; i < block_size; ++i) {
-          out_predt(predict_offset + i, gid) +=
-              scalar::PredValueByOneTree<false>(thread_temp[offset + i], tree, cats);
-        }
-      }
+      PredictBlockByAllTrees<true, false>(model, predict_offset, fvec_tloc, block_size, out_predt,
+                                          tree_depth);
     }
+  } else {
+    PredictBlockByAllTrees<false, true>(model, predict_offset, fvec_tloc, block_size, out_predt,
+                                        tree_depth);
   }
 }
 
-template <typename DataView>
-void FVecFill(std::size_t const block_size, std::size_t const batch_offset,
-              bst_feature_t n_features, DataView *p_batch, std::size_t const fvec_offset,
-              std::vector<RegTree::FVec> *p_feats) {
-  auto &feats_vec = *p_feats;
-  auto &batch = *p_batch;
-  for (std::size_t i = 0; i < block_size; ++i) {
-    RegTree::FVec &feats = feats_vec[fvec_offset + i];
-    if (feats.Size() == 0) {
-      feats.Init(n_features);
-    }
-    batch.Fill(batch_offset + i, &feats);
-  }
+bool ShouldUseBlock(DMatrix *p_fmat) {
+  // Threshold to use block-based prediction.
+  constexpr double kDensityThresh = .125;
+  bst_idx_t n_samples = p_fmat->Info().num_row_;
+  bst_idx_t total = std::max(n_samples * p_fmat->Info().num_col_, static_cast<bst_idx_t>(1));
+  double density = static_cast<double>(p_fmat->Info().num_nonzero_) / static_cast<double>(total);
+  bool blocked = density > kDensityThresh;
+  return blocked;
 }
 
-void FVecDrop(std::size_t const block_size, std::size_t const fvec_offset,
-              std::vector<RegTree::FVec> *p_feats) {
-  for (size_t i = 0; i < block_size; ++i) {
-    RegTree::FVec &feats = (*p_feats)[fvec_offset + i];
-    feats.Drop();
-  }
-}
+using cpu_impl::MakeCatAccessor;
 
 // Convert a single sample in batch view to FVec
 template <typename BatchView>
@@ -167,30 +241,57 @@ struct DataToFeatVec {
     auto n_valid = static_cast<BatchView const *>(this)->DoFill(ridx, feats.Data().data());
     feats.HasMissing(n_valid != feats.Size());
   }
+
+  // Fill the data into the feature vector.
+  void FVecFill(common::Range1d const &block, bst_feature_t n_features,
+                common::Span<RegTree::FVec> s_feats_vec) const {
+    auto feats_vec = s_feats_vec.data();
+    for (std::size_t i = 0; i < block.Size(); ++i) {
+      RegTree::FVec &feats = feats_vec[i];
+      if (feats.Size() == 0) {
+        feats.Init(n_features);
+      }
+      this->Fill(block.begin() + i, &feats);
+    }
+  }
+  // Clear the feature vector.
+  static void FVecDrop(common::Span<RegTree::FVec> s_feats) {
+    auto p_feats = s_feats.data();
+    for (size_t i = 0, n = s_feats.size(); i < n; ++i) {
+      p_feats[i].Drop();
+    }
+  }
 };
 
-struct SparsePageView : public DataToFeatVec<SparsePageView> {
-  bst_idx_t base_rowid;
-  HostSparsePageView view;
+template <typename EncAccessor>
+class SparsePageView : public DataToFeatVec<SparsePageView<EncAccessor>> {
+  EncAccessor acc_;
+  HostSparsePageView const view_;
+
+ public:
+  bst_idx_t const base_rowid;
 
-  explicit SparsePageView(SparsePage const *p) : base_rowid{p->base_rowid} { view = p->GetView(); }
-  [[nodiscard]] std::size_t Size() const { return view.Size(); }
+  SparsePageView(HostSparsePageView const p, bst_idx_t base_rowid, EncAccessor acc)
+      : acc_{std::move(acc)}, view_{p}, base_rowid{base_rowid} {}
+  [[nodiscard]] std::size_t Size() const { return view_.Size(); }
 
   [[nodiscard]] bst_idx_t DoFill(bst_idx_t ridx, float *out) const {
-    auto p_data = view[ridx].data();
+    auto p_data = view_[ridx].data();
 
-    for (std::size_t i = 0, n = view[ridx].size(); i < n; ++i) {
+    for (std::size_t i = 0, n = view_[ridx].size(); i < n; ++i) {
       auto const &entry = p_data[i];
-      out[entry.index] = entry.fvalue;
+      out[entry.index] = acc_(entry);
     }
 
-    return view[ridx].size();
+    return view_[ridx].size();
   }
 };
 
-struct GHistIndexMatrixView : public DataToFeatVec<GHistIndexMatrixView> {
+template <typename EncAccessor>
+class GHistIndexMatrixView : public DataToFeatVec<GHistIndexMatrixView<EncAccessor>> {
  private:
   GHistIndexMatrix const &page_;
+  EncAccessor acc_;
   common::Span<FeatureType const> ft_;
 
   std::vector<std::uint32_t> const &ptrs_;
@@ -202,8 +303,10 @@ struct GHistIndexMatrixView : public DataToFeatVec<GHistIndexMatrixView> {
   bst_idx_t const base_rowid;
 
  public:
-  GHistIndexMatrixView(GHistIndexMatrix const &_page, common::Span<FeatureType const> ft)
+  GHistIndexMatrixView(GHistIndexMatrix const &_page, EncAccessor acc,
+                       common::Span<FeatureType const> ft)
       : page_{_page},
+        acc_{std::move(acc)},
         ft_{ft},
         ptrs_{_page.cut.Ptrs()},
         mins_{_page.cut.MinValues()},
@@ -211,7 +314,7 @@ struct GHistIndexMatrixView : public DataToFeatVec<GHistIndexMatrixView> {
         columns_{page_.Transpose()},
         base_rowid{_page.base_rowid} {}
 
-  [[nodiscard]] bst_idx_t DoFill(bst_idx_t ridx, float* out) const {
+  [[nodiscard]] bst_idx_t DoFill(bst_idx_t ridx, float *out) const {
     auto gridx = ridx + this->base_rowid;
     auto n_features = page_.Features();
 
@@ -219,8 +322,8 @@ struct GHistIndexMatrixView : public DataToFeatVec<GHistIndexMatrixView> {
     if (page_.IsDense()) {
       common::DispatchBinType(page_.index.GetBinTypeSize(), [&](auto t) {
         using T = decltype(t);
-        auto ptr = page_.index.data<T>();
-        auto rbeg = page_.row_ptr[ridx];
+        auto ptr = this->page_.index.template data<T>();
+        auto rbeg = this->page_.row_ptr[ridx];
         for (bst_feature_t fidx = 0; fidx < n_features; ++fidx) {
           bst_bin_t bin_idx;
           float fvalue;
@@ -232,30 +335,30 @@ struct GHistIndexMatrixView : public DataToFeatVec<GHistIndexMatrixView> {
             fvalue =
                 common::HistogramCuts::NumericBinValue(this->ptrs_, values_, mins_, fidx, bin_idx);
           }
-          out[fidx] = fvalue;
+          out[fidx] = acc_(fvalue, fidx);
         }
       });
       n_non_missings += n_features;
     } else {
       for (bst_feature_t fidx = 0; fidx < n_features; ++fidx) {
-        float f = std::numeric_limits<float>::quiet_NaN();
+        float fvalue = std::numeric_limits<float>::quiet_NaN();
         bool is_cat = common::IsCat(ft_, fidx);
         if (columns_.GetColumnType(fidx) == common::kSparseColumn) {
           // Special handling for extremely sparse data. Just binary search.
           auto bin_idx = page_.GetGindex(gridx, fidx);
           if (bin_idx != -1) {
             if (is_cat) {
-              f = values_[bin_idx];
+              fvalue = values_[bin_idx];
             } else {
-              f = common::HistogramCuts::NumericBinValue(this->ptrs_, values_, mins_, fidx,
-                                                         bin_idx);
+              fvalue = common::HistogramCuts::NumericBinValue(this->ptrs_, values_, mins_, fidx,
+                                                              bin_idx);
             }
           }
         } else {
-          f = page_.GetFvalue(ptrs_, values_, mins_, gridx, fidx, is_cat);
+          fvalue = page_.GetFvalue(ptrs_, values_, mins_, gridx, fidx, is_cat);
         }
-        if (!common::CheckNAN(f)) {
-          out[fidx] = f;
+        if (!common::CheckNAN(fvalue)) {
+          out[fidx] = acc_(fvalue, fidx);
           n_non_missings++;
         }
       }
@@ -263,17 +366,18 @@ struct GHistIndexMatrixView : public DataToFeatVec<GHistIndexMatrixView> {
     return n_non_missings;
   }
 
-  [[nodiscard]] auto Size() const { return page_.Size(); }
+  [[nodiscard]] bst_idx_t Size() const { return page_.Size(); }
 };
 
-template <typename Adapter>
-class AdapterView : public DataToFeatVec<AdapterView<Adapter>> {
+template <typename Adapter, typename EncAccessor>
+class AdapterView : public DataToFeatVec<AdapterView<Adapter, EncAccessor>> {
   Adapter const *adapter_;
   float missing_;
+  EncAccessor acc_;
 
  public:
-  explicit AdapterView(Adapter const *adapter, float missing)
-      : adapter_{adapter}, missing_{missing} {}
+  explicit AdapterView(Adapter const *adapter, float missing, EncAccessor acc)
+      : adapter_{adapter}, missing_{missing}, acc_{std::move(acc)} {}
 
   [[nodiscard]] bst_idx_t DoFill(bst_idx_t ridx, float *out) const {
     auto const &batch = adapter_->Value();
@@ -282,78 +386,217 @@ class AdapterView : public DataToFeatVec<AdapterView<Adapter>> {
     for (size_t c = 0; c < row.Size(); ++c) {
       auto e = row.GetElement(c);
       if (missing_ != e.value && !common::CheckNAN(e.value)) {
-        out[e.column_idx] = e.value;
+        auto fvalue = this->acc_(e);
+        out[e.column_idx] = fvalue;
         n_non_missings++;
       }
     }
     return n_non_missings;
   }
 
-  [[nodiscard]] size_t Size() const { return adapter_->NumRows(); }
+  [[nodiscard]] bst_idx_t Size() const { return adapter_->NumRows(); }
 
   bst_idx_t const static base_rowid = 0;  // NOLINT
 };
 
-template <typename DataView, std::size_t kBlockOfRowsSize>
-void PredictBatchByBlockOfRowsKernel(DataView batch, gbm::GBTreeModel const &model,
-                                     bst_tree_t tree_begin, bst_tree_t tree_end,
-                                     std::vector<RegTree::FVec> *p_thread_temp,
-                                     std::int32_t n_threads,
-                                     linalg::TensorView<float, 2> out_predt) {
-  auto &thread_temp = *p_thread_temp;
+// Ordinal re-coder.
+struct EncAccessorPolicy {
+ private:
+  std::vector<int32_t> mapping_;
+
+ public:
+  EncAccessorPolicy() = default;
+
+  EncAccessorPolicy &operator=(EncAccessorPolicy const &that) = delete;
+  EncAccessorPolicy(EncAccessorPolicy const &that) = delete;
+
+  EncAccessorPolicy &operator=(EncAccessorPolicy &&that) = default;
+  EncAccessorPolicy(EncAccessorPolicy &&that) = default;
+
+  [[nodiscard]] auto MakeAccessor(Context const *ctx, enc::HostColumnsView new_enc,
+                                  gbm::GBTreeModel const &model) {
+    auto [acc, mapping] = MakeCatAccessor(ctx, new_enc, model.Cats());
+    std::swap(mapping, this->mapping_);
+    return acc;
+  }
+};
+
+struct NullEncAccessorPolicy {
+  template <typename... Args>
+  [[nodiscard]] auto MakeAccessor(Args &&...) const {
+    return NoOpAccessor{};
+  }
+};
+
+// Block-based parallel.
+struct BlockPolicy {
+  constexpr static std::size_t kBlockOfRowsSize = 64;
+};
+
+struct NullBlockPolicy {
+  constexpr static std::size_t kBlockOfRowsSize = 1;
+};
+
+/**
+ * @brief Policy class, requires a block policy and an accessor policy.
+ */
+template <typename... Args>
+struct LaunchConfig : public Args... {
+  Context const *ctx;
+  DMatrix *p_fmat;
+  gbm::GBTreeModel const &model;
+
+  LaunchConfig(Context const *ctx, DMatrix *p_fmat, gbm::GBTreeModel const &model)
+      : ctx{ctx}, p_fmat{p_fmat}, model{model} {}
+
+  LaunchConfig(LaunchConfig const &that) = delete;
+  LaunchConfig &operator=(LaunchConfig const &that) = delete;
+  LaunchConfig(LaunchConfig &&that) = default;
+  LaunchConfig &operator=(LaunchConfig &&that) = default;
+
+  // Helper for running prediction with DMatrix inputs.
+  template <typename Fn>
+  void ForEachBatch(Fn &&fn) {
+    auto acc = this->MakeAccessor(ctx, p_fmat->Cats()->HostView(), model);
+
+    if (!p_fmat->PageExists<SparsePage>()) {
+      auto ft = p_fmat->Info().feature_types.ConstHostVector();
+      for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx, {})) {
+        fn(GHistIndexMatrixView{page, acc, ft});
+      }
+    } else {
+      for (auto const &page : p_fmat->GetBatches<SparsePage>()) {
+        fn(SparsePageView{page.GetView(), page.base_rowid, acc});
+      }
+    }
+  }
+};
+
+/**
+ * @brief Dispatch for the prediction function.
+ *
+ * @tparam Fn         A function that accepts a @ref LaunchConfig object.
+ * @tparam NeedRecode Given a DMatrix input, returns whether we need to recode the categorical
+ *                    features.
+ */
+template <typename Fn, typename NeedRecode>
+void LaunchPredict(Context const *ctx, DMatrix *p_fmat, gbm::GBTreeModel const &model, Fn &&fn,
+                   NeedRecode &&need_recode) {
+  bool blocked = ShouldUseBlock(p_fmat);
+
+  if (blocked) {
+    if (model.Cats()->HasCategorical() && need_recode(p_fmat)) {
+      using Policy = LaunchConfig<BlockPolicy, EncAccessorPolicy>;
+      fn(Policy{ctx, p_fmat, model});
+    } else {
+      using Policy = LaunchConfig<BlockPolicy, NullEncAccessorPolicy>;
+      fn(Policy{ctx, p_fmat, model});
+    }
+  } else {
+    if (model.Cats()->HasCategorical() && need_recode(p_fmat)) {
+      using Policy = LaunchConfig<NullBlockPolicy, EncAccessorPolicy>;
+      fn(Policy{ctx, p_fmat, model});
+    } else {
+      using Policy = LaunchConfig<NullBlockPolicy, NullEncAccessorPolicy>;
+      fn(Policy{ctx, p_fmat, model});
+    }
+  }
+}
+
+template <typename Fn>
+void LaunchPredict(Context const *ctx, DMatrix *p_fmat, gbm::GBTreeModel const &model, Fn &&fn) {
+  LaunchPredict(ctx, p_fmat, model, fn,
+                [](DMatrix const *p_fmat) { return p_fmat->Cats()->NeedRecode(); });
+}
+
+/**
+ * @brief Thread-local buffer for the feature matrix.
+ */
+template <std::size_t kBlockOfRowsSize>
+class ThreadTmp {
+ private:
+  std::vector<RegTree::FVec> feat_vecs_;
+
+ public:
+  /**
+   * @param blocked Whether block-based parallelism is used.
+   */
+  explicit ThreadTmp(std::int32_t n_threads) {
+    std::size_t n = n_threads * kBlockOfRowsSize;
+    std::size_t prev_thread_temp_size = feat_vecs_.size();
+    if (prev_thread_temp_size < n) {
+      feat_vecs_.resize(n, RegTree::FVec{});
+    }
+  }
+  /**
+   * @brief Get a thread local buffer.
+   *
+   * @param n The size of the thread local block.
+   */
+  common::Span<RegTree::FVec> ThreadBuffer(std::size_t n) {
+    std::int32_t thread_idx = omp_get_thread_num();
+    auto const fvec_offset = thread_idx * kBlockOfRowsSize;
+    auto fvec_tloc = common::Span{feat_vecs_}.subspan(fvec_offset, n);
+    return fvec_tloc;
+  }
+};
 
+template <std::size_t kBlockOfRowsSize, typename DataView>
+void PredictBatchByBlockKernel(DataView const &batch, HostModel const &model,
+                               ThreadTmp<kBlockOfRowsSize> *p_fvec, std::int32_t n_threads,
+                               bool any_missing, linalg::TensorView<float, 2> out_predt) {
+  auto &fvec = *p_fvec;
   // Parallel over local batches
   auto const n_samples = batch.Size();
-  auto const n_features = model.learner_model_param->num_feature;
-  auto const n_blocks = common::DivRoundUp(n_samples, kBlockOfRowsSize);
-
-  common::ParallelFor(n_blocks, n_threads, [&](auto block_id) {
-    auto const batch_offset = block_id * kBlockOfRowsSize;
-    auto const block_size =
-        std::min(static_cast<std::size_t>(n_samples - batch_offset), kBlockOfRowsSize);
-    auto const fvec_offset = omp_get_thread_num() * kBlockOfRowsSize;
-
-    FVecFill(block_size, batch_offset, n_features, &batch, fvec_offset, p_thread_temp);
-    // process block of rows through all trees to keep cache locality
-    PredictByAllTrees(model, tree_begin, tree_end, batch_offset + batch.base_rowid, thread_temp,
-                      fvec_offset, block_size, out_predt);
-    FVecDrop(block_size, fvec_offset, p_thread_temp);
+  auto const n_features = model.n_features;
+
+  /* Precalculate depth for each tree.
+   * These values are required only for the ArrayLayout optimization,
+   * so we don't need them if kBlockOfRowsSize == 1
+   */
+  std::vector<int> tree_depth;
+  if constexpr (kBlockOfRowsSize > 1) {
+    tree_depth.resize(model.tree_end - model.tree_begin);
+    CHECK_EQ(tree_depth.size(), model.Trees().size());
+    common::ParallelFor(model.tree_end - model.tree_begin, n_threads, [&](auto i) {
+      std::visit([&](auto &&tree) { tree_depth[i] = tree.MaxDepth(); }, model.Trees()[i]);
+    });
+  }
+  common::ParallelFor1d<kBlockOfRowsSize>(n_samples, n_threads, [&](auto &&block) {
+    auto fvec_tloc = fvec.ThreadBuffer(block.Size());
+
+    batch.FVecFill(block, n_features, fvec_tloc);
+    DispatchArrayLayout(model, block.begin() + batch.base_rowid, fvec_tloc, block.Size(), out_predt,
+                        tree_depth, any_missing);
+    batch.FVecDrop(fvec_tloc);
   });
 }
 
-float FillNodeMeanValues(RegTree const *tree, bst_node_t nidx, std::vector<float> *mean_values) {
-  bst_float result;
-  auto &node = (*tree)[nidx];
+float FillNodeMeanValues(tree::ScalarTreeView const &tree, bst_node_t nidx,
+                         std::vector<float> *mean_values) {
+  float result;
   auto &node_mean_values = *mean_values;
-  if (node.IsLeaf()) {
-    result = node.LeafValue();
+  if (tree.IsLeaf(nidx)) {
+    result = tree.LeafValue(nidx);
   } else {
-    result = FillNodeMeanValues(tree, node.LeftChild(), mean_values) *
-             tree->Stat(node.LeftChild()).sum_hess;
-    result += FillNodeMeanValues(tree, node.RightChild(), mean_values) *
-              tree->Stat(node.RightChild()).sum_hess;
-    result /= tree->Stat(nidx).sum_hess;
+    result = FillNodeMeanValues(tree, tree.LeftChild(nidx), mean_values) *
+             tree.Stat(tree.LeftChild(nidx)).sum_hess;
+    result += FillNodeMeanValues(tree, tree.RightChild(nidx), mean_values) *
+              tree.Stat(tree.RightChild(nidx)).sum_hess;
+    result /= tree.Stat(nidx).sum_hess;
   }
   node_mean_values[nidx] = result;
   return result;
 }
 
-void FillNodeMeanValues(RegTree const* tree, std::vector<float>* mean_values) {
-  auto n_nodes = tree->NumNodes();
+void FillNodeMeanValues(tree::ScalarTreeView const &tree, std::vector<float> *mean_values) {
+  auto n_nodes = tree.Size();
   if (static_cast<decltype(n_nodes)>(mean_values->size()) == n_nodes) {
     return;
   }
   mean_values->resize(n_nodes);
   FillNodeMeanValues(tree, 0, mean_values);
 }
-
-// init thread buffers
-static void InitThreadTemp(int nthread, std::vector<RegTree::FVec> *out) {
-  int prev_thread_temp_size = out->size();
-  if (prev_thread_temp_size < nthread) {
-    out->resize(nthread, RegTree::FVec());
-  }
-}
 }  // anonymous namespace
 
 /**
@@ -383,24 +626,34 @@ static void InitThreadTemp(int nthread, std::vector<RegTree::FVec> *out) {
  */
 class ColumnSplitHelper {
  public:
-  ColumnSplitHelper(std::int32_t n_threads, gbm::GBTreeModel const &model, uint32_t tree_begin,
-                    uint32_t tree_end)
-      : n_threads_{n_threads}, model_{model}, tree_begin_{tree_begin}, tree_end_{tree_end} {
+  ColumnSplitHelper(std::int32_t n_threads, gbm::GBTreeModel const &model, bst_tree_t tree_begin,
+                    bst_tree_t tree_end)
+      : n_threads_{n_threads},
+        model_{model},
+        tree_begin_{tree_begin},
+        tree_end_{tree_end},
+        feat_vecs_{n_threads} {
+    CHECK(!model.learner_model_param->IsVectorLeaf())
+        << "Predict DMatrix with column split" << MTNotImplemented();
+    CHECK(!model.Cats()->HasCategorical())
+        << "Categorical feature is not yet supported with column-split.";
+    CHECK(xgboost::collective::IsDistributed())
+        << "column-split prediction is only supported for distributed training";
+
     auto const n_trees = tree_end_ - tree_begin_;
     tree_sizes_.resize(n_trees);
     tree_offsets_.resize(n_trees);
     for (decltype(tree_begin) i = 0; i < n_trees; i++) {
       auto const &tree = *model_.trees[tree_begin_ + i];
-      tree_sizes_[i] = tree.GetNodes().size();
+      tree_sizes_[i] = tree.Size();
     }
     // std::exclusive_scan (only available in c++17) equivalent to get tree offsets.
     tree_offsets_[0] = 0;
     for (decltype(tree_begin) i = 1; i < n_trees; i++) {
       tree_offsets_[i] = tree_offsets_[i - 1] + tree_sizes_[i - 1];
     }
+    // Add the size of the last tree since this is exclusive_scan
     bits_per_row_ = tree_offsets_.back() + tree_sizes_.back();
-
-    InitThreadTemp(n_threads_ * kBlockOfRowsSize, &feat_vecs_);
   }
 
   // Disable copy (and move) semantics.
@@ -410,24 +663,22 @@ class ColumnSplitHelper {
   ColumnSplitHelper &operator=(ColumnSplitHelper &&) noexcept = delete;
 
   void PredictDMatrix(Context const *ctx, DMatrix *p_fmat, std::vector<bst_float> *out_preds) {
-    CHECK(xgboost::collective::IsDistributed())
-        << "column-split prediction is only supported for distributed training";
-
+    if (!p_fmat->PageExists<SparsePage>()) {
+      LOG(FATAL) << "Predict with `QuantileDMatrix` is not supported with column-split.";
+    }
     for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
       CHECK_EQ(out_preds->size(),
                p_fmat->Info().num_row_ * model_.learner_model_param->num_output_group);
-      PredictBatchKernel<SparsePageView, kBlockOfRowsSize>(ctx, SparsePageView{&batch}, out_preds);
+      PredictBatchKernel<kBlockOfRowsSize>(
+          ctx, SparsePageView{batch.GetView(), batch.base_rowid, NoOpAccessor{}}, out_preds);
     }
   }
 
-  void PredictLeaf(Context const* ctx, DMatrix *p_fmat, std::vector<bst_float> *out_preds) {
-    CHECK(xgboost::collective::IsDistributed())
-        << "column-split prediction is only supported for distributed training";
-
+  void PredictLeaf(Context const *ctx, DMatrix *p_fmat, std::vector<bst_float> *out_preds) {
     for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
       CHECK_EQ(out_preds->size(), p_fmat->Info().num_row_ * (tree_end_ - tree_begin_));
-      PredictBatchKernel<SparsePageView, kBlockOfRowsSize, true>(ctx, SparsePageView{&batch},
-                                                                 out_preds);
+      PredictBatchKernel<kBlockOfRowsSize, true>(
+          ctx, SparsePageView{batch.GetView(), batch.base_rowid, NoOpAccessor{}}, out_preds);
     }
   }
 
@@ -468,18 +719,17 @@ class ColumnSplitHelper {
   }
 
   void MaskOneTree(RegTree::FVec const &feat, std::size_t tree_id, std::size_t row_id) {
-    auto const &tree = *model_.trees[tree_id];
+    auto const tree = model_.trees[tree_id]->HostScView();
     auto const &cats = tree.GetCategoriesMatrix();
-    bst_node_t n_nodes = tree.GetNodes().size();
+    bst_node_t n_nodes = tree.Size();
 
     for (bst_node_t nid = 0; nid < n_nodes; nid++) {
-      auto const &node = tree[nid];
-      if (node.IsDeleted() || node.IsLeaf()) {
+      if (tree.IsDeleted(nid) || tree.IsLeaf(nid)) {
         continue;
       }
 
       auto const bit_index = BitIndex(tree_id, row_id, nid);
-      unsigned split_index = node.SplitIndex();
+      unsigned split_index = tree.SplitIndex(nid);
       if (feat.IsMissing(split_index)) {
         missing_bits_.Set(bit_index);
         continue;
@@ -487,56 +737,59 @@ class ColumnSplitHelper {
 
       auto const fvalue = feat.GetFvalue(split_index);
       auto const decision = tree.HasCategoricalSplit()
-                                ? GetDecision<true>(node, nid, fvalue, cats)
-                                : GetDecision<false>(node, nid, fvalue, cats);
+                                ? GetDecision<true>(tree, nid, fvalue, cats)
+                                : GetDecision<false>(tree, nid, fvalue, cats);
       if (decision) {
         decision_bits_.Set(bit_index);
       }
     }
   }
 
-  void MaskAllTrees(std::size_t batch_offset, std::size_t fvec_offset, std::size_t block_size) {
+  void MaskAllTrees(std::size_t batch_offset, common::Span<RegTree::FVec> feat_vecs,
+                    std::size_t block_size) {
     for (auto tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {
       for (size_t i = 0; i < block_size; ++i) {
-        MaskOneTree(feat_vecs_[fvec_offset + i], tree_id, batch_offset + i);
+        MaskOneTree(feat_vecs[i], tree_id, batch_offset + i);
       }
     }
   }
 
-  bst_node_t GetNextNode(RegTree::Node const &node, std::size_t bit_index) {
+  bst_node_t GetNextNode(tree::ScalarTreeView const &tree, bst_node_t nidx, std::size_t bit_index) {
     if (missing_bits_.Check(bit_index)) {
-      return node.DefaultChild();
+      return tree.DefaultChild(nidx);
     } else {
-      return node.LeftChild() + !decision_bits_.Check(bit_index);
+      return tree.LeftChild(nidx) + !decision_bits_.Check(bit_index);
     }
   }
 
-  bst_node_t GetLeafIndex(RegTree const &tree, std::size_t tree_id, std::size_t row_id) {
-    bst_node_t nid = 0;
-    while (!tree[nid].IsLeaf()) {
-      auto const bit_index = BitIndex(tree_id, row_id, nid);
-      nid = GetNextNode(tree[nid], bit_index);
+  bst_node_t GetLeafIndex(tree::ScalarTreeView const &tree, std::size_t tree_id,
+                          std::size_t row_id) {
+    bst_node_t nidx = RegTree::kRoot;
+    while (!tree.IsLeaf(nidx)) {
+      auto const bit_index = BitIndex(tree_id, row_id, nidx);
+      nidx = GetNextNode(tree, nidx, bit_index);
     }
-    return nid;
+    return nidx;
   }
 
   template <bool predict_leaf = false>
   bst_float PredictOneTree(std::size_t tree_id, std::size_t row_id) {
-    auto const &tree = *model_.trees[tree_id];
+    auto const tree = model_.trees[tree_id]->HostScView();
     auto const leaf = GetLeafIndex(tree, tree_id, row_id);
     if constexpr (predict_leaf) {
       return static_cast<bst_float>(leaf);
     } else {
-      return tree[leaf].LeafValue();
+      return tree.LeafValue(leaf);
     }
   }
 
   template <bool predict_leaf = false>
-  void PredictAllTrees(std::vector<bst_float> *out_preds, std::size_t batch_offset,
+  void PredictAllTrees(common::Span<bst_target_t const> h_tree_groups,
+                       std::vector<bst_float> *out_preds, std::size_t batch_offset,
                        std::size_t predict_offset, std::size_t num_group, std::size_t block_size) {
     auto &preds = *out_preds;
-    for (size_t tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {
-      auto const gid = model_.tree_info[tree_id];
+    for (auto tree_id = tree_begin_; tree_id < tree_end_; ++tree_id) {
+      auto const gid = h_tree_groups[tree_id];
       for (size_t i = 0; i < block_size; ++i) {
         auto const result = PredictOneTree<predict_leaf>(tree_id, batch_offset + i);
         if constexpr (predict_leaf) {
@@ -548,53 +801,46 @@ class ColumnSplitHelper {
     }
   }
 
-  template <typename DataView, size_t block_of_rows_size, bool predict_leaf = false>
-  void PredictBatchKernel(Context const* ctx, DataView batch, std::vector<bst_float> *out_preds) {
+  template <size_t block_of_rows_size, bool predict_leaf = false, typename DataView>
+  void PredictBatchKernel(Context const *ctx, DataView batch, std::vector<bst_float> *out_preds) {
     auto const num_group = model_.learner_model_param->num_output_group;
 
     // parallel over local batch
-    auto const nsize = batch.Size();
-    auto const num_feature = model_.learner_model_param->num_feature;
-    auto const n_blocks = common::DivRoundUp(nsize, block_of_rows_size);
-    InitBitVectors(nsize);
-
-    // auto block_id has the same type as `n_blocks`.
-    common::ParallelFor(n_blocks, n_threads_, [&](auto block_id) {
-      auto const batch_offset = block_id * block_of_rows_size;
-      auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset),
-                                       static_cast<std::size_t>(block_of_rows_size));
-      auto const fvec_offset = omp_get_thread_num() * block_of_rows_size;
-
-      FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, &feat_vecs_);
-      MaskAllTrees(batch_offset, fvec_offset, block_size);
-      FVecDrop(block_size, fvec_offset, &feat_vecs_);
+    auto const n_samples = batch.Size();
+    auto const n_features = model_.learner_model_param->num_feature;
+
+    InitBitVectors(n_samples);
+
+    common::ParallelFor1d<kBlockOfRowsSize>(n_samples, n_threads_, [&](auto &&block) {
+      auto fvec_tloc = feat_vecs_.ThreadBuffer(block.Size());
+
+      batch.FVecFill(block, n_features, fvec_tloc);
+      MaskAllTrees(block.begin(), fvec_tloc, block.Size());
+      batch.FVecDrop(fvec_tloc);
     });
 
     AllreduceBitVectors(ctx);
+    auto h_tree_groups = this->model_.TreeGroups(ctx->Device());
 
-    // auto block_id has the same type as `n_blocks`.
-    common::ParallelFor(n_blocks, n_threads_, [&](auto block_id) {
-      auto const batch_offset = block_id * block_of_rows_size;
-      auto const block_size = std::min(static_cast<std::size_t>(nsize - batch_offset),
-                                       static_cast<std::size_t>(block_of_rows_size));
-      PredictAllTrees<predict_leaf>(out_preds, batch_offset, batch_offset + batch.base_rowid,
-                                    num_group, block_size);
+    common::ParallelFor1d<kBlockOfRowsSize>(n_samples, n_threads_, [&](auto &&block) {
+      PredictAllTrees<predict_leaf>(h_tree_groups, out_preds, block.begin(),
+                                    block.begin() + batch.base_rowid, num_group, block.Size());
     });
 
     ClearBitVectors();
   }
 
-  static std::size_t constexpr kBlockOfRowsSize = 64;
+  static std::size_t constexpr kBlockOfRowsSize = BlockPolicy::kBlockOfRowsSize;
 
   std::int32_t const n_threads_;
   gbm::GBTreeModel const &model_;
-  uint32_t const tree_begin_;
-  uint32_t const tree_end_;
+  bst_tree_t const tree_begin_;
+  bst_tree_t const tree_end_;
 
   std::vector<std::size_t> tree_sizes_{};
   std::vector<std::size_t> tree_offsets_{};
   std::size_t bits_per_row_{};
-  std::vector<RegTree::FVec> feat_vecs_{};
+  ThreadTmp<kBlockOfRowsSize> feat_vecs_;
 
   std::size_t n_rows_;
   /**
@@ -644,112 +890,86 @@ class CPUPredictor : public Predictor {
   void PredictDMatrix(DMatrix *p_fmat, std::vector<float> *out_preds, gbm::GBTreeModel const &model,
                       bst_tree_t tree_begin, bst_tree_t tree_end) const {
     if (p_fmat->Info().IsColumnSplit()) {
-      CHECK(!model.learner_model_param->IsVectorLeaf())
-          << "Predict DMatrix with column split" << MTNotImplemented();
-
       ColumnSplitHelper helper(this->ctx_->Threads(), model, tree_begin, tree_end);
       helper.PredictDMatrix(ctx_, p_fmat, out_preds);
       return;
     }
 
     auto const n_threads = this->ctx_->Threads();
-    constexpr double kDensityThresh = .5;
-    size_t total =
-        std::max(p_fmat->Info().num_row_ * p_fmat->Info().num_col_, static_cast<uint64_t>(1));
-    double density = static_cast<double>(p_fmat->Info().num_nonzero_) / static_cast<double>(total);
-    bool blocked = density > kDensityThresh;
-
-    std::vector<RegTree::FVec> feat_vecs;
-    InitThreadTemp(n_threads * (blocked ? kBlockOfRowsSize : 1), &feat_vecs);
 
-    std::size_t n_samples = p_fmat->Info().num_row_;
-    std::size_t n_groups = model.learner_model_param->OutputLength();
+    // Create a writable view on the output prediction vector.
+    bst_idx_t n_groups = model.learner_model_param->OutputLength();
+    bst_idx_t n_samples = p_fmat->Info().num_row_;
     CHECK_EQ(out_preds->size(), n_samples * n_groups);
     auto out_predt = linalg::MakeTensorView(ctx_, *out_preds, n_samples, n_groups);
-
-    if (!p_fmat->PageExists<SparsePage>()) {
-      auto ft = p_fmat->Info().feature_types.ConstHostVector();
-      for (auto const &batch : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, {})) {
-        if (blocked) {
-          PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, kBlockOfRowsSize>(
-              GHistIndexMatrixView{batch, ft}, model, tree_begin, tree_end, &feat_vecs, n_threads,
-              out_predt);
-        } else {
-          PredictBatchByBlockOfRowsKernel<GHistIndexMatrixView, 1>(
-              GHistIndexMatrixView{batch, ft}, model, tree_begin, tree_end, &feat_vecs, n_threads,
-              out_predt);
-        }
-      }
-    } else {
-      for (auto const &batch : p_fmat->GetBatches<SparsePage>()) {
-        if (blocked) {
-          PredictBatchByBlockOfRowsKernel<SparsePageView, kBlockOfRowsSize>(
-              SparsePageView{&batch}, model, tree_begin, tree_end, &feat_vecs, n_threads,
-              out_predt);
-
-        } else {
-          PredictBatchByBlockOfRowsKernel<SparsePageView, 1>(SparsePageView{&batch}, model,
-                                                             tree_begin, tree_end, &feat_vecs,
-                                                             n_threads, out_predt);
-        }
-      }
-    }
+    bool any_missing = !(p_fmat->IsDense());
+    auto const h_model =
+        HostModel{DeviceOrd::CPU(), model, tree_begin, tree_end, &this->mu_, CopyViews{}};
+
+    LaunchPredict(this->ctx_, p_fmat, model, [&](auto &&policy) {
+      using Policy = common::GetValueT<decltype(policy)>;
+      ThreadTmp<Policy::kBlockOfRowsSize> feat_vecs{n_threads};
+      policy.ForEachBatch([&](auto &&batch) {
+        PredictBatchByBlockKernel<Policy::kBlockOfRowsSize>(batch, h_model, &feat_vecs, n_threads,
+                                                            any_missing, out_predt);
+      });
+    });
   }
 
   template <typename DataView>
-  void PredictContributionKernel(
-      DataView batch, const MetaInfo &info, const gbm::GBTreeModel &model,
-      const std::vector<bst_float> *tree_weights, std::vector<std::vector<float>> *mean_values,
-      std::vector<RegTree::FVec> *feat_vecs, std::vector<bst_float> *contribs,
-      bst_tree_t ntree_limit, bool approximate, int condition, unsigned condition_feature) const {
-    const int num_feature = model.learner_model_param->num_feature;
-    const int ngroup = model.learner_model_param->num_output_group;
-    CHECK_NE(ngroup, 0);
+  void PredictContributionKernel(DataView batch, const MetaInfo &info, HostModel const &h_model,
+                                 linalg::VectorView<float const> base_score,
+                                 std::vector<bst_float> const *tree_weights,
+                                 std::vector<std::vector<float>> *mean_values,
+                                 ThreadTmp<1> *feat_vecs, std::vector<bst_float> *contribs,
+                                 bool approximate, int condition,
+                                 unsigned condition_feature) const {
+    const int num_feature = h_model.n_features;
+    const auto n_groups = h_model.n_groups;
+    CHECK_NE(n_groups, 0);
     size_t const ncolumns = num_feature + 1;
     CHECK_NE(ncolumns, 0);
     auto device = ctx_->Device().IsSycl() ? DeviceOrd::CPU() : ctx_->Device();
     auto base_margin = info.base_margin_.View(device);
-    auto base_score = model.learner_model_param->BaseScore(device)(0);
 
     // parallel over local batch
     common::ParallelFor(batch.Size(), this->ctx_->Threads(), [&](auto i) {
       auto row_idx = batch.base_rowid + i;
-      RegTree::FVec &feats = (*feat_vecs)[omp_get_thread_num()];
+      RegTree::FVec &feats = feat_vecs->ThreadBuffer(1).front();
       if (feats.Size() == 0) {
         feats.Init(num_feature);
       }
       std::vector<bst_float> this_tree_contribs(ncolumns);
       // loop over all classes
-      for (int gid = 0; gid < ngroup; ++gid) {
-        bst_float *p_contribs = &(*contribs)[(row_idx * ngroup + gid) * ncolumns];
+      for (bst_target_t gid = 0; gid < n_groups; ++gid) {
+        float *p_contribs = &(*contribs)[(row_idx * n_groups + gid) * ncolumns];
         batch.Fill(i, &feats);
         // calculate contributions
-        for (bst_tree_t j = 0; j < ntree_limit; ++j) {
+        for (bst_tree_t j = 0; j < h_model.tree_end; ++j) {
           auto *tree_mean_values = &mean_values->at(j);
           std::fill(this_tree_contribs.begin(), this_tree_contribs.end(), 0);
-          if (model.tree_info[j] != gid) {
+          if (h_model.tree_groups[j] != gid) {
             continue;
           }
+          auto sc_tree = std::get<tree::ScalarTreeView>(h_model.Trees()[j]);
           if (!approximate) {
-            CalculateContributions(*model.trees[j], feats, tree_mean_values,
-                                   &this_tree_contribs[0], condition, condition_feature);
+            CalculateContributions(sc_tree, feats, tree_mean_values, &this_tree_contribs[0],
+                                   condition, condition_feature);
           } else {
-            model.trees[j]->CalculateContributionsApprox(
-                feats, tree_mean_values, &this_tree_contribs[0]);
+            CalculateContributionsApprox(sc_tree, feats, tree_mean_values, &this_tree_contribs[0]);
           }
           for (size_t ci = 0; ci < ncolumns; ++ci) {
             p_contribs[ci] +=
-                this_tree_contribs[ci] *
-                (tree_weights == nullptr ? 1 : (*tree_weights)[j]);
+                this_tree_contribs[ci] * (tree_weights == nullptr ? 1 : (*tree_weights)[j]);
           }
         }
         feats.Drop();
         // add base margin to BIAS
         if (base_margin.Size() != 0) {
-          CHECK_EQ(base_margin.Shape(1), ngroup);
+          CHECK_EQ(base_margin.Shape(1), n_groups);
           p_contribs[ncolumns - 1] += base_margin(row_idx, gid);
         } else {
-          p_contribs[ncolumns - 1] += base_score;
+          p_contribs[ncolumns - 1] += base_score(gid);
         }
       }
     });
@@ -769,57 +989,55 @@ class CPUPredictor : public Predictor {
     this->PredictDMatrix(dmat, &out_preds->HostVector(), model, tree_begin, tree_end);
   }
 
-  template <typename Adapter, size_t kBlockSize>
-  void DispatchedInplacePredict(std::any const &x, std::shared_ptr<DMatrix> p_m,
-                                const gbm::GBTreeModel &model, float missing,
-                                PredictionCacheEntry *out_preds, bst_tree_t tree_begin,
-                                bst_tree_t tree_end) const {
-    auto const n_threads = this->ctx_->Threads();
-    auto m = std::any_cast<std::shared_ptr<Adapter>>(x);
-    CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
-        << "Number of columns in data must equal to trained model.";
-    CHECK(p_m);
-    CHECK_EQ(p_m->Info().num_row_, m->NumRows());
-    CHECK_EQ(p_m->Info().num_col_, m->NumColumns());
-    this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);
+  [[nodiscard]] bool InplacePredict(std::shared_ptr<DMatrix> p_m, gbm::GBTreeModel const &model,
+                                    float missing, PredictionCacheEntry *out_preds,
+                                    bst_tree_t tree_begin, bst_tree_t tree_end) const override {
+    auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
+    CHECK(proxy) << error::InplacePredictProxy();
 
+    this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);
     auto &predictions = out_preds->predictions.HostVector();
-    std::vector<RegTree::FVec> thread_temp;
-    InitThreadTemp(n_threads * kBlockSize, &thread_temp);
-    std::size_t n_groups = model.learner_model_param->OutputLength();
-    auto out_predt = linalg::MakeTensorView(ctx_, predictions, m->NumRows(), n_groups);
-    PredictBatchByBlockOfRowsKernel<AdapterView<Adapter>, kBlockSize>(
-        AdapterView<Adapter>(m.get(), missing), model, tree_begin, tree_end, &thread_temp,
-        n_threads, out_predt);
-  }
-
-  bool InplacePredict(std::shared_ptr<DMatrix> p_m, const gbm::GBTreeModel &model, float missing,
-                      PredictionCacheEntry *out_preds, bst_tree_t tree_begin,
-                      bst_tree_t tree_end) const override {
-    auto proxy = dynamic_cast<data::DMatrixProxy *>(p_m.get());
-    CHECK(proxy)<< error::InplacePredictProxy();
-    CHECK(!p_m->Info().IsColumnSplit())
-        << "Inplace predict support for column-wise data split is not yet implemented.";
-    auto x = proxy->Adapter();
-    if (x.type() == typeid(std::shared_ptr<data::DenseAdapter>)) {
-      this->DispatchedInplacePredict<data::DenseAdapter, kBlockOfRowsSize>(
-          x, p_m, model, missing, out_preds, tree_begin, tree_end);
-    } else if (x.type() == typeid(std::shared_ptr<data::CSRAdapter>)) {
-      this->DispatchedInplacePredict<data::CSRAdapter, 1>(x, p_m, model, missing, out_preds,
-                                                          tree_begin, tree_end);
-    } else if (x.type() == typeid(std::shared_ptr<data::ArrayAdapter>)) {
-      this->DispatchedInplacePredict<data::ArrayAdapter, kBlockOfRowsSize>(
-          x, p_m, model, missing, out_preds, tree_begin, tree_end);
-    } else if (x.type() == typeid(std::shared_ptr<data::CSRArrayAdapter>)) {
-      this->DispatchedInplacePredict<data::CSRArrayAdapter, 1>(x, p_m, model, missing, out_preds,
-                                                               tree_begin, tree_end);
-    } else if (x.type() == typeid(std::shared_ptr<data::ColumnarAdapter>)) {
-      this->DispatchedInplacePredict<data::ColumnarAdapter, kBlockOfRowsSize>(
-          x, p_m, model, missing, out_preds, tree_begin, tree_end);
-    } else {
-      return false;
-    }
-    return true;
+    bool any_missing = true;
+
+    auto const n_threads = this->ctx_->Threads();
+    // Always use block as we don't know the nnz.
+    ThreadTmp<BlockPolicy::kBlockOfRowsSize> feat_vecs{n_threads};
+    bst_idx_t n_groups = model.learner_model_param->OutputLength();
+    auto const h_model =
+        HostModel{DeviceOrd::CPU(), model, tree_begin, tree_end, &this->mu_, CopyViews{}};
+
+    auto kernel = [&](auto &&view) {
+      auto out_predt = linalg::MakeTensorView(ctx_, predictions, view.Size(), n_groups);
+      PredictBatchByBlockKernel<BlockPolicy::kBlockOfRowsSize>(view, h_model, &feat_vecs, n_threads,
+                                                               any_missing, out_predt);
+    };
+    auto dispatch = [&](auto x) {
+      using AdapterT = typename decltype(x)::element_type;
+      CheckProxyDMatrix(x, proxy, model.learner_model_param);
+      LaunchPredict(
+          this->ctx_, proxy, model,
+          [&](auto &&policy) {
+            if constexpr (std::is_same_v<AdapterT, data::ColumnarAdapter>) {
+              auto view =
+                  AdapterView{x.get(), missing, policy.MakeAccessor(ctx_, x->Cats(), model)};
+              kernel(view);
+            } else {
+              auto view = AdapterView{x.get(), missing, NoOpAccessor{}};
+              kernel(view);
+            }
+          },
+          [&](auto) {
+            if constexpr (std::is_same_v<AdapterT, data::ColumnarAdapter>) {
+              return !x->Cats().Empty();
+            } else {
+              return false;
+            }
+          });
+    };
+
+    bool type_error = false;
+    data::cpu_impl::DispatchAny<false>(proxy, dispatch, &type_error);
+    return !type_error;
   }
 
   void PredictLeaf(DMatrix *p_fmat, HostDeviceVector<float> *out_preds,
@@ -828,47 +1046,40 @@ class CPUPredictor : public Predictor {
     // number of valid trees
     ntree_limit = GetTreeLimit(model.trees, ntree_limit);
     const MetaInfo &info = p_fmat->Info();
-    std::vector<bst_float> &preds = out_preds->HostVector();
+    std::vector<float> &preds = out_preds->HostVector();
     preds.resize(info.num_row_ * ntree_limit);
 
     if (p_fmat->Info().IsColumnSplit()) {
-      CHECK(!model.learner_model_param->IsVectorLeaf())
-          << "Predict leaf with column split" << MTNotImplemented();
-
       ColumnSplitHelper helper(n_threads, model, 0, ntree_limit);
       helper.PredictLeaf(ctx_, p_fmat, &preds);
       return;
     }
 
-    std::vector<RegTree::FVec> feat_vecs;
-    const int num_feature = model.learner_model_param->num_feature;
-    InitThreadTemp(n_threads, &feat_vecs);
-    // start collecting the prediction
-    for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
-      // parallel over local batch
-      auto page = batch.GetView();
-      common::ParallelFor(page.Size(), n_threads, [&](auto i) {
-        const int tid = omp_get_thread_num();
-        auto ridx = static_cast<size_t>(batch.base_rowid + i);
-        RegTree::FVec &feats = feat_vecs[tid];
-        if (feats.Size() == 0) {
-          feats.Init(num_feature);
-        }
-        feats.Fill(page[i]);
-        for (bst_tree_t j = 0; j < ntree_limit; ++j) {
-          auto const &tree = *model.trees[j];
-          auto const &cats = tree.GetCategoriesMatrix();
-          bst_node_t nidx;
-          if (tree.IsMultiTarget()) {
-            nidx = multi::GetLeafIndex<true, true>(*tree.GetMultiTargetTree(), feats, cats);
-          } else {
-            nidx = scalar::GetLeafIndex<true, true>(tree, feats, cats);
+    auto n_features = model.learner_model_param->num_feature;
+    ThreadTmp<1> feat_vecs{n_threads};
+
+    auto const h_model =
+        HostModel{DeviceOrd::CPU(), model, 0, ntree_limit, &this->mu_, CopyViews{}};
+    LaunchPredict(this->ctx_, p_fmat, model, [&](auto &&policy) {
+      policy.ForEachBatch([&](auto &&batch) {
+        common::ParallelFor1d<1>(batch.Size(), n_threads, [&](auto &&block) {
+          auto ridx = static_cast<bst_idx_t>(batch.base_rowid + block.begin());
+          auto fvec_tloc = feat_vecs.ThreadBuffer(block.Size());
+          batch.FVecFill(block, n_features, fvec_tloc);
+
+          for (bst_tree_t j = 0; j < ntree_limit; ++j) {
+            bst_node_t nidx = std::visit(
+                [&](auto &&tree) {
+                  return GetLeafIndex<true, true>(tree, fvec_tloc.front(),
+                                                  tree.GetCategoriesMatrix(), RegTree::kRoot);
+                },
+                h_model.Trees()[j]);
+            preds[ridx * ntree_limit + j] = static_cast<float>(nidx);
           }
-          preds[ridx * ntree_limit + j] = static_cast<bst_float>(nidx);
-        }
-        feats.Drop();
+          batch.FVecDrop(fvec_tloc);
+        });
       });
-    }
+    });
   }
 
   void PredictContribution(DMatrix *p_fmat, HostDeviceVector<float> *out_contribs,
@@ -880,14 +1091,13 @@ class CPUPredictor : public Predictor {
     CHECK(!p_fmat->Info().IsColumnSplit())
         << "Predict contribution support for column-wise data split is not yet implemented.";
     auto const n_threads = this->ctx_->Threads();
-    std::vector<RegTree::FVec> feat_vecs;
-    InitThreadTemp(n_threads, &feat_vecs);
-    const MetaInfo& info = p_fmat->Info();
+    ThreadTmp<1> feat_vecs{n_threads};
+    const MetaInfo &info = p_fmat->Info();
     // number of valid trees
     ntree_limit = GetTreeLimit(model.trees, ntree_limit);
     size_t const ncolumns = model.learner_model_param->num_feature + 1;
     // allocate space for (number of features + bias) times the number of rows
-    std::vector<bst_float>& contribs = out_contribs->HostVector();
+    std::vector<bst_float> &contribs = out_contribs->HostVector();
     contribs.resize(info.num_row_ * ncolumns * model.learner_model_param->num_output_group);
     // make sure contributions is zeroed, we could be reusing a previously
     // allocated one
@@ -895,23 +1105,19 @@ class CPUPredictor : public Predictor {
     // initialize tree node mean values
     std::vector<std::vector<float>> mean_values(ntree_limit);
     common::ParallelFor(ntree_limit, n_threads, [&](bst_omp_uint i) {
-      FillNodeMeanValues(model.trees[i].get(), &(mean_values[i]));
+      FillNodeMeanValues(model.trees[i]->HostScView(), &(mean_values[i]));
     });
-    // start collecting the contributions
-    if (!p_fmat->PageExists<SparsePage>()) {
-      auto ft = p_fmat->Info().feature_types.ConstHostVector();
-      for (const auto &batch : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, {})) {
-        PredictContributionKernel(GHistIndexMatrixView{batch, ft}, info, model, tree_weights,
-                                  &mean_values, &feat_vecs, &contribs, ntree_limit, approximate,
+
+    auto const h_model =
+        HostModel{DeviceOrd::CPU(), model, 0, ntree_limit, &this->mu_, CopyViews{}};
+    LaunchPredict(this->ctx_, p_fmat, model, [&](auto &&policy) {
+      policy.ForEachBatch([&](auto &&batch) {
+        PredictContributionKernel(batch, info, h_model,
+                                  model.learner_model_param->BaseScore(DeviceOrd::CPU()),
+                                  tree_weights, &mean_values, &feat_vecs, &contribs, approximate,
                                   condition, condition_feature);
-      }
-    } else {
-      for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
-        PredictContributionKernel(
-            SparsePageView{&batch}, info, model, tree_weights, &mean_values, &feat_vecs,
-            &contribs, ntree_limit, approximate, condition, condition_feature);
-      }
-    }
+      });
+    });
   }
 
   void PredictInteractionContributions(DMatrix *p_fmat, HostDeviceVector<float> *out_contribs,
@@ -922,15 +1128,15 @@ class CPUPredictor : public Predictor {
         << "Predict interaction contribution" << MTNotImplemented();
     CHECK(!p_fmat->Info().IsColumnSplit()) << "Predict interaction contribution support for "
                                               "column-wise data split is not yet implemented.";
-    const MetaInfo& info = p_fmat->Info();
-    const int ngroup = model.learner_model_param->num_output_group;
-    size_t const ncolumns = model.learner_model_param->num_feature;
+    const MetaInfo &info = p_fmat->Info();
+    auto const ngroup = model.learner_model_param->num_output_group;
+    auto const ncolumns = model.learner_model_param->num_feature;
     const unsigned row_chunk = ngroup * (ncolumns + 1) * (ncolumns + 1);
     const unsigned mrow_chunk = (ncolumns + 1) * (ncolumns + 1);
     const unsigned crow_chunk = ngroup * (ncolumns + 1);
 
     // allocate space for (number of features^2) times the number of rows and tmp off/on contribs
-    std::vector<bst_float>& contribs = out_contribs->HostVector();
+    std::vector<bst_float> &contribs = out_contribs->HostVector();
     contribs.resize(info.num_row_ * ngroup * (ncolumns + 1) * (ncolumns + 1));
     HostDeviceVector<bst_float> contribs_off_hdv(info.num_row_ * ngroup * (ncolumns + 1));
     auto &contribs_off = contribs_off_hdv.HostVector();
@@ -942,16 +1148,16 @@ class CPUPredictor : public Predictor {
     // Compute the difference in effects when conditioning on each of the features on and off
     // see: Axiomatic characterizations of probabilistic and
     //      cardinal-probabilistic interaction indices
-    PredictContribution(p_fmat, &contribs_diag_hdv, model, ntree_limit,
-                        tree_weights, approximate, 0, 0);
+    PredictContribution(p_fmat, &contribs_diag_hdv, model, ntree_limit, tree_weights, approximate,
+                        0, 0);
     for (size_t i = 0; i < ncolumns + 1; ++i) {
-      PredictContribution(p_fmat, &contribs_off_hdv, model, ntree_limit,
-                          tree_weights, approximate, -1, i);
-      PredictContribution(p_fmat, &contribs_on_hdv, model, ntree_limit,
-                          tree_weights, approximate, 1, i);
+      PredictContribution(p_fmat, &contribs_off_hdv, model, ntree_limit, tree_weights, approximate,
+                          -1, i);
+      PredictContribution(p_fmat, &contribs_on_hdv, model, ntree_limit, tree_weights, approximate,
+                          1, i);
 
       for (size_t j = 0; j < info.num_row_; ++j) {
-        for (int l = 0; l < ngroup; ++l) {
+        for (std::remove_const_t<decltype(ngroup)> l = 0; l < ngroup; ++l) {
           const unsigned o_offset = j * row_chunk + l * mrow_chunk + i * (ncolumns + 1);
           const unsigned c_offset = j * crow_chunk + l * (ncolumns + 1);
           contribs[o_offset + i] = 0;
@@ -960,7 +1166,8 @@ class CPUPredictor : public Predictor {
             if (k == i) {
               contribs[o_offset + i] += contribs_diag[c_offset + k];
             } else {
-              contribs[o_offset + k] = (contribs_on[c_offset + k] - contribs_off[c_offset + k])/2.0;
+              contribs[o_offset + k] =
+                  (contribs_on[c_offset + k] - contribs_off[c_offset + k]) / 2.0;
               contribs[o_offset + i] -= contribs[o_offset + k];
             }
           }
@@ -970,7 +1177,7 @@ class CPUPredictor : public Predictor {
   }
 
  private:
-  static size_t constexpr kBlockOfRowsSize = 64;
+  mutable std::mutex mu_;
 };
 
 XGBOOST_REGISTER_PREDICTOR(CPUPredictor, "cpu_predictor")
diff --git a/src/predictor/cpu_treeshap.h b/src/predictor/cpu_treeshap.h
deleted file mode 100644
index 3cdbcc4a998e..000000000000
--- a/src/predictor/cpu_treeshap.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef XGBOOST_PREDICTOR_CPU_TREESHAP_H_
-#define XGBOOST_PREDICTOR_CPU_TREESHAP_H_
-/**
- * Copyright by XGBoost Contributors 2017-2022
- */
-#include <vector>                // vector
-
-#include "xgboost/tree_model.h"  // RegTree
-
-namespace xgboost {
-/**
- * \brief calculate the feature contributions (https://arxiv.org/abs/1706.06060) for the tree
- * \param feat dense feature vector, if the feature is missing the field is set to NaN
- * \param out_contribs output vector to hold the contributions
- * \param condition fix one feature to either off (-1) on (1) or not fixed (0 default)
- * \param condition_feature the index of the feature to fix
- */
-void CalculateContributions(RegTree const &tree, const RegTree::FVec &feat,
-                            std::vector<float> *mean_values, bst_float *out_contribs, int condition,
-                            unsigned condition_feature);
-}  // namespace xgboost
-#endif  // XGBOOST_PREDICTOR_CPU_TREESHAP_H_
diff --git a/src/predictor/gbtree_view.h b/src/predictor/gbtree_view.h
new file mode 100644
index 000000000000..c7acbbad5ba7
--- /dev/null
+++ b/src/predictor/gbtree_view.h
@@ -0,0 +1,83 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#pragma once
+
+#include <mutex>    // for mutex, lock_guard
+#include <utility>  // for move
+#include <vector>   // for vector
+
+#include "../gbm/gbtree_model.h"  // for GBTreeModel
+#include "../tree/tree_view.h"    // for MultiTargetTreeView, ScalarTreeView
+#include "xgboost/base.h"         // for bst_tree_t, bst_target_t
+#include "xgboost/context.h"      // for DeviceOrd
+#include "xgboost/logging.h"      // for CHECK_GT
+#include "xgboost/span.h"         // for Span
+
+namespace xgboost::predictor {
+/**
+ * @brief A view for the boosted trees to ensure thread safety.
+ *
+ *   This class contains a subset of trees based on the input tree range.
+ *
+ * @tparam Container   The container for storing the tree view variants.
+ * @tparam TreeViewVar A std::variant for different view types.
+ * @tparam CopyViews   A policy for how to copy the tree views into the container.
+ */
+template <template <typename> typename Container, typename TreeViewVar, typename CopyViews>
+class GBTreeModelView {
+ private:
+  Container<TreeViewVar> trees_;
+
+ public:
+  bst_tree_t const tree_begin;
+  bst_tree_t const tree_end;
+  common::Span<bst_target_t const> tree_groups;
+  bst_target_t const n_groups;
+  bst_feature_t const n_features;
+  bst_node_t n_nodes{0};
+
+ public:
+  explicit GBTreeModelView(DeviceOrd device, gbm::GBTreeModel const& model, bst_tree_t tree_begin,
+                           bst_tree_t tree_end, std::mutex* p_mu, CopyViews&& copy)
+      : tree_begin{tree_begin},
+        tree_end{tree_end},
+        n_groups{model.learner_model_param->OutputLength()},
+        n_features{model.learner_model_param->num_feature} {
+    // Make sure the trees are pulled to target device without race.
+    std::lock_guard guard{*p_mu};
+    // Create tree views.
+    std::vector<TreeViewVar> trees;
+    for (bst_tree_t tree_idx = this->tree_begin; tree_idx < this->tree_end; ++tree_idx) {
+      auto const& p_tree = model.trees[tree_idx];
+      if (p_tree->IsMultiTarget()) {
+        auto tree = tree::MultiTargetTreeView{device, p_tree.get()};
+        this->n_nodes += tree.Size();
+        trees.emplace_back(tree);
+      } else {
+        auto tree = tree::ScalarTreeView{device, p_tree.get()};
+        this->n_nodes += tree.Size();
+        trees.emplace_back(tree);
+      }
+    }
+
+    copy(&this->trees_, std::move(trees));  // NOLINT[build/include_what_you_use]
+
+    CHECK_GE(this->tree_end, this->tree_begin);
+    auto n_trees = this->tree_end - this->tree_begin;
+    model.tree_info.SetDevice(device);
+    this->tree_groups = model.TreeGroups(device).subspan(this->tree_begin, n_trees);
+    CHECK_EQ(n_trees, this->trees_.size());
+  }
+
+  [[nodiscard]] common::Span<TreeViewVar const> Trees() const {
+    return {trees_.data(), trees_.size()};
+  }
+
+  GBTreeModelView() = delete;
+  GBTreeModelView(GBTreeModelView const&) = delete;
+  GBTreeModelView& operator=(GBTreeModelView const&) = delete;
+  GBTreeModelView(GBTreeModelView&&) = default;
+  GBTreeModelView& operator=(GBTreeModelView&&) = delete;
+};
+}  // namespace xgboost::predictor
diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu
index d99f00cd35a4..368c8b48f5cc 100644
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -6,7 +6,8 @@
 #include <thrust/device_vector.h>
 #include <thrust/fill.h>
 
-#include <any>  // for any, any_cast
+#include <cuda/functional>   // for proclaim_return_type
+#include <cuda/std/utility>  // for swap
 #include <memory>
 
 #include "../collective/allreduce.h"
@@ -16,15 +17,22 @@
 #include "../common/cuda_context.cuh"  // for CUDAContext
 #include "../common/cuda_rt_utils.h"   // for AllVisibleGPUs, SetDevice
 #include "../common/device_helpers.cuh"
-#include "../common/error_msg.h"  // for InplacePredictProxy
-#include "../data/batch_utils.h"  // for StaticBatch
+#include "../common/error_msg.h"      // for InplacePredictProxy
+#include "../common/nvtx_utils.h"     // for xgboost_NVTX_FN_RANGE
+#include "../data/batch_utils.h"      // for StaticBatch
+#include "../data/cat_container.cuh"  // for EncPolicy
 #include "../data/device_adapter.cuh"
 #include "../data/ellpack_page.cuh"
+#include "../data/proxy_dmatrix.cuh"  // for DispatchAny
 #include "../data/proxy_dmatrix.h"
 #include "../gbm/gbtree_model.h"
+#include "../tree/tree_view.h"
+#include "gbtree_view.h"  // for GBTreeModelView
 #include "predict_fn.h"
+#include "utils.h"  // for CheckProxyDMatrix
 #include "xgboost/data.h"
 #include "xgboost/host_device_vector.h"
+#include "xgboost/multi_target_tree_model.h"  // for MultiTargetTree, MultiTargetTreeView
 #include "xgboost/predictor.h"
 #include "xgboost/tree_model.h"
 #include "xgboost/tree_updater.h"
@@ -34,49 +42,23 @@ DMLC_REGISTRY_FILE_TAG(gpu_predictor);
 
 using cuda_impl::StaticBatch;
 
-struct TreeView {
-  RegTree::CategoricalSplitMatrix cats;
-  common::Span<RegTree::Node const> d_tree;
-
-  XGBOOST_DEVICE
-  TreeView(bst_tree_t tree_begin, bst_tree_t tree_idx, common::Span<const RegTree::Node> d_nodes,
-           common::Span<size_t const> d_tree_segments,
-           common::Span<FeatureType const> d_tree_split_types,
-           common::Span<uint32_t const> d_cat_tree_segments,
-           common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
-           common::Span<uint32_t const> d_categories) {
-    auto begin = d_tree_segments[tree_idx - tree_begin];
-    auto n_nodes = d_tree_segments[tree_idx - tree_begin + 1] -
-                   d_tree_segments[tree_idx - tree_begin];
-
-    d_tree = d_nodes.subspan(begin, n_nodes);
-
-    auto tree_cat_ptrs = d_cat_node_segments.subspan(begin, n_nodes);
-    auto tree_split_types = d_tree_split_types.subspan(begin, n_nodes);
-
-    auto tree_categories =
-        d_categories.subspan(d_cat_tree_segments[tree_idx - tree_begin],
-                             d_cat_tree_segments[tree_idx - tree_begin + 1] -
-                                 d_cat_tree_segments[tree_idx - tree_begin]);
-
-    cats.split_type = tree_split_types;
-    cats.categories = tree_categories;
-    cats.node_ptr = tree_cat_ptrs;
-  }
-
-  [[nodiscard]] __device__ bool HasCategoricalSplit() const { return !cats.categories.empty(); }
-};
-
 struct SparsePageView {
   common::Span<const Entry> d_data;
   common::Span<const bst_idx_t> d_row_ptr;
   bst_feature_t num_features;
 
   SparsePageView() = default;
-  XGBOOST_DEVICE SparsePageView(common::Span<const Entry> data,
-                                common::Span<const bst_idx_t> row_ptr,
-                                bst_feature_t num_features)
-      : d_data{data}, d_row_ptr{row_ptr}, num_features(num_features) {}
+  explicit SparsePageView(Context const* ctx, SparsePage const& page, bst_feature_t n_features)
+      : d_data{[&] {
+          page.data.SetDevice(ctx->Device());
+          return page.data.ConstDeviceSpan();
+        }()},
+        d_row_ptr{[&] {
+          page.offset.SetDevice(ctx->Device());
+          return page.offset.ConstDeviceSpan();
+        }()},
+        num_features{n_features} {}
+
   [[nodiscard]] __device__ float GetElement(size_t ridx, size_t fidx) const {
     // Binary search
     auto begin_ptr = d_data.begin() + d_row_ptr[ridx];
@@ -109,14 +91,22 @@ struct SparsePageView {
   [[nodiscard]] XGBOOST_DEVICE size_t NumCols() const { return num_features; }
 };
 
+template <typename EncAccessor>
 struct SparsePageLoader {
+ public:
+  using SupportShmemLoad = std::true_type;
+
+ private:
+  EncAccessor acc_;
+
+ public:
   bool use_shared;
   SparsePageView data;
   float* smem;
 
   __device__ SparsePageLoader(SparsePageView data, bool use_shared, bst_feature_t num_features,
-                              bst_idx_t num_rows, float)
-      : use_shared(use_shared), data(data) {
+                              bst_idx_t num_rows, float, EncAccessor&& acc)
+      : use_shared(use_shared), data(data), acc_{std::forward<EncAccessor>(acc)} {
     extern __shared__ float _smem[];
     smem = _smem;
     // Copy instances
@@ -130,7 +120,7 @@ struct SparsePageLoader {
         bst_uint elem_end = data.d_row_ptr[global_idx + 1];
         for (bst_uint elem_idx = elem_begin; elem_idx < elem_end; elem_idx++) {
           Entry elem = data.d_data[elem_idx];
-          smem[threadIdx.x * data.num_features + elem.index] = elem.fvalue;
+          smem[threadIdx.x * data.num_features + elem.index] = this->acc_(elem);
         }
       }
       __syncthreads();
@@ -140,22 +130,29 @@ struct SparsePageLoader {
     if (use_shared) {
       return smem[threadIdx.x * data.num_features + fidx];
     } else {
-      return data.GetElement(ridx, fidx);
+      return this->acc_(data.GetElement(ridx, fidx), fidx);
     }
   }
 };
 
+template <typename Accessor, typename EncAccessor>
 struct EllpackLoader {
-  EllpackDeviceAccessor matrix;
-  XGBOOST_DEVICE EllpackLoader(EllpackDeviceAccessor m, bool, bst_feature_t, bst_idx_t, float)
-      : matrix{std::move(m)} {}
+ public:
+  using SupportShmemLoad = std::false_type;
+
+  Accessor matrix;
+  EncAccessor acc;
+
+  XGBOOST_DEVICE EllpackLoader(Accessor m, bool /*use_shared*/, bst_feature_t /*n_features*/,
+                               bst_idx_t /*n_samples*/, float /*missing*/, EncAccessor&& acc)
+      : matrix{std::move(m)}, acc{std::forward<EncAccessor>(acc)} {}
   [[nodiscard]] XGBOOST_DEV_INLINE float GetElement(size_t ridx, size_t fidx) const {
-    auto gidx = matrix.GetBinIndex<false>(ridx, fidx);
+    auto gidx = matrix.template GetBinIndex<false>(ridx, fidx);
     if (gidx == -1) {
       return std::numeric_limits<float>::quiet_NaN();
     }
     if (common::IsCat(matrix.feature_types, fidx)) {
-      return matrix.gidx_fvalue_map[gidx];
+      return this->acc(matrix.gidx_fvalue_map[gidx], fidx);
     }
     // The gradient index needs to be shifted by one as min values are not included in the
     // cuts.
@@ -168,34 +165,45 @@ struct EllpackLoader {
   [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return this->matrix.n_rows; }
 };
 
-template <typename Batch>
+/**
+ * @brief Use for in-place predict.
+ */
+template <typename Batch, typename EncAccessor>
 struct DeviceAdapterLoader {
-  Batch batch;
-  bst_feature_t columns;
+ public:
+  using SupportShmemLoad = std::true_type;
+
+ private:
+  Batch batch_;
+  EncAccessor acc_;
+
+ public:
+  bst_feature_t n_features;
   float* smem;
   bool use_shared;
   data::IsValidFunctor is_valid;
 
-  using BatchT = Batch;
-
-  XGBOOST_DEV_INLINE DeviceAdapterLoader(Batch const batch, bool use_shared,
-                                         bst_feature_t num_features, bst_idx_t num_rows,
-                                         float missing)
-      : batch{batch}, columns{num_features}, use_shared{use_shared}, is_valid{missing} {
+  XGBOOST_DEV_INLINE DeviceAdapterLoader(Batch&& batch, bool use_shared, bst_feature_t n_features,
+                                         bst_idx_t n_samples, float missing, EncAccessor&& acc)
+      : batch_{std::move(batch)},
+        acc_{std::forward<EncAccessor>(acc)},
+        n_features{n_features},
+        use_shared{use_shared},
+        is_valid{missing} {
     extern __shared__ float _smem[];
-    smem = _smem;
-    if (use_shared) {
-      uint32_t global_idx = blockDim.x * blockIdx.x + threadIdx.x;
-      size_t shared_elements = blockDim.x * num_features;
+    this->smem = _smem;
+    if (this->use_shared) {
+      auto global_idx = blockDim.x * blockIdx.x + threadIdx.x;
+      size_t shared_elements = blockDim.x * n_features;
       dh::BlockFill(smem, shared_elements, std::numeric_limits<float>::quiet_NaN());
       __syncthreads();
-      if (global_idx < num_rows) {
-        auto beg = global_idx * columns;
-        auto end = (global_idx + 1) * columns;
+      if (global_idx < n_samples) {
+        auto beg = global_idx * n_features;
+        auto end = (global_idx + 1) * n_features;
         for (size_t i = beg; i < end; ++i) {
-          auto value = batch.GetElement(i).value;
-          if (is_valid(value)) {
-            smem[threadIdx.x * num_features + (i - beg)] = value;
+          data::COOTuple const& e = this->batch_.GetElement(i);
+          if (is_valid(e)) {
+            smem[threadIdx.x * n_features + (i - beg)] = this->acc_(e);
           }
         }
       }
@@ -205,218 +213,142 @@ struct DeviceAdapterLoader {
 
   [[nodiscard]] XGBOOST_DEV_INLINE float GetElement(size_t ridx, size_t fidx) const {
     if (use_shared) {
-      return smem[threadIdx.x * columns + fidx];
+      return smem[threadIdx.x * n_features + fidx];
     }
-    auto value = batch.GetElement(ridx * columns + fidx).value;
+    auto value = this->batch_.GetElement(ridx * n_features + fidx).value;
     if (is_valid(value)) {
-      return value;
+      return this->acc_(value, fidx);
     } else {
       return std::numeric_limits<float>::quiet_NaN();
     }
   }
 };
 
-template <bool has_missing, bool has_categorical, typename Loader>
+namespace {
+template <bool has_missing, bool has_categorical, typename TreeView, typename Loader>
 __device__ bst_node_t GetLeafIndex(bst_idx_t ridx, TreeView const& tree, Loader* loader) {
   bst_node_t nidx = 0;
-  RegTree::Node n = tree.d_tree[nidx];
-  while (!n.IsLeaf()) {
-    float fvalue = loader->GetElement(ridx, n.SplitIndex());
-    bool is_missing = common::CheckNAN(fvalue);
-    nidx = GetNextNode<has_missing, has_categorical>(n, nidx, fvalue, is_missing, tree.cats);
-    n = tree.d_tree[nidx];
+  while (!tree.IsLeaf(nidx)) {
+    float fvalue = loader->GetElement(ridx, tree.SplitIndex(nidx));
+    bool is_missing = has_missing && common::CheckNAN(fvalue);
+    auto next = GetNextNode<has_missing, has_categorical>(tree, nidx, fvalue, is_missing,
+                                                          tree.GetCategoriesMatrix());
+    assert(nidx < next);
+    nidx = next;
   }
   return nidx;
 }
 
-template <bool has_missing, typename Loader>
-__device__ float GetLeafWeight(bst_idx_t ridx, TreeView const &tree,
-                               Loader *loader) {
+template <bool has_missing, typename TreeView, typename Loader>
+__device__ auto GetLeafWeight(bst_idx_t ridx, TreeView const& tree, Loader* loader) {
   bst_node_t nidx = -1;
   if (tree.HasCategoricalSplit()) {
     nidx = GetLeafIndex<has_missing, true>(ridx, tree, loader);
   } else {
     nidx = GetLeafIndex<has_missing, false>(ridx, tree, loader);
   }
-  return tree.d_tree[nidx].LeafValue();
+  return tree.LeafValue(nidx);
 }
+}  // namespace
 
-template <typename Loader, typename Data>
-__global__ void
-PredictLeafKernel(Data data, common::Span<const RegTree::Node> d_nodes,
-                  common::Span<float> d_out_predictions,
-                  common::Span<size_t const> d_tree_segments,
-
-                  common::Span<FeatureType const> d_tree_split_types,
-                  common::Span<uint32_t const> d_cat_tree_segments,
-                  common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
-                  common::Span<uint32_t const> d_categories,
+using TreeViewVar = cuda::std::variant<tree::ScalarTreeView, tree::MultiTargetTreeView>;
 
-                  bst_tree_t tree_begin, bst_tree_t tree_end, bst_feature_t num_features,
-                  size_t num_rows, bool use_shared,
-                  float missing) {
+template <typename Loader, typename Data, bool has_missing, typename EncAccessor>
+__global__ void PredictLeafKernel(Data data, common::Span<TreeViewVar const> d_trees,
+                                  common::Span<float> d_out_predictions, bst_tree_t tree_begin,
+                                  bst_tree_t tree_end, bst_feature_t num_features, bool use_shared,
+                                  float missing, EncAccessor acc) {
   bst_idx_t ridx = blockDim.x * blockIdx.x + threadIdx.x;
-  if (ridx >= num_rows) {
+  if (ridx >= data.NumRows()) {
     return;
   }
-  Loader loader{data, use_shared, num_features, num_rows, missing};
+  Loader loader{std::move(data), use_shared, num_features, data.NumRows(), missing, std::move(acc)};
   for (bst_tree_t tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
-    TreeView d_tree{
-        tree_begin,          tree_idx,           d_nodes,
-        d_tree_segments,     d_tree_split_types, d_cat_tree_segments,
-        d_cat_node_segments, d_categories};
-
-    bst_node_t leaf = -1;
-    if (d_tree.HasCategoricalSplit()) {
-      leaf = GetLeafIndex<true, true>(ridx, d_tree, &loader);
-    } else {
-      leaf = GetLeafIndex<true, false>(ridx, d_tree, &loader);
-    }
-    d_out_predictions[ridx * (tree_end - tree_begin) + tree_idx] = leaf;
+    auto const& d_tree = d_trees[tree_idx - tree_begin];
+    cuda::std::visit(
+        [&](auto&& tree) {
+          bst_node_t leaf = -1;
+          if (tree.HasCategoricalSplit()) {
+            leaf = GetLeafIndex<has_missing, true>(ridx, tree, &loader);
+          } else {
+            leaf = GetLeafIndex<has_missing, false>(ridx, tree, &loader);
+          }
+          d_out_predictions[ridx * (tree_end - tree_begin) + tree_idx] = leaf;
+        },
+        d_tree);
   }
 }
 
-template <typename Loader, typename Data, bool has_missing = true>
-__global__ void
-PredictKernel(Data data, common::Span<const RegTree::Node> d_nodes,
-              common::Span<float> d_out_predictions,
-              common::Span<size_t const> d_tree_segments,
-              common::Span<int const> d_tree_group,
-              common::Span<FeatureType const> d_tree_split_types,
-              common::Span<uint32_t const> d_cat_tree_segments,
-              common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
-              common::Span<uint32_t const> d_categories, bst_tree_t tree_begin,
-              bst_tree_t tree_end, size_t num_features, size_t num_rows,
-              bool use_shared, int num_group, float missing) {
-  bst_uint global_idx = blockDim.x * blockIdx.x + threadIdx.x;
-  Loader loader(data, use_shared, num_features, num_rows, missing);
-  if (global_idx >= num_rows) return;
-
-  if (num_group == 1) {
+template <typename Loader, typename Data, bool has_missing, typename EncAccessor>
+__global__ void PredictKernel(Data data, common::Span<TreeViewVar const> d_trees,
+                              common::Span<float> d_out_predictions,
+                              common::Span<bst_target_t const> d_tree_groups,
+                              bst_feature_t num_features, bool use_shared, bst_target_t n_groups,
+                              float missing, EncAccessor acc) {
+  bst_idx_t global_idx = blockDim.x * blockIdx.x + threadIdx.x;
+  Loader loader{std::move(data), use_shared, num_features, data.NumRows(), missing, std::move(acc)};
+  if (global_idx >= data.NumRows()) {
+    return;
+  }
+
+  if (n_groups == 1u) {
     float sum = 0;
-    for (bst_tree_t tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-      TreeView d_tree{
-          tree_begin,          tree_idx,           d_nodes,
-          d_tree_segments,     d_tree_split_types, d_cat_tree_segments,
-          d_cat_node_segments, d_categories};
-      float leaf = GetLeafWeight<has_missing>(global_idx, d_tree, &loader);
+    for (auto const& d_tree : d_trees) {
+      auto const& sc_tree = cuda::std::get<tree::ScalarTreeView>(d_tree);
+      float leaf = GetLeafWeight<has_missing>(global_idx, sc_tree, &loader);
       sum += leaf;
     }
     d_out_predictions[global_idx] += sum;
   } else {
-    for (bst_tree_t tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-      int tree_group = d_tree_group[tree_idx];
-      TreeView d_tree{
-          tree_begin,          tree_idx,           d_nodes,
-          d_tree_segments,     d_tree_split_types, d_cat_tree_segments,
-          d_cat_node_segments, d_categories};
-      bst_uint out_prediction_idx = global_idx * num_group + tree_group;
-      d_out_predictions[out_prediction_idx] +=
-          GetLeafWeight<has_missing>(global_idx, d_tree, &loader);
+    for (bst_tree_t tree_idx = 0, k = d_trees.size(); tree_idx < k; tree_idx++) {
+      // Both d_tree_group and d_tress are subset of trees.
+      auto tree_group = d_tree_groups[tree_idx];
+      auto const& d_tree = d_trees[tree_idx];
+      cuda::std::visit(
+          enc::Overloaded{[&](tree::ScalarTreeView const& tree) {
+                            auto leaf = GetLeafWeight<has_missing>(global_idx, tree, &loader);
+                            bst_idx_t out_prediction_idx = global_idx * n_groups + tree_group;
+                            d_out_predictions[out_prediction_idx] += leaf;
+                          },
+                          [&](tree::MultiTargetTreeView const& tree) {
+                            // Tree group is 0.
+                            auto leaf = GetLeafWeight<has_missing>(global_idx, tree, &loader);
+                            for (std::size_t i = 0, n = leaf.Shape(0); i < n; ++i) {
+                              bst_idx_t out_prediction_idx = global_idx * n_groups + i;
+                              d_out_predictions[out_prediction_idx] += leaf(i);
+                            }
+                          }},
+          d_tree);
     }
   }
 }
 
-class DeviceModel {
- public:
-  // Need to lazily construct the vectors because GPU id is only known at runtime
-  HostDeviceVector<RTreeNodeStat> stats;
-  HostDeviceVector<size_t> tree_segments;
-  HostDeviceVector<RegTree::Node> nodes;
-  HostDeviceVector<int> tree_group;
-  HostDeviceVector<FeatureType> split_types;
-
-  // Pointer to each tree, segmenting the node array.
-  HostDeviceVector<uint32_t> categories_tree_segments;
-  // Pointer to each node, segmenting categories array.
-  HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment> categories_node_segments;
-  HostDeviceVector<uint32_t> categories;
-
-  size_t tree_beg_;  // NOLINT
-  size_t tree_end_;  // NOLINT
-  int num_group;
-
-  void Init(const gbm::GBTreeModel& model, size_t tree_begin, size_t tree_end, DeviceOrd device) {
-    dh::safe_cuda(cudaSetDevice(device.ordinal));
-
-    // Copy decision trees to device
-    tree_segments = HostDeviceVector<size_t>({}, device);
-    auto& h_tree_segments = tree_segments.HostVector();
-    h_tree_segments.reserve((tree_end - tree_begin) + 1);
-    size_t sum = 0;
-    h_tree_segments.push_back(sum);
-    for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-      sum += model.trees.at(tree_idx)->GetNodes().size();
-      h_tree_segments.push_back(sum);
-    }
-
-    nodes = HostDeviceVector<RegTree::Node>(h_tree_segments.back(), RegTree::Node(), device);
-    stats = HostDeviceVector<RTreeNodeStat>(h_tree_segments.back(), RTreeNodeStat(), device);
-    auto d_nodes = nodes.DevicePointer();
-    auto d_stats = stats.DevicePointer();
-    for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-      auto& src_nodes = model.trees.at(tree_idx)->GetNodes();
-      auto& src_stats = model.trees.at(tree_idx)->GetStats();
-      dh::safe_cuda(cudaMemcpyAsync(
-          d_nodes + h_tree_segments[tree_idx - tree_begin], src_nodes.data(),
-          sizeof(RegTree::Node) * src_nodes.size(), cudaMemcpyDefault));
-      dh::safe_cuda(cudaMemcpyAsync(
-          d_stats + h_tree_segments[tree_idx - tree_begin], src_stats.data(),
-          sizeof(RTreeNodeStat) * src_stats.size(), cudaMemcpyDefault));
-    }
-
-    tree_group = HostDeviceVector<int>(model.tree_info.size(), 0, device);
-    auto& h_tree_group = tree_group.HostVector();
-    std::memcpy(h_tree_group.data(), model.tree_info.data(), sizeof(int) * model.tree_info.size());
-
-    // Initialize categorical splits.
-    split_types.SetDevice(device);
-    std::vector<FeatureType>& h_split_types = split_types.HostVector();
-    h_split_types.resize(h_tree_segments.back());
-    for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
-      auto const& src_st = model.trees.at(tree_idx)->GetSplitTypes();
-      std::copy(src_st.cbegin(), src_st.cend(),
-                h_split_types.begin() + h_tree_segments[tree_idx - tree_begin]);
-    }
-
-    categories = HostDeviceVector<uint32_t>({}, device);
-    categories_tree_segments = HostDeviceVector<uint32_t>(1, 0, device);
-    std::vector<uint32_t> &h_categories = categories.HostVector();
-    std::vector<uint32_t> &h_split_cat_segments = categories_tree_segments.HostVector();
-    for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
-      auto const& src_cats = model.trees.at(tree_idx)->GetSplitCategories();
-      size_t orig_size = h_categories.size();
-      h_categories.resize(orig_size + src_cats.size());
-      std::copy(src_cats.cbegin(), src_cats.cend(),
-                h_categories.begin() + orig_size);
-      h_split_cat_segments.push_back(h_categories.size());
-    }
-
-    categories_node_segments = HostDeviceVector<RegTree::CategoricalSplitMatrix::Segment>(
-        h_tree_segments.back(), {}, device);
-    std::vector<RegTree::CategoricalSplitMatrix::Segment>& h_categories_node_segments =
-        categories_node_segments.HostVector();
-    for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
-      auto const &src_cats_ptr = model.trees.at(tree_idx)->GetSplitCategoriesPtr();
-      std::copy(src_cats_ptr.cbegin(), src_cats_ptr.cend(),
-                h_categories_node_segments.begin() +
-                    h_tree_segments[tree_idx - tree_begin]);
-    }
-
-    this->tree_beg_ = tree_begin;
-    this->tree_end_ = tree_end;
-    this->num_group = model.learner_model_param->OutputLength();
+namespace {
+struct CopyViews {
+  Context const* ctx;
+  explicit CopyViews(Context const* ctx) : ctx{ctx} {}
+
+  void operator()(dh::DeviceUVector<TreeViewVar>* p_dst, std::vector<TreeViewVar>&& src) {
+    xgboost_NVTX_FN_RANGE();
+    p_dst->resize(src.size());
+    auto d_dst = dh::ToSpan(*p_dst);
+    dh::safe_cuda(cudaMemcpyAsync(d_dst.data(), src.data(), d_dst.size_bytes(), cudaMemcpyDefault,
+                                  ctx->CUDACtx()->Stream()));
   }
 };
 
+using DeviceModel = GBTreeModelView<dh::DeviceUVector, TreeViewVar, CopyViews>;
+}  // namespace
+
 struct ShapSplitCondition {
   ShapSplitCondition() = default;
   XGBOOST_DEVICE
-  ShapSplitCondition(float feature_lower_bound, float feature_upper_bound,
-                     bool is_missing_branch, common::CatBitField cats)
+  ShapSplitCondition(float feature_lower_bound, float feature_upper_bound, bool is_missing_branch,
+                     common::CatBitField cats)
       : feature_lower_bound(feature_lower_bound),
         feature_upper_bound(feature_upper_bound),
-        is_missing_branch(is_missing_branch), categories{std::move(cats)} {
+        is_missing_branch(is_missing_branch),
+        categories{std::move(cats)} {
     assert(feature_lower_bound <= feature_upper_bound);
   }
 
@@ -450,7 +382,7 @@ struct ShapSplitCondition {
       return l;
     }
     if (l.Capacity() > r.Capacity()) {
-      thrust::swap(l, r);
+      cuda::std::swap(l, r);
     }
     for (size_t i = 0; i < r.Bits().size(); ++i) {
       l.Bits()[i] &= r.Bits()[i];
@@ -472,44 +404,74 @@ struct ShapSplitCondition {
 };
 
 struct PathInfo {
-  int64_t leaf_position;  // -1 not a leaf
-  size_t length;
+  std::size_t length;
+  // Node index in tree.
+  // -1 if not a leaf (internal split node)
+  bst_node_t nidx;
   bst_tree_t tree_idx;
+
+  [[nodiscard]] XGBOOST_DEVICE bool IsLeaf() const { return nidx != -1; }
 };
+static_assert(sizeof(PathInfo) == 16);
+
+auto MakeTreeSegments(Context const* ctx, bst_tree_t tree_begin, bst_tree_t tree_end,
+                      gbm::GBTreeModel const& model) {
+  // Copy decision trees to device
+  auto tree_segments = HostDeviceVector<size_t>({}, ctx->Device());
+  auto& h_tree_segments = tree_segments.HostVector();
+  h_tree_segments.reserve((tree_end - tree_begin) + 1);
+  std::size_t sum = 0;
+  h_tree_segments.push_back(sum);
+  for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
+    auto const& p_tree = model.trees.at(tree_idx);
+    CHECK(!p_tree->IsMultiTarget()) << " SHAP " << MTNotImplemented();
+    sum += p_tree->Size();
+    h_tree_segments.push_back(sum);
+  }
+  return tree_segments;
+}
 
 // Transform model into path element form for GPUTreeShap
 void ExtractPaths(Context const* ctx,
                   dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>* paths,
-                  DeviceModel* model, dh::device_vector<uint32_t>* path_categories,
-                  DeviceOrd device) {
-  curt::SetDevice(device.ordinal);
-  auto& device_model = *model;
-
-  dh::caching_device_vector<PathInfo> info(device_model.nodes.Size());
-  auto d_nodes = device_model.nodes.ConstDeviceSpan();
-  auto d_tree_segments = device_model.tree_segments.ConstDeviceSpan();
-  auto nodes_transform = dh::MakeTransformIterator<PathInfo>(
-      thrust::make_counting_iterator(0ull), [=] __device__(size_t idx) {
-        auto n = d_nodes[idx];
-        if (!n.IsLeaf() || n.IsDeleted()) {
-          return PathInfo{-1, 0, 0};
+                  gbm::GBTreeModel const& h_model, DeviceModel const& d_model,
+                  dh::device_vector<uint32_t>* path_categories) {
+  curt::SetDevice(ctx->Ordinal());
+
+  // Path length and tree index for all leaf nodes
+  dh::caching_device_vector<PathInfo> info(d_model.n_nodes);
+  auto d_trees = d_model.Trees();  // subset of trees
+  auto tree_segments = MakeTreeSegments(ctx, d_model.tree_begin, d_model.tree_end, h_model);
+  CHECK_EQ(tree_segments.ConstHostVector().back(), d_model.n_nodes);
+  auto d_tree_segments = tree_segments.ConstDeviceSpan();
+
+  auto path_it = dh::MakeIndexTransformIter(
+      cuda::proclaim_return_type<PathInfo>([=] __device__(size_t idx) -> PathInfo {
+        bst_tree_t const tree_idx = dh::SegmentId(d_tree_segments, idx);
+        bst_node_t const nidx = idx - d_tree_segments[tree_idx];
+        auto const& tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx]);
+        if (!tree.IsLeaf(nidx) || tree.IsDeleted(nidx)) {
+          // -1 if it's an internal split node
+          return PathInfo{0, -1, 0};
         }
-        bst_tree_t tree_idx = dh::SegmentId(d_tree_segments.begin(), d_tree_segments.end(), idx);
-        size_t tree_offset = d_tree_segments[tree_idx];
-        size_t path_length = 1;
-        while (!n.IsRoot()) {
-          n = d_nodes[n.Parent() + tree_offset];
+        // Get the path length for leaf
+        std::size_t path_length = 1;
+        auto iter_nidx = nidx;
+        while (!tree.IsRoot(iter_nidx)) {
+          iter_nidx = tree.Parent(iter_nidx);
           path_length++;
         }
-        return PathInfo{static_cast<int64_t>(idx), path_length, tree_idx};
-      });
-  auto end = thrust::copy_if(ctx->CUDACtx()->CTP(), nodes_transform,
-                             nodes_transform + d_nodes.size(), info.begin(),
-                             [=] __device__(const PathInfo& e) { return e.leaf_position != -1; });
+        return PathInfo{path_length, nidx, tree_idx};
+      }));
+  auto end = thrust::copy_if(
+      ctx->CUDACtx()->CTP(), path_it, path_it + d_model.n_nodes, info.begin(),
+      cuda::proclaim_return_type<bool>([=] __device__(PathInfo const& e) { return e.IsLeaf(); }));
+
   info.resize(end - info.begin());
-  auto length_iterator = dh::MakeTransformIterator<size_t>(
-      info.begin(),
-      [=] __device__(const PathInfo& info) { return info.length; });
+  using LenT = decltype(std::declval<PathInfo>().length);
+  auto length_iterator = dh::MakeTransformIterator<LenT>(
+      info.begin(), cuda::proclaim_return_type<LenT>(
+                        [=] __device__(PathInfo const& info) { return info.length; }));
   dh::caching_device_vector<size_t> path_segments(info.size() + 1);
   thrust::exclusive_scan(ctx->CUDACtx()->CTP(), length_iterator, length_iterator + info.size() + 1,
                          path_segments.begin());
@@ -518,81 +480,75 @@ void ExtractPaths(Context const* ctx,
 
   auto d_paths = dh::ToSpan(*paths);
   auto d_info = info.data().get();
-  auto d_stats = device_model.stats.ConstDeviceSpan();
-  auto d_tree_group = device_model.tree_group.ConstDeviceSpan();
+  auto d_tree_groups = d_model.tree_groups;
   auto d_path_segments = path_segments.data().get();
 
-  auto d_split_types = device_model.split_types.ConstDeviceSpan();
-  auto d_cat_segments = device_model.categories_tree_segments.ConstDeviceSpan();
-  auto d_cat_node_segments = device_model.categories_node_segments.ConstDeviceSpan();
-
-  size_t max_cat = 0;
-  if (thrust::any_of(ctx->CUDACtx()->CTP(), dh::tbegin(d_split_types), dh::tend(d_split_types),
-                     common::IsCatOp{})) {
-    dh::PinnedMemory pinned;
-    auto h_max_cat = pinned.GetSpan<RegTree::CategoricalSplitMatrix::Segment>(1);
-    auto max_elem_it = dh::MakeTransformIterator<size_t>(
-        dh::tbegin(d_cat_node_segments),
-        [] __device__(RegTree::CategoricalSplitMatrix::Segment seg) { return seg.size; });
-    size_t max_cat_it = thrust::max_element(ctx->CUDACtx()->CTP(), max_elem_it,
-                                            max_elem_it + d_cat_node_segments.size()) -
-                        max_elem_it;
-    dh::safe_cuda(cudaMemcpy(h_max_cat.data(), d_cat_node_segments.data() + max_cat_it,
-                             h_max_cat.size_bytes(), cudaMemcpyDeviceToHost));
-    max_cat = h_max_cat[0].size;
+  std::size_t max_cat = 0;
+  if (std::any_of(h_model.trees.cbegin(), h_model.trees.cend(),
+                  [](auto const& p_tree) { return p_tree->HasCategoricalSplit(); })) {
+    auto max_elem_it = dh::MakeIndexTransformIter([=] __device__(std::size_t i) -> std::size_t {
+      auto tree_idx = dh::SegmentId(d_tree_segments, i);
+      auto nidx = i - d_tree_segments[tree_idx];
+      return cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx])
+          .GetCategoriesMatrix()
+          .node_ptr[nidx]
+          .size;
+    });
+    auto max_cat_it =
+        thrust::max_element(ctx->CUDACtx()->CTP(), max_elem_it, max_elem_it + d_model.n_nodes);
+    dh::CachingDeviceUVector<std::size_t> d_max_cat(1);
+    auto s_max_cat = dh::ToSpan(d_max_cat);
+    dh::LaunchN(1, ctx->CUDACtx()->Stream(),
+                [=] __device__(std::size_t) { s_max_cat[0] = *max_cat_it; });
+    dh::safe_cuda(
+        cudaMemcpy(&max_cat, s_max_cat.data(), s_max_cat.size_bytes(), cudaMemcpyDeviceToHost));
     CHECK_GE(max_cat, 1);
     path_categories->resize(max_cat * paths->size());
   }
 
-  auto d_model_categories = device_model.categories.DeviceSpan();
   common::Span<uint32_t> d_path_categories = dh::ToSpan(*path_categories);
 
   dh::LaunchN(info.size(), ctx->CUDACtx()->Stream(), [=] __device__(size_t idx) {
     auto path_info = d_info[idx];
-    size_t tree_offset = d_tree_segments[path_info.tree_idx];
-    TreeView tree{0,                   path_info.tree_idx, d_nodes,
-                  d_tree_segments,     d_split_types,      d_cat_segments,
-                  d_cat_node_segments, d_model_categories};
-    int group = d_tree_group[path_info.tree_idx];
-    size_t child_idx = path_info.leaf_position;
-    auto child = d_nodes[child_idx];
-    float v = child.LeafValue();
+    auto tree = cuda::std::get<tree::ScalarTreeView>(d_trees[path_info.tree_idx]);
+    std::int32_t group = d_tree_groups[path_info.tree_idx];
+    auto child_nidx = path_info.nidx;
+
+    float v = tree.LeafValue(child_nidx);
     const float inf = std::numeric_limits<float>::infinity();
     size_t output_position = d_path_segments[idx + 1] - 1;
-    while (!child.IsRoot()) {
-      size_t parent_idx = tree_offset + child.Parent();
-      double child_cover = d_stats[child_idx].sum_hess;
-      double parent_cover = d_stats[parent_idx].sum_hess;
+
+    while (!tree.IsRoot(child_nidx)) {
+      auto parent_nidx = tree.Parent(child_nidx);
+      double child_cover = tree.SumHess(child_nidx);
+      double parent_cover = tree.SumHess(parent_nidx);
       double zero_fraction = child_cover / parent_cover;
-      auto parent = tree.d_tree[child.Parent()];
 
-      bool is_left_path = (tree_offset + parent.LeftChild()) == child_idx;
-      bool is_missing_path = (!parent.DefaultLeft() && !is_left_path) ||
-                             (parent.DefaultLeft() && is_left_path);
+      bool is_left_path = tree.LeftChild(parent_nidx) == child_nidx;
+      bool is_missing_path = (!tree.DefaultLeft(parent_nidx) && !is_left_path) ||
+                             (tree.DefaultLeft(parent_nidx) && is_left_path);
 
       float lower_bound = -inf;
       float upper_bound = inf;
       common::CatBitField bits;
-      if (common::IsCat(tree.cats.split_type, child.Parent())) {
+      if (common::IsCat(tree.cats.split_type, tree.Parent(child_nidx))) {
         auto path_cats = d_path_categories.subspan(max_cat * output_position, max_cat);
-        size_t size = tree.cats.node_ptr[child.Parent()].size;
-        auto node_cats = tree.cats.categories.subspan(tree.cats.node_ptr[child.Parent()].beg, size);
+        auto node_cats = tree.NodeCats(tree.Parent(child_nidx));
         SPAN_CHECK(path_cats.size() >= node_cats.size());
         for (size_t i = 0; i < node_cats.size(); ++i) {
           path_cats[i] = is_left_path ? ~node_cats[i] : node_cats[i];
         }
         bits = common::CatBitField{path_cats};
       } else {
-        lower_bound = is_left_path ? -inf : parent.SplitCond();
-        upper_bound = is_left_path ? parent.SplitCond() : inf;
+        lower_bound = is_left_path ? -inf : tree.SplitCond(parent_nidx);
+        upper_bound = is_left_path ? tree.SplitCond(parent_nidx) : inf;
       }
-      d_paths[output_position--] =
-          gpu_treeshap::PathElement<ShapSplitCondition>{
-              idx,           parent.SplitIndex(),
-              group,         ShapSplitCondition{lower_bound, upper_bound, is_missing_path, bits},
-              zero_fraction, v};
-      child_idx = parent_idx;
-      child = parent;
+      d_paths[output_position--] = gpu_treeshap::PathElement<ShapSplitCondition>{
+          idx,           tree.SplitIndex(parent_nidx),
+          group,         ShapSplitCondition{lower_bound, upper_bound, is_missing_path, bits},
+          zero_fraction, v};
+
+      child_nidx = parent_nidx;
     }
     // Root node has feature -1
     d_paths[output_position] = {idx, -1, group, ShapSplitCondition{-inf, inf, false, {}}, 1.0, v};
@@ -600,13 +556,11 @@ void ExtractPaths(Context const* ctx,
 }
 
 namespace {
-template <size_t kBlockThreads>
-size_t SharedMemoryBytes(size_t cols, size_t max_shared_memory_bytes) {
-  // No way max_shared_memory_bytes that is equal to 0.
-  CHECK_GT(max_shared_memory_bytes, 0);
-  size_t shared_memory_bytes =
-      static_cast<size_t>(sizeof(float) * cols * kBlockThreads);
-  if (shared_memory_bytes > max_shared_memory_bytes) {
+template <std::size_t kBlockThreads>
+[[nodiscard]] std::size_t SharedMemoryBytes(std::size_t n_features, std::size_t max_shmem_bytes) {
+  CHECK_GT(max_shmem_bytes, 0);
+  size_t shared_memory_bytes = static_cast<size_t>(sizeof(float) * n_features * kBlockThreads);
+  if (shared_memory_bytes > max_shmem_bytes) {
     shared_memory_bytes = 0;
   }
   return shared_memory_bytes;
@@ -614,88 +568,80 @@ size_t SharedMemoryBytes(size_t cols, size_t max_shared_memory_bytes) {
 
 using BitVector = LBitField64;
 
-__global__ void MaskBitVectorKernel(
-    SparsePageView data, common::Span<RegTree::Node const> d_nodes,
-    common::Span<std::size_t const> d_tree_segments, common::Span<int const> d_tree_group,
-    common::Span<FeatureType const> d_tree_split_types,
-    common::Span<std::uint32_t const> d_cat_tree_segments,
-    common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
-    common::Span<std::uint32_t const> d_categories, BitVector decision_bits, BitVector missing_bits,
-    bst_tree_t tree_begin, bst_tree_t tree_end, bst_feature_t num_features, std::size_t num_rows,
-    std::size_t num_nodes, bool use_shared, float missing) {
+__global__ void MaskBitVectorKernel(SparsePageView data, common::Span<TreeViewVar const> d_trees,
+                                    BitVector decision_bits, BitVector missing_bits,
+                                    bst_tree_t tree_begin, bst_tree_t tree_end,
+                                    bst_feature_t num_features, std::size_t num_nodes,
+                                    bool use_shared, float missing) {
   // This needs to be always instantiated since the data is loaded cooperatively by all threads.
-  SparsePageLoader loader{data, use_shared, num_features, num_rows, missing};
+  SparsePageLoader loader{data, use_shared, num_features, data.NumRows(), missing, NoOpAccessor{}};
   auto const row_idx = blockIdx.x * blockDim.x + threadIdx.x;
-  if (row_idx >= num_rows) {
+  if (row_idx >= data.NumRows()) {
     return;
   }
 
   std::size_t tree_offset = 0;
   for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-    TreeView d_tree{tree_begin,          tree_idx,           d_nodes,
-                    d_tree_segments,     d_tree_split_types, d_cat_tree_segments,
-                    d_cat_node_segments, d_categories};
-    auto const tree_nodes = d_tree.d_tree.size();
+    auto const& d_tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx - tree_begin]);
+    auto const tree_nodes = d_tree.Size();
     for (auto nid = 0; nid < tree_nodes; nid++) {
-      auto const& node = d_tree.d_tree[nid];
-      if (node.IsDeleted() || node.IsLeaf()) {
-          continue;
+      if (d_tree.IsDeleted(nid) || d_tree.IsLeaf(nid)) {
+        continue;
       }
-      auto const fvalue = loader.GetElement(row_idx, node.SplitIndex());
+      auto const fvalue = loader.GetElement(row_idx, d_tree.SplitIndex(nid));
       auto const is_missing = common::CheckNAN(fvalue);
       auto const bit_index = row_idx * num_nodes + tree_offset + nid;
       if (is_missing) {
-          missing_bits.Set(bit_index);
+        missing_bits.Set(bit_index);
       } else {
-          auto const decision = d_tree.HasCategoricalSplit()
-                                    ? GetDecision<true>(node, nid, fvalue, d_tree.cats)
-                                    : GetDecision<false>(node, nid, fvalue, d_tree.cats);
-          if (decision) {
-            decision_bits.Set(bit_index);
-          }
+        auto const decision =
+            d_tree.HasCategoricalSplit()
+                ? GetDecision<true>(d_tree, nid, fvalue, d_tree.GetCategoriesMatrix())
+                : GetDecision<false>(d_tree, nid, fvalue, d_tree.GetCategoriesMatrix());
+        if (decision) {
+          decision_bits.Set(bit_index);
+        }
       }
     }
     tree_offset += tree_nodes;
   }
 }
 
+template <typename TreeView>
 __device__ bst_node_t GetLeafIndexByBitVector(bst_idx_t ridx, TreeView const& tree,
                                               BitVector const& decision_bits,
                                               BitVector const& missing_bits, std::size_t num_nodes,
                                               std::size_t tree_offset) {
   bst_node_t nidx = 0;
-  RegTree::Node n = tree.d_tree[nidx];
-  while (!n.IsLeaf()) {
+  while (!tree.IsLeaf(nidx)) {
     auto const bit_index = ridx * num_nodes + tree_offset + nidx;
     if (missing_bits.Check(bit_index)) {
-      nidx = n.DefaultChild();
+      nidx = tree.DefaultChild(nidx);
     } else {
-      nidx = n.LeftChild() + !decision_bits.Check(bit_index);
+      nidx = tree.LeftChild(nidx) + !decision_bits.Check(bit_index);
     }
-    n = tree.d_tree[nidx];
   }
   return nidx;
 }
 
+template <typename TreeView>
 __device__ float GetLeafWeightByBitVector(bst_idx_t ridx, TreeView const& tree,
                                           BitVector const& decision_bits,
                                           BitVector const& missing_bits, std::size_t num_nodes,
                                           std::size_t tree_offset) {
   auto const nidx =
       GetLeafIndexByBitVector(ridx, tree, decision_bits, missing_bits, num_nodes, tree_offset);
-  return tree.d_tree[nidx].LeafValue();
+  return tree.LeafValue(nidx);
 }
 
 template <bool predict_leaf>
-__global__ void PredictByBitVectorKernel(
-    common::Span<RegTree::Node const> d_nodes, common::Span<float> d_out_predictions,
-    common::Span<std::size_t const> d_tree_segments, common::Span<int const> d_tree_group,
-    common::Span<FeatureType const> d_tree_split_types,
-    common::Span<std::uint32_t const> d_cat_tree_segments,
-    common::Span<RegTree::CategoricalSplitMatrix::Segment const> d_cat_node_segments,
-    common::Span<std::uint32_t const> d_categories, BitVector decision_bits, BitVector missing_bits,
-    bst_tree_t tree_begin, bst_tree_t tree_end, std::size_t num_rows, std::size_t num_nodes,
-    std::uint32_t num_group) {
+__global__ void PredictByBitVectorKernel(common::Span<TreeViewVar const> d_trees,
+                                         common::Span<float> d_out_predictions,
+                                         common::Span<bst_target_t const> d_tree_groups,
+                                         BitVector decision_bits, BitVector missing_bits,
+                                         bst_tree_t tree_begin, bst_tree_t tree_end,
+                                         std::size_t num_rows, std::size_t num_nodes,
+                                         std::uint32_t num_group) {
   auto const row_idx = blockIdx.x * blockDim.x + threadIdx.x;
   if (row_idx >= num_rows) {
     return;
@@ -704,36 +650,30 @@ __global__ void PredictByBitVectorKernel(
   std::size_t tree_offset = 0;
   if constexpr (predict_leaf) {
     for (auto tree_idx = tree_begin; tree_idx < tree_end; ++tree_idx) {
-      TreeView d_tree{tree_begin,          tree_idx,           d_nodes,
-                      d_tree_segments,     d_tree_split_types, d_cat_tree_segments,
-                      d_cat_node_segments, d_categories};
+      auto const& d_tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx - tree_begin]);
       auto const leaf = GetLeafIndexByBitVector(row_idx, d_tree, decision_bits, missing_bits,
                                                 num_nodes, tree_offset);
       d_out_predictions[row_idx * (tree_end - tree_begin) + tree_idx] = static_cast<float>(leaf);
-      tree_offset += d_tree.d_tree.size();
+      tree_offset += d_tree.Size();
     }
   } else {
     if (num_group == 1) {
       float sum = 0;
       for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-          TreeView d_tree{tree_begin,          tree_idx,           d_nodes,
-                          d_tree_segments,     d_tree_split_types, d_cat_tree_segments,
-                          d_cat_node_segments, d_categories};
-          sum += GetLeafWeightByBitVector(row_idx, d_tree, decision_bits, missing_bits, num_nodes,
-                                          tree_offset);
-          tree_offset += d_tree.d_tree.size();
+        auto const& d_tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx - tree_begin]);
+        sum += GetLeafWeightByBitVector(row_idx, d_tree, decision_bits, missing_bits, num_nodes,
+                                        tree_offset);
+        tree_offset += d_tree.Size();
       }
       d_out_predictions[row_idx] += sum;
     } else {
       for (auto tree_idx = tree_begin; tree_idx < tree_end; tree_idx++) {
-          auto const tree_group = d_tree_group[tree_idx];
-          TreeView d_tree{tree_begin,          tree_idx,           d_nodes,
-                          d_tree_segments,     d_tree_split_types, d_cat_tree_segments,
-                          d_cat_node_segments, d_categories};
-          bst_uint out_prediction_idx = row_idx * num_group + tree_group;
-          d_out_predictions[out_prediction_idx] += GetLeafWeightByBitVector(
-              row_idx, d_tree, decision_bits, missing_bits, num_nodes, tree_offset);
-          tree_offset += d_tree.d_tree.size();
+        auto const tree_group = d_tree_groups[tree_idx - tree_begin];
+        auto const& d_tree = cuda::std::get<tree::ScalarTreeView>(d_trees[tree_idx - tree_begin]);
+        bst_uint out_prediction_idx = row_idx * num_group + tree_group;
+        d_out_predictions[out_prediction_idx] += GetLeafWeightByBitVector(
+            row_idx, d_tree, decision_bits, missing_bits, num_nodes, tree_offset);
+        tree_offset += d_tree.Size();
       }
     }
   }
@@ -761,7 +701,7 @@ class ColumnSplitHelper {
   using BitType = BitVector::value_type;
 
   template <bool predict_leaf>
-  void PredictDMatrix(DMatrix* dmat, HostDeviceVector<float>* out_preds, DeviceModel const& model,
+  void PredictDMatrix(DMatrix* dmat, HostDeviceVector<float>* out_preds, DeviceModel const& d_model,
                       bst_feature_t num_features, std::uint32_t num_group) const {
     dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
     dh::caching_device_vector<BitType> decision_storage{};
@@ -773,7 +713,7 @@ class ColumnSplitHelper {
         SharedMemoryBytes<kBlockThreads>(num_features, max_shared_memory_bytes);
     auto const use_shared = shared_memory_bytes != 0;
 
-    auto const num_nodes = model.nodes.Size();
+    auto const num_nodes = d_model.n_nodes;
     std::size_t batch_offset = 0;
     for (auto const& batch : dmat->GetBatches<SparsePage>()) {
       auto const num_rows = batch.Size();
@@ -781,28 +721,20 @@ class ColumnSplitHelper {
       BitVector decision_bits{dh::ToSpan(decision_storage)};
       BitVector missing_bits{dh::ToSpan(missing_storage)};
 
-      batch.offset.SetDevice(ctx_->Device());
-      batch.data.SetDevice(ctx_->Device());
-      SparsePageView data(batch.data.DeviceSpan(), batch.offset.DeviceSpan(), num_features);
-
+      SparsePageView data{ctx_, batch, num_features};
       auto const grid = static_cast<uint32_t>(common::DivRoundUp(num_rows, kBlockThreads));
+      auto d_tree_groups = d_model.tree_groups;
       dh::LaunchKernel {grid, kBlockThreads, shared_memory_bytes, ctx_->CUDACtx()->Stream()}(
-          MaskBitVectorKernel, data, model.nodes.ConstDeviceSpan(),
-          model.tree_segments.ConstDeviceSpan(), model.tree_group.ConstDeviceSpan(),
-          model.split_types.ConstDeviceSpan(), model.categories_tree_segments.ConstDeviceSpan(),
-          model.categories_node_segments.ConstDeviceSpan(), model.categories.ConstDeviceSpan(),
-          decision_bits, missing_bits, model.tree_beg_, model.tree_end_, num_features, num_rows,
-          num_nodes, use_shared, std::numeric_limits<float>::quiet_NaN());
+          MaskBitVectorKernel, data, d_model.Trees(), decision_bits, missing_bits,
+          d_model.tree_begin, d_model.tree_end, num_features, num_nodes, use_shared,
+          std::numeric_limits<float>::quiet_NaN());
 
       AllReduceBitVectors(&decision_storage, &missing_storage);
 
-      dh::LaunchKernel {grid, kBlockThreads, 0, ctx_->CUDACtx()->Stream()} (
-          PredictByBitVectorKernel<predict_leaf>, model.nodes.ConstDeviceSpan(),
-          out_preds->DeviceSpan().subspan(batch_offset), model.tree_segments.ConstDeviceSpan(),
-          model.tree_group.ConstDeviceSpan(), model.split_types.ConstDeviceSpan(),
-          model.categories_tree_segments.ConstDeviceSpan(),
-          model.categories_node_segments.ConstDeviceSpan(), model.categories.ConstDeviceSpan(),
-          decision_bits, missing_bits, model.tree_beg_, model.tree_end_, num_rows, num_nodes,
+      dh::LaunchKernel {grid, kBlockThreads, 0, ctx_->CUDACtx()->Stream()}(
+          PredictByBitVectorKernel<predict_leaf>, d_model.Trees(),
+          out_preds->DeviceSpan().subspan(batch_offset), d_tree_groups,
+          decision_bits, missing_bits, d_model.tree_begin, d_model.tree_end, num_rows, num_nodes,
           num_group);
 
       batch_offset += batch.Size() * num_group;
@@ -841,99 +773,236 @@ class ColumnSplitHelper {
 
   Context const* ctx_;
 };
-}  // anonymous namespace
 
-class GPUPredictor : public xgboost::Predictor {
+using cuda_impl::MakeCatAccessor;
+
+template <typename EncAccessor>
+struct ShapSparsePageLoader {
+ public:
+  using SupportShmemLoad = std::false_type;
+
+  SparsePageView data;
+  EncAccessor acc;
+
+  template <typename Fidx>
+  [[nodiscard]] __device__ float GetElement(bst_idx_t ridx, Fidx fidx) const {
+    auto fvalue = data.GetElement(ridx, fidx);
+    return acc(fvalue, fidx);
+  }
+  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumRows() const { return data.NumRows(); }
+  [[nodiscard]] XGBOOST_DEVICE bst_idx_t NumCols() const { return data.NumCols(); }
+};
+
+// Provide configuration for launching the predict kernel.
+template <typename IsDense, typename EncAccessor>
+class LaunchConfig {
+ public:
+  static constexpr bool HasMissing() { return !IsDense::value; }
+  using EncAccessorT = EncAccessor;
+
+  template <typename T, std::uint32_t block_threads>
+  struct LoaderType {
+    using Type = T;
+    constexpr static std::uint32_t kBlockThreads = block_threads;
+
+    static std::size_t AllocShmem(Context const* ctx, bst_feature_t n_features) {
+      if constexpr (typename Type::SupportShmemLoad{}) {
+        return SharedMemoryBytes<kBlockThreads>(n_features, ConfigureDevice(ctx->Device()));
+      }
+      return 0;
+    }
+  };
+
  private:
-  void PredictInternal(const SparsePage& batch, DeviceModel const& model, size_t num_features,
-                       HostDeviceVector<bst_float>* predictions, size_t batch_offset,
-                       bool is_dense) const {
-    batch.offset.SetDevice(ctx_->Device());
-    batch.data.SetDevice(ctx_->Device());
-    const uint32_t BLOCK_THREADS = 128;
-    bst_idx_t num_rows = batch.Size();
-    auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(num_rows, BLOCK_THREADS));
-    auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device());
-    size_t shared_memory_bytes =
-        SharedMemoryBytes<BLOCK_THREADS>(num_features, max_shared_memory_bytes);
-    bool use_shared = shared_memory_bytes != 0;
-
-    SparsePageView data(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
-                        num_features);
-    auto const kernel = [&](auto predict_fn) {
-      dh::LaunchKernel {GRID_SIZE, BLOCK_THREADS, shared_memory_bytes, ctx_->CUDACtx()->Stream()}(
-          predict_fn, data, model.nodes.ConstDeviceSpan(),
-          predictions->DeviceSpan().subspan(batch_offset), model.tree_segments.ConstDeviceSpan(),
-          model.tree_group.ConstDeviceSpan(), model.split_types.ConstDeviceSpan(),
-          model.categories_tree_segments.ConstDeviceSpan(),
-          model.categories_node_segments.ConstDeviceSpan(), model.categories.ConstDeviceSpan(),
-          model.tree_beg_, model.tree_end_, num_features, num_rows, use_shared, model.num_group,
-          std::numeric_limits<float>::quiet_NaN());
-    };
-    if (is_dense) {
-      kernel(PredictKernel<SparsePageLoader, SparsePageView, false>);
+  static auto constexpr NotSet() { return std::numeric_limits<bst_idx_t>::max(); }
+
+  Context const* ctx_;
+  bst_feature_t n_features_;
+  std::size_t shared_memory_bytes_{0};
+
+ public:
+  template <typename Loader, typename K, typename BatchT, typename... Args>
+  void Launch(K&& kernel, BatchT&& batch, Args&&... args) const {
+    auto grid = static_cast<uint32_t>(common::DivRoundUp(batch.NumRows(), Loader::kBlockThreads));
+    dh::LaunchKernel{grid, Loader::kBlockThreads, this->shared_memory_bytes_,  // NOLINT
+                     this->ctx_->CUDACtx()->Stream()}(kernel, std::forward<BatchT>(batch),
+                                                      std::forward<Args>(args)...);
+  }
+  template <typename Loader, typename Data>
+  void LaunchPredictKernel(Data batch, float missing, bst_feature_t n_features,
+                           DeviceModel const& d_model, EncAccessorT acc, bst_idx_t batch_offset,
+                           HostDeviceVector<float>* predictions) {
+    auto kernel = PredictKernel<typename Loader::Type, common::GetValueT<decltype(batch)>,
+                                HasMissing(), EncAccessorT>;
+    auto d_tree_groups = d_model.tree_groups;
+    this->Launch<Loader>(kernel, std::move(batch), d_model.Trees(),
+                         predictions->DeviceSpan().subspan(batch_offset), d_tree_groups, n_features,
+                         this->UseShared(), d_model.n_groups, missing, acc);
+  }
+
+  [[nodiscard]] bool UseShared() const { return shared_memory_bytes_ != 0; }
+
+  [[nodiscard]] static std::size_t ConfigureDevice(DeviceOrd const& device) {
+    thread_local std::unordered_map<std::int32_t, std::size_t> max_shared;
+    auto it = max_shared.find(device.ordinal);
+    if (it == max_shared.cend()) {
+      max_shared[device.ordinal] = dh::MaxSharedMemory(device.ordinal);
+      it = max_shared.find(device.ordinal);
+    }
+    return it->second;
+  }
+
+  template <typename Loader>
+  void AllocShmem() {
+    this->shared_memory_bytes_ = Loader::AllocShmem(this->ctx_, this->n_features_);
+  }
+
+ public:
+  LaunchConfig(Context const* ctx, bst_feature_t n_features)
+      : ctx_{ctx}, n_features_{n_features} {}
+
+  template <typename Fn>
+  void ForEachBatch(DMatrix* p_fmat, Fn&& fn) {
+    if (p_fmat->PageExists<SparsePage>()) {
+      constexpr std::uint32_t kBlockThreads = 128;
+      using LoaderImpl = SparsePageLoader<EncAccessor>;
+      using Loader = LoaderType<LoaderImpl, kBlockThreads>;
+      this->AllocShmem<Loader>();
+      for (auto& page : p_fmat->GetBatches<SparsePage>()) {
+        SparsePageView batch{ctx_, page, n_features_};
+        fn(Loader{}, std::forward<SparsePageView>(batch));
+      }
+    } else {
+      p_fmat->Info().feature_types.SetDevice(ctx_->Device());
+      auto feature_types = p_fmat->Info().feature_types.ConstDeviceSpan();
+
+      for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {
+        page.Impl()->Visit(ctx_, feature_types, [&](auto&& batch) {
+          using Acc = std::remove_reference_t<decltype(batch)>;
+          // No shared memory use for ellpack
+          using Loader = EllpackLoader<Acc, EncAccessor>;
+          constexpr std::uint32_t kBlockThreads = 256;
+          fn(LoaderType<Loader, kBlockThreads>{},
+             std::forward<common::GetValueT<decltype(batch)>>(batch));
+        });
+      }
+    }
+  }
+  // Used by the SHAP methods.
+  template <typename Fn>
+  void ForEachBatch(DMatrix* p_fmat, EncAccessor&& acc, Fn&& fn) {
+    if (p_fmat->PageExists<SparsePage>()) {
+      for (auto& page : p_fmat->GetBatches<SparsePage>()) {
+        // Shap kernel doesn't use shared memory to stage data.
+        SparsePageView batch{ctx_, page, n_features_};
+        auto loader = ShapSparsePageLoader<EncAccessor>{batch, acc};
+        fn(std::move(loader), page.base_rowid);
+      }
+    } else {
+      p_fmat->Info().feature_types.SetDevice(ctx_->Device());
+      auto feature_types = p_fmat->Info().feature_types.ConstDeviceSpan();
+
+      for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {
+        page.Impl()->Visit(ctx_, feature_types, [&](auto&& batch) {
+          using Acc = std::remove_reference_t<decltype(batch)>;
+          // No shared memory use for ellpack
+          auto loader = EllpackLoader{batch,
+                                      /*use_shared=*/false,
+                                      this->n_features_,
+                                      batch.NumRows(),
+                                      std::numeric_limits<float>::quiet_NaN(),
+                                      std::forward<EncAccessor>(acc)};
+          fn(std::move(loader), batch.base_rowid);
+        });
+      }
+    }
+  }
+};
+
+template <typename Kernel>
+void LaunchPredict(Context const* ctx, bool is_dense, enc::DeviceColumnsView const& new_enc,
+                   gbm::GBTreeModel const& model, Kernel&& launch) {
+  if (is_dense) {
+    if (model.Cats() && model.Cats()->HasCategorical() && new_enc.HasCategorical()) {
+      auto [acc, mapping] = MakeCatAccessor(ctx, new_enc, model.Cats());
+      auto cfg =
+          LaunchConfig<std::true_type, decltype(acc)>{ctx, model.learner_model_param->num_feature};
+      launch(std::move(cfg), std::move(acc));
     } else {
-      kernel(PredictKernel<SparsePageLoader, SparsePageView, true>);
+      auto cfg =
+          LaunchConfig<std::true_type, NoOpAccessor>{ctx, model.learner_model_param->num_feature};
+      launch(std::move(cfg), NoOpAccessor{});
+    }
+  } else {
+    if (model.Cats() && model.Cats()->HasCategorical() && new_enc.HasCategorical()) {
+      auto [acc, mapping] = MakeCatAccessor(ctx, new_enc, model.Cats());
+      auto cfg =
+          LaunchConfig<std::false_type, decltype(acc)>{ctx, model.learner_model_param->num_feature};
+      launch(std::move(cfg), std::move(acc));
+    } else {
+      auto cfg =
+          LaunchConfig<std::false_type, NoOpAccessor>{ctx, model.learner_model_param->num_feature};
+      launch(std::move(cfg), NoOpAccessor{});
     }
   }
+}
 
-  void PredictInternal(EllpackDeviceAccessor const& batch, DeviceModel const& model,
-                       HostDeviceVector<bst_float>* out_preds, bst_idx_t batch_offset) const {
-    const uint32_t BLOCK_THREADS = 256;
-    size_t num_rows = batch.n_rows;
-    auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(num_rows, BLOCK_THREADS));
-    DeviceModel d_model;
-
-    bool use_shared = false;
-    dh::LaunchKernel {GRID_SIZE, BLOCK_THREADS, 0, ctx_->CUDACtx()->Stream()}(
-        PredictKernel<EllpackLoader, EllpackDeviceAccessor>, batch, model.nodes.ConstDeviceSpan(),
-        out_preds->DeviceSpan().subspan(batch_offset), model.tree_segments.ConstDeviceSpan(),
-        model.tree_group.ConstDeviceSpan(), model.split_types.ConstDeviceSpan(),
-        model.categories_tree_segments.ConstDeviceSpan(),
-        model.categories_node_segments.ConstDeviceSpan(), model.categories.ConstDeviceSpan(),
-        model.tree_beg_, model.tree_end_, batch.NumFeatures(), num_rows, use_shared,
-        model.num_group, std::numeric_limits<float>::quiet_NaN());
+template <typename Kernel>
+void LaunchShap(Context const* ctx, enc::DeviceColumnsView const& new_enc,
+                gbm::GBTreeModel const& model, Kernel&& launch) {
+  if (model.Cats() && model.Cats()->HasCategorical() && new_enc.HasCategorical()) {
+    auto [acc, mapping] = MakeCatAccessor(ctx, new_enc, model.Cats());
+    auto cfg =
+        LaunchConfig<std::true_type, decltype(acc)>{ctx, model.learner_model_param->num_feature};
+    launch(std::move(cfg), std::move(acc));
+  } else {
+    auto cfg =
+        LaunchConfig<std::true_type, NoOpAccessor>{ctx, model.learner_model_param->num_feature};
+    launch(std::move(cfg), NoOpAccessor{});
   }
+}
+}  // anonymous namespace
 
-  void DevicePredictInternal(DMatrix* dmat, HostDeviceVector<float>* out_preds,
-                             const gbm::GBTreeModel& model, size_t tree_begin,
-                             size_t tree_end) const {
+class GPUPredictor : public xgboost::Predictor {
+ private:
+  void PredictDMatrix(DMatrix* p_fmat, HostDeviceVector<float>* out_preds,
+                      gbm::GBTreeModel const& model, bst_tree_t tree_begin,
+                      bst_tree_t tree_end) const {
     if (tree_end - tree_begin == 0) {
       return;
     }
     out_preds->SetDevice(ctx_->Device());
-    auto const& info = dmat->Info();
-    DeviceModel d_model;
-    d_model.Init(model, tree_begin, tree_end, ctx_->Device());
+    auto const& info = p_fmat->Info();
+
+    DeviceModel d_model{this->ctx_->Device(), model, tree_begin, tree_end, &this->model_mu_,
+                        CopyViews{this->ctx_}};
 
     if (info.IsColumnSplit()) {
-      column_split_helper_.PredictBatch(dmat, out_preds, model, d_model);
+      column_split_helper_.PredictBatch(p_fmat, out_preds, model, d_model);
       return;
     }
 
-    CHECK_LE(dmat->Info().num_col_, model.learner_model_param->num_feature);
-    if (dmat->PageExists<SparsePage>()) {
-      bst_idx_t batch_offset = 0;
-      for (auto& batch : dmat->GetBatches<SparsePage>()) {
-        this->PredictInternal(batch, d_model, model.learner_model_param->num_feature, out_preds,
-                              batch_offset, dmat->IsDense());
-        batch_offset += batch.Size() * model.learner_model_param->OutputLength();
-      }
-    } else {
+    CHECK_LE(p_fmat->Info().num_col_, model.learner_model_param->num_feature);
+    auto n_features = model.learner_model_param->num_feature;
+
+    auto new_enc =
+        p_fmat->Cats()->NeedRecode() ? p_fmat->Cats()->DeviceView(ctx_) : enc::DeviceColumnsView{};
+    LaunchPredict(ctx_, p_fmat->IsDense(), new_enc, model, [&](auto&& cfg, auto&& acc) {
+      using Config = common::GetValueT<decltype(cfg)>;
+
       bst_idx_t batch_offset = 0;
-      for (auto const& page : dmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {
-        dmat->Info().feature_types.SetDevice(ctx_->Device());
-        auto feature_types = dmat->Info().feature_types.ConstDeviceSpan();
-        this->PredictInternal(page.Impl()->GetDeviceAccessor(ctx_, feature_types), d_model,
-                              out_preds, batch_offset);
-        batch_offset += page.Size() * model.learner_model_param->OutputLength();
-      }
-    }
+      cfg.ForEachBatch(p_fmat, [&](auto&& loader_t, auto&& batch) {
+        using Loader = typename common::GetValueT<decltype(loader_t)>;
+        cfg.template LaunchPredictKernel<Loader>(std::move(batch),
+                                                 std::numeric_limits<float>::quiet_NaN(),
+                                                 n_features, d_model, acc, batch_offset, out_preds);
+        batch_offset += batch.NumRows() * model.learner_model_param->OutputLength();
+      });
+    });
   }
 
  public:
-  explicit GPUPredictor(Context const* ctx)
-      : Predictor::Predictor{ctx}, column_split_helper_{ctx} {}
+  explicit GPUPredictor(Context const* ctx) : Predictor{ctx}, column_split_helper_{ctx} {}
 
   ~GPUPredictor() override {
     if (ctx_->IsCUDA() && ctx_->Ordinal() < curt::AllVisibleGPUs()) {
@@ -943,82 +1012,86 @@ class GPUPredictor : public xgboost::Predictor {
 
   void PredictBatch(DMatrix* dmat, PredictionCacheEntry* predts, const gbm::GBTreeModel& model,
                     bst_tree_t tree_begin, bst_tree_t tree_end = 0) const override {
+    xgboost_NVTX_FN_RANGE();
     CHECK(ctx_->Device().IsCUDA()) << "Set `device' to `cuda` for processing GPU data.";
     auto* out_preds = &predts->predictions;
     if (tree_end == 0) {
       tree_end = model.trees.size();
     }
-    this->DevicePredictInternal(dmat, out_preds, model, tree_begin, tree_end);
+    this->PredictDMatrix(dmat, out_preds, model, tree_begin, tree_end);
   }
 
-  template <typename Adapter, typename Loader>
-  void DispatchedInplacePredict(std::any const& x, std::shared_ptr<DMatrix> p_m,
+  template <typename Adapter>
+  void DispatchedInplacePredict(std::shared_ptr<Adapter> m, std::shared_ptr<DMatrix> p_m,
                                 const gbm::GBTreeModel& model, float missing,
                                 PredictionCacheEntry* out_preds, bst_tree_t tree_begin,
                                 bst_tree_t tree_end) const {
-    uint32_t const output_groups =  model.learner_model_param->num_output_group;
-
-    auto m = std::any_cast<std::shared_ptr<Adapter>>(x);
-    CHECK_EQ(m->NumColumns(), model.learner_model_param->num_feature)
-        << "Number of columns in data must equal to trained model.";
     CHECK_EQ(dh::CurrentDevice(), m->Device().ordinal)
         << "XGBoost is running on device: " << this->ctx_->Device().Name() << ", "
         << "but data is on: " << m->Device().Name();
-    if (p_m) {
-      p_m->Info().num_row_ = m->NumRows();
-      this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);
-    } else {
-      MetaInfo info;
-      info.num_row_ = m->NumRows();
-      this->InitOutPredictions(info, &(out_preds->predictions), model);
-    }
+    this->InitOutPredictions(p_m->Info(), &(out_preds->predictions), model);
     out_preds->predictions.SetDevice(m->Device());
+    using BatchT = common::GetValueT<decltype(std::declval<Adapter>().Value())>;
+
+    auto n_samples = m->NumRows();
+    auto n_features = model.learner_model_param->num_feature;
+
+    DeviceModel d_model{ctx_->Device(),       model, tree_begin, tree_end, &this->model_mu_,
+                        CopyViews{this->ctx_}};
+
+    if constexpr (std::is_same_v<Adapter, data::CudfAdapter>) {
+      if (m->HasCategorical()) {
+        auto new_enc = m->DCats();
+        LaunchPredict(this->ctx_, false, new_enc, model, [&](auto&& cfg, auto&& acc) {
+          using EncAccessor = std::remove_reference_t<decltype(acc)>;
+          using LoaderImpl = DeviceAdapterLoader<BatchT, EncAccessor>;
+          using Loader =
+              typename common::GetValueT<decltype(cfg)>::template LoaderType<LoaderImpl, 128>;
+          cfg.template AllocShmem<Loader>();
+          cfg.template LaunchPredictKernel<Loader>(m->Value(), missing, n_features, d_model, acc, 0,
+                                                   &out_preds->predictions);
+        });
+        return;
+      }
+    }
 
-    const uint32_t BLOCK_THREADS = 128;
-    auto GRID_SIZE = static_cast<uint32_t>(common::DivRoundUp(m->NumRows(), BLOCK_THREADS));
-
-    auto max_shared_memory_bytes = dh::MaxSharedMemory(m->Device().ordinal);
-    size_t shared_memory_bytes =
-        SharedMemoryBytes<BLOCK_THREADS>(m->NumColumns(), max_shared_memory_bytes);
-    DeviceModel d_model;
-    d_model.Init(model, tree_begin, tree_end, m->Device());
-
-    bool use_shared = shared_memory_bytes != 0;
-
-    dh::LaunchKernel {GRID_SIZE, BLOCK_THREADS, shared_memory_bytes, ctx_->CUDACtx()->Stream()}(
-        PredictKernel<Loader, typename Loader::BatchT>, m->Value(), d_model.nodes.ConstDeviceSpan(),
-        out_preds->predictions.DeviceSpan(), d_model.tree_segments.ConstDeviceSpan(),
-        d_model.tree_group.ConstDeviceSpan(), d_model.split_types.ConstDeviceSpan(),
-        d_model.categories_tree_segments.ConstDeviceSpan(),
-        d_model.categories_node_segments.ConstDeviceSpan(), d_model.categories.ConstDeviceSpan(),
-        tree_begin, tree_end, m->NumColumns(), m->NumRows(), use_shared, output_groups, missing);
+    LaunchPredict(this->ctx_, false, enc::DeviceColumnsView{}, model,
+                  [&](auto&& cfg, auto&& acc) {
+                    using EncAccessor = std::remove_reference_t<decltype(acc)>;
+                    CHECK((std::is_same_v<EncAccessor, NoOpAccessor>));
+                    using LoaderImpl = DeviceAdapterLoader<BatchT, EncAccessor>;
+                    using Loader =
+                        typename common::GetValueT<decltype(cfg)>::template LoaderType<LoaderImpl,
+                                                                                       128>;
+                    cfg.template AllocShmem<Loader>();
+                    cfg.template LaunchPredictKernel<Loader>(
+                        m->Value(), missing, n_features, d_model, acc, 0, &out_preds->predictions);
+                  });
   }
 
-  bool InplacePredict(std::shared_ptr<DMatrix> p_m, gbm::GBTreeModel const& model, float missing,
-                      PredictionCacheEntry* out_preds, bst_tree_t tree_begin,
-                      bst_tree_t tree_end) const override {
+  [[nodiscard]] bool InplacePredict(std::shared_ptr<DMatrix> p_m, gbm::GBTreeModel const& model,
+                                    float missing, PredictionCacheEntry* out_preds,
+                                    bst_tree_t tree_begin, bst_tree_t tree_end) const override {
+    xgboost_NVTX_FN_RANGE();
     auto proxy = dynamic_cast<data::DMatrixProxy*>(p_m.get());
     CHECK(proxy) << error::InplacePredictProxy();
-    auto x = proxy->Adapter();
-    if (x.type() == typeid(std::shared_ptr<data::CupyAdapter>)) {
-      this->DispatchedInplacePredict<data::CupyAdapter,
-                                     DeviceAdapterLoader<data::CupyAdapterBatch>>(
-          x, p_m, model, missing, out_preds, tree_begin, tree_end);
-    } else if (x.type() == typeid(std::shared_ptr<data::CudfAdapter>)) {
-      this->DispatchedInplacePredict<data::CudfAdapter,
-                                     DeviceAdapterLoader<data::CudfAdapterBatch>>(
-          x, p_m, model, missing, out_preds, tree_begin, tree_end);
-    } else {
-      return false;
-    }
-    return true;
+    bool type_error = false;
+    data::cuda_impl::DispatchAny<false>(
+        proxy,
+        [&](auto x) {
+          CheckProxyDMatrix(x, proxy, model.learner_model_param);
+          this->DispatchedInplacePredict(x, p_m, model, missing, out_preds, tree_begin, tree_end);
+        },
+        &type_error);
+    return !type_error;
   }
 
   void PredictContribution(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
                            const gbm::GBTreeModel& model, bst_tree_t tree_end,
-                           std::vector<bst_float> const* tree_weights, bool approximate, int,
+                           std::vector<float> const* tree_weights, bool approximate, int,
                            unsigned) const override {
-    std::string not_implemented{
+    xgboost_NVTX_FN_RANGE();
+    StringView not_implemented{
         "contribution is not implemented in the GPU predictor, use CPU instead."};
     if (approximate) {
       LOG(FATAL) << "Approximated " << not_implemented;
@@ -1035,58 +1108,54 @@ class GPUPredictor : public xgboost::Predictor {
     const int ngroup = model.learner_model_param->num_output_group;
     CHECK_NE(ngroup, 0);
     // allocate space for (number of features + bias) times the number of rows
-    size_t contributions_columns =
-        model.learner_model_param->num_feature + 1;  // +1 for bias
+    size_t contributions_columns = model.learner_model_param->num_feature + 1;  // +1 for bias
     auto dim_size = contributions_columns * model.learner_model_param->num_output_group;
+    // Output shape: [n_samples, n_classes, n_features + 1]
     out_contribs->Resize(p_fmat->Info().num_row_ * dim_size);
     out_contribs->Fill(0.0f);
     auto phis = out_contribs->DeviceSpan();
 
-    dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
-        device_paths;
-    DeviceModel d_model;
-    d_model.Init(model, 0, tree_end, ctx_->Device());
+    dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>> device_paths;
+    DeviceModel d_model{this->ctx_->Device(), model, 0, tree_end, &this->model_mu_,
+                        CopyViews{this->ctx_}};
+
+    auto new_enc =
+        p_fmat->Cats()->NeedRecode() ? p_fmat->Cats()->DeviceView(ctx_) : enc::DeviceColumnsView{};
+
     dh::device_vector<uint32_t> categories;
-    ExtractPaths(ctx_, &device_paths, &d_model, &categories, ctx_->Device());
-    if (p_fmat->PageExists<SparsePage>()) {
-      for (auto& batch : p_fmat->GetBatches<SparsePage>()) {
-        batch.data.SetDevice(ctx_->Device());
-        batch.offset.SetDevice(ctx_->Device());
-        SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
-                         model.learner_model_param->num_feature);
-        auto begin = dh::tbegin(phis) + batch.base_rowid * dim_size;
-        gpu_treeshap::GPUTreeShap<dh::XGBDeviceAllocator<int>>(
-            X, device_paths.begin(), device_paths.end(), ngroup, begin, dh::tend(phis));
-      }
-    } else {
-      for (auto& batch : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {
-        EllpackDeviceAccessor acc{batch.Impl()->GetDeviceAccessor(ctx_)};
-        auto X = EllpackLoader{acc, true, model.learner_model_param->num_feature, batch.Size(),
-                               std::numeric_limits<float>::quiet_NaN()};
-        auto begin = dh::tbegin(phis) + batch.BaseRowId() * dim_size;
-        gpu_treeshap::GPUTreeShap<dh::XGBDeviceAllocator<int>>(
-            X, device_paths.begin(), device_paths.end(), ngroup, begin, dh::tend(phis));
-      }
-    }
+    ExtractPaths(ctx_, &device_paths, model, d_model, &categories);
+
+    LaunchShap(this->ctx_, new_enc, model, [&](auto&& cfg, auto&& acc) {
+      using Config = common::GetValueT<decltype(cfg)>;
+      using EncAccessor = typename Config::EncAccessorT;
+
+      cfg.ForEachBatch(
+          p_fmat, std::forward<EncAccessor>(acc), [&](auto&& loader, bst_idx_t base_rowid) {
+            auto begin = dh::tbegin(phis) + base_rowid * dim_size;
+            gpu_treeshap::GPUTreeShap<dh::XGBDeviceAllocator<int>>(
+                loader, device_paths.begin(), device_paths.end(), ngroup, begin, dh::tend(phis));
+          });
+    });
 
     // Add the base margin term to last column
     p_fmat->Info().base_margin_.SetDevice(ctx_->Device());
     const auto margin = p_fmat->Info().base_margin_.Data()->ConstDeviceSpan();
 
     auto base_score = model.learner_model_param->BaseScore(ctx_);
-    dh::LaunchN(p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
-                ctx_->CUDACtx()->Stream(), [=] __device__(size_t idx) {
-                  phis[(idx + 1) * contributions_columns - 1] +=
-                      margin.empty() ? base_score(0) : margin[idx];
-                });
+    bst_idx_t n_samples = p_fmat->Info().num_row_;
+    dh::LaunchN(n_samples * ngroup, ctx_->CUDACtx()->Stream(), [=] __device__(std::size_t idx) {
+      auto [_, gid] = linalg::UnravelIndex(idx, n_samples, ngroup);
+      phis[(idx + 1) * contributions_columns - 1] += margin.empty() ? base_score(gid) : margin[idx];
+    });
   }
 
   void PredictInteractionContributions(DMatrix* p_fmat, HostDeviceVector<float>* out_contribs,
                                        gbm::GBTreeModel const& model, bst_tree_t tree_end,
                                        std::vector<float> const* tree_weights,
                                        bool approximate) const override {
-    std::string not_implemented{"contribution is not implemented in GPU "
-                                "predictor, use `cpu_predictor` instead."};
+    xgboost_NVTX_FN_RANGE();
+    std::string not_implemented{
+        "contribution is not implemented in GPU predictor, use cpu instead."};
     if (approximate) {
       LOG(FATAL) << "Approximated " << not_implemented;
     }
@@ -1100,41 +1169,33 @@ class GPUPredictor : public xgboost::Predictor {
     const int ngroup = model.learner_model_param->num_output_group;
     CHECK_NE(ngroup, 0);
     // allocate space for (number of features + bias) times the number of rows
-    size_t contributions_columns =
-        model.learner_model_param->num_feature + 1;  // +1 for bias
+    size_t contributions_columns = model.learner_model_param->num_feature + 1;  // +1 for bias
     auto dim_size =
         contributions_columns * contributions_columns * model.learner_model_param->num_output_group;
     out_contribs->Resize(p_fmat->Info().num_row_ * dim_size);
     out_contribs->Fill(0.0f);
     auto phis = out_contribs->DeviceSpan();
 
-    dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>>
-        device_paths;
-    DeviceModel d_model;
-    d_model.Init(model, 0, tree_end, ctx_->Device());
+    dh::device_vector<gpu_treeshap::PathElement<ShapSplitCondition>> device_paths;
+    DeviceModel d_model{this->ctx_->Device(), model, 0, tree_end, &this->model_mu_,
+                        CopyViews{this->ctx_}};
+
     dh::device_vector<uint32_t> categories;
-    ExtractPaths(ctx_, &device_paths, &d_model, &categories, ctx_->Device());
-    if (p_fmat->PageExists<SparsePage>()) {
-      for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
-        batch.data.SetDevice(ctx_->Device());
-        batch.offset.SetDevice(ctx_->Device());
-        SparsePageView X(batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
-                         model.learner_model_param->num_feature);
-        auto begin = dh::tbegin(phis) + batch.base_rowid * dim_size;
-        gpu_treeshap::GPUTreeShapInteractions<dh::XGBDeviceAllocator<int>>(
-            X, device_paths.begin(), device_paths.end(), ngroup, begin, dh::tend(phis));
-      }
-    } else {
-      for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {
-        auto impl = batch.Impl();
-        auto acc = impl->GetDeviceAccessor(ctx_, p_fmat->Info().feature_types.ConstDeviceSpan());
-        auto begin = dh::tbegin(phis) + batch.BaseRowId() * dim_size;
-        auto X = EllpackLoader{acc, true, model.learner_model_param->num_feature, batch.Size(),
-                               std::numeric_limits<float>::quiet_NaN()};
-        gpu_treeshap::GPUTreeShapInteractions<dh::XGBDeviceAllocator<int>>(
-            X, device_paths.begin(), device_paths.end(), ngroup, begin, dh::tend(phis));
-      }
-    }
+    ExtractPaths(ctx_, &device_paths, model, d_model, &categories);
+    auto new_enc =
+        p_fmat->Cats()->NeedRecode() ? p_fmat->Cats()->DeviceView(ctx_) : enc::DeviceColumnsView{};
+
+    LaunchShap(this->ctx_, new_enc, model, [&](auto&& cfg, auto&& acc) {
+      using Config = common::GetValueT<decltype(cfg)>;
+      using EncAccessor = typename Config::EncAccessorT;
+
+      cfg.ForEachBatch(
+          p_fmat, std::forward<EncAccessor>(acc), [&](auto&& loader, bst_idx_t base_rowid) {
+            auto begin = dh::tbegin(phis) + base_rowid * dim_size;
+            gpu_treeshap::GPUTreeShapInteractions<dh::XGBDeviceAllocator<int>>(
+                loader, device_paths.begin(), device_paths.end(), ngroup, begin, dh::tend(phis));
+          });
+    });
 
     // Add the base margin term to last column
     p_fmat->Info().base_margin_.SetDevice(ctx_->Device());
@@ -1142,83 +1203,59 @@ class GPUPredictor : public xgboost::Predictor {
 
     auto base_score = model.learner_model_param->BaseScore(ctx_);
     size_t n_features = model.learner_model_param->num_feature;
-    dh::LaunchN(p_fmat->Info().num_row_ * model.learner_model_param->num_output_group,
-                ctx_->CUDACtx()->Stream(), [=] __device__(size_t idx) {
-                  size_t group = idx % ngroup;
-                  size_t row_idx = idx / ngroup;
-                  phis[gpu_treeshap::IndexPhiInteractions(row_idx, ngroup, group, n_features,
-                                                          n_features, n_features)] +=
-                      margin.empty() ? base_score(0) : margin[idx];
-                });
+    bst_idx_t n_samples = p_fmat->Info().num_row_;
+    dh::LaunchN(n_samples * ngroup, ctx_->CUDACtx()->Stream(), [=] __device__(size_t idx) {
+      auto [ridx, gidx] = linalg::UnravelIndex(idx, n_samples, ngroup);
+      phis[gpu_treeshap::IndexPhiInteractions(ridx, ngroup, gidx, n_features, n_features,
+                                              n_features)] +=
+          margin.empty() ? base_score(gidx) : margin[idx];
+    });
   }
 
   void PredictLeaf(DMatrix* p_fmat, HostDeviceVector<float>* predictions,
                    gbm::GBTreeModel const& model, bst_tree_t tree_end) const override {
+    xgboost_NVTX_FN_RANGE();
     dh::safe_cuda(cudaSetDevice(ctx_->Ordinal()));
-    auto max_shared_memory_bytes = ConfigureDevice(ctx_->Device());
 
     const MetaInfo& info = p_fmat->Info();
-    bst_idx_t num_rows = info.num_row_;
+    bst_idx_t n_samples = info.num_row_;
     tree_end = GetTreeLimit(model.trees, tree_end);
     predictions->SetDevice(ctx_->Device());
-    predictions->Resize(num_rows * tree_end);
-    DeviceModel d_model;
-    d_model.Init(model, 0, tree_end, this->ctx_->Device());
+    predictions->Resize(n_samples * tree_end);
+
+    DeviceModel d_model{ctx_->Device(),       model, 0, tree_end, &this->model_mu_,
+                        CopyViews{this->ctx_}};
 
     if (info.IsColumnSplit()) {
       column_split_helper_.PredictLeaf(p_fmat, predictions, model, d_model);
       return;
     }
 
-    constexpr uint32_t kBlockThreads = 128;
-    size_t shared_memory_bytes = SharedMemoryBytes<kBlockThreads>(
-        info.num_col_, max_shared_memory_bytes);
-    bool use_shared = shared_memory_bytes != 0;
-    bst_feature_t num_features = info.num_col_;
-
-    auto launch = [&](auto fn, std::uint32_t grid, auto data, bst_idx_t batch_offset) {
-      dh::LaunchKernel {grid, kBlockThreads, shared_memory_bytes, ctx_->CUDACtx()->Stream()}(
-          fn, data, d_model.nodes.ConstDeviceSpan(),
-          predictions->DeviceSpan().subspan(batch_offset), d_model.tree_segments.ConstDeviceSpan(),
-
-          d_model.split_types.ConstDeviceSpan(), d_model.categories_tree_segments.ConstDeviceSpan(),
-          d_model.categories_node_segments.ConstDeviceSpan(), d_model.categories.ConstDeviceSpan(),
-
-          d_model.tree_beg_, d_model.tree_end_, num_features, num_rows, use_shared,
-          std::numeric_limits<float>::quiet_NaN());
-    };
+    bst_feature_t n_features = model.learner_model_param->num_feature;
+    auto new_enc =
+        p_fmat->Cats()->NeedRecode() ? p_fmat->Cats()->DeviceView(ctx_) : enc::DeviceColumnsView{};
 
-    if (p_fmat->PageExists<SparsePage>()) {
-      bst_idx_t batch_offset = 0;
-      for (auto const& batch : p_fmat->GetBatches<SparsePage>()) {
-        batch.data.SetDevice(ctx_->Device());
-        batch.offset.SetDevice(ctx_->Device());
-        SparsePageView data{batch.data.DeviceSpan(), batch.offset.DeviceSpan(),
-                            model.learner_model_param->num_feature};
-        auto grid = static_cast<std::uint32_t>(common::DivRoundUp(batch.Size(), kBlockThreads));
-        launch(PredictLeafKernel<SparsePageLoader, SparsePageView>, grid, data, batch_offset);
-        batch_offset += batch.Size();
-      }
-    } else {
+    LaunchPredict(ctx_, p_fmat->IsDense(), new_enc, model, [&](auto&& cfg, auto&& acc) {
       bst_idx_t batch_offset = 0;
-      for (auto const& batch : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {
-        EllpackDeviceAccessor data{batch.Impl()->GetDeviceAccessor(ctx_)};
-        auto grid = static_cast<std::uint32_t>(common::DivRoundUp(batch.Size(), kBlockThreads));
-        launch(PredictLeafKernel<EllpackLoader, EllpackDeviceAccessor>, grid, data, batch_offset);
-        batch_offset += batch.Size();
-      }
-    }
+      cfg.ForEachBatch(p_fmat, [&](auto&& loader_t, auto&& batch) {
+        using Loader = typename common::GetValueT<decltype(loader_t)>;
+        using Config = common::GetValueT<decltype(cfg)>;
+        auto kernel = PredictLeafKernel<typename Loader::Type, common::GetValueT<decltype(batch)>,
+                                        Config::HasMissing(), typename Config::EncAccessorT>;
+        cfg.template Launch<Loader>(kernel, std::move(batch), d_model.Trees(),
+                                    predictions->DeviceSpan().subspan(batch_offset),
+                                    d_model.tree_begin, d_model.tree_end, n_features,
+                                    cfg.UseShared(), std::numeric_limits<float>::quiet_NaN(),
+                                    std::forward<typename Config::EncAccessorT>(acc));
+
+        batch_offset += batch.NumRows();
+      });
+    });
   }
 
  private:
-  /*! \brief Reconfigure the device when GPU is changed. */
-  static size_t ConfigureDevice(DeviceOrd device) {
-    if (device.IsCUDA()) {
-      return dh::MaxSharedMemory(device.ordinal);
-    }
-    return 0;
-  }
-
+  // Prevent multiple threads from pulling the model to device together.
+  mutable std::mutex model_mu_;
   ColumnSplitHelper column_split_helper_;
 };
 
diff --git a/src/predictor/predict_fn.h b/src/predictor/predict_fn.h
index e3be91d5fa3f..372647e032ac 100644
--- a/src/predictor/predict_fn.h
+++ b/src/predictor/predict_fn.h
@@ -12,43 +12,25 @@
 
 namespace xgboost::predictor {
 /** @brief Whether it should traverse to the left branch of a tree. */
-template <bool has_categorical>
-XGBOOST_DEVICE bool GetDecision(RegTree::Node const &node, bst_node_t nid, float fvalue,
+template <bool has_categorical, typename TreeView>
+XGBOOST_DEVICE bool GetDecision(TreeView const &tree, bst_node_t nid, float fvalue,
                                 RegTree::CategoricalSplitMatrix const &cats) {
   if (has_categorical && common::IsCat(cats.split_type, nid)) {
     auto node_categories = cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);
     return common::Decision(node_categories, fvalue);
   } else {
-    return fvalue < node.SplitCond();
+    return fvalue < tree.SplitCond(nid);
   }
 }
 
-template <bool has_missing, bool has_categorical>
-XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bst_node_t nid, float fvalue,
+template <bool has_missing, bool has_categorical, typename TreeView>
+XGBOOST_DEVICE bst_node_t GetNextNode(TreeView const &tree, const bst_node_t nid, float fvalue,
                                       bool is_missing,
                                       RegTree::CategoricalSplitMatrix const &cats) {
   if (has_missing && is_missing) {
-    return node.DefaultChild();
+    return tree.DefaultChild(nid);
   } else {
-    return node.LeftChild() + !GetDecision<has_categorical>(node, nid, fvalue, cats);
-  }
-}
-
-template <bool has_missing, bool has_categorical>
-XGBOOST_DEVICE bst_node_t GetNextNodeMulti(MultiTargetTree const &tree, bst_node_t const nidx,
-                                           float fvalue, bool is_missing,
-                                           RegTree::CategoricalSplitMatrix const &cats) {
-  if (has_missing && is_missing) {
-    return tree.DefaultChild(nidx);
-  } else {
-    if (has_categorical && common::IsCat(cats.split_type, nidx)) {
-      auto node_categories =
-          cats.categories.subspan(cats.node_ptr[nidx].beg, cats.node_ptr[nidx].size);
-      return common::Decision(node_categories, fvalue) ? tree.LeftChild(nidx)
-                                                       : tree.RightChild(nidx);
-    } else {
-      return tree.LeftChild(nidx) + !(fvalue < tree.SplitCond(nidx));
-    }
+    return tree.LeftChild(nid) + !GetDecision<has_categorical>(tree, nid, fvalue, cats);
   }
 }
 
diff --git a/src/predictor/predictor.cc b/src/predictor/predictor.cc
index 2a6d1b9c58db..592fb3e02069 100644
--- a/src/predictor/predictor.cc
+++ b/src/predictor/predictor.cc
@@ -1,15 +1,15 @@
 /**
- * Copyright 2017-2024 by Contributors
+ * Copyright 2017-2025, XGBoost Contributors
  */
 #include "xgboost/predictor.h"
 
-#include <dmlc/registry.h>               // for DMLC_REGISTRY_LINK_TAG
+#include <dmlc/registry.h>  // for DMLC_REGISTRY_LINK_TAG
 
-#include <cstdint>                       // for int32_t
-#include <string>                        // for string, to_string
+#include <cstdint>  // for int32_t
+#include <string>   // for string, to_string
 
 #include "../gbm/gbtree_model.h"         // for GBTreeModel
-#include "xgboost/base.h"                // for bst_float, Args, bst_group_t, bst_idx_t
+#include "xgboost/base.h"                // for Args, bst_group_t, bst_idx_t
 #include "xgboost/context.h"             // for Context
 #include "xgboost/data.h"                // for MetaInfo
 #include "xgboost/host_device_vector.h"  // for HostDeviceVector
@@ -36,32 +36,73 @@ Predictor* Predictor::Create(std::string const& name, Context const* ctx) {
 template <int32_t D>
 void ValidateBaseMarginShape(linalg::Tensor<float, D> const& margin, bst_idx_t n_samples,
                              bst_group_t n_groups) {
-  // FIXME: Bindings other than Python doesn't have shape.
+  // FIXME: Bindings other than Python and R don't have shape.
   std::string expected{"Invalid shape of base_margin. Expected: (" + std::to_string(n_samples) +
                        ", " + std::to_string(n_groups) + ")"};
   CHECK_EQ(margin.Shape(0), n_samples) << expected;
   CHECK_EQ(margin.Shape(1), n_groups) << expected;
 }
 
-void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<bst_float>* out_preds,
-                                   const gbm::GBTreeModel& model) const {
+namespace cuda_impl {
+void InitOutPredictions(Context const* ctx, linalg::VectorView<float const> base_score,
+                        linalg::MatrixView<float> predt);
+}
+
+namespace sycl_impl {
+void InitOutPredictions(Context const* ctx, linalg::VectorView<float const> base_score,
+                        linalg::MatrixView<float> predt);
+}
+
+void Predictor::InitOutPredictions(const MetaInfo& info, HostDeviceVector<float>* out_preds,
+                                   gbm::GBTreeModel const& model) const {
   CHECK_NE(model.learner_model_param->num_output_group, 0);
-  auto n = static_cast<size_t>(model.learner_model_param->OutputLength() * info.num_row_);
 
-  const HostDeviceVector<bst_float>* base_margin = info.base_margin_.Data();
-  if (ctx_->Device().IsCUDA()) {
+  if (!ctx_->Device().IsCPU()) {
     out_preds->SetDevice(ctx_->Device());
   }
+
+  // Cannot rely on the Resize to fill as it might skip if the size is already correct.
+  auto n = static_cast<size_t>(model.learner_model_param->OutputLength() * info.num_row_);
+  out_preds->Resize(n);
+
+  HostDeviceVector<float> const* base_margin = info.base_margin_.Data();
   if (!base_margin->Empty()) {
-    out_preds->Resize(n);
     ValidateBaseMarginShape(info.base_margin_, info.num_row_,
                             model.learner_model_param->OutputLength());
     out_preds->Copy(*base_margin);
+    return;
+  }
+
+  auto base_score = model.learner_model_param->BaseScore(this->ctx_->Device());
+  if (base_score.Size() == 1) {
+    // Fill a scalar
+    out_preds->Fill(model.learner_model_param->BaseScore(DeviceOrd::CPU())(0));
+    return;
+  }
+
+  // Handle multi-output models where base_score is a vector.
+  auto predt = linalg::MakeTensorView(this->ctx_, out_preds, info.num_row_,
+                                      model.learner_model_param->OutputLength());
+  CHECK_EQ(predt.Size(), out_preds->Size());
+
+  if (this->ctx_->IsCUDA()) {
+#if defined(XGBOOST_USE_CUDA)
+    cuda_impl::InitOutPredictions(this->ctx_, base_score, predt);
+#else
+    common::AssertGPUSupport();
+#endif
+  } else if (this->ctx_->IsSycl()) {
+#if defined(XGBOOST_USE_SYCL)
+    sycl_impl::InitOutPredictions(this->ctx_, base_score, predt);
+#else
+    common::AssertSYCLSupport();
+#endif
   } else {
-    // cannot rely on the Resize to fill as it might skip if the size is already correct.
-    out_preds->Resize(n);
-    auto base_score = model.learner_model_param->BaseScore(DeviceOrd::CPU())(0);
-    out_preds->Fill(base_score);
+    common::ParallelFor(info.num_row_, this->ctx_->Threads(), [&](auto i) {
+      for (std::size_t j = 0, m = predt.Shape(1); j < m; ++j) {
+        predt(i, j) = base_score(j);
+      }
+    });
   }
 }
 }  // namespace xgboost
diff --git a/src/predictor/predictor.cu b/src/predictor/predictor.cu
new file mode 100644
index 000000000000..285ecf2b7618
--- /dev/null
+++ b/src/predictor/predictor.cu
@@ -0,0 +1,19 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <thrust/for_each.h>                    // for for_each_n
+#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator
+
+#include "../common/cuda_context.cuh"
+#include "xgboost/linalg.h"  // for UnravelIndex
+
+namespace xgboost::cuda_impl {
+void InitOutPredictions(Context const* ctx, linalg::VectorView<float const> base_score,
+                        linalg::MatrixView<float> predt) {
+  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), predt.Size(),
+                     [=] XGBOOST_DEVICE(std::size_t k) mutable {
+                       auto [i, j] = linalg::UnravelIndex(k, predt.Shape());
+                       predt(i, j) = base_score(j);
+                     });
+}
+}  // namespace xgboost::cuda_impl
diff --git a/src/predictor/cpu_treeshap.cc b/src/predictor/treeshap.cc
similarity index 79%
rename from src/predictor/cpu_treeshap.cc
rename to src/predictor/treeshap.cc
index 64b195d78221..bae297c973a8 100644
--- a/src/predictor/cpu_treeshap.cc
+++ b/src/predictor/treeshap.cc
@@ -1,17 +1,47 @@
 /**
- * Copyright by XGBoost Contributors 2017-2022
+ * Copyright 2017-2025, XGBoost Contributors
  */
-#include "cpu_treeshap.h"
+#include "treeshap.h"
 
-#include <algorithm>             // copy
-#include <cinttypes>             // std::uint32_t
+#include <algorithm>  // copy
+#include <cstdint>    // std::uint32_t
 
-#include "predict_fn.h"          // GetNextNode
-#include "xgboost/base.h"        // bst_node_t
+#include "../tree/tree_view.h"  // for ScalarTreeView
+#include "predict_fn.h"         // GetNextNode
+#include "xgboost/base.h"       // bst_node_t
 #include "xgboost/logging.h"
 #include "xgboost/tree_model.h"  // RegTree
 
 namespace xgboost {
+void CalculateContributionsApprox(tree::ScalarTreeView const& tree, const RegTree::FVec& feat,
+                                  std::vector<float>* mean_values, float* out_contribs) {
+  CHECK_GT(mean_values->size(), 0U);
+  bst_feature_t split_index = 0;
+  // update bias value
+  float node_value = (*mean_values)[0];
+  out_contribs[feat.Size()] += node_value;
+  if (tree.IsLeaf(RegTree::kRoot)) {
+    // nothing to do anymore
+    return;
+  }
+
+  bst_node_t nidx = 0;
+  auto const& cats = tree.GetCategoriesMatrix();
+
+  while (!tree.IsLeaf(nidx)) {
+    split_index = tree.SplitIndex(nidx);
+    nidx = predictor::GetNextNode<true, true>(tree, nidx, feat.GetFvalue(split_index),
+                                              feat.IsMissing(split_index), cats);
+    bst_float new_value = (*mean_values)[nidx];
+    // update feature weight
+    out_contribs[split_index] += new_value - node_value;
+    node_value = new_value;
+  }
+  float leaf_value = tree.LeafValue(nidx);
+  // update leaf feature weight
+  out_contribs[split_index] += leaf_value - node_value;
+}
+
 // Used by TreeShap
 // data we keep about our decision path
 // note that pweight is included for convenience and is not tied with the other attributes
@@ -107,12 +137,10 @@ float UnwoundPathSum(const PathElement* unique_path, std::uint32_t unique_depth,
  * \param condition_feature the index of the feature to fix
  * \param condition_fraction what fraction of the current weight matches our conditioning feature
  */
-void TreeShap(RegTree const& tree, const RegTree::FVec& feat, float* phi, bst_node_t node_index,
-              std::uint32_t unique_depth, PathElement* parent_unique_path,
+void TreeShap(tree::ScalarTreeView const& tree, const RegTree::FVec& feat, float* phi,
+              bst_node_t nidx, std::uint32_t unique_depth, PathElement* parent_unique_path,
               float parent_zero_fraction, float parent_one_fraction, int parent_feature_index,
               int condition, std::uint32_t condition_feature, float condition_fraction) {
-  const auto node = tree[node_index];
-
   // stop if we have no weight coming down to us
   if (condition_fraction == 0) return;
 
@@ -124,15 +152,15 @@ void TreeShap(RegTree const& tree, const RegTree::FVec& feat, float* phi, bst_no
     ExtendPath(unique_path, unique_depth, parent_zero_fraction, parent_one_fraction,
                parent_feature_index);
   }
-  const std::uint32_t split_index = node.SplitIndex();
+  const std::uint32_t split_index = tree.SplitIndex(nidx);
 
   // leaf node
-  if (node.IsLeaf()) {
+  if (tree.IsLeaf(nidx)) {
     for (std::uint32_t i = 1; i <= unique_depth; ++i) {
       const float w = UnwoundPathSum(unique_path, unique_depth, i);
       const PathElement& el = unique_path[i];
       phi[el.feature_index] +=
-          w * (el.one_fraction - el.zero_fraction) * node.LeafValue() * condition_fraction;
+          w * (el.one_fraction - el.zero_fraction) * tree.LeafValue(nidx) * condition_fraction;
     }
 
     // internal node
@@ -140,10 +168,11 @@ void TreeShap(RegTree const& tree, const RegTree::FVec& feat, float* phi, bst_no
     // find which branch is "hot" (meaning x would follow it)
     auto const& cats = tree.GetCategoriesMatrix();
     bst_node_t hot_index = predictor::GetNextNode<true, true>(
-        node, node_index, feat.GetFvalue(split_index), feat.IsMissing(split_index), cats);
+        tree, nidx, feat.GetFvalue(split_index), feat.IsMissing(split_index), cats);
 
-    const auto cold_index = (hot_index == node.LeftChild() ? node.RightChild() : node.LeftChild());
-    const float w = tree.Stat(node_index).sum_hess;
+    const auto cold_index =
+        (hot_index == tree.LeftChild(nidx) ? tree.RightChild(nidx) : tree.LeftChild(nidx));
+    const float w = tree.Stat(nidx).sum_hess;
     const float hot_zero_fraction = tree.Stat(hot_index).sum_hess / w;
     const float cold_zero_fraction = tree.Stat(cold_index).sum_hess / w;
     float incoming_zero_fraction = 1;
@@ -184,7 +213,7 @@ void TreeShap(RegTree const& tree, const RegTree::FVec& feat, float* phi, bst_no
   }
 }
 
-void CalculateContributions(RegTree const& tree, const RegTree::FVec& feat,
+void CalculateContributions(tree::ScalarTreeView const& tree, const RegTree::FVec& feat,
                             std::vector<float>* mean_values, float* out_contribs, int condition,
                             std::uint32_t condition_feature) {
   // find the expected value of the tree's predictions
@@ -194,7 +223,7 @@ void CalculateContributions(RegTree const& tree, const RegTree::FVec& feat,
   }
 
   // Preallocate space for the unique path data
-  const int maxd = tree.MaxDepth(0) + 2;
+  bst_node_t const maxd = tree.MaxDepth() + 2;
   std::vector<PathElement> unique_path_data((maxd * (maxd + 1)) / 2);
 
   TreeShap(tree, feat, out_contribs, 0, 0, unique_path_data.data(), 1, 1, -1, condition,
diff --git a/src/predictor/treeshap.h b/src/predictor/treeshap.h
new file mode 100644
index 000000000000..69423dd9d4bd
--- /dev/null
+++ b/src/predictor/treeshap.h
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2017-2025, XGBoost Contributors
+ */
+#pragma once
+
+#include <vector>  // for vector
+
+#include "xgboost/tree_model.h"  // for RegTree
+
+namespace xgboost {
+/**
+ * @brief calculate the approximate feature contributions for the given root
+ *
+ *   This follows the idea of http://blog.datadive.net/interpreting-random-forests/
+ *
+ * @param feat dense feature vector, if the feature is missing the field is set to NaN
+ * @param out_contribs output vector to hold the contributions
+ */
+void CalculateContributionsApprox(tree::ScalarTreeView const& tree, const RegTree::FVec& feat,
+                                  std::vector<float>* mean_values, float* out_contribs);
+
+/**
+ * @brief calculate the feature contributions (https://arxiv.org/abs/1706.06060) for the tree
+ *
+ * @param feat dense feature vector, if the feature is missing the field is set to NaN
+ * @param out_contribs output vector to hold the contributions
+ * @param condition fix one feature to either off (-1) on (1) or not fixed (0 default)
+ * @param condition_feature the index of the feature to fix
+ */
+void CalculateContributions(tree::ScalarTreeView const& tree, const RegTree::FVec& feat,
+                            std::vector<float>* mean_values, float* out_contribs, int condition,
+                            unsigned condition_feature);
+}  // namespace xgboost
diff --git a/src/predictor/utils.h b/src/predictor/utils.h
new file mode 100644
index 000000000000..b16ac49bc3f9
--- /dev/null
+++ b/src/predictor/utils.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2017-2025, XGBoost Contributors
+ */
+#pragma once
+#include <memory>  // for shared_ptr
+
+#include "../data/proxy_dmatrix.h"  // for DMatrixProxy
+#include "xgboost/data.h"           // for DMatrix
+#include "xgboost/learner.h"        // LearnerModelParam
+
+namespace xgboost::predictor {
+template <typename Adapter>
+void CheckProxyDMatrix(std::shared_ptr<Adapter> m, data::DMatrixProxy const* proxy,
+                       LearnerModelParam const* p) {
+  CHECK(proxy);
+  CHECK(!proxy->Info().IsColumnSplit())
+      << "Inplace predict support for column-wise data split is not yet implemented.";
+  auto n_features_data = m->NumColumns();
+  auto n_features_model = p->num_feature;
+  CHECK_EQ(n_features_data, n_features_model)
+      << "Number of columns in data must equal to the trained model.";
+  CHECK_EQ(proxy->Info().num_row_, m->NumRows());
+  CHECK_EQ(proxy->Info().num_col_, m->NumColumns());
+  CHECK_EQ(proxy->Info().num_nonzero_, 0);  // unknown
+}
+}  // namespace xgboost::predictor
diff --git a/src/tree/common_row_partitioner.h b/src/tree/common_row_partitioner.h
index 45926b4ea22d..d6393f730523 100644
--- a/src/tree/common_row_partitioner.h
+++ b/src/tree/common_row_partitioner.h
@@ -18,6 +18,7 @@
 #include "../common/partition_builder.h"  // for PartitionBuilder
 #include "../common/row_set.h"            // for RowSetCollection
 #include "../common/threading_utils.h"    // for ParallelFor2d
+#include "tree_view.h"                    // for ScalarTreeView
 #include "xgboost/base.h"                 // for bst_idx_t
 #include "xgboost/collective/result.h"    // for Success, SafeColl
 #include "xgboost/context.h"              // for Context
@@ -43,11 +44,12 @@ class ColumnSplitHelper {
     missing_bits_ = BitVector{common::Span<BitVector::value_type>{missing_storage_}};
   }
 
-  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
+  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry,
+            typename TreeView>
   void Partition(Context const* ctx, common::BlockedSpace2d const& space, std::int32_t n_threads,
                  GHistIndexMatrix const& gmat, common::ColumnMatrix const& column_matrix,
                  std::vector<ExpandEntry> const& nodes,
-                 std::vector<std::int32_t> const& split_conditions, RegTree const* p_tree) {
+                 std::vector<std::int32_t> const& split_conditions, TreeView const& tree) {
     // When data is split by column, we don't have all the feature values in the local worker, so
     // we first collect all the decisions and whether the feature is missing into bit vectors.
     std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
@@ -74,7 +76,7 @@ class ColumnSplitHelper {
       auto missing = make_tloc(this->tloc_missing_, tidx);
       bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
       partition_builder_->MaskRows<BinIdxType, any_missing, any_cat>(
-          node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
+          node_in_set, nodes, r, split_cond, gmat, column_matrix, tree,
           (*row_set_collection_)[nid].begin(), &decision, &missing);
     });
 
@@ -103,7 +105,7 @@ class ColumnSplitHelper {
       const int32_t nid = nodes[node_in_set].nid;
       const size_t task_id = partition_builder_->GetTaskIdx(node_in_set, begin);
       partition_builder_->AllocateForTask(task_id);
-      partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, *p_tree,
+      partition_builder_->PartitionByMask(node_in_set, nodes, r, gmat, tree,
                                           (*row_set_collection_)[nid].begin(), decision_bits_,
                                           missing_bits_);
     });
@@ -153,8 +155,8 @@ class CommonRowPartitioner {
   }
 
   /* Making GHistIndexMatrix_t a templete parameter allows reuse this function for sycl-plugin */
-  template <typename ExpandEntry, typename GHistIndexMatrixT>
-  static void FindSplitConditions(const std::vector<ExpandEntry>& nodes, const RegTree& tree,
+  template <typename ExpandEntry, typename GHistIndexMatrixT, typename TreeView>
+  static void FindSplitConditions(const std::vector<ExpandEntry>& nodes, TreeView const& tree,
                                   GHistIndexMatrixT const& gmat,
                                   std::vector<int32_t>* p_split_conditions) {
     auto const& ptrs = gmat.cut.Ptrs();
@@ -180,28 +182,28 @@ class CommonRowPartitioner {
     }
   }
 
-  template <typename ExpandEntry>
-  void AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes, RegTree const* p_tree) {
+  template <typename ExpandEntry, typename TreeView>
+  void AddSplitsToRowSet(const std::vector<ExpandEntry>& nodes, TreeView const& tree) {
     const size_t n_nodes = nodes.size();
     for (unsigned int i = 0; i < n_nodes; ++i) {
       const int32_t nidx = nodes[i].nid;
       const size_t n_left = partition_builder_.GetNLeftElems(i);
       const size_t n_right = partition_builder_.GetNRightElems(i);
-      CHECK_EQ(p_tree->LeftChild(nidx) + 1, p_tree->RightChild(nidx));
-      row_set_collection_.AddSplit(nidx, p_tree->LeftChild(nidx), p_tree->RightChild(nidx), n_left,
+      CHECK_EQ(tree.LeftChild(nidx) + 1, tree.RightChild(nidx));
+      row_set_collection_.AddSplit(nidx, tree.LeftChild(nidx), tree.RightChild(nidx), n_left,
                                    n_right);
     }
   }
 
-  template <typename ExpandEntry>
+  template <typename ExpandEntry, typename TreeView>
   void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
-                      std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
+                      std::vector<ExpandEntry> const& nodes, TreeView const& tree) {
     auto const& column_matrix = gmat.Transpose();
     if (column_matrix.IsInitialized()) {
       if (gmat.cut.HasCategorical()) {
-        this->template UpdatePosition<true>(ctx, gmat, column_matrix, nodes, p_tree);
+        this->template UpdatePosition<true>(ctx, gmat, column_matrix, nodes, tree);
       } else {
-        this->template UpdatePosition<false>(ctx, gmat, column_matrix, nodes, p_tree);
+        this->template UpdatePosition<false>(ctx, gmat, column_matrix, nodes, tree);
       }
     } else {
       /* ColumnMatrix is not initilized.
@@ -209,43 +211,43 @@ class CommonRowPartitioner {
        * any_missing and any_cat don't metter in this case.
        * Jump directly to the main method.
        */
-      this->template UpdatePosition<uint8_t, true, true>(ctx, gmat, column_matrix, nodes, p_tree);
+      this->template UpdatePosition<uint8_t, true, true>(ctx, gmat, column_matrix, nodes, tree);
     }
   }
 
-  template <bool any_cat, typename ExpandEntry>
+  template <bool any_cat, typename ExpandEntry, typename TreeView>
   void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
                       const common::ColumnMatrix& column_matrix,
-                      std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
+                      std::vector<ExpandEntry> const& nodes, TreeView const& tree) {
     if (column_matrix.AnyMissing()) {
-      this->template UpdatePosition<true, any_cat>(ctx, gmat, column_matrix, nodes, p_tree);
+      this->template UpdatePosition<true, any_cat>(ctx, gmat, column_matrix, nodes, tree);
     } else {
-      this->template UpdatePosition<false, any_cat>(ctx, gmat, column_matrix, nodes, p_tree);
+      this->template UpdatePosition<false, any_cat>(ctx, gmat, column_matrix, nodes, tree);
     }
   }
 
-  template <bool any_missing, bool any_cat, typename ExpandEntry>
+  template <bool any_missing, bool any_cat, typename ExpandEntry, typename TreeView>
   void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
                       const common::ColumnMatrix& column_matrix,
-                      std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
+                      std::vector<ExpandEntry> const& nodes, TreeView const& tree) {
     common::DispatchBinType(column_matrix.GetTypeSize(), [&](auto t) {
       using T = decltype(t);
-      this->template UpdatePosition<T, any_missing, any_cat>(ctx, gmat, column_matrix, nodes,
-                                                             p_tree);
+      this->template UpdatePosition<T, any_missing, any_cat>(ctx, gmat, column_matrix, nodes, tree);
     });
   }
 
-  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
+  template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry,
+            typename TreeView>
   void UpdatePosition(Context const* ctx, GHistIndexMatrix const& gmat,
                       const common::ColumnMatrix& column_matrix,
-                      std::vector<ExpandEntry> const& nodes, RegTree const* p_tree) {
+                      std::vector<ExpandEntry> const& nodes, TreeView const& tree) {
     // 1. Find split condition for each split
     size_t n_nodes = nodes.size();
 
     std::vector<bst_bin_t> split_conditions;
     if (column_matrix.IsInitialized()) {
       split_conditions.resize(n_nodes);
-      FindSplitConditions(nodes, *p_tree, gmat, &split_conditions);
+      FindSplitConditions(nodes, tree, gmat, &split_conditions);
     }
 
     // 2.1 Create a blocked space of size SUM(samples in each node)
@@ -271,7 +273,7 @@ class CommonRowPartitioner {
     // Store results in intermediate buffers from partition_builder_
     if (is_col_split_) {
       column_split_helper_.Partition<BinIdxType, any_missing, any_cat>(
-          ctx, space, ctx->Threads(), gmat, column_matrix, nodes, split_conditions, p_tree);
+          ctx, space, ctx->Threads(), gmat, column_matrix, nodes, split_conditions, tree);
     } else {
       common::ParallelFor2d(space, ctx->Threads(), [&](size_t node_in_set, common::Range1d r) {
         size_t begin = r.begin();
@@ -280,7 +282,7 @@ class CommonRowPartitioner {
         partition_builder_.AllocateForTask(task_id);
         bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
         partition_builder_.template Partition<BinIdxType, any_missing, any_cat>(
-            node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
+            node_in_set, nodes, r, split_cond, gmat, column_matrix, tree,
             row_set_collection_[nid].begin());
       });
     }
@@ -297,7 +299,7 @@ class CommonRowPartitioner {
     });
 
     // 5. Add info about splits into row_set_collection_
-    AddSplitsToRowSet(nodes, p_tree);
+    AddSplitsToRowSet(nodes, tree);
   }
 
   [[nodiscard]] auto const& Partitions() const { return row_set_collection_; }
@@ -309,15 +311,16 @@ class CommonRowPartitioner {
   auto& operator[](bst_node_t nidx) { return row_set_collection_[nidx]; }
   auto const& operator[](bst_node_t nidx) const { return row_set_collection_[nidx]; }
 
-  void LeafPartition(Context const* ctx, RegTree const& tree, common::Span<float const> hess,
+  void LeafPartition(Context const* ctx, ScalarTreeView const& tree, common::Span<float const> hess,
                      common::Span<bst_node_t> out_position) const {
     partition_builder_.LeafPartition(
         ctx, tree, this->Partitions(), out_position,
         [&](size_t idx) -> bool { return hess[idx - this->base_rowid] - .0f == .0f; });
   }
 
-  void LeafPartition(Context const* ctx, RegTree const& tree,
-                     linalg::TensorView<GradientPair const, 2> gpair,
+  template <typename TreeView>
+  void LeafPartition(Context const* ctx, TreeView const& tree,
+                     linalg::MatrixView<GradientPair const> gpair,
                      common::Span<bst_node_t> out_position) const {
     if (gpair.Shape(1) > 1) {
       partition_builder_.LeafPartition(
@@ -334,14 +337,6 @@ class CommonRowPartitioner {
                                        });
     }
   }
-  void LeafPartition(Context const* ctx, RegTree const& tree,
-                     common::Span<GradientPair const> gpair,
-                     common::Span<bst_node_t> out_position) const {
-    partition_builder_.LeafPartition(ctx, tree, this->Partitions(), out_position,
-                                     [&](std::size_t idx) -> bool {
-                                       return gpair[idx - this->base_rowid].GetHess() - .0f == .0f;
-                                     });
-  }
 
  private:
   common::PartitionBuilder<kPartitionBlockSize> partition_builder_;
diff --git a/src/tree/fit_stump.cc b/src/tree/fit_stump.cc
index 8fdcb3131646..144abcbd8131 100644
--- a/src/tree/fit_stump.cc
+++ b/src/tree/fit_stump.cc
@@ -74,7 +74,7 @@ void FitStump(Context const* ctx, MetaInfo const& info, linalg::Matrix<GradientP
   out->Reshape(n_targets);
 
   gpair.SetDevice(ctx->Device());
-  auto gpair_t = gpair.View(ctx->Device());
+  auto gpair_t = gpair.View(ctx->Device().IsSycl() ? DeviceOrd::CPU() : ctx->Device());
   ctx->IsCUDA() ? cuda_impl::FitStump(ctx, info, gpair_t, out->View(ctx->Device()))
                 : cpu_impl::FitStump(ctx, info, gpair_t, out->HostView());
 }
diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu
index 423cbe9b5253..5b2f1468a991 100644
--- a/src/tree/gpu_hist/evaluate_splits.cu
+++ b/src/tree/gpu_hist/evaluate_splits.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #include <algorithm>  // for :max
 #include <limits>     // for numeric_limits
@@ -8,6 +8,7 @@
 #include "../../collective/communicator-inl.h"  // for GetWorldSize, GetRank
 #include "../../common/categorical.h"
 #include "../../common/cuda_context.cuh"  // for CUDAContext
+#include "../../common/cuda_stream.h"     // for Event
 #include "evaluate_splits.cuh"
 #include "expand_entry.cuh"
 
@@ -29,7 +30,7 @@ XGBOOST_DEVICE float LossChangeMissing(const GradientPairInt64 &scan,
       quantiser.ToFloatingPoint(parent_sum - scan));
 
   missing_left_out = missing_left_gain > missing_right_gain;
-  return missing_left_out?missing_left_gain:missing_right_gain;
+  return missing_left_out ? missing_left_gain : missing_right_gain;
 }
 
 // This kernel uses block_size == warp_size. This is an unusually small block size for a cuda kernel
@@ -91,8 +92,7 @@ class EvaluateSplitAgent {
   }
   __device__ GradientPairInt64 ReduceFeature() {
     GradientPairInt64 local_sum;
-    for (int idx = gidx_begin + threadIdx.x; idx < gidx_end;
-         idx += kBlockSize) {
+    for (int idx = gidx_begin + threadIdx.x; idx < gidx_end; idx += kBlockSize) {
       local_sum += LoadGpair(node_histogram + idx);
     }
     local_sum = SumReduceT(temp_storage->sum_reduce).Sum(local_sum);  // NOLINT
@@ -102,7 +102,7 @@ class EvaluateSplitAgent {
   }
 
   // Load using efficient 128 vector load instruction
-  __device__ __forceinline__ GradientPairInt64 LoadGpair(const GradientPairInt64 *ptr) {
+  __device__ __forceinline__ static GradientPairInt64 LoadGpair(const GradientPairInt64 *ptr) {
     float4 tmp = *reinterpret_cast<const float4 *>(ptr);
     auto gpair = *reinterpret_cast<const GradientPairInt64 *>(&tmp);
     static_assert(sizeof(decltype(gpair)) == sizeof(float4),
@@ -110,12 +110,16 @@ class EvaluateSplitAgent {
     return gpair;
   }
 
-  __device__ __forceinline__ void Numerical(DeviceSplitCandidate *__restrict__ best_split) {
-    for (int scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += kBlockSize) {
+  __device__ __forceinline__ void Numerical(DeviceSplitCandidate *best_split) {
+    for (bst_bin_t scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += kBlockSize) {
       bool thread_active = (scan_begin + threadIdx.x) < gidx_end;
       GradientPairInt64 bin = thread_active ? LoadGpair(node_histogram + scan_begin + threadIdx.x)
-                                              : GradientPairInt64();
-      BlockScanT(temp_storage->scan).ExclusiveScan(bin, bin, cub::Sum(), prefix_op);
+                                            : GradientPairInt64();
+#if CUB_VERSION >= 300000
+      BlockScanT(temp_storage->scan).ExclusiveScan(bin, bin, cuda::std::plus{}, prefix_op);
+#else
+      BlockScanT(temp_storage->scan).ExclusiveScan(bin, bin, cub::Sum{}, prefix_op);
+#endif
       // Whether the gradient of missing values is put to the left side.
       bool missing_left = true;
       float gain = thread_active ? LossChangeMissing(bin, missing, parent_sum, param, nidx, fidx,
@@ -138,10 +142,12 @@ class EvaluateSplitAgent {
         best_split->Update(gain, missing_left ? kLeftDir : kRightDir, fvalue, fidx, left, right,
                            false, param, rounding);
       }
+
+      __syncwarp();
     }
   }
 
-  __device__ __forceinline__ void OneHot(DeviceSplitCandidate *__restrict__ best_split) {
+  __device__ __forceinline__ void OneHot(DeviceSplitCandidate *best_split) {
     for (int scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += kBlockSize) {
       bool thread_active = (scan_begin + threadIdx.x) < gidx_end;
 
@@ -168,6 +174,8 @@ class EvaluateSplitAgent {
         best_split->UpdateCat(gain, missing_left ? kLeftDir : kRightDir,
                               static_cast<bst_cat_t>(fvalue), fidx, left, right, param, rounding);
       }
+
+      __syncwarp();
     }
   }
   /**
@@ -177,7 +185,7 @@ class EvaluateSplitAgent {
                                                   bool missing_left, bst_bin_t it,
                                                   GradientPairInt64 const &left_sum,
                                                   GradientPairInt64 const &right_sum,
-                                                  DeviceSplitCandidate *__restrict__ best_split) {
+                                                  DeviceSplitCandidate *best_split) {
     auto gain = thread_active
                     ? evaluator.CalcSplitGain(param, nidx, fidx, rounding.ToFloatingPoint(left_sum),
                                               rounding.ToFloatingPoint(right_sum))
@@ -196,11 +204,13 @@ class EvaluateSplitAgent {
       best_split->UpdateCat(gain, missing_left ? kLeftDir : kRightDir, best_thresh, fidx, left_sum,
                             right_sum, param, rounding);
     }
+
+    __syncwarp();
   }
   /**
    * \brief Partition-based split for categorical feature.
    */
-  __device__ __forceinline__ void Partition(DeviceSplitCandidate *__restrict__ best_split,
+  __device__ __forceinline__ void Partition(DeviceSplitCandidate *best_split,
                                             common::Span<bst_feature_t> sorted_idx,
                                             std::size_t node_offset,
                                             GPUTrainingParam const &param) {
@@ -244,12 +254,10 @@ class EvaluateSplitAgent {
   }
 };
 
-template <int kBlockSize>
-__global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel(
-    bst_feature_t max_active_features,
-    common::Span<const EvaluateSplitInputs> d_inputs,
-    const EvaluateSplitSharedInputs shared_inputs,
-    common::Span<bst_feature_t> sorted_idx,
+template <int kBlockThreads>
+__global__ __launch_bounds__(kBlockThreads) void EvaluateSplitsKernel(
+    bst_feature_t max_active_features, common::Span<const EvaluateSplitInputs> d_inputs,
+    const EvaluateSplitSharedInputs shared_inputs, common::Span<bst_feature_t> sorted_idx,
     const TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
     common::Span<DeviceSplitCandidate> out_candidates) {
   // Aligned && shared storage for best_split
@@ -257,7 +265,7 @@ __global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel(
   DeviceSplitCandidate &best_split = uninitialized_split.Alias();
 
   if (threadIdx.x == 0) {
-    best_split = DeviceSplitCandidate();
+    best_split = DeviceSplitCandidate{};
   }
 
   __syncthreads();
@@ -273,7 +281,7 @@ __global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel(
   }
   int fidx = inputs.feature_set[feature_offset];
 
-  using AgentT = EvaluateSplitAgent<kBlockSize>;
+  using AgentT = EvaluateSplitAgent<kBlockThreads>;
   __shared__ typename AgentT::TempStorage temp_storage;
   AgentT agent(&temp_storage, fidx, inputs, shared_inputs, evaluator);
 
@@ -292,7 +300,7 @@ __global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel(
     agent.Numerical(&best_split);
   }
 
-  cub::CTA_SYNC();
+  __syncthreads();
   if (threadIdx.x == 0) {
     // Record best loss for each feature
     out_candidates[blockIdx.x] = best_split;
@@ -342,9 +350,8 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu
 }
 
 void GPUHistEvaluator::LaunchEvaluateSplits(
-    bst_feature_t max_active_features,
-    common::Span<const EvaluateSplitInputs> d_inputs,
-    EvaluateSplitSharedInputs shared_inputs,
+    Context const *ctx, bst_feature_t max_active_features,
+    common::Span<const EvaluateSplitInputs> d_inputs, EvaluateSplitSharedInputs shared_inputs,
     TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
     common::Span<DeviceSplitCandidate> out_splits) {
   if (need_sort_histogram_) {
@@ -357,36 +364,33 @@ void GPUHistEvaluator::LaunchEvaluateSplits(
 
   // One block for each feature
   uint32_t constexpr kBlockThreads = 32;
-  dh::LaunchKernel {static_cast<uint32_t>(combined_num_features), kBlockThreads,
-                    0}(
-      EvaluateSplitsKernel<kBlockThreads>, max_active_features, d_inputs,
-      shared_inputs,
-      this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()),
-      evaluator, dh::ToSpan(feature_best_splits));
+  dh::LaunchKernel{static_cast<uint32_t>(combined_num_features), kBlockThreads, 0,  // NOLINT
+                   ctx->CUDACtx()->Stream()}(
+      EvaluateSplitsKernel<kBlockThreads>, max_active_features, d_inputs, shared_inputs,
+      this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()), evaluator,
+      dh::ToSpan(feature_best_splits));
 
   // Reduce to get best candidate for left and right child over all features
-  auto reduce_offset =
-      dh::MakeTransformIterator<size_t>(thrust::make_counting_iterator(0llu),
-                                        [=] __device__(size_t idx) -> size_t {
-                                          return idx * max_active_features;
-                                        });
+  auto reduce_offset = dh::MakeTransformIterator<size_t>(
+      thrust::make_counting_iterator(0llu),
+      [=] __device__(size_t idx) -> size_t { return idx * max_active_features; });
   size_t temp_storage_bytes = 0;
   auto num_segments = out_splits.size();
-  cub::DeviceSegmentedReduce::Sum(nullptr, temp_storage_bytes, feature_best_splits.data(),
-                                  out_splits.data(), num_segments, reduce_offset,
-                                  reduce_offset + 1);
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(
+      nullptr, temp_storage_bytes, feature_best_splits.data(), out_splits.data(), num_segments,
+      reduce_offset, reduce_offset + 1, ctx->CUDACtx()->Stream()));
   dh::TemporaryArray<int8_t> temp(temp_storage_bytes);
-  cub::DeviceSegmentedReduce::Sum(temp.data().get(), temp_storage_bytes, feature_best_splits.data(),
-                                  out_splits.data(), num_segments, reduce_offset,
-                                  reduce_offset + 1);
+  dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(
+      temp.data().get(), temp_storage_bytes, feature_best_splits.data(), out_splits.data(),
+      num_segments, reduce_offset, reduce_offset + 1, ctx->CUDACtx()->Stream()));
 }
 
 void GPUHistEvaluator::CopyToHost(const std::vector<bst_node_t> &nidx) {
   if (!has_categoricals_) return;
   auto d_cats = this->DeviceCatStorage(nidx);
   auto h_cats = this->HostCatStorage(nidx);
-  dh::CUDAEvent event;
-  event.Record(dh::DefaultStream());
+  curt::Event event;
+  event.Record(curt::DefaultStream());
   for (auto idx : nidx) {
     copy_stream_.View().Wait(event);
     dh::safe_cuda(cudaMemcpyAsync(
@@ -404,8 +408,8 @@ void GPUHistEvaluator::EvaluateSplits(Context const *ctx, const std::vector<bst_
 
   dh::TemporaryArray<DeviceSplitCandidate> splits_out_storage(d_inputs.size());
   auto out_splits = dh::ToSpan(splits_out_storage);
-  this->LaunchEvaluateSplits(max_active_features, d_inputs, shared_inputs,
-                             evaluator, out_splits);
+  this->LaunchEvaluateSplits(ctx, max_active_features, d_inputs, shared_inputs, evaluator,
+                             out_splits);
 
   if (is_column_split_) {
     // With column-wise data split, we gather the split candidates from all the workers and find the
@@ -417,7 +421,7 @@ void GPUHistEvaluator::EvaluateSplits(Context const *ctx, const std::vector<bst_
         all_candidates.subspan(collective::GetRank() * out_splits.size(), out_splits.size());
     dh::safe_cuda(cudaMemcpyAsync(current_rank.data(), out_splits.data(),
                                   out_splits.size() * sizeof(DeviceSplitCandidate),
-                                  cudaMemcpyDeviceToDevice));
+                                  cudaMemcpyDeviceToDevice, ctx->CUDACtx()->Stream()));
     auto rc = collective::Allgather(
         ctx, linalg::MakeVec(all_candidates.data(), all_candidates.size(), ctx->Device()));
     collective::SafeColl(rc);
diff --git a/src/tree/gpu_hist/evaluate_splits.cuh b/src/tree/gpu_hist/evaluate_splits.cuh
index c4abfb11f736..3e8d819a7bab 100644
--- a/src/tree/gpu_hist/evaluate_splits.cuh
+++ b/src/tree/gpu_hist/evaluate_splits.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #ifndef EVALUATE_SPLITS_CUH_
 #define EVALUATE_SPLITS_CUH_
@@ -7,6 +7,7 @@
 
 #include "../../common/categorical.h"
 #include "../../common/cuda_pinned_allocator.h"
+#include "../../common/cuda_stream.h"  // for Stream
 #include "../split_evaluator.h"
 #include "../updater_gpu_common.cuh"  // for DeviceSplitCandidate
 #include "expand_entry.cuh"
@@ -20,8 +21,8 @@ namespace tree {
 
 // Inputs specific to each node
 struct EvaluateSplitInputs {
-  int nidx;
-  int depth;
+  bst_node_t nidx;
+  bst_node_t depth;
   GradientPairInt64 parent_sum;
   common::Span<const bst_feature_t> feature_set;
   common::Span<const GradientPairInt64> gradient_histogram;
@@ -65,7 +66,7 @@ class GPUHistEvaluator {
   // host storage for categories for each node, used for sort based splits.
   std::vector<CatST, Alloc> h_split_cats_;
   // stream for copying categories from device back to host for expanding the decision tree.
-  dh::CUDAStream copy_stream_;
+  curt::Stream copy_stream_;
   // storage for sorted index of feature histogram, used for sort based splits.
   dh::device_vector<bst_feature_t> cat_sorted_idx_;
   // cached input for sorting the histogram, used for sort based splits.
@@ -87,7 +88,7 @@ class GPUHistEvaluator {
   DeviceOrd device_;
 
   // Copy the categories from device to host asynchronously.
-  void CopyToHost( const std::vector<bst_node_t>& nidx);
+  void CopyToHost(const std::vector<bst_node_t> &nidx);
 
   /**
    * \brief Get host category storage of nidx for internal calculation.
@@ -126,7 +127,7 @@ class GPUHistEvaluator {
   }
 
   auto SortInput(int num_nodes, bst_feature_t total_bins) {
-    if(!need_sort_histogram_) return common::Span<SortPair>();
+    if (!need_sort_histogram_) return common::Span<SortPair>();
     sort_input_.resize(num_nodes * total_bins);
     return dh::ToSpan(sort_input_);
   }
@@ -167,10 +168,10 @@ class GPUHistEvaluator {
   void ApplyTreeSplit(GPUExpandEntry const &candidate, RegTree *p_tree) {
     auto &tree = *p_tree;
     // Set up child constraints
-    auto left_child = tree[candidate.nid].LeftChild();
-    auto right_child = tree[candidate.nid].RightChild();
-    tree_evaluator_.AddSplit(candidate.nid, left_child, right_child,
-                             tree[candidate.nid].SplitIndex(), candidate.left_weight,
+    auto left_child = tree[candidate.nidx].LeftChild();
+    auto right_child = tree[candidate.nidx].RightChild();
+    tree_evaluator_.AddSplit(candidate.nidx, left_child, right_child,
+                             tree[candidate.nidx].SplitIndex(), candidate.left_weight,
                              candidate.right_weight);
   }
 
@@ -178,21 +179,20 @@ class GPUHistEvaluator {
   /**
    * \brief Sort the histogram based on output to obtain contiguous partitions.
    */
-  common::Span<bst_feature_t const> SortHistogram(common::Span<const EvaluateSplitInputs> d_inputs,
-      EvaluateSplitSharedInputs shared_inputs,
+  common::Span<bst_feature_t const> SortHistogram(
+      common::Span<const EvaluateSplitInputs> d_inputs, EvaluateSplitSharedInputs shared_inputs,
       TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator);
 
   // impl of evaluate splits, contains CUDA kernels so it's public
-  void LaunchEvaluateSplits(
-      bst_feature_t max_active_features,
-      common::Span<const EvaluateSplitInputs> d_inputs,
-      EvaluateSplitSharedInputs shared_inputs,
-      TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
-      common::Span<DeviceSplitCandidate> out_splits);
+  void LaunchEvaluateSplits(Context const *ctx, bst_feature_t max_active_features,
+                            common::Span<const EvaluateSplitInputs> d_inputs,
+                            EvaluateSplitSharedInputs shared_inputs,
+                            TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator,
+                            common::Span<DeviceSplitCandidate> out_splits);
   /**
    * \brief Evaluate splits for left and right nodes.
    */
-  void EvaluateSplits(Context const* ctx, const std::vector<bst_node_t> &nidx,
+  void EvaluateSplits(Context const *ctx, const std::vector<bst_node_t> &nidx,
                       bst_feature_t max_active_features,
                       common::Span<const EvaluateSplitInputs> d_inputs,
                       EvaluateSplitSharedInputs shared_inputs,
@@ -203,6 +203,41 @@ class GPUHistEvaluator {
   GPUExpandEntry EvaluateSingleSplit(Context const *ctx, EvaluateSplitInputs input,
                                      EvaluateSplitSharedInputs shared_inputs);
 };
+
+// Input for evaluation kernel for each tree node.
+struct MultiEvaluateSplitInputs {
+  bst_node_t nidx;
+  bst_node_t depth;
+  common::Span<GradientPairInt64 const> parent_sum;
+  common::Span<const GradientPairInt64> histogram;
+};
+
+// Input for evaluation kernel that can be shared by multiple tree nodes.
+struct MultiEvaluateSplitSharedInputs {
+  // len == n_targets
+  common::Span<GradientQuantiser const> roundings;
+  // cut pointers
+  common::Span<std::uint32_t const> feature_segments;
+  // cut values
+  common::Span<float const> feature_values;
+  // min cut values
+  common::Span<float const> min_values;
+  // Number of bins for one feature and one target
+  bst_bin_t n_bins_per_feat_tar;
+  GPUTrainingParam param;
+
+  // Used for testing
+  enum OnePass {
+    kNone,      // normal
+    kForward,   // only perform the forward pass
+    kBackward,  // only perform the backward pass
+  } one_pass{kNone};
+
+  [[nodiscard]] XGBOOST_DEVICE bst_target_t Targets() const { return roundings.size(); }
+  [[nodiscard]] XGBOOST_DEVICE bst_feature_t Features() const {
+    return this->feature_segments.size() - 1;
+  }
+};
 }  // namespace tree
 }  // namespace xgboost
 
diff --git a/src/tree/gpu_hist/evaluator.cu b/src/tree/gpu_hist/evaluator.cu
index 90e83c2c3b3d..f4d3d64d5189 100644
--- a/src/tree/gpu_hist/evaluator.cu
+++ b/src/tree/gpu_hist/evaluator.cu
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2022-2023 by XGBoost Contributors
+/**
+ * Copyright 2022-2025, XGBoost Contributors
  *
  * \brief Some components of GPU Hist evaluator, this file only exist to reduce nvcc
  *        compilation time.
@@ -8,6 +8,7 @@
 #include <thrust/sort.h>     // thrust::stable_sort
 
 #include "../../common/cuda_context.cuh"  // for CUDAContext
+#include "../../common/cuda_stream.h"     // for DefaultStream
 #include "../../common/device_helpers.cuh"
 #include "../../common/hist_util.h"  // common::HistogramCuts
 #include "evaluate_splits.cuh"
@@ -70,7 +71,7 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
     TreeEvaluator::SplitEvaluator<GPUTrainingParam> evaluator) {
   dh::XGBCachingDeviceAllocator<char> alloc;
   auto sorted_idx = this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size());
-  dh::Iota(sorted_idx, dh::DefaultStream());
+  dh::Iota(sorted_idx, curt::DefaultStream());
   auto data = this->SortInput(d_inputs.size(), shared_inputs.feature_values.size());
   auto it = thrust::make_counting_iterator(0u);
   auto d_feature_idx = dh::ToSpan(feature_idx_);
diff --git a/src/tree/gpu_hist/expand_entry.cu b/src/tree/gpu_hist/expand_entry.cu
new file mode 100644
index 000000000000..9a71b0051200
--- /dev/null
+++ b/src/tree/gpu_hist/expand_entry.cu
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <cstddef>  // for size_t
+#include <ostream>  // for ostream
+#include <vector>   // for vector
+
+#include "../../common/device_helpers.cuh"  // for CopyDeviceSpanToVector
+#include "../../common/type.h"              // for GetValueT
+#include "expand_entry.cuh"
+
+namespace xgboost::tree::cuda_impl {
+std::ostream& operator<<(std::ostream& os, MultiExpandEntry const& e) {
+  os << "MultiExpandEntry:\n"
+     << "nidx: " << e.nidx << "\n"
+     << "depth: " << e.depth << "\n"
+     << "loss: " << e.split.loss_chg << "\n";
+
+  std::vector<GradientPairInt64> h_node_sum(e.split.node_sum.size());
+  dh::CopyDeviceSpanToVector(&h_node_sum, e.split.node_sum);
+
+  auto print_span = [&](auto const& span) {
+    using T = typename common::GetValueT<decltype(span)>::value_type;
+    std::vector<T> h_vec(span.size());
+    dh::CopyDeviceSpanToVector(&h_vec, span);
+
+    os << "[";
+    for (std::size_t i = 0; i < h_vec.size(); ++i) {
+      os << h_vec[i];
+      if (i != h_vec.size() - 1) {
+        os << ", ";
+      }
+    }
+    os << "]\n";
+  };
+  if (e.split.dir == kRightDir) {
+    os << "left_sum: ";
+  } else {
+    os << "right_sum: ";
+  }
+  print_span(e.split.node_sum);
+
+  os << "base_weight: ";
+  print_span(e.base_weight);
+  os << "left_weight: ";
+  print_span(e.left_weight);
+  os << "right_weight: ";
+  print_span(e.right_weight);
+
+  return os;
+}
+}  // namespace xgboost::tree::cuda_impl
diff --git a/src/tree/gpu_hist/expand_entry.cuh b/src/tree/gpu_hist/expand_entry.cuh
index b4dc41da2b83..1ddcfb0f79ce 100644
--- a/src/tree/gpu_hist/expand_entry.cuh
+++ b/src/tree/gpu_hist/expand_entry.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #ifndef EXPAND_ENTRY_CUH_
 #define EXPAND_ENTRY_CUH_
@@ -13,7 +13,7 @@
 
 namespace xgboost::tree {
 struct GPUExpandEntry {
-  bst_node_t nid;
+  bst_node_t nidx;
   bst_node_t depth;
   DeviceSplitCandidate split;
 
@@ -24,7 +24,7 @@ struct GPUExpandEntry {
   GPUExpandEntry() = default;
   XGBOOST_DEVICE GPUExpandEntry(bst_node_t nid, bst_node_t depth, DeviceSplitCandidate split,
                                 float base, float left, float right)
-      : nid(nid),
+      : nidx(nid),
         depth(depth),
         split(std::move(split)),
         base_weight{base},
@@ -49,13 +49,13 @@ struct GPUExpandEntry {
 
   [[nodiscard]] float GetLossChange() const { return split.loss_chg; }
 
-  [[nodiscard]] bst_node_t GetNodeId() const { return nid; }
+  [[nodiscard]] bst_node_t GetNodeId() const { return nidx; }
 
   [[nodiscard]] bst_node_t GetDepth() const { return depth; }
 
   friend std::ostream& operator<<(std::ostream& os, const GPUExpandEntry& e) {
     os << "GPUExpandEntry: \n";
-    os << "nidx: " << e.nid << "\n";
+    os << "nidx: " << e.nidx << "\n";
     os << "depth: " << e.depth << "\n";
     os << "loss: " << e.split.loss_chg << "\n";
     os << "left_sum: " << e.split.left_sum << "\n";
@@ -66,7 +66,7 @@ struct GPUExpandEntry {
   void Save(Json* p_out) const {
     auto& out = *p_out;
 
-    out["nid"] = Integer{this->nid};
+    out["nid"] = Integer{this->nidx};
     out["depth"] = Integer{this->depth};
     // GPU specific
     out["base_weight"] = this->base_weight;
@@ -99,7 +99,7 @@ struct GPUExpandEntry {
   }
 
   void Load(Json const& in) {
-    this->nid = get<Integer const>(in["nid"]);
+    this->nidx = get<Integer const>(in["nid"]);
     this->depth = get<Integer const>(in["depth"]);
     // GPU specific
     this->base_weight = get<Number const>(in["base_weight"]);
@@ -125,6 +125,49 @@ struct GPUExpandEntry {
     this->split.right_sum = GradientPairInt64{right_sum[0], right_sum[1]};
   }
 };
+
+namespace cuda_impl {
+struct MultiExpandEntry {
+  bst_node_t nidx{0};
+  bst_node_t depth{0};
+  MultiSplitCandidate split;
+
+  common::Span<float const> base_weight;
+  common::Span<float const> left_weight;
+  common::Span<float const> right_weight;
+
+  MultiExpandEntry() = default;
+
+  [[nodiscard]] float GetLossChange() const { return split.loss_chg; }
+
+  [[nodiscard]] bst_node_t GetNodeId() const { return nidx; }
+
+  [[nodiscard]] bst_node_t GetDepth() const { return depth; }
+
+  [[nodiscard]] bool IsValid(TrainParam const& param, bst_node_t n_leaves) const {
+    // The split evaluator handles the zero Hessian case. It returns an empty expand entry
+    // if there the Hessian is invalid.
+    if (split.loss_chg <= kRtEps) {
+      return false;
+    }
+    if (base_weight.empty() || left_weight.empty() || right_weight.empty()) {
+      return false;
+    }
+    if (split.loss_chg < param.min_split_loss) {
+      return false;
+    }
+    if (param.max_depth > 0 && depth == param.max_depth) {
+      return false;
+    }
+    if (param.max_leaves > 0 && n_leaves == param.max_leaves) {
+      return false;
+    }
+    return true;
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, MultiExpandEntry const& entry);
+};
+}  // namespace cuda_impl
 }  // namespace xgboost::tree
 
 #endif  // EXPAND_ENTRY_CUH_
diff --git a/src/tree/gpu_hist/feature_groups.cuh b/src/tree/gpu_hist/feature_groups.cuh
index 37d87a9f577a..546b0d4c9247 100644
--- a/src/tree/gpu_hist/feature_groups.cuh
+++ b/src/tree/gpu_hist/feature_groups.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #ifndef FEATURE_GROUPS_CUH_
 #define FEATURE_GROUPS_CUH_
@@ -59,6 +59,11 @@ struct FeatureGroupsAccessor {
     return {feature_segments[i], feature_segments[i + 1] - feature_segments[i], bin_segments[i],
             bin_segments[i + 1] - bin_segments[i]};
   }
+  /** @brief The needed shared memory size for the largest group. */
+  [[nodiscard]] std::size_t ShmemSize() const {
+    return sizeof(GradientPairInt64) * this->max_group_bins;
+    ;
+  }
 };
 
 /**
diff --git a/src/tree/gpu_hist/gradient_based_sampler.cu b/src/tree/gpu_hist/gradient_based_sampler.cu
index 824cdbf19ebf..785a8d0c8d15 100644
--- a/src/tree/gpu_hist/gradient_based_sampler.cu
+++ b/src/tree/gpu_hist/gradient_based_sampler.cu
@@ -5,10 +5,9 @@
 #include <thrust/random.h>
 #include <thrust/sort.h>  // for sort
 #include <thrust/transform.h>
-#include <xgboost/host_device_vector.h>
-#include <xgboost/logging.h>
 
-#include <cstddef>  // for size_t
+#include <cstddef>            // for size_t
+#include <cuda/std/iterator>  // for distance
 #include <limits>
 #include <utility>
 
@@ -18,6 +17,8 @@
 #include "../../data/iterative_dmatrix.h"  // for IterativeDMatrix
 #include "../param.h"
 #include "gradient_based_sampler.cuh"
+#include "xgboost/host_device_vector.h"
+#include "xgboost/logging.h"
 
 namespace xgboost::tree {
 /*! \brief A functor that returns random weights. */
@@ -374,6 +375,6 @@ size_t GradientBasedSampler::CalculateThresholdIndex(Context const* ctx,
                     SampleRateDelta(threshold, gpair.size(), sample_rows));
   thrust::device_ptr<float> min =
       thrust::min_element(cuctx->CTP(), dh::tbegin(grad_sum), dh::tend(grad_sum));
-  return thrust::distance(dh::tbegin(grad_sum), min) + 1;
+  return cuda::std::distance(dh::tbegin(grad_sum), min) + 1;
 }
 };  // namespace xgboost::tree
diff --git a/src/tree/gpu_hist/histogram.cu b/src/tree/gpu_hist/histogram.cu
index c4a4b9fcce9d..07c00c23b6b6 100644
--- a/src/tree/gpu_hist/histogram.cu
+++ b/src/tree/gpu_hist/histogram.cu
@@ -9,8 +9,10 @@
 #include "../../collective/aggregator.h"
 #include "../../common/deterministic.cuh"
 #include "../../common/device_helpers.cuh"
+#include "../../common/linalg_op.cuh"  // for tbegin
 #include "../../data/ellpack_page.cuh"
 #include "histogram.cuh"
+#include "../../common/nvtx_utils.h"
 #include "row_partitioner.cuh"
 #include "xgboost/base.h"
 
@@ -30,7 +32,8 @@ XGBOOST_DEV_INLINE bst_feature_t FeatIdx(FeatureGroup const& group, bst_idx_t id
   return fidx;
 }
 
-XGBOOST_DEV_INLINE bst_idx_t IterIdx(EllpackDeviceAccessor const& matrix,
+template <typename IterT>
+XGBOOST_DEV_INLINE bst_idx_t IterIdx(EllpackAccessorImpl<IterT> const& matrix,
                                      RowPartitioner::RowIndexT ridx, bst_feature_t fidx) {
   // ridx_local = ridx - base_rowid  <== Row index local to each batch
   // entry_idx = ridx_local * row_stride <== Starting entry index for this row in the matrix
@@ -114,9 +117,30 @@ GradientQuantiser::GradientQuantiser(Context const* ctx, common::Span<GradientPa
                                  static_cast<T>(1) / to_floating_point_.GetHess());
 }
 
+MultiGradientQuantiser::MultiGradientQuantiser(Context const* ctx,
+                                               linalg::MatrixView<GradientPair const> gpair,
+                                               MetaInfo const& info) {
+  CHECK(gpair.FContiguous());
+  std::vector<GradientQuantiser> h_quantizers;
+  // TODO(jiamingy): We need to merge this into a single call for improved distributed training.
+  for (bst_target_t t = 0, n_targets = gpair.Shape(1); t < n_targets; ++t) {
+    h_quantizers.emplace_back(ctx, gpair.Slice(linalg::All(), t).Values(), info);
+  }
+  this->quantizers_ = h_quantizers;
+}
+
+namespace cuda_impl {
+void TransposeGradient(Context const* ctx, linalg::MatrixView<GradientPair const> in,
+                       linalg::MatrixView<GradientPair> out) {
+  CHECK(in.CContiguous());
+  CHECK(out.FContiguous());
+  thrust::copy_n(ctx->CUDACtx()->CTP(), in.Values().data(), in.Size(), linalg::tbegin(out));
+}
+}  // namespace cuda_impl
+
 XGBOOST_DEV_INLINE void AtomicAddGpairShared(xgboost::GradientPairInt64* dest,
                                              xgboost::GradientPairInt64 const& gpair) {
-  auto dst_ptr = reinterpret_cast<int64_t *>(dest);
+  auto dst_ptr = reinterpret_cast<int64_t*>(dest);
   auto g = gpair.GetQuantisedGrad();
   auto h = gpair.GetQuantisedHess();
 
@@ -132,13 +156,11 @@ XGBOOST_DEV_INLINE void AtomicAddGpairGlobal(xgboost::GradientPairInt64* dest,
   auto g = gpair.GetQuantisedGrad();
   auto h = gpair.GetQuantisedHess();
 
-  atomicAdd(dst_ptr,
-            *reinterpret_cast<uint64_t*>(&g));
-  atomicAdd(dst_ptr + 1,
-            *reinterpret_cast<uint64_t*>(&h));
+  atomicAdd(dst_ptr, *reinterpret_cast<uint64_t*>(&g));
+  atomicAdd(dst_ptr + 1, *reinterpret_cast<uint64_t*>(&h));
 }
 
-template <bool kCompressed, int kBlockThreads, int kItemsPerThread>
+template <typename Accessor, bool kCompressed, bool kDense, int kBlockThreads, int kItemsPerThread>
 class HistogramAgent {
   int constexpr static kItemsPerTile = kBlockThreads * kItemsPerThread;
 
@@ -149,15 +171,17 @@ class HistogramAgent {
   dh::LDGIterator<const Idx> d_ridx_;
   const GradientPair* d_gpair_;
   const FeatureGroup group_;
-  const EllpackDeviceAccessor& matrix_;
+  Accessor const& matrix_;
   const int feature_stride_;
   const bst_idx_t n_elements_;
   const GradientQuantiser& rounding_;
 
+  static_assert(kCompressed >= kDense);
+
  public:
   __device__ HistogramAgent(GradientPairInt64* smem_arr,
                             GradientPairInt64* __restrict__ d_node_hist, const FeatureGroup& group,
-                            const EllpackDeviceAccessor& matrix, common::Span<const Idx> d_ridx,
+                            Accessor const& matrix, common::Span<const Idx> d_ridx,
                             const GradientQuantiser& rounding, const GradientPair* d_gpair)
       : smem_arr_{smem_arr},
         d_node_hist_{d_node_hist},
@@ -176,7 +200,7 @@ class HistogramAgent {
       Idx ridx = d_ridx_[idx / feature_stride_];
       auto fidx = FeatIdx(group_, idx, feature_stride_);
       bst_bin_t compressed_bin = matrix_.gidx_iter[IterIdx(matrix_, ridx, fidx)];
-      if (compressed_bin != matrix_.NullValue()) {
+      if (kDense || compressed_bin != matrix_.NullValue()) {
         // The matrix is compressed with feature-local bins.
         if (kCompressed) {
           compressed_bin += this->matrix_.feature_segments[fidx];
@@ -211,18 +235,20 @@ class HistogramAgent {
       gpair[i] = d_gpair_[ridx[i]];
       auto fidx = FeatIdx(group_, idx[i], feature_stride_);
       gidx[i] = matrix_.gidx_iter[IterIdx(matrix_, ridx[i], fidx)];
-      if (gidx[i] != matrix_.NullValue()) {
-        if (kCompressed) {
+      if (kDense || gidx[i] != matrix_.NullValue()) {
+        if constexpr (kCompressed) {
           gidx[i] += matrix_.feature_segments[fidx];
         }
       } else {
-        gidx[i] = -1;  // missing
+        // Use -1 to denote missing. Since we need to add the beginning bin to gidx, the
+        // result might equal to the `NullValue`.
+        gidx[i] = -1;
       }
     }
 #pragma unroll
     for (int i = 0; i < kItemsPerThread; i++) {
       // Avoid atomic add if it's a null value.
-      if (gidx[i] != -1) {
+      if (kDense || gidx[i] != -1) {
         auto adjusted = rounding_.ToFixedPoint(gpair[i]);
         AtomicAddGpairShared(smem_arr_ + gidx[i] - group_.start_bin, adjusted);
       }
@@ -262,10 +288,10 @@ class HistogramAgent {
   }
 };
 
-template <bool kIsDense, bool use_shared_memory_histograms, int kBlockThreads, int kItemsPerThread>
+template <typename Accessor, bool kCompressed, bool kDense, bool use_shared_memory_histograms,
+          int kBlockThreads, int kItemsPerThread>
 __global__ void __launch_bounds__(kBlockThreads)
-    SharedMemHistKernel(const EllpackDeviceAccessor matrix,
-                        const FeatureGroupsAccessor feature_groups,
+    SharedMemHistKernel(Accessor const matrix, const FeatureGroupsAccessor feature_groups,
                         common::Span<const RowPartitioner::RowIndexT> d_ridx,
                         GradientPairInt64* __restrict__ d_node_hist,
                         const GradientPair* __restrict__ d_gpair,
@@ -273,7 +299,7 @@ __global__ void __launch_bounds__(kBlockThreads)
   extern __shared__ char smem[];
   const FeatureGroup group = feature_groups[blockIdx.y];
   auto smem_arr = reinterpret_cast<GradientPairInt64*>(smem);
-  auto agent = HistogramAgent<kIsDense, kBlockThreads, kItemsPerThread>(
+  auto agent = HistogramAgent<Accessor, kCompressed, kDense, kBlockThreads, kItemsPerThread>(
       smem_arr, d_node_hist, group, matrix, d_ridx, rounding, d_gpair);
   if (use_shared_memory_histograms) {
     agent.BuildHistogramWithShared();
@@ -282,49 +308,153 @@ __global__ void __launch_bounds__(kBlockThreads)
   }
 }
 
+// Kernel for vector-leaf, bare minimum for now.
+template <typename Accessor, bool kCompressed, bool kDense, bool use_shared_memory_histograms,
+          std::int32_t kBlockThreads, std::int32_t kItemsPerThread>
+__global__ __launch_bounds__(kBlockThreads) void MultiHistKernel(
+    Accessor const matrix, const FeatureGroupsAccessor feature_groups,
+    common::Span<const RowPartitioner::RowIndexT> d_ridx, GradientPairInt64* d_node_hist,
+    linalg::MatrixView<const GradientPair> d_gpair,
+    common::Span<GradientQuantiser const> roundings) {
+  const FeatureGroup group = feature_groups[blockIdx.y];
+  std::int32_t feature_stride = kCompressed ? group.num_features : matrix.row_stride;
+  bst_idx_t n_elements = feature_stride * d_ridx.size();
+  using Idx = RowPartitioner::RowIndexT;
+  for (auto idx : dh::GridStrideRange(static_cast<std::size_t>(0), n_elements)) {
+    Idx ridx = d_ridx[idx / feature_stride];
+    auto fidx = FeatIdx(group, idx, feature_stride);
+    bst_bin_t compressed_bin = matrix.gidx_iter[IterIdx(matrix, ridx, fidx)];
+    if (compressed_bin != matrix.NullValue()) {
+      if (kCompressed) {
+        compressed_bin += matrix.feature_segments[fidx];
+      }
+      bst_target_t n_targets = roundings.size();
+      compressed_bin *= n_targets;
+      // TODO(jiamingy): Assign a thread for each target.
+      for (bst_target_t t = 0; t < n_targets; ++t) {
+        auto adjusted = roundings[t].ToFixedPoint(d_gpair(ridx, t));
+        AtomicAddGpairGlobal(d_node_hist + compressed_bin + t, adjusted);
+      }
+    }
+  }
+}
+
 namespace {
 constexpr std::int32_t kBlockThreads = 1024;
 constexpr std::int32_t kItemsPerThread = 8;
 constexpr std::int32_t ItemsPerTile() { return kBlockThreads * kItemsPerThread; }
+template <auto Ker>
+using DeduceKernelT = std::decay_t<decltype(Ker)>;
 }  // namespace
 
 // Use auto deduction guide to workaround compiler error.
-template <auto GlobalDense = SharedMemHistKernel<true, false, kBlockThreads, kItemsPerThread>,
-          auto Global = SharedMemHistKernel<false, false, kBlockThreads, kItemsPerThread>,
-          auto SharedDense = SharedMemHistKernel<true, true, kBlockThreads, kItemsPerThread>,
-          auto Shared = SharedMemHistKernel<false, true, kBlockThreads, kItemsPerThread>>
+template <typename Accessor>
 struct HistogramKernel {
-  // Kernel for working with dense Ellpack using the global memory.
-  decltype(Global) global_dense_kernel{
-      SharedMemHistKernel<true, false, kBlockThreads, kItemsPerThread>};
+  enum KernelType : std::size_t {
+    // single-target
+    kGlobalCompr = 0,
+    kGlobal = 1,
+    kSharedCompr = 2,
+    kShared = 3,
+    kGlobalDense = 4,
+    kSharedDense = 5,
+    // multi-target
+    kMtGlobalCompr = 6,
+    kMtGlobal = 7,
+    kMtSharedCompr = 8,
+    kMtShared = 9,
+    kMtGlobalDense = 10,
+    kMtSharedDense = 11,
+  };
+  /**
+   * Single-target
+   */
+  // Kernel for working with compressed sparse Ellpack using the global memory.
+  using GlobalCompr = DeduceKernelT<
+      SharedMemHistKernel<Accessor, true, false, false, kBlockThreads, kItemsPerThread>>;
+  GlobalCompr global_compr_kernel{
+      SharedMemHistKernel<Accessor, true, false, false, kBlockThreads, kItemsPerThread>};
   // Kernel for working with sparse Ellpack using the global memory.
-  decltype(Global) global_kernel{SharedMemHistKernel<false, false, kBlockThreads, kItemsPerThread>};
-  // Kernel for working with dense Ellpack using the shared memory.
-  decltype(Shared) shared_dense_kernel{
-      SharedMemHistKernel<true, true, kBlockThreads, kItemsPerThread>};
+  using Global = DeduceKernelT<
+      SharedMemHistKernel<Accessor, false, false, false, kBlockThreads, kItemsPerThread>>;
+  Global global_kernel{
+      SharedMemHistKernel<Accessor, false, false, false, kBlockThreads, kItemsPerThread>};
+  // Kernel for working with compressed sparse Ellpack using the shared memory.
+  using SharedCompr = DeduceKernelT<
+      SharedMemHistKernel<Accessor, true, false, true, kBlockThreads, kItemsPerThread>>;
+  SharedCompr shared_compr_kernel{
+      SharedMemHistKernel<Accessor, true, false, true, kBlockThreads, kItemsPerThread>};
   // Kernel for working with sparse Ellpack using the shared memory.
-  decltype(Shared) shared_kernel{SharedMemHistKernel<false, true, kBlockThreads, kItemsPerThread>};
+  using Shared = DeduceKernelT<
+      SharedMemHistKernel<Accessor, false, false, true, kBlockThreads, kItemsPerThread>>;
+  Shared shared_kernel{
+      SharedMemHistKernel<Accessor, false, false, true, kBlockThreads, kItemsPerThread>};
+  // Kernel for working with compressed dense ellpack using the global memory
+  using GlobalDense = DeduceKernelT<
+      SharedMemHistKernel<Accessor, true, true, false, kBlockThreads, kItemsPerThread>>;
+  GlobalDense global_dense_kernel{
+      SharedMemHistKernel<Accessor, true, true, false, kBlockThreads, kItemsPerThread>};
+  // Kernel for working with compressed dense ellpack using the shared memory
+  using SharedDense = DeduceKernelT<
+      SharedMemHistKernel<Accessor, true, true, true, kBlockThreads, kItemsPerThread>>;
+  SharedDense shared_dense_kernel{
+      SharedMemHistKernel<Accessor, true, true, true, kBlockThreads, kItemsPerThread>};
+
+  /**
+   * Multi-target
+   */
+  // Kernel for working with compressed sparse Ellpack using the global memory.
+  using MtGlobalCompr =
+      DeduceKernelT<MultiHistKernel<Accessor, true, false, false, kBlockThreads, kItemsPerThread>>;
+  MtGlobalCompr mt_global_compr_kernel{
+      MultiHistKernel<Accessor, true, false, false, kBlockThreads, kItemsPerThread>};
+  // Kernel for working with sparse Ellpack using the global memory.
+  using MtGlobal =
+      DeduceKernelT<MultiHistKernel<Accessor, false, false, false, kBlockThreads, kItemsPerThread>>;
+  MtGlobal mt_global_kernel{
+      MultiHistKernel<Accessor, false, false, false, kBlockThreads, kItemsPerThread>};
+  // Kernel for working with compressed sparse Ellpack using the shared memory.
+  using MtSharedCompr =
+      DeduceKernelT<MultiHistKernel<Accessor, true, false, true, kBlockThreads, kItemsPerThread>>;
+  MtSharedCompr mt_shared_compr_kernel{
+      MultiHistKernel<Accessor, true, false, true, kBlockThreads, kItemsPerThread>};
+  // Kernel for working with sparse Ellpack using the shared memory.
+  using MtShared =
+      DeduceKernelT<MultiHistKernel<Accessor, false, false, true, kBlockThreads, kItemsPerThread>>;
+  MtShared mt_shared_kernel{
+      MultiHistKernel<Accessor, false, false, true, kBlockThreads, kItemsPerThread>};
+  // Kernel for working with compressed dense ellpack using the global memory
+  using MtGlobalDense =
+      DeduceKernelT<MultiHistKernel<Accessor, true, true, false, kBlockThreads, kItemsPerThread>>;
+  MtGlobalDense mt_global_dense_kernel{
+      MultiHistKernel<Accessor, true, true, false, kBlockThreads, kItemsPerThread>};
+  // Kernel for working with compressed dense ellpack using the shared memory
+  using MtSharedDense =
+      DeduceKernelT<MultiHistKernel<Accessor, true, true, true, kBlockThreads, kItemsPerThread>>;
+  MtSharedDense mt_shared_dense_kernel{
+      MultiHistKernel<Accessor, true, true, true, kBlockThreads, kItemsPerThread>};
 
   bool shared{false};
-  std::uint32_t grid_size{0};
+  std::array<std::uint32_t, 12> grid_sizes;
   std::size_t smem_size{0};
+  std::size_t const max_shared_memory;
   bool const force_global;
 
   HistogramKernel(Context const* ctx, FeatureGroupsAccessor const& feature_groups,
                   bool force_global_memory)
-      : force_global{force_global_memory} {
+      : max_shared_memory{dh::MaxSharedMemoryOptin(ctx->Ordinal())},
+        force_global{force_global_memory} {
+    std::fill_n(grid_sizes.data(), grid_sizes.size(), 0);
     // Decide whether to use shared memory
     // Opt into maximum shared memory for the kernel if necessary
-    std::size_t max_shared_memory = dh::MaxSharedMemoryOptin(ctx->Ordinal());
-
-    this->smem_size = sizeof(GradientPairInt64) * feature_groups.max_group_bins;
-    this->shared = !force_global_memory && this->smem_size <= max_shared_memory;
+    this->smem_size = feature_groups.ShmemSize();
+    this->shared = !force_global_memory && this->smem_size <= this->max_shared_memory;
     this->smem_size = this->shared ? this->smem_size : 0;
 
-    auto init = [&](auto& kernel) {
+    auto init = [&](auto& kernel, KernelType k) {
       if (this->shared) {
         dh::safe_cuda(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize,
-                                           max_shared_memory));
+                                           this->max_shared_memory));
       }
 
       // determine the launch configuration
@@ -338,28 +468,49 @@ struct HistogramKernel {
 
       // This gives the number of blocks to keep the device occupied Use this as the
       // maximum number of blocks
-      this->grid_size = n_blocks_per_mp * n_mps;
+      this->grid_sizes[static_cast<std::size_t>(k)] = n_blocks_per_mp * n_mps;
     };
     // Initialize all kernel instantiations
-    for (auto& kernel : {global_dense_kernel, global_kernel, shared_dense_kernel, shared_kernel}) {
-      init(kernel);
+    {
+      // Single target
+      std::array kernel_types{kGlobalCompr, kGlobal,      kSharedCompr,
+                              kShared,      kGlobalDense, kSharedDense};
+      std::int32_t k = 0;
+      for (auto& kernel : {global_compr_kernel, global_kernel, shared_compr_kernel, shared_kernel,
+                           global_dense_kernel, shared_dense_kernel}) {
+        init(kernel, kernel_types[k]);
+        ++k;
+      }
+    }
+    {
+      // Multi target
+      std::array kernel_types{kMtGlobalCompr, kMtGlobal,      kMtSharedCompr,
+                              kMtShared,      kMtGlobalDense, kMtSharedDense};
+      std::int32_t k = 0;
+      for (auto& kernel : {mt_global_compr_kernel, mt_global_kernel, mt_shared_compr_kernel,
+                           mt_shared_kernel, mt_global_dense_kernel, mt_shared_dense_kernel}) {
+        init(kernel, kernel_types[k]);
+        ++k;
+      }
     }
   }
 };
 
-class DeviceHistogramBuilderImpl {
-  std::unique_ptr<HistogramKernel<>> kernel_{nullptr};
+template <typename Accessor>
+class DeviceHistogramDispatchAccessor {
+  std::unique_ptr<HistogramKernel<Accessor>> kernel_{nullptr};
 
  public:
   void Reset(Context const* ctx, FeatureGroupsAccessor const& feature_groups,
              bool force_global_memory) {
-    this->kernel_ = std::make_unique<HistogramKernel<>>(ctx, feature_groups, force_global_memory);
+    this->kernel_ =
+        std::make_unique<HistogramKernel<Accessor>>(ctx, feature_groups, force_global_memory);
     if (force_global_memory) {
       CHECK(!this->kernel_->shared);
     }
   }
 
-  void BuildHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const& matrix,
+  void BuildHistogram(CUDAContext const* ctx, Accessor const& matrix,
                       FeatureGroupsAccessor const& feature_groups,
                       common::Span<GradientPair const> gpair,
                       common::Span<const cuda_impl::RowIndexT> d_ridx,
@@ -375,32 +526,127 @@ class DeviceHistogramBuilderImpl {
     // Allocate number of blocks such that each block has about kMinItemsPerBlock work
     // Up to a maximum where the device is saturated
     auto constexpr kMinItemsPerBlock = ItemsPerTile();
-    auto grid_size = std::min(kernel_->grid_size, static_cast<std::uint32_t>(common::DivRoundUp(
-                                                      items_per_group, kMinItemsPerBlock)));
-    auto launcher = [&](auto kernel) {
+
+    auto launcher = [&](auto const& kernel, std::uint32_t grid_size) {
+      CHECK_NE(grid_size, 0);
+      grid_size = std::min(grid_size, static_cast<std::uint32_t>(
+                                          common::DivRoundUp(items_per_group, kMinItemsPerBlock)));
       dh::LaunchKernel{dim3(grid_size, feature_groups.NumGroups()),  // NOLINT
                        static_cast<uint32_t>(kBlockThreads), kernel_->smem_size, ctx->Stream()}(
           kernel, matrix, feature_groups, d_ridx, histogram.data(), gpair.data(), rounding);
     };
 
+    using K = HistogramKernel<EllpackDeviceAccessor>::KernelType;
     if (!this->kernel_->shared) {  // Use global memory
       CHECK_EQ(this->kernel_->smem_size, 0);
-      if (matrix.IsDenseCompressed()) {
-        // Dense must use shared memory except for testing.
-        CHECK(this->kernel_->force_global);
-        launcher(this->kernel_->global_dense_kernel);
+      if (matrix.IsDense()) {
+        CHECK(this->kernel_->force_global ||
+              (feature_groups.ShmemSize() >= this->kernel_->max_shared_memory));
+        launcher(this->kernel_->global_dense_kernel, this->kernel_->grid_sizes[K::kGlobalDense]);
+      } else if (matrix.IsDenseCompressed()) {
+        CHECK(this->kernel_->force_global ||
+              (feature_groups.ShmemSize() >= this->kernel_->max_shared_memory));
+        launcher(this->kernel_->global_compr_kernel, this->kernel_->grid_sizes[K::kGlobalCompr]);
       } else {
-        launcher(this->kernel_->global_kernel);
+        // Sparse
+        launcher(this->kernel_->global_kernel, this->kernel_->grid_sizes[K::kGlobal]);
       }
     } else {  // Use shared memory
       CHECK_NE(this->kernel_->smem_size, 0);
-      if (matrix.IsDenseCompressed()) {
-        launcher(this->kernel_->shared_dense_kernel);
+      if (matrix.IsDense()) {
+        launcher(this->kernel_->shared_dense_kernel, this->kernel_->grid_sizes[K::kSharedDense]);
+      } else if (matrix.IsDenseCompressed()) {
+        // Dense
+        launcher(this->kernel_->shared_compr_kernel, this->kernel_->grid_sizes[K::kSharedCompr]);
       } else {
-        launcher(this->kernel_->shared_kernel);
+        // Sparse
+        launcher(this->kernel_->shared_kernel, this->kernel_->grid_sizes[K::kShared]);
       }
     }
   }
+
+  void BuildHistogram(CUDAContext const* ctx, Accessor const& matrix,
+                      FeatureGroupsAccessor const& feature_groups,
+                      linalg::MatrixView<GradientPair const> gpair,
+                      common::Span<const cuda_impl::RowIndexT> d_ridx,
+                      common::Span<GradientPairInt64> histogram,
+                      common::Span<GradientQuantiser const> roundings) const {
+    CHECK(kernel_);
+    // Otherwise launch blocks such that each block has a minimum amount of work to do
+    // There are fixed costs to launching each block, e.g. zeroing shared memory
+    // The below amount of minimum work was found by experimentation
+    int columns_per_group = common::DivRoundUp(matrix.row_stride, feature_groups.NumGroups());
+    // Average number of matrix elements processed by each group
+    std::size_t items_per_group = d_ridx.size() * columns_per_group;
+
+    // Allocate number of blocks such that each block has about kMinItemsPerBlock work
+    // Up to a maximum where the device is saturated
+    auto constexpr kMinItemsPerBlock = ItemsPerTile();
+
+    auto launcher = [&](auto const& kernel, std::uint32_t grid_size) {
+      CHECK_NE(grid_size, 0);
+      grid_size = std::min(grid_size, static_cast<std::uint32_t>(
+                                          common::DivRoundUp(items_per_group, kMinItemsPerBlock)));
+      dh::LaunchKernel{dim3(grid_size, feature_groups.NumGroups()),  // NOLINT
+                       static_cast<uint32_t>(kBlockThreads), kernel_->smem_size, ctx->Stream()}(
+          kernel, matrix, feature_groups, d_ridx, histogram.data(), gpair, roundings);
+    };
+
+    using K = HistogramKernel<EllpackDeviceAccessor>::KernelType;
+    if (!this->kernel_->shared) {  // Use global memory
+      CHECK_EQ(this->kernel_->smem_size, 0);
+      if (matrix.IsDense()) {
+        CHECK(this->kernel_->force_global ||
+              (feature_groups.ShmemSize() >= this->kernel_->max_shared_memory));
+        launcher(this->kernel_->mt_global_dense_kernel,
+                 this->kernel_->grid_sizes[K::kMtGlobalDense]);
+      } else if (matrix.IsDenseCompressed()) {
+        CHECK(this->kernel_->force_global ||
+              (feature_groups.ShmemSize() >= this->kernel_->max_shared_memory));
+        launcher(this->kernel_->mt_global_compr_kernel,
+                 this->kernel_->grid_sizes[K::kMtGlobalCompr]);
+      } else {
+        // Sparse
+        launcher(this->kernel_->mt_global_kernel, this->kernel_->grid_sizes[K::kMtGlobal]);
+      }
+    } else {  // Use shared memory
+      CHECK_NE(this->kernel_->smem_size, 0);
+      CHECK(false) << MTNotImplemented();
+      if (matrix.IsDense()) {
+        launcher(this->kernel_->mt_shared_dense_kernel,
+                 this->kernel_->grid_sizes[K::kMtSharedDense]);
+      } else if (matrix.IsDenseCompressed()) {
+        // Dense
+        launcher(this->kernel_->mt_shared_compr_kernel,
+                 this->kernel_->grid_sizes[K::kMtSharedCompr]);
+      } else {
+        // Sparse
+        launcher(this->kernel_->mt_shared_kernel, this->kernel_->grid_sizes[K::kMtShared]);
+      }
+    }
+  }
+};
+
+// Dispatch between single buffer accessor and double buffer accessor.
+struct DeviceHistogramBuilderImpl {
+  DeviceHistogramDispatchAccessor<EllpackDeviceAccessor> simpl;
+  DeviceHistogramDispatchAccessor<DoubleEllpackAccessor> dimpl;
+
+  template <typename... Args>
+  void Reset(Args&&... args) {
+    this->simpl.Reset(std::forward<Args>(args)...);
+    this->dimpl.Reset(std::forward<Args>(args)...);
+  }
+
+  template <typename Accessor, typename... Args>
+  void BuildHistogram(CUDAContext const* ctx, Accessor const& matrix, Args&&... args) {
+    if constexpr (std::is_same_v<Accessor, EllpackDeviceAccessor>) {
+      this->simpl.BuildHistogram(ctx, matrix, std::forward<Args>(args)...);
+    } else {
+      static_assert(std::is_same_v<Accessor, DoubleEllpackAccessor>);
+      this->dimpl.BuildHistogram(ctx, matrix, std::forward<Args>(args)...);
+    }
+  }
 };
 
 DeviceHistogramBuilder::DeviceHistogramBuilder()
@@ -419,18 +665,37 @@ void DeviceHistogramBuilder::Reset(Context const* ctx, std::size_t max_cached_hi
   this->monitor_.Stop(__func__);
 }
 
-void DeviceHistogramBuilder::BuildHistogram(CUDAContext const* ctx,
-                                            EllpackDeviceAccessor const& matrix,
+void DeviceHistogramBuilder::BuildHistogram(CUDAContext const* ctx, EllpackAccessor const& matrix,
                                             FeatureGroupsAccessor const& feature_groups,
                                             common::Span<GradientPair const> gpair,
                                             common::Span<const cuda_impl::RowIndexT> ridx,
                                             common::Span<GradientPairInt64> histogram,
                                             GradientQuantiser rounding) {
   this->monitor_.Start(__func__);
-  this->p_impl_->BuildHistogram(ctx, matrix, feature_groups, gpair, ridx, histogram, rounding);
+  std::visit(
+      [&](auto&& matrix) {
+        this->p_impl_->BuildHistogram(ctx, matrix, feature_groups, gpair, ridx, histogram,
+                                      rounding);
+      },
+      matrix);
   this->monitor_.Stop(__func__);
 }
 
+void DeviceHistogramBuilder::BuildHistogram(CUDAContext const* ctx, EllpackAccessor const& matrix,
+                                            FeatureGroupsAccessor const& feature_groups,
+                                            linalg::MatrixView<GradientPair const> gpair,
+                                            common::Span<const std::uint32_t> ridx,
+                                            common::Span<GradientPairInt64> histogram,
+                                            common::Span<GradientQuantiser const> roundings) {
+  xgboost_NVTX_FN_RANGE();
+  std::visit(
+      [&](auto&& matrix) {
+        this->p_impl_->BuildHistogram(ctx, matrix, feature_groups, gpair, ridx, histogram,
+                                      roundings);
+      },
+      matrix);
+}
+
 void DeviceHistogramBuilder::AllReduceHist(Context const* ctx, MetaInfo const& info,
                                            bst_node_t nidx, std::size_t num_histograms) {
   this->monitor_.Start(__func__);
diff --git a/src/tree/gpu_hist/histogram.cuh b/src/tree/gpu_hist/histogram.cuh
index 55e398e1be8b..01002cd0ea22 100644
--- a/src/tree/gpu_hist/histogram.cuh
+++ b/src/tree/gpu_hist/histogram.cuh
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #ifndef HISTOGRAM_CUH_
 #define HISTOGRAM_CUH_
@@ -59,7 +59,8 @@ class DeviceHistogramStorage {
   // overwritten when a new histogram is requested
   dh::device_vector<typename GradientSumT::ValueT> overflow_;
   std::map<int, size_t> overflow_nidx_map_;
-  int n_bins_;
+  // The total number of bins across all features and targets
+  bst_bin_t n_total_bins_;
   static constexpr std::size_t kNumItemsInGradientSum =
       sizeof(GradientSumT) / sizeof(typename GradientSumT::ValueT);
   static_assert(kNumItemsInGradientSum == 2, "Number of items in gradient type should be 2.");
@@ -68,7 +69,7 @@ class DeviceHistogramStorage {
   explicit DeviceHistogramStorage() { data_.reserve(cuda_impl::DftReserveSize()); }
 
   void Reset(Context const* ctx, bst_bin_t n_total_bins, std::size_t max_cached_nodes) {
-    this->n_bins_ = n_total_bins;
+    this->n_total_bins_ = n_total_bins;
     auto d_data = data_.data().get();
     dh::LaunchN(data_.size(), ctx->CUDACtx()->Stream(),
                 [=] __device__(size_t idx) { d_data[idx] = 0.0f; });
@@ -84,8 +85,8 @@ class DeviceHistogramStorage {
     return nidx_map_.find(nidx) != nidx_map_.cend() ||
            overflow_nidx_map_.find(nidx) != overflow_nidx_map_.cend();
   }
-  [[nodiscard]] int Bins() const { return n_bins_; }
-  [[nodiscard]] size_t HistogramSize() const { return n_bins_ * kNumItemsInGradientSum; }
+  [[nodiscard]] int Bins() const { return n_total_bins_; }
+  [[nodiscard]] size_t HistogramSize() const { return n_total_bins_ * kNumItemsInGradientSum; }
   dh::device_vector<typename GradientSumT::ValueT>& Data() { return data_; }
 
   void AllocateHistograms(Context const* ctx, std::vector<bst_node_t> const& new_nidxs) {
@@ -135,11 +136,11 @@ class DeviceHistogramStorage {
     if (nidx_map_.find(nidx) != nidx_map_.cend()) {
       // Fetch from normal cache
       auto ptr = data_.data().get() + nidx_map_.at(nidx);
-      return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
+      return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_total_bins_)};
     } else {
       // Fetch from overflow
       auto ptr = overflow_.data().get() + overflow_nidx_map_.at(nidx);
-      return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_bins_)};
+      return {reinterpret_cast<GradientSumT*>(ptr), static_cast<std::size_t>(n_total_bins_)};
     }
   }
 };
@@ -154,16 +155,24 @@ class DeviceHistogramBuilder {
  public:
   explicit DeviceHistogramBuilder();
   ~DeviceHistogramBuilder();
-
+  // TODO(jiamingy): use a type larger than bst_bin_t since we need to support multi-target.
   void Reset(Context const* ctx, std::size_t max_cached_hist_nodes,
              FeatureGroupsAccessor const& feature_groups, bst_bin_t n_total_bins,
              bool force_global_memory);
-  void BuildHistogram(CUDAContext const* ctx, EllpackDeviceAccessor const& matrix,
+
+  void BuildHistogram(CUDAContext const* ctx, EllpackAccessor const& matrix,
                       FeatureGroupsAccessor const& feature_groups,
                       common::Span<GradientPair const> gpair,
                       common::Span<const std::uint32_t> ridx,
                       common::Span<GradientPairInt64> histogram, GradientQuantiser rounding);
 
+  void BuildHistogram(CUDAContext const* ctx, EllpackAccessor const& matrix,
+                      FeatureGroupsAccessor const& feature_groups,
+                      linalg::MatrixView<GradientPair const> gpair,
+                      common::Span<const std::uint32_t> ridx,
+                      common::Span<GradientPairInt64> histogram,
+                      common::Span<GradientQuantiser const> roundings);
+
   [[nodiscard]] auto GetNodeHistogram(bst_node_t nidx) { return hist_.GetNodeHistogram(nidx); }
 
   // num histograms is the number of contiguous histograms in memory to reduce over
@@ -187,7 +196,8 @@ class DeviceHistogramBuilder {
     return true;
   }
 
-  [[nodiscard]] auto SubtractHist(Context const* ctx, std::vector<GPUExpandEntry> const& candidates,
+  template <typename ExpandEntry>
+  [[nodiscard]] auto SubtractHist(Context const* ctx, std::vector<ExpandEntry> const& candidates,
                                   std::vector<bst_node_t> const& build_nidx,
                                   std::vector<bst_node_t> const& subtraction_nidx) {
     this->monitor_.Start(__func__);
@@ -195,7 +205,7 @@ class DeviceHistogramBuilder {
     for (std::size_t i = 0; i < subtraction_nidx.size(); i++) {
       auto build_hist_nidx = build_nidx.at(i);
       auto subtraction_trick_nidx = subtraction_nidx.at(i);
-      auto parent_nidx = candidates.at(i).nid;
+      auto parent_nidx = candidates.at(i).nidx;
 
       if (!this->SubtractionTrick(ctx, parent_nidx, build_hist_nidx, subtraction_trick_nidx)) {
         need_build.push_back(subtraction_trick_nidx);
diff --git a/src/tree/gpu_hist/leaf_sum.cu b/src/tree/gpu_hist/leaf_sum.cu
new file mode 100644
index 000000000000..7c57b9ae8731
--- /dev/null
+++ b/src/tree/gpu_hist/leaf_sum.cu
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#include <cstddef>  // for size_t
+#include <vector>   // for vector
+
+#include "../../common/linalg_op.cuh"  // for tbegin
+#include "../updater_gpu_common.cuh"   // for GPUTrainingParam
+#include "leaf_sum.cuh"
+#include "quantiser.cuh"        // for GradientQuantiser
+#include "row_partitioner.cuh"  // for RowIndexT, LeafInfo
+#include "xgboost/base.h"       // for GradientPairInt64
+#include "xgboost/context.h"    // for Context
+#include "xgboost/linalg.h"     // for MatrixView
+#include "xgboost/span.h"       // for Span
+
+namespace xgboost::tree::cuda_impl {
+void LeafGradSum(Context const* ctx, std::vector<LeafInfo> const& h_leaves,
+                 common::Span<GradientQuantiser const> roundings,
+                 common::Span<RowIndexT const> sorted_ridx,
+                 linalg::MatrixView<GradientPair const> grad,
+                 linalg::MatrixView<GradientPairInt64> out_sum) {
+  CHECK_EQ(h_leaves.size(), out_sum.Shape(0));
+
+  dh::device_vector<LeafInfo> leaves(h_leaves);
+  auto d_leaves = dh::ToSpan(leaves);
+
+  std::vector<RowIndexT> h_indptr{0};
+  for (auto const& node : h_leaves) {
+    h_indptr.push_back(node.node.segment.Size());
+  }
+  // leaves form a complete partition
+  dh::device_vector<RowIndexT> indptr{h_indptr};
+  thrust::inclusive_scan(ctx->CUDACtx()->CTP(), indptr.cbegin(), indptr.cend(), indptr.begin());
+  CHECK_EQ(roundings.size(), grad.Shape(1));
+  CHECK_EQ(roundings.size(), out_sum.Shape(1));
+  CHECK_EQ(out_sum.Shape(0), indptr.size() - 1);
+  CHECK_EQ(indptr.size(), h_leaves.size() + 1);
+  auto d_indptr = dh::ToSpan(indptr);
+
+  for (bst_target_t t = 0, n_targets = grad.Shape(1); t < n_targets; ++t) {
+    auto out_t = out_sum.Slice(linalg::All(), t);  // len == n_leaves
+    auto it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) {
+      auto nidx_in_set = dh::SegmentId(d_indptr, i);
+      // Index within segment
+      auto k = i - d_indptr[nidx_in_set];
+      // Global index (within a batch).
+      auto j = d_leaves[nidx_in_set].node.segment.begin + k;
+      // gradient
+      auto g = grad(sorted_ridx[j], t);
+      return roundings[t].ToFixedPoint(g);
+    });
+    std::size_t n_bytes = 0;
+    dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(nullptr, n_bytes, it, linalg::tbegin(out_t),
+                                                  h_leaves.size(), indptr.data(), indptr.data() + 1,
+                                                  ctx->CUDACtx()->Stream()));
+    dh::TemporaryArray<char> alloc(n_bytes);
+    dh::safe_cuda(cub::DeviceSegmentedReduce::Sum(
+        alloc.data().get(), n_bytes, it, linalg::tbegin(out_t), h_leaves.size(), indptr.data(),
+        indptr.data() + 1, ctx->CUDACtx()->Stream()));
+  }
+}
+
+void LeafWeight(Context const* ctx, GPUTrainingParam const& param,
+                common::Span<GradientQuantiser const> roundings,
+                linalg::MatrixView<GradientPairInt64 const> grad_sum,
+                linalg::MatrixView<float> out_weights) {
+  CHECK(grad_sum.Contiguous());
+  auto s_grad_sum = grad_sum.Values();
+  dh::LaunchN(grad_sum.Size(), ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t i) mutable {
+    auto [nidx_in_set, t] = linalg::UnravelIndex(i, grad_sum.Shape());
+    auto g = roundings[t].ToFloatingPoint(grad_sum(nidx_in_set, t));
+    out_weights(nidx_in_set, t) = CalcWeight(param, g.GetGrad(), g.GetHess());
+  });
+}
+}  // namespace xgboost::tree::cuda_impl
diff --git a/src/tree/gpu_hist/leaf_sum.cuh b/src/tree/gpu_hist/leaf_sum.cuh
new file mode 100644
index 000000000000..18c305a89e70
--- /dev/null
+++ b/src/tree/gpu_hist/leaf_sum.cuh
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#pragma once
+
+#include <vector>  // for vector
+
+#include "../updater_gpu_common.cuh"  // for GPUTrainingParam
+#include "quantiser.cuh"              // for GradientQuantiser
+#include "row_partitioner.cuh"        // for RowIndexT, LeafInfo
+#include "xgboost/context.h"          // for Context
+#include "xgboost/linalg.h"           // for MatrixView
+#include "xgboost/span.h"             // for Span
+
+namespace xgboost::tree::cuda_impl {
+/**
+ * @brief Calculate gradient sum for leaf nodes based on row partitions.
+ *
+ *   shape(out_sum) == (n_leaves, n_targets)
+ */
+void LeafGradSum(Context const* ctx, std::vector<LeafInfo> const& h_leaves,
+                 common::Span<GradientQuantiser const> roundings,
+                 common::Span<RowIndexT const> sorted_ridx,
+                 linalg::MatrixView<GradientPair const> grad,
+                 linalg::MatrixView<GradientPairInt64> out_sum);
+
+/**
+ * @brief Calculate leaf weights from gradient sum.
+ *
+ *   shape(grad_sum) == (n_leaves, n_targets)
+ *   shape(out_weights) == (n_leaves, n_targets)
+ */
+void LeafWeight(Context const* ctx, GPUTrainingParam const& param,
+                common::Span<GradientQuantiser const> roundings,
+                linalg::MatrixView<GradientPairInt64 const> grad_sum,
+                linalg::MatrixView<float> out_weights);
+}  // namespace xgboost::tree::cuda_impl
diff --git a/src/tree/gpu_hist/multi_evaluate_splits.cu b/src/tree/gpu_hist/multi_evaluate_splits.cu
new file mode 100644
index 000000000000..457c4d6876a3
--- /dev/null
+++ b/src/tree/gpu_hist/multi_evaluate_splits.cu
@@ -0,0 +1,420 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#include <thrust/reduce.h>  // for reduce_by_key
+
+#include <cub/block/block_scan.cuh>  // for BlockScan
+#include <cub/util_type.cuh>         // for KeyValuePair
+#include <cub/warp/warp_reduce.cuh>  // for WarpReduce
+#include <vector>                    // for vector
+
+#include "../../common/cuda_context.cuh"
+#include "../updater_gpu_common.cuh"  // for SumCallbackOp
+#include "multi_evaluate_splits.cuh"  // for MultiEvalauteSplitInputs, MultiEvaluateSplitSharedInputs
+#include "quantiser.cuh"              // for GradientQuantiser
+#include "xgboost/base.h"             // for GradientPairInt64
+#include "xgboost/span.h"             // for Span
+
+namespace xgboost::tree::cuda_impl {
+namespace {
+/**
+ * @brief Calculate the gradient index for the reverse pass
+ *
+ * @note All inputs are global across features.
+ */
+__device__ bst_bin_t RevBinIdx(bst_bin_t gidx_begin, bst_bin_t gidx_end, bst_bin_t bin_idx) {
+  return gidx_begin + (gidx_end - bin_idx - 1);
+}
+
+// Scan the histogram in 2 dim for all nodes
+// Each block for one feature and one target
+template <std::int32_t kBlockThreads>
+struct ScanHistogramAgent {
+  using BlockScanT = cub::BlockScan<GradientPairInt64, kBlockThreads>;
+
+  typename BlockScanT::TempStorage *tmp_storage;
+  bst_bin_t gidx_begin;
+  bst_bin_t gidx_end;
+  bst_target_t n_targets;
+
+  template <typename BinIndexFn>
+  __device__ void ScanFeature(common::Span<GradientPairInt64 const> node_histogram,
+                              common::Span<GradientPairInt64> scan_result, bst_target_t t,
+                              BinIndexFn &&bin_idx_fn) {
+    SumCallbackOp<GradientPairInt64> prefix_op;
+    // The forward pass and the backward pass differs in where the bin is read, which is
+    // specified by the callback bin_idx_fn(). They write to the same output location.
+    for (bst_bin_t scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += kBlockThreads) {
+      auto bin_idx = scan_begin + threadIdx.x;
+      bool thread_active = bin_idx < gidx_end;
+      auto bin =
+          thread_active ? node_histogram[bin_idx_fn(bin_idx) * n_targets + t] : GradientPairInt64{};
+      BlockScanT(*tmp_storage).InclusiveScan(bin, bin, cuda::std::plus{}, prefix_op);
+      if (thread_active) {
+        scan_result[bin_idx * n_targets + t] = bin;
+      }
+
+      // Required by the block scan.
+      __syncthreads();
+    }
+  }
+  // Forward scan pass
+  __device__ void Forward(common::Span<GradientPairInt64 const> node_histogram,
+                          common::Span<GradientPairInt64> scan_result, bst_target_t t) {
+    this->ScanFeature(node_histogram, scan_result, t, cuda::std::identity{});
+  }
+  // Backward scan pass for missing values
+  __device__ void Backward(common::Span<GradientPairInt64 const> node_histogram,
+                           common::Span<GradientPairInt64> scan_result, bst_target_t t) {
+    this->ScanFeature(node_histogram, scan_result, t,
+                      [&](bst_bin_t bin_idx) { return RevBinIdx(gidx_begin, gidx_end, bin_idx); });
+  }
+};
+}  // namespace
+
+template <std::int32_t kBlockThreads>
+__global__ __launch_bounds__(kBlockThreads) void ScanHistogramKernel(
+    common::Span<MultiEvaluateSplitInputs const> nodes, MultiEvaluateSplitSharedInputs shared,
+    common::Span<common::Span<GradientPairInt64>> outputs) {
+  auto nidx_in_set = blockIdx.x;
+
+  auto const &node = nodes[nidx_in_set];
+  auto out = outputs[nidx_in_set];
+
+  auto fidx = blockIdx.y;
+  auto t = blockIdx.z;
+
+  bst_bin_t gidx_begin = shared.feature_segments[fidx];
+  bst_bin_t gidx_end = shared.feature_segments[fidx + 1];
+  bst_target_t n_targets = shared.Targets();
+
+  using AgentT = ScanHistogramAgent<kBlockThreads>;
+  __shared__ typename AgentT::BlockScanT::TempStorage tmp_storage;
+  ScanHistogramAgent<kBlockThreads> agent{&tmp_storage, gidx_begin, gidx_end, n_targets};
+
+  if (shared.one_pass != MultiEvaluateSplitSharedInputs::kBackward) {
+    auto forward = out.subspan(0, node.histogram.size());
+    agent.Forward(node.histogram, forward, t);
+  }
+  // TODO(jiamingy): Skip the backward pass if there's no missing value.
+  if (shared.one_pass != MultiEvaluateSplitSharedInputs::kForward) {
+    auto backward = out.subspan(node.histogram.size(), node.histogram.size());
+    agent.Backward(node.histogram, backward, t);
+  }
+}
+
+namespace {
+template <std::int32_t kBlockThreads>
+struct EvaluateSplitAgent {
+  using ArgMaxT = cub::KeyValuePair<std::uint32_t, double>;
+  using MaxReduceT = cub::WarpReduce<ArgMaxT>;
+  using SumReduceT = cub::WarpReduce<GradientPairInt64>;
+
+  struct TempStorage {
+    typename MaxReduceT::TempStorage max_reduce;
+    typename SumReduceT::TempStorage sum_reduce;
+  } *temp_storage;
+  bst_feature_t fidx;
+
+  template <std::int32_t d_step>
+  __device__ void Numerical(MultiEvaluateSplitInputs const &node,
+                            MultiEvaluateSplitSharedInputs const &shared,
+                            common::Span<GradientPairInt64 const> node_scan,
+                            MultiSplitCandidate *best_split) {
+    static_assert(d_step == +1 || d_step == -1, "Invalid step.");
+    // Calculate split gain for each bin
+    auto n_targets = shared.Targets();
+    auto roundings = shared.roundings;
+
+    bst_bin_t gidx_begin = shared.feature_segments[fidx];
+    bst_bin_t gidx_end = shared.feature_segments[fidx + 1];
+
+    for (bst_bin_t scan_begin = gidx_begin; scan_begin < gidx_end; scan_begin += kBlockThreads) {
+      auto bin_idx = scan_begin + threadIdx.x;
+      bool thread_active = bin_idx < gidx_end;
+
+      auto constexpr kNullGain = -std::numeric_limits<double>::infinity();
+      double gain = thread_active ? 0 : kNullGain;
+
+      if (thread_active) {
+        auto scan_bin = node_scan.subspan(bin_idx * n_targets, n_targets);
+        for (bst_target_t t = 0; t < n_targets; ++t) {
+          auto pg = roundings[t].ToFloatingPoint(node.parent_sum[t]);
+          // left
+          SPAN_LT(t, scan_bin.size());
+          auto left_sum = roundings[t].ToFloatingPoint(scan_bin[t]);
+          auto lw_t =
+              ::xgboost::tree::CalcWeight(shared.param, left_sum.GetGrad(), left_sum.GetHess());
+          // right
+          auto right_sum = pg - left_sum;
+          auto rw_t =
+              ::xgboost::tree::CalcWeight(shared.param, right_sum.GetGrad(), right_sum.GetHess());
+
+          gain += -lw_t * ThresholdL1(left_sum.GetGrad(), shared.param.reg_alpha);
+          gain += -rw_t * ThresholdL1(right_sum.GetGrad(), shared.param.reg_alpha);
+        }
+      }
+
+      auto best = MaxReduceT(temp_storage->max_reduce).Reduce({threadIdx.x, gain}, cub::ArgMax{});
+      auto best_thread = __shfl_sync(0xffffffff, best.key, 0);
+
+      if (threadIdx.x == best_thread && !isinf(gain)) {
+        // Update
+        bst_bin_t split_gidx = bin_idx;
+        if (d_step == -1) {
+          split_gidx = RevBinIdx(gidx_begin, gidx_end, bin_idx);
+        }
+        float min_fvalue = shared.min_values[fidx];
+        float fvalue;
+        if (d_step == +1) {
+          fvalue = shared.feature_values[split_gidx];
+        } else {
+          if (split_gidx == gidx_begin) {
+            fvalue = min_fvalue;
+          } else {
+            fvalue = shared.feature_values[split_gidx - 1];
+          }
+        }
+        auto scan_bin = node_scan.subspan(bin_idx * n_targets, n_targets);
+        // Missing values go to right in the forward pass, go to left in the backward pass.
+        best_split->Update(gain, d_step == 1 ? kRightDir : kLeftDir, fvalue, fidx, scan_bin, false,
+                           shared.param, shared.roundings);
+      }
+
+      __syncwarp();
+    }
+  }
+};
+}  // namespace
+
+// Find the best split based on the scan result
+// Only a single node is working at the moment
+template <std::int32_t kBlockThreads>
+__global__ __launch_bounds__(kBlockThreads) void EvaluateSplitsKernel(
+    common::Span<MultiEvaluateSplitInputs const> nodes, MultiEvaluateSplitSharedInputs shared,
+    common::Span<common::Span<GradientPairInt64>> bin_scans,
+    common::Span<MultiSplitCandidate> out_candidates) {
+  using AgentT = EvaluateSplitAgent<kBlockThreads>;
+  __shared__ typename AgentT::TempStorage temp_storage;
+
+  const auto nidx = blockIdx.x / shared.Features();
+  bst_feature_t fidx = blockIdx.x % shared.Features();
+  AgentT agent{&temp_storage, fidx};
+
+  auto n_targets = shared.Targets();
+  // The number of bins in a feature
+  auto f_hist_size =
+      (shared.feature_segments[fidx + 1] - shared.feature_segments[fidx]) * n_targets;
+
+  auto candidate_idx = nidx * shared.Features() + fidx;
+
+  if (shared.one_pass != MultiEvaluateSplitSharedInputs::kBackward) {
+    auto forward = bin_scans[nidx].subspan(0, nodes[nidx].histogram.size());
+    agent.template Numerical<+1>(nodes[nidx], shared, forward, &out_candidates[candidate_idx]);
+  }
+  if (shared.one_pass != MultiEvaluateSplitSharedInputs::kForward) {
+    auto backward =
+        bin_scans[nidx].subspan(nodes[nidx].histogram.size(), nodes[nidx].histogram.size());
+    agent.template Numerical<-1>(nodes[nidx], shared, backward, &out_candidates[candidate_idx]);
+  }
+}
+
+[[nodiscard]] MultiExpandEntry MultiHistEvaluator::EvaluateSingleSplit(
+    Context const *ctx, MultiEvaluateSplitInputs const &input,
+    MultiEvaluateSplitSharedInputs const &shared_inputs) {
+  dh::device_vector<MultiEvaluateSplitInputs> inputs{input};
+  dh::device_vector<MultiExpandEntry> outputs(1);
+
+  this->EvaluateSplits(ctx, dh::ToSpan(inputs), shared_inputs, dh::ToSpan(outputs));
+  return outputs[0];
+}
+
+void MultiHistEvaluator::EvaluateSplits(Context const *ctx,
+                                        common::Span<MultiEvaluateSplitInputs const> d_inputs,
+                                        MultiEvaluateSplitSharedInputs const &shared_inputs,
+                                        common::Span<MultiExpandEntry> out_splits) {
+  auto n_targets = shared_inputs.Targets();
+  auto n_bins_per_feat_tar = shared_inputs.n_bins_per_feat_tar;
+  CHECK_GE(n_bins_per_feat_tar, 1);
+  auto n_features = shared_inputs.Features();
+  CHECK_GE(n_features, 1);
+
+  std::uint32_t n_nodes = d_inputs.size();
+  CHECK_EQ(n_nodes, out_splits.size());
+
+  if (n_nodes == 0) {
+    return;
+  }
+
+  // Calculate total scan buffer size needed for all nodes
+  auto node_hist_size = n_targets * n_features * n_bins_per_feat_tar;
+  std::size_t total_hist_size = node_hist_size * n_nodes;
+
+  // Scan the histograms. One for forward and the other for backward.
+  this->scan_buffer_.resize(total_hist_size * 2);
+  thrust::fill(ctx->CUDACtx()->CTP(), this->scan_buffer_.begin(), this->scan_buffer_.end(),
+               GradientPairInt64{});
+
+  // Create spans for each node's scan results
+  dh::device_vector<common::Span<GradientPairInt64>> scans(n_nodes);
+  for (std::size_t nidx_in_set = 0; nidx_in_set < n_nodes; ++nidx_in_set) {
+    scans[nidx_in_set] = dh::ToSpan(this->scan_buffer_)
+                             .subspan(nidx_in_set * node_hist_size * 2, node_hist_size * 2);
+  }
+
+  // Launch histogram scan kernel
+  dim3 grid{n_nodes, n_features, n_targets};
+  std::uint32_t constexpr kBlockThreads = 32;
+  dh::LaunchKernel{grid, kBlockThreads}(  // NOLINT
+      ScanHistogramKernel<kBlockThreads>, d_inputs, shared_inputs, dh::ToSpan(scans));
+
+  // Launch split evaluation kernel
+  dh::device_vector<MultiSplitCandidate> d_splits(n_nodes * n_features);
+  dh::LaunchKernel{n_nodes * n_features, kBlockThreads, 0, ctx->CUDACtx()->Stream()}(  // NOLINT
+      EvaluateSplitsKernel<kBlockThreads>, d_inputs, shared_inputs, dh::ToSpan(scans),
+      dh::ToSpan(d_splits));
+
+  // Find best split for each node
+  // * 3 because of base, left, right weights.
+  this->weights_.resize(n_nodes * n_targets * 3);
+  auto d_weights = dh::ToSpan(this->weights_);
+
+  dh::CachingDeviceUVector<float> d_parent_gains(n_nodes);
+  dh::CachingDeviceUVector<std::int32_t> sum_zeros(n_nodes * 2);
+
+  auto s_parent_gains = dh::ToSpan(d_parent_gains);
+  auto s_sum_zeros = dh::ToSpan(sum_zeros);
+  auto s_d_splits = dh::ToSpan(d_splits);
+
+  // Process results for each node
+  // Find best splits among all features for all nodes
+  auto key_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) {
+    // Returns nidx_in_set
+    return i / n_features;
+  });
+  dh::device_vector<MultiSplitCandidate> best_splits(out_splits.size());
+  thrust::reduce_by_key(
+      ctx->CUDACtx()->CTP(), key_it, key_it + s_d_splits.size(), dh::tcbegin(s_d_splits),
+      thrust::make_discard_iterator(), best_splits.begin(), std::equal_to{},
+      [=] XGBOOST_DEVICE(MultiSplitCandidate const &lhs, MultiSplitCandidate const &rhs) {
+        return lhs.loss_chg > rhs.loss_chg ? lhs : rhs;
+      });
+  auto d_best_splits = dh::ToSpan(best_splits);
+
+  dh::LaunchN(n_nodes, ctx->CUDACtx()->Stream(), [=] __device__(std::size_t nidx_in_set) {
+    auto input = d_inputs[nidx_in_set];
+    MultiSplitCandidate best_split = d_best_splits[nidx_in_set];
+    if (best_split.node_sum.empty()) {
+      // Invalid split
+      out_splits[nidx_in_set] = {};
+      return;
+    }
+
+    // Calculate weights for this node
+    auto base_weight = d_weights.subspan(nidx_in_set * n_targets * 3, n_targets);
+    auto left_weight = d_weights.subspan(nidx_in_set * n_targets * 3 + n_targets, n_targets);
+    auto right_weight = d_weights.subspan(nidx_in_set * n_targets * 3 + n_targets * 2, n_targets);
+
+    auto d_roundings = shared_inputs.roundings;
+    auto node_sum = best_split.node_sum;
+
+    float parent_gain = 0;
+    for (bst_target_t t = 0; t < n_targets; ++t) {
+      auto quantizer = d_roundings[t];
+      auto sibling_sum = input.parent_sum[t] - node_sum[t];
+      auto sum = node_sum[t] + sibling_sum;
+      auto g = quantizer.ToFloatingPoint(sum);
+
+      base_weight[t] = CalcWeight(shared_inputs.param, g.GetGrad(), g.GetHess());
+      parent_gain += -base_weight[t] * ThresholdL1(g.GetGrad(), shared_inputs.param.reg_alpha);
+    }
+    s_parent_gains[nidx_in_set] = parent_gain;
+
+    bool l = true, r = true;
+    for (bst_target_t t = 0; t < n_targets; ++t) {
+      auto quantizer = d_roundings[t];
+      auto sibling_sum = input.parent_sum[t] - node_sum[t];
+
+      l = l && (node_sum[t].GetQuantisedHess() == 0);
+      r = r && (sibling_sum.GetQuantisedHess() == 0);
+
+      if (best_split.dir == kRightDir) {
+        // forward pass, node_sum is the left sum
+        auto lg = quantizer.ToFloatingPoint(node_sum[t]);
+        left_weight[t] = CalcWeight(shared_inputs.param, lg.GetGrad(), lg.GetHess());
+        auto rg = quantizer.ToFloatingPoint(sibling_sum);
+        right_weight[t] = CalcWeight(shared_inputs.param, rg.GetGrad(), rg.GetHess());
+      } else {
+        // backward pass, node_sum is the right sum
+        auto rg = quantizer.ToFloatingPoint(node_sum[t]);
+        right_weight[t] = CalcWeight(shared_inputs.param, rg.GetGrad(), rg.GetHess());
+        auto lg = quantizer.ToFloatingPoint(sibling_sum);
+        left_weight[t] = CalcWeight(shared_inputs.param, lg.GetGrad(), lg.GetHess());
+      }
+    }
+
+    s_sum_zeros[nidx_in_set * 2] = l;
+    s_sum_zeros[nidx_in_set * 2 + 1] = r;
+
+    // Set up the output entry
+    out_splits[nidx_in_set] = {input.nidx,  input.depth, best_split,
+                               base_weight, left_weight, right_weight};
+    out_splits[nidx_in_set].split.loss_chg -= parent_gain;
+
+    if (l || r) {
+      out_splits[nidx_in_set] = {};
+    }
+  });
+}
+
+void MultiHistEvaluator::ApplyTreeSplit(Context const *ctx, RegTree const *p_tree,
+                                        MultiExpandEntry const &candidate) {
+  auto left_child = p_tree->LeftChild(candidate.nidx);
+  auto right_child = p_tree->RightChild(candidate.nidx);
+  bst_node_t max_node = std::max(left_child, right_child);
+  auto n_targets = candidate.base_weight.size();
+
+  this->AllocNodeSum(max_node, n_targets);
+
+  auto parent_sum = this->GetNodeSum(candidate.nidx, n_targets);
+  auto left_sum = this->GetNodeSum(left_child, n_targets);
+  auto right_sum = this->GetNodeSum(right_child, n_targets);
+
+  // Calculate node sums
+  // TODO(jiamingy): We need to batch the nodes
+  auto best_split = candidate.split;
+
+  auto node_sum = best_split.node_sum;
+  dh::LaunchN(n_targets, ctx->CUDACtx()->Stream(), [=] XGBOOST_DEVICE(std::size_t t) {
+    auto sibling_sum = parent_sum[t] - node_sum[t];
+    if (best_split.dir == kRightDir) {
+      // forward pass, node_sum is the left sum
+      left_sum[t] = node_sum[t];
+      right_sum[t] = sibling_sum;
+    } else {
+      // backward pass, node_sum is the right sum
+      right_sum[t] = node_sum[t];
+      left_sum[t] = sibling_sum;
+    }
+  });
+}
+
+std::ostream &DebugPrintHistogram(std::ostream &os, common::Span<GradientPairInt64 const> node_hist,
+                                  common::Span<GradientQuantiser const> roundings,
+                                  bst_target_t n_targets) {
+  std::vector<GradientQuantiser> h_roundings;
+  thrust::copy(dh::tcbegin(roundings), dh::tcend(roundings), std::back_inserter(h_roundings));
+  dh::CopyDeviceSpanToVector(&h_roundings, roundings);
+
+  std::vector<GradientPairInt64> h_node_hist(node_hist.size());
+  dh::CopyDeviceSpanToVector(&h_node_hist, node_hist);
+  for (bst_target_t t = 0; t < n_targets; ++t) {
+    os << "Target:" << t << std::endl;
+    for (std::size_t i = t; i < h_node_hist.size() / n_targets; i += n_targets) {
+      os << h_roundings[t].ToFloatingPoint(h_node_hist[i]) << ", ";
+    }
+    os << std::endl;
+  }
+  return os;
+}
+}  // namespace xgboost::tree::cuda_impl
diff --git a/src/tree/gpu_hist/multi_evaluate_splits.cuh b/src/tree/gpu_hist/multi_evaluate_splits.cuh
new file mode 100644
index 000000000000..d3c369e4276a
--- /dev/null
+++ b/src/tree/gpu_hist/multi_evaluate_splits.cuh
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#pragma once
+
+#include "../../common/device_vector.cuh"  // for device_vector
+#include "evaluate_splits.cuh"             // for MultiEvaluateSplitSharedInputs
+#include "quantiser.cuh"                   // for GradientQuantiser
+#include "xgboost/base.h"                  // for GradientPairInt64
+#include "xgboost/context.h"               // for Context
+
+namespace xgboost::tree::cuda_impl {
+/** @brief Evaluator for vector leaf. */
+class MultiHistEvaluator {
+  dh::device_vector<float> weights_;
+
+  dh::device_vector<GradientPairInt64> scan_buffer_;
+  dh::device_vector<GradientPairInt64> node_sums_;
+
+ public:
+  /**
+   * @brief Run evaluation for the root node.
+   */
+  [[nodiscard]] MultiExpandEntry EvaluateSingleSplit(
+      Context const *ctx, MultiEvaluateSplitInputs const &input,
+      MultiEvaluateSplitSharedInputs const &shared_inputs);
+  /**
+   * @brief Run evaluation for multiple nodes.
+   */
+  void EvaluateSplits(Context const *ctx, common::Span<MultiEvaluateSplitInputs const> d_inputs,
+                      MultiEvaluateSplitSharedInputs const &shared_inputs,
+                      common::Span<MultiExpandEntry> out_splits);
+
+  void AllocNodeSum(bst_node_t nidx, bst_target_t n_targets) {
+    auto end = (nidx + 1) * n_targets;
+    if (this->node_sums_.size() < end) {
+      this->node_sums_.resize(end);
+    }
+  }
+  [[nodiscard]] common::Span<GradientPairInt64> GetNodeSum(bst_node_t nidx,
+                                                           bst_target_t n_targets) {
+    auto offset = nidx * n_targets;
+    return dh::ToSpan(this->node_sums_).subspan(offset, n_targets);
+  }
+  [[nodiscard]] common::Span<GradientPairInt64 const> GetNodeSum(bst_node_t nidx,
+                                                                 bst_target_t n_targets) const {
+    auto offset = nidx * n_targets;
+    return dh::ToSpan(this->node_sums_).subspan(offset, n_targets);
+  }
+
+  // Track the child gradient sum.
+  void ApplyTreeSplit(Context const *ctx, RegTree const *p_tree, MultiExpandEntry const &candidate);
+};
+
+std::ostream &DebugPrintHistogram(std::ostream &os, common::Span<GradientPairInt64 const> node_hist,
+                                  common::Span<GradientQuantiser const> roundings,
+                                  bst_target_t n_targets);
+}  // namespace xgboost::tree::cuda_impl
diff --git a/src/tree/gpu_hist/quantiser.cuh b/src/tree/gpu_hist/quantiser.cuh
index 36bd5a1d36fe..31ce158e4600 100644
--- a/src/tree/gpu_hist/quantiser.cuh
+++ b/src/tree/gpu_hist/quantiser.cuh
@@ -1,11 +1,13 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #pragma once
-#include "xgboost/base.h"     // for GradientPairPrecise, GradientPairInt64
-#include "xgboost/context.h"  // for Context
-#include "xgboost/data.h"     // for MetaInfo
-#include "xgboost/span.h"     // for Span
+#include "../../common/device_helpers.cuh"  // for ToSpan
+#include "../../common/device_vector.cuh"   // for device_vector
+#include "xgboost/base.h"                   // for GradientPairPrecise, GradientPairInt64
+#include "xgboost/context.h"                // for Context
+#include "xgboost/data.h"                   // for MetaInfo
+#include "xgboost/span.h"                   // for Span
 
 namespace xgboost::tree {
 class GradientQuantiser {
@@ -16,6 +18,9 @@ class GradientQuantiser {
   GradientPairPrecise to_floating_point_;
 
  public:
+  // Used for test
+  GradientQuantiser(GradientPairPrecise to_fixed, GradientPairPrecise to_float)
+      : to_fixed_point_{to_fixed}, to_floating_point_{to_float} {}
   GradientQuantiser(Context const* ctx, common::Span<GradientPair const> gpair,
                     MetaInfo const& info);
   [[nodiscard]] XGBOOST_DEVICE GradientPairInt64 ToFixedPoint(GradientPair const& gpair) const {
@@ -36,4 +41,21 @@ class GradientQuantiser {
     return {g, h};
   }
 };
+
+// For vector leaf
+class MultiGradientQuantiser {
+ private:
+  dh::device_vector<GradientQuantiser> quantizers_;
+
+ public:
+  MultiGradientQuantiser(Context const* ctx, linalg::MatrixView<GradientPair const> gpair,
+                         MetaInfo const& info);
+
+  [[nodiscard]] auto Quantizers() const { return dh::ToSpan(this->quantizers_); }
+};
+
+namespace cuda_impl {
+void TransposeGradient(Context const* ctx, linalg::MatrixView<GradientPair const> in,
+                       linalg::MatrixView<GradientPair> out);
+}  // namespace cuda_impl
 }  // namespace xgboost::tree
diff --git a/src/tree/gpu_hist/row_partitioner.cuh b/src/tree/gpu_hist/row_partitioner.cuh
index a23a443ed92e..5e0dc68661d6 100644
--- a/src/tree/gpu_hist/row_partitioner.cuh
+++ b/src/tree/gpu_hist/row_partitioner.cuh
@@ -1,15 +1,15 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #pragma once
-#include <thrust/execution_policy.h>
 #include <thrust/iterator/counting_iterator.h>          // for make_counting_iterator
 #include <thrust/iterator/transform_output_iterator.h>  // for make_transform_output_iterator
 
-#include <algorithm>  // for max
-#include <cstddef>    // for size_t
-#include <cstdint>    // for int32_t, uint32_t
-#include <vector>     // for vector
+#include <algorithm>        // for max
+#include <cstddef>          // for size_t
+#include <cstdint>          // for int32_t, uint32_t
+#include <cuda/functional>  // for proclaim_return_type
+#include <vector>           // for vector
 
 #include "../../common/cuda_context.cuh"    // for CUDAContext
 #include "../../common/device_helpers.cuh"  // for MakeTransformIterator
@@ -21,7 +21,7 @@ namespace xgboost::tree {
 namespace cuda_impl {
 using RowIndexT = std::uint32_t;
 // TODO(Rory): Can be larger. To be tuned alongside other batch operations.
-static const std::int32_t kMaxUpdatePositionBatchSize = 32;
+inline constexpr std::int32_t kMaxUpdatePositionBatchSize = 32;
 }  // namespace cuda_impl
 
 /**
@@ -37,7 +37,7 @@ struct Segment {
   Segment(cuda_impl::RowIndexT begin, cuda_impl::RowIndexT end) : begin(begin), end(end) {
     CHECK_GE(end, begin);
   }
-  __host__ __device__ bst_idx_t Size() const { return end - begin; }
+  [[nodiscard]] XGBOOST_DEVICE bst_idx_t Size() const { return end - begin; }
 };
 
 template <typename OpDataT>
@@ -46,28 +46,42 @@ struct PerNodeData {
   OpDataT data;
 };
 
-template <typename BatchIterT>
-XGBOOST_DEV_INLINE void AssignBatch(BatchIterT batch_info, std::size_t global_thread_idx,
-                                    int* batch_idx, std::size_t* item_idx) {
+/**
+ * @param global_thread_idx In practice, the row index within the total number of rows for
+ *        this node batch.
+ * @param batch_idx The nidx within this node batch (not the actual node index in a tree).
+ * @param item_idx The resulting global row index (without accounting for base_rowid). This maps the
+ *        row index within the node batch back to the global row index.
+ */
+template <typename T>
+XGBOOST_DEV_INLINE void AssignBatch(dh::LDGIterator<T> const& batch_info_iter,
+                                    std::size_t global_thread_idx, int* batch_idx,
+                                    std::size_t* item_idx) {
   cuda_impl::RowIndexT sum = 0;
-  for (int i = 0; i < cuda_impl::kMaxUpdatePositionBatchSize; i++) {
-    if (sum + batch_info[i].segment.Size() > global_thread_idx) {
+  // Search for the nidx in batch and the corresponding global row index, exit once found.
+  for (std::int32_t i = 0; i < cuda_impl::kMaxUpdatePositionBatchSize; i++) {
+    if (sum + batch_info_iter[i].segment.Size() > global_thread_idx) {
       *batch_idx = i;
-      *item_idx = (global_thread_idx - sum) + batch_info[i].segment.begin;
+      // the beginning of the segment plus the offset into that segment
+      *item_idx = (global_thread_idx - sum) + batch_info_iter[i].segment.begin;
       break;
     }
-    sum += batch_info[i].segment.Size();
+    sum += batch_info_iter[i].segment.Size();
   }
 }
 
+/**
+ * @param total_rows The total number of rows for this batch of nodes.
+ */
 template <int kBlockSize, typename OpDataT>
 __global__ __launch_bounds__(kBlockSize) void SortPositionCopyKernel(
-    dh::LDGIterator<PerNodeData<OpDataT>> batch_info, common::Span<cuda_impl::RowIndexT> d_ridx,
-    const common::Span<const cuda_impl::RowIndexT> ridx_tmp, bst_idx_t total_rows) {
+    dh::LDGIterator<PerNodeData<OpDataT>> batch_info_iter,
+    common::Span<cuda_impl::RowIndexT> d_ridx,
+    common::Span<cuda_impl::RowIndexT const> const ridx_tmp, bst_idx_t total_rows) {
   for (auto idx : dh::GridStrideRange<std::size_t>(0, total_rows)) {
-    int batch_idx;
-    std::size_t item_idx;
-    AssignBatch(batch_info, idx, &batch_idx, &item_idx);
+    std::int32_t batch_idx;  // unused
+    std::size_t item_idx = std::numeric_limits<std::size_t>::max();
+    AssignBatch(batch_info_iter, idx, &batch_idx, &item_idx);
     d_ridx[item_idx] = ridx_tmp[item_idx];
   }
 }
@@ -141,24 +155,28 @@ void SortPositionBatch(Context const* ctx, common::Span<const PerNodeData<OpData
   auto discard_write_iterator =
       thrust::make_transform_output_iterator(dh::TypedDiscard<IndexFlagTuple>(), write_results);
   auto counting = thrust::make_counting_iterator(0llu);
-  auto input_iterator =
-      dh::MakeTransformIterator<IndexFlagTuple>(counting, [=] __device__(std::size_t idx) {
-        int nidx_in_batch;
+  auto input_iterator = dh::MakeTransformIterator<IndexFlagTuple>(
+      counting, cuda::proclaim_return_type<IndexFlagTuple>([=] __device__(std::size_t idx) {
+        std::int32_t nidx_in_batch;
         std::size_t item_idx;
         AssignBatch(batch_info_itr, idx, &nidx_in_batch, &item_idx);
         auto go_left = op(ridx[item_idx], nidx_in_batch, batch_info_itr[nidx_in_batch].data);
         return IndexFlagTuple{static_cast<cuda_impl::RowIndexT>(item_idx), go_left, nidx_in_batch,
                               go_left};
-      });
-  // Avoid using int as the offset type
+      }));
+  // Reach down to the dispatch function to avoid using int as the offset type.
   std::size_t n_bytes = 0;
   if (tmp->empty()) {
+    // The size of temporary storage is calculated based on the total number of
+    // rows. Since the root node has all the rows, subsequence allocatioin must be smaller
+    // than the root node. As a result, we can calculate this once and reuse it throughout
+    // the iteration.
     auto ret =
         cub::DispatchScan<decltype(input_iterator), decltype(discard_write_iterator), IndexFlagOp,
-                          cub::NullType, std::int64_t>::Dispatch(nullptr, n_bytes, input_iterator,
+                          cub::NullType, std::uint64_t>::Dispatch(nullptr, n_bytes, input_iterator,
                                                                  discard_write_iterator,
                                                                  IndexFlagOp{}, cub::NullType{},
-                                                                 total_rows,
+                                                                 static_cast<std::uint64_t>(total_rows),
                                                                  ctx->CUDACtx()->Stream());
     dh::safe_cuda(ret);
     tmp->resize(n_bytes);
@@ -166,10 +184,10 @@ void SortPositionBatch(Context const* ctx, common::Span<const PerNodeData<OpData
   n_bytes = tmp->size();
   auto ret =
       cub::DispatchScan<decltype(input_iterator), decltype(discard_write_iterator), IndexFlagOp,
-                        cub::NullType, std::int64_t>::Dispatch(tmp->data(), n_bytes, input_iterator,
+                        cub::NullType, std::uint64_t>::Dispatch(tmp->data(), n_bytes, input_iterator,
                                                                discard_write_iterator,
                                                                IndexFlagOp{}, cub::NullType{},
-                                                               total_rows,
+                                                               static_cast<std::uint64_t>(total_rows),
                                                                ctx->CUDACtx()->Stream());
   dh::safe_cuda(ret);
 
@@ -187,7 +205,12 @@ struct NodePositionInfo {
   Segment segment;
   bst_node_t left_child = -1;
   bst_node_t right_child = -1;
-  __device__ bool IsLeaf() { return left_child == -1; }
+  [[nodiscard]] XGBOOST_DEVICE bool IsLeaf() const { return left_child == -1; }
+};
+
+struct LeafInfo {
+  bst_node_t nidx;
+  NodePositionInfo node;
 };
 
 XGBOOST_DEV_INLINE int GetPositionFromSegments(std::size_t idx,
@@ -286,10 +309,22 @@ class RowPartitioner {
   [[nodiscard]] bst_node_t GetNumNodes() const { return n_nodes_; }
 
   /**
-   * \brief Convenience method for testing
+   * @brief Convenience method for testing.
    */
   std::vector<RowIndexT> GetRowsHost(bst_node_t nidx);
 
+  [[nodiscard]] std::vector<LeafInfo> GetLeaves() const {
+    std::vector<LeafInfo> leaves;
+    bst_node_t nidx = 0;
+    for (auto const& node : this->ridx_segments_) {
+      if (node.IsLeaf()) {
+        leaves.emplace_back(LeafInfo{nidx, node});
+      }
+      nidx += 1;
+    }
+    return leaves;
+  }
+
   /**
    * \brief Updates the tree position for set of training instances being split
    * into left and right child nodes. Accepts a user-defined lambda specifying
@@ -305,10 +340,10 @@ class RowPartitioner {
    * second. Returns true if this training instance goes on the left partition.
    */
   template <typename UpdatePositionOpT, typename OpDataT>
-  void UpdatePositionBatch(Context const* ctx, const std::vector<bst_node_t>& nidx,
-                           const std::vector<bst_node_t>& left_nidx,
-                           const std::vector<bst_node_t>& right_nidx,
-                           const std::vector<OpDataT>& op_data, UpdatePositionOpT op) {
+  void UpdatePositionBatch(Context const* ctx, std::vector<bst_node_t> const& nidx,
+                           std::vector<bst_node_t> const& left_nidx,
+                           std::vector<bst_node_t> const& right_nidx,
+                           std::vector<OpDataT> const& op_data, UpdatePositionOpT op) {
     if (nidx.empty()) {
       return;
     }
@@ -317,28 +352,47 @@ class RowPartitioner {
     CHECK_EQ(nidx.size(), right_nidx.size());
     CHECK_EQ(nidx.size(), op_data.size());
     this->n_nodes_ += (left_nidx.size() + right_nidx.size());
-
-    auto h_batch_info = pinned2_.GetSpan<PerNodeData<OpDataT>>(nidx.size());
+    common::Span<PerNodeData<OpDataT>> h_batch_info =
+        pinned2_.GetSpan<PerNodeData<OpDataT>>(nidx.size());
     dh::TemporaryArray<PerNodeData<OpDataT>> d_batch_info(nidx.size());
 
-    std::size_t total_rows = 0;
-    for (size_t i = 0; i < nidx.size(); i++) {
-      h_batch_info[i] = {ridx_segments_.at(nidx.at(i)).segment, op_data.at(i)};
-      total_rows += ridx_segments_.at(nidx.at(i)).segment.Size();
+    for (std::size_t i = 0; i < nidx.size(); i++) {
+      h_batch_info[i] = {ridx_segments_.at(nidx[i]).segment, op_data[i]};
     }
     dh::safe_cuda(cudaMemcpyAsync(d_batch_info.data().get(), h_batch_info.data(),
-                                  h_batch_info.size() * sizeof(PerNodeData<OpDataT>),
-                                  cudaMemcpyDefault, ctx->CUDACtx()->Stream()));
-
+                                  h_batch_info.size_bytes(), cudaMemcpyDefault,
+                                  ctx->CUDACtx()->Stream()));
     // Temporary arrays
     auto h_counts = pinned_.GetSpan<RowIndexT>(nidx.size());
     // Must initialize with 0 as 0 count is not written in the kernel.
     dh::TemporaryArray<RowIndexT> d_counts(nidx.size(), 0);
 
-    // Partition the rows according to the operator
-    SortPositionBatch<UpdatePositionOpT, OpDataT>(ctx, dh::ToSpan(d_batch_info), dh::ToSpan(ridx_),
-                                                  dh::ToSpan(ridx_tmp_), dh::ToSpan(d_counts),
-                                                  total_rows, op, &tmp_);
+    // Process a sub-batch
+    auto sub_batch_impl = [ctx, op, this](common::Span<bst_node_t const> nidx,
+                                          common::Span<PerNodeData<OpDataT>> d_batch_info,
+                                          common::Span<RowIndexT> d_counts) {
+      std::size_t total_rows = 0;
+      for (bst_node_t i : nidx) {
+        total_rows += this->ridx_segments_[i].segment.Size();
+      }
+
+      // Partition the rows according to the operator
+      SortPositionBatch<UpdatePositionOpT, OpDataT>(ctx, d_batch_info, dh::ToSpan(this->ridx_),
+                                                    dh::ToSpan(this->ridx_tmp_), d_counts,
+                                                    total_rows, op, &this->tmp_);
+    };
+
+    // Divide inputs into sub-batches.
+    for (std::size_t batch_begin = 0, n = nidx.size(); batch_begin < n;
+         batch_begin += cuda_impl::kMaxUpdatePositionBatchSize) {
+      auto constexpr kMax = static_cast<decltype(n)>(cuda_impl::kMaxUpdatePositionBatchSize);
+      auto batch_size = std::min(kMax, n - batch_begin);
+      auto nidx_batch = common::Span{nidx}.subspan(batch_begin, batch_size);
+      auto d_info_batch = dh::ToSpan(d_batch_info).subspan(batch_begin, batch_size);
+      auto d_counts_batch = dh::ToSpan(d_counts).subspan(batch_begin, batch_size);
+      sub_batch_impl(nidx_batch, d_info_batch, d_counts_batch);
+    }
+
     dh::safe_cuda(cudaMemcpyAsync(h_counts.data(), d_counts.data().get(), h_counts.size_bytes(),
                                   cudaMemcpyDefault, ctx->CUDACtx()->Stream()));
     // TODO(Rory): this synchronisation hurts performance a lot
diff --git a/src/tree/hist/evaluate_splits.h b/src/tree/hist/evaluate_splits.h
index a260784ad1db..cc64a8c13e07 100644
--- a/src/tree/hist/evaluate_splits.h
+++ b/src/tree/hist/evaluate_splits.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
 #define XGBOOST_TREE_HIST_EVALUATE_SPLITS_H_
@@ -20,6 +20,7 @@
 #include "../constraints.h"            // for FeatureInteractionConstraintHost
 #include "../param.h"                  // for TrainParam
 #include "../split_evaluator.h"        // for TreeEvaluator
+#include "../tree_view.h"              // for MultiTargetTreeView
 #include "expand_entry.h"              // for MultiExpandEntry
 #include "hist_cache.h"                // for BoundedHistCollection
 #include "xgboost/base.h"              // for bst_node_t, bst_target_t, bst_feature_t
@@ -446,7 +447,7 @@ class HistEvaluator {
                              tree[candidate.nid].SplitIndex(), left_weight, right_weight);
     evaluator = tree_evaluator_.GetEvaluator();
 
-    snode_.resize(tree.GetNodes().size());
+    snode_.resize(tree.Size());
     snode_.at(left_child).stats = candidate.split.left_sum;
     snode_.at(left_child).root_gain =
         evaluator.CalcGain(candidate.nid, *param_, GradStats{candidate.split.left_sum});
@@ -679,13 +680,15 @@ class HistMultiEvaluator {
 
     p_tree->ExpandNode(candidate.nid, candidate.split.SplitIndex(), candidate.split.split_value,
                        candidate.split.DefaultLeft(), base_weight, left_weight, right_weight);
+
     CHECK(p_tree->IsMultiTarget());
-    auto left_child = p_tree->LeftChild(candidate.nid);
+    auto mt_tree = p_tree->HostMtView();
+    auto left_child = mt_tree.LeftChild(candidate.nid);
     CHECK_GT(left_child, candidate.nid);
-    auto right_child = p_tree->RightChild(candidate.nid);
+    auto right_child = mt_tree.RightChild(candidate.nid);
     CHECK_GT(right_child, candidate.nid);
 
-    std::size_t n_nodes = p_tree->Size();
+    std::size_t n_nodes = mt_tree.Size();
     gain_.resize(n_nodes);
     // Re-calculate weight without learning rate.
     CalcWeight(*param_, left_sum, left_weight);
@@ -725,20 +728,18 @@ class HistMultiEvaluator {
  * \param p_last_tree The last tree being updated by tree updater
  */
 template <typename Partitioner>
-void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree,
+void UpdatePredictionCacheImpl(Context const *ctx, ScalarTreeView const &last_tree,
                                std::vector<Partitioner> const &partitioner,
                                linalg::VectorView<float> out_preds) {
-  auto const &tree = *p_last_tree;
   CHECK(out_preds.Device().IsCPU());
-  size_t n_nodes = p_last_tree->GetNodes().size();
+  size_t n_nodes = last_tree.Size();
   for (auto &part : partitioner) {
     CHECK_EQ(part.Size(), n_nodes);
-    common::BlockedSpace2d space(
-        part.Size(), [&](size_t node) { return part[node].Size(); }, 1024);
+    common::BlockedSpace2d space(part.Size(), [&](size_t node) { return part[node].Size(); }, 1024);
     common::ParallelFor2d(space, ctx->Threads(), [&](bst_node_t nidx, common::Range1d r) {
-      if (!tree[nidx].IsDeleted() && tree[nidx].IsLeaf()) {
+      if (!last_tree.IsDeleted(nidx) && last_tree.IsLeaf(nidx)) {
         auto const &rowset = part[nidx];
-        auto leaf_value = tree[nidx].LeafValue();
+        auto leaf_value = last_tree.LeafValue(nidx);
         for (auto const *it = rowset.begin() + r.begin(); it < rowset.begin() + r.end(); ++it) {
           out_preds(*it) += leaf_value;
         }
@@ -756,13 +757,13 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree,
 
   auto const &tree = *p_last_tree;
   if (!tree.IsMultiTarget()) {
-    UpdatePredictionCacheImpl(ctx, p_last_tree, partitioner, out_preds.Slice(linalg::All(), 0));
-    return;
+    return UpdatePredictionCacheImpl(ctx, p_last_tree->HostScView(), partitioner,
+                                     out_preds.Slice(linalg::All(), 0));
   }
 
-  auto const *mttree = tree.GetMultiTargetTree();
-  auto n_nodes = mttree->Size();
-  auto n_targets = tree.NumTargets();
+  auto const mt_tree = tree.HostMtView();
+  auto n_nodes = mt_tree.Size();
+  auto n_targets = mt_tree.NumTargets();
   CHECK_EQ(out_preds.Shape(1), n_targets);
   CHECK(out_preds.Device().IsCPU());
 
@@ -771,9 +772,9 @@ void UpdatePredictionCacheImpl(Context const *ctx, RegTree const *p_last_tree,
     common::BlockedSpace2d space(
         part.Size(), [&](size_t node) { return part[node].Size(); }, 1024);
     common::ParallelFor2d(space, ctx->Threads(), [&](bst_node_t nidx, common::Range1d r) {
-      if (tree.IsLeaf(nidx)) {
+      if (mt_tree.IsLeaf(nidx)) {
         auto const &rowset = part[nidx];
-        auto leaf_value = mttree->LeafValue(nidx);
+        auto leaf_value = mt_tree.LeafValue(nidx);
         for (bst_idx_t const *it = rowset.begin() + r.begin(); it < rowset.begin() + r.end();
              ++it) {
           for (std::size_t i = 0; i < n_targets; ++i) {
diff --git a/src/tree/hist/param.cc b/src/tree/hist/hist_param.cc
similarity index 95%
rename from src/tree/hist/param.cc
rename to src/tree/hist/hist_param.cc
index 10895d5111b8..7ca41edacd1a 100644
--- a/src/tree/hist/param.cc
+++ b/src/tree/hist/hist_param.cc
@@ -1,7 +1,7 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
-#include "param.h"
+#include "hist_param.h"
 
 #include <ios>     // for binary
 #include <string>  // for string
diff --git a/src/tree/hist/param.h b/src/tree/hist/hist_param.h
similarity index 97%
rename from src/tree/hist/param.h
rename to src/tree/hist/hist_param.h
index 53e79f0da2f7..184debf7da00 100644
--- a/src/tree/hist/param.h
+++ b/src/tree/hist/hist_param.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #pragma once
 
diff --git a/src/tree/hist/histogram.cc b/src/tree/hist/histogram.cc
index 96abc039cf5d..b90eaad764e5 100644
--- a/src/tree/hist/histogram.cc
+++ b/src/tree/hist/histogram.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023 by XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #include "histogram.h"
 
@@ -9,20 +9,22 @@
 #include <vector>   // for vector
 
 #include "../../common/transform_iterator.h"  // for MakeIndexTransformIter
+#include "../tree_view.h"                     // for ScalarTreeView, MultiTargetTreeView
 #include "expand_entry.h"                     // for MultiExpandEntry, CPUExpandEntry
-#include "xgboost/logging.h"                  // for CHECK_NE
+#include "xgboost/logging.h"                  // for CHECK_EQ
 #include "xgboost/span.h"                     // for Span
 #include "xgboost/tree_model.h"               // for RegTree
 
 namespace xgboost::tree {
-void AssignNodes(RegTree const *p_tree, std::vector<MultiExpandEntry> const &valid_candidates,
+void AssignNodes(MultiTargetTreeView const &tree,
+                 std::vector<MultiExpandEntry> const &valid_candidates,
                  common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub) {
   CHECK_EQ(nodes_to_build.size(), valid_candidates.size());
 
   std::size_t n_idx = 0;
   for (auto const &c : valid_candidates) {
-    auto left_nidx = p_tree->LeftChild(c.nid);
-    auto right_nidx = p_tree->RightChild(c.nid);
+    auto left_nidx = tree.LeftChild(c.nid);
+    auto right_nidx = tree.RightChild(c.nid);
 
     auto build_nidx = left_nidx;
     auto subtract_nidx = right_nidx;
@@ -42,12 +44,12 @@ void AssignNodes(RegTree const *p_tree, std::vector<MultiExpandEntry> const &val
   }
 }
 
-void AssignNodes(RegTree const *p_tree, std::vector<CPUExpandEntry> const &candidates,
+void AssignNodes(ScalarTreeView const &tree, std::vector<CPUExpandEntry> const &candidates,
                  common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub) {
   std::size_t n_idx = 0;
   for (auto const &c : candidates) {
-    auto left_nidx = (*p_tree)[c.nid].LeftChild();
-    auto right_nidx = (*p_tree)[c.nid].RightChild();
+    auto left_nidx = tree.LeftChild(c.nid);
+    auto right_nidx = tree.RightChild(c.nid);
     auto fewer_right = c.split.right_sum.GetHess() < c.split.left_sum.GetHess();
 
     auto build_nidx = left_nidx;
diff --git a/src/tree/hist/histogram.h b/src/tree/hist/histogram.h
index fcfa03e039f7..9044afa66004 100644
--- a/src/tree/hist/histogram.h
+++ b/src/tree/hist/histogram.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_TREE_HIST_HISTOGRAM_H_
 #define XGBOOST_TREE_HIST_HISTOGRAM_H_
@@ -17,7 +17,7 @@
 #include "../../data/gradient_index.h"     // for GHistIndexMatrix
 #include "expand_entry.h"                  // for MultiExpandEntry, CPUExpandEntry
 #include "hist_cache.h"                    // for BoundedHistCollection
-#include "param.h"                         // for HistMakerTrainParam
+#include "hist_param.h"                    // for HistMakerTrainParam
 #include "xgboost/base.h"                  // for bst_node_t, bst_target_t, bst_bin_t
 #include "xgboost/context.h"               // for Context
 #include "xgboost/data.h"                  // for BatchIterator, BatchSet
@@ -30,21 +30,23 @@ namespace xgboost::tree {
 /**
  * @brief Decide which node as the build node for multi-target trees.
  */
-void AssignNodes(RegTree const *p_tree, std::vector<MultiExpandEntry> const &valid_candidates,
+void AssignNodes(MultiTargetTreeView const &tree,
+                 std::vector<MultiExpandEntry> const &valid_candidates,
                  common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub);
 
 /**
  * @brief Decide which node as the build node.
  */
-void AssignNodes(RegTree const *p_tree, std::vector<CPUExpandEntry> const &candidates,
+void AssignNodes(ScalarTreeView const &tree, std::vector<CPUExpandEntry> const &candidates,
                  common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub);
 
 class HistogramBuilder {
   /*! \brief culmulative histogram of gradients. */
+  common::Monitor monitor_;
   BoundedHistCollection hist_;
   common::ParallelGHistBuilder buffer_;
   BatchParam param_;
-  int32_t n_threads_{-1};
+  std::int32_t n_threads_{-1};
   // Whether XGBoost is running in distributed environment.
   bool is_distributed_{false};
   bool is_col_split_{false};
@@ -92,7 +94,8 @@ class HistogramBuilder {
    * @brief Allocate histogram, rearrange the nodes if `rearrange` is true and the tree
    *        has reached the cache size limit.
    */
-  void AddHistRows(RegTree const *p_tree, std::vector<bst_node_t> *p_nodes_to_build,
+  template <typename TreeView>
+  void AddHistRows(TreeView const &tree, std::vector<bst_node_t> *p_nodes_to_build,
                    std::vector<bst_node_t> *p_nodes_to_sub, bool rearrange) {
     CHECK(p_nodes_to_build);
     auto &nodes_to_build = *p_nodes_to_build;
@@ -129,7 +132,7 @@ class HistogramBuilder {
     // saved memory.
     std::vector<bst_node_t> can_subtract;
     for (auto const &v : nodes_to_sub) {
-      if (this->hist_.HistogramExists(p_tree->Parent(v))) {
+      if (this->hist_.HistogramExists(tree.Parent(v))) {
         // We can still use the subtraction trick for this node
         can_subtract.push_back(v);
       } else {
@@ -147,6 +150,7 @@ class HistogramBuilder {
                  GHistIndexMatrix const &gidx, common::RowSetCollection const &row_set_collection,
                  std::vector<bst_node_t> const &nodes_to_build,
                  linalg::VectorView<GradientPair const> gpair, bool force_read_by_column = false) {
+    monitor_.Start(__func__);
     CHECK(gpair.Contiguous());
 
     if (page_idx == 0) {
@@ -167,9 +171,11 @@ class HistogramBuilder {
       this->BuildLocalHistograms<true>(space, gidx, nodes_to_build, row_set_collection,
                                        gpair.Values(), force_read_by_column);
     }
+    monitor_.Stop(__func__);
   }
 
-  void SyncHistogram(Context const *ctx, RegTree const *p_tree,
+  template <typename TreeView>
+  void SyncHistogram(Context const *ctx, TreeView const &tree,
                      std::vector<bst_node_t> const &nodes_to_build,
                      std::vector<bst_node_t> const &nodes_to_trick) {
     auto n_total_bins = buffer_.TotalBins();
@@ -198,9 +204,9 @@ class HistogramBuilder {
     common::ParallelFor2d(
         subspace, this->n_threads_, [&](std::size_t nidx_in_set, common::Range1d r) {
           auto subtraction_nidx = nodes_to_trick[nidx_in_set];
-          auto parent_id = p_tree->Parent(subtraction_nidx);
-          auto sibling_nidx = p_tree->IsLeftChild(subtraction_nidx) ? p_tree->RightChild(parent_id)
-                                                                    : p_tree->LeftChild(parent_id);
+          auto parent_id = tree.Parent(subtraction_nidx);
+          auto sibling_nidx = tree.IsLeftChild(subtraction_nidx) ? tree.RightChild(parent_id)
+                                                                 : tree.LeftChild(parent_id);
           auto sibling_hist = this->hist_[sibling_nidx];
           auto parent_hist = this->hist_[parent_id];
           auto subtract_hist = this->hist_[subtraction_nidx];
@@ -248,12 +254,12 @@ class MultiHistogramBuilder {
   /**
    * @brief Build the histogram for root node.
    */
-  template <typename Partitioner, typename ExpandEntry>
-  void BuildRootHist(DMatrix *p_fmat, RegTree const *p_tree,
+  template <typename Partitioner, typename ExpandEntry, typename TreeView>
+  void BuildRootHist(DMatrix *p_fmat, TreeView const &tree,
                      std::vector<Partitioner> const &partitioners,
                      linalg::MatrixView<GradientPair const> gpair, ExpandEntry const &best,
                      BatchParam const &param, bool force_read_by_column = false) {
-    auto n_targets = p_tree->NumTargets();
+    auto n_targets = tree.NumTargets();
     CHECK_EQ(gpair.Shape(1), n_targets);
     CHECK_EQ(p_fmat->Info().num_row_, gpair.Shape(0));
     CHECK_EQ(target_builders_.size(), n_targets);
@@ -262,7 +268,7 @@ class MultiHistogramBuilder {
 
     auto space = ConstructHistSpace(partitioners, nodes);
     for (bst_target_t t{0}; t < n_targets; ++t) {
-      this->target_builders_[t].AddHistRows(p_tree, &nodes, &dummy_sub, false);
+      this->target_builders_[t].AddHistRows(tree, &nodes, &dummy_sub, false);
     }
     CHECK(dummy_sub.empty());
 
@@ -277,38 +283,38 @@ class MultiHistogramBuilder {
       ++page_idx;
     }
 
-    for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
-      this->target_builders_[t].SyncHistogram(ctx_, p_tree, nodes, dummy_sub);
+    for (bst_target_t t = 0; t < n_targets; ++t) {
+      this->target_builders_[t].SyncHistogram(ctx_, tree, nodes, dummy_sub);
     }
   }
   /**
    * @brief Build histogram for left and right child of valid candidates
    */
-  template <typename Partitioner, typename ExpandEntry>
-  void BuildHistLeftRight(Context const *ctx, DMatrix *p_fmat, RegTree const *p_tree,
+  template <typename Partitioner, typename ExpandEntry, typename TreeView>
+  void BuildHistLeftRight(Context const *ctx, DMatrix *p_fmat, TreeView const &tree,
                           std::vector<Partitioner> const &partitioners,
                           std::vector<ExpandEntry> const &valid_candidates,
                           linalg::MatrixView<GradientPair const> gpair, BatchParam const &param,
                           bool force_read_by_column = false) {
     std::vector<bst_node_t> nodes_to_build(valid_candidates.size());
     std::vector<bst_node_t> nodes_to_sub(valid_candidates.size());
-    AssignNodes(p_tree, valid_candidates, nodes_to_build, nodes_to_sub);
+    AssignNodes(tree, valid_candidates, nodes_to_build, nodes_to_sub);
 
     // use the first builder for getting number of valid nodes.
-    target_builders_.front().AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, true);
+    target_builders_.front().AddHistRows(tree, &nodes_to_build, &nodes_to_sub, true);
     CHECK_GE(nodes_to_build.size(), nodes_to_sub.size());
     CHECK_EQ(nodes_to_sub.size() + nodes_to_build.size(), valid_candidates.size() * 2);
 
     // allocate storage for the rest of the builders
     for (bst_target_t t = 1; t < target_builders_.size(); ++t) {
-      target_builders_[t].AddHistRows(p_tree, &nodes_to_build, &nodes_to_sub, false);
+      target_builders_[t].AddHistRows(tree, &nodes_to_build, &nodes_to_sub, false);
     }
 
     auto space = ConstructHistSpace(partitioners, nodes_to_build);
     std::size_t page_idx{0};
     for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, param)) {
-      CHECK_EQ(gpair.Shape(1), p_tree->NumTargets());
-      for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
+      CHECK_EQ(gpair.Shape(1), tree.NumTargets());
+      for (bst_target_t t = 0; t < tree.NumTargets(); ++t) {
         auto t_gpair = gpair.Slice(linalg::All(), t);
         CHECK_EQ(t_gpair.Shape(0), p_fmat->Info().num_row_);
         this->target_builders_[t].BuildHist(page_idx, space, page,
@@ -318,8 +324,8 @@ class MultiHistogramBuilder {
       page_idx++;
     }
 
-    for (bst_target_t t = 0; t < p_tree->NumTargets(); ++t) {
-      this->target_builders_[t].SyncHistogram(ctx, p_tree, nodes_to_build, nodes_to_sub);
+    for (bst_target_t t = 0; t < tree.NumTargets(); ++t) {
+      this->target_builders_[t].SyncHistogram(ctx, tree, nodes_to_build, nodes_to_sub);
     }
   }
 
diff --git a/src/tree/io_utils.h b/src/tree/io_utils.h
index 7a8f055fe7c9..1dc17c08c063 100644
--- a/src/tree/io_utils.h
+++ b/src/tree/io_utils.h
@@ -1,11 +1,12 @@
 /**
- * Copyright 2023-2024, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_TREE_IO_UTILS_H_
 #define XGBOOST_TREE_IO_UTILS_H_
-#include <string>          // for string
-#include <type_traits>     // for enable_if_t, is_same_v, conditional_t
-#include <vector>          // for vector
+#include <limits>       // for numeric_limits
+#include <string>       // for string
+#include <type_traits>  // for enable_if_t, is_same_v, conditional_t
+#include <vector>       // for vector
 
 #include "xgboost/json.h"  // for Json
 
@@ -59,5 +60,7 @@ inline std::string const kParent{"parents"};
 inline std::string const kLeft{"left_children"};
 inline std::string const kRight{"right_children"};
 }  // namespace tree_field
+
+constexpr float DftBadValue() { return std::numeric_limits<float>::denorm_min(); }
 }  // namespace xgboost
 #endif  // XGBOOST_TREE_IO_UTILS_H_
diff --git a/src/tree/multi_target_tree_model.cc b/src/tree/multi_target_tree_model.cc
index 7f3087fd6f06..c34cc82c42aa 100644
--- a/src/tree/multi_target_tree_model.cc
+++ b/src/tree/multi_target_tree_model.cc
@@ -25,8 +25,8 @@ MultiTargetTree::MultiTargetTree(TreeParam const* param)
       parent_(1ul, InvalidNodeId()),
       split_index_(1ul, 0),
       default_left_(1ul, 0),
-      split_conds_(1ul, std::numeric_limits<float>::quiet_NaN()),
-      weights_(param->size_leaf_vector, std::numeric_limits<float>::quiet_NaN()) {
+      split_conds_(1ul, DftBadValue()),
+      weights_(param->size_leaf_vector, DftBadValue()) {
   CHECK_GT(param_->size_leaf_vector, 1);
 }
 
@@ -48,6 +48,86 @@ MultiTargetTree::MultiTargetTree(MultiTargetTree const& that)
   this->weights_.Copy(that.weights_);
 }
 
+void MultiTargetTree::SetRoot(linalg::VectorView<float const> weight) {
+  auto const next_nidx = RegTree::kRoot + 1;
+  CHECK_LE(weight.Size(), this->NumTargets());
+  CHECK_GE(weights_.Size(), next_nidx * weight.Size());
+  auto out_weight = weights_.HostSpan().subspan(RegTree::kRoot * weight.Size(), weight.Size());
+  for (std::size_t i = 0, n = weight.Size(); i < n; ++i) {
+    out_weight[i] = weight(i);
+  }
+}
+
+void MultiTargetTree::Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond,
+                             bool default_left, linalg::VectorView<float const> base_weight,
+                             linalg::VectorView<float const> left_weight,
+                             linalg::VectorView<float const> right_weight) {
+  CHECK(this->IsLeaf(nidx));
+  CHECK_GE(parent_.Size(), 1);
+  CHECK_EQ(parent_.Size(), left_.Size());
+  CHECK_EQ(left_.Size(), right_.Size());
+
+  std::size_t n = param_->num_nodes + 2;
+  CHECK_LT(split_idx, this->param_->num_feature);
+  left_.Resize(n, InvalidNodeId());
+  right_.Resize(n, InvalidNodeId());
+  parent_.Resize(n, InvalidNodeId());
+
+  auto left_child = parent_.Size() - 2;
+  auto right_child = parent_.Size() - 1;
+
+  CHECK_NE(left_child, nidx);
+  left_.HostVector()[nidx] = left_child;
+  right_.HostVector()[nidx] = right_child;
+
+  auto& h_parent = parent_.HostVector();
+  if (nidx != 0) {
+    CHECK_NE(h_parent[nidx], InvalidNodeId());
+  }
+
+  h_parent[left_child] = nidx;
+  h_parent[right_child] = nidx;
+
+  split_index_.Resize(n);
+  split_index_.HostVector()[nidx] = split_idx;
+
+  split_conds_.Resize(n, DftBadValue());
+  split_conds_.HostVector()[nidx] = split_cond;
+
+  default_left_.Resize(n);
+  default_left_.HostVector()[nidx] = static_cast<std::uint8_t>(default_left);
+
+  weights_.Resize(n * this->NumTargets());
+  auto p_weight = this->NodeWeight(nidx);
+  CHECK_GE(p_weight.Size(), base_weight.Size());
+  auto l_weight = this->NodeWeight(left_child);
+  CHECK_GE(l_weight.Size(), left_weight.Size());
+  auto r_weight = this->NodeWeight(right_child);
+  CHECK_GE(r_weight.Size(), right_weight.Size());
+
+  CHECK_EQ(base_weight.Size(), left_weight.Size());
+  CHECK_EQ(base_weight.Size(), right_weight.Size());
+
+  for (std::size_t i = 0, n = base_weight.Size(); i < n; ++i) {
+    p_weight(i) = base_weight(i);
+    l_weight(i) = left_weight(i);
+    r_weight(i) = right_weight(i);
+  }
+}
+
+void MultiTargetTree::SetLeaves(std::vector<bst_node_t> leaves, common::Span<float const> weights) {
+  auto n_targets = this->NumTargets();
+  auto h_weights = this->weights_.HostSpan();
+  std::int32_t nidx_in_set = 0;
+  for (auto nidx : leaves) {
+    CHECK(this->IsLeaf(nidx));
+    auto w_in = weights.subspan(nidx_in_set * n_targets, n_targets);
+    auto w_out = h_weights.subspan(nidx * n_targets, n_targets);
+    std::copy(w_in.cbegin(), w_in.cend(), w_out.begin());
+    nidx_in_set++;
+  }
+}
+
 template <bool typed, bool feature_is_64>
 void LoadModelImpl(Json const& in, HostDeviceVector<float>* p_weights,
                    HostDeviceVector<bst_node_t>* p_lefts, HostDeviceVector<bst_node_t>* p_rights,
@@ -127,7 +207,7 @@ void MultiTargetTree::SaveModel(Json* p_out) const {
   I32Array parents(n_nodes);
   F32Array conds(n_nodes);
   U8Array default_left(n_nodes);
-  F32Array weights(n_nodes * this->NumTarget());
+  F32Array weights(n_nodes * this->NumTargets());
 
   auto const& h_left = this->left_.ConstHostVector();
   auto const& h_right = this->right_.ConstHostVector();
@@ -151,7 +231,7 @@ void MultiTargetTree::SaveModel(Json* p_out) const {
 
       auto in_weight = this->NodeWeight(nidx);
       auto weight_out = common::Span<float>(weights.GetArray())
-                            .subspan(nidx * this->NumTarget(), this->NumTarget());
+                            .subspan(nidx * this->NumTargets(), this->NumTargets());
       CHECK_EQ(in_weight.Size(), weight_out.size());
       std::copy_n(in_weight.Values().data(), in_weight.Size(), weight_out.data());
     }
@@ -179,70 +259,24 @@ void MultiTargetTree::SaveModel(Json* p_out) const {
   out[tf::kDftLeft] = std::move(default_left);
 }
 
-void MultiTargetTree::SetLeaf(bst_node_t nidx, linalg::VectorView<float const> weight) {
-  CHECK(this->IsLeaf(nidx)) << "Collapsing a split node to leaf " << MTNotImplemented();
-  auto const next_nidx = nidx + 1;
-  CHECK_EQ(weight.Size(), this->NumTarget());
-  CHECK_GE(weights_.Size(), next_nidx * weight.Size());
-  auto out_weight = weights_.HostSpan().subspan(nidx * weight.Size(), weight.Size());
-  for (std::size_t i = 0; i < weight.Size(); ++i) {
-    out_weight[i] = weight(i);
-  }
-}
-
-void MultiTargetTree::Expand(bst_node_t nidx, bst_feature_t split_idx, float split_cond,
-                             bool default_left, linalg::VectorView<float const> base_weight,
-                             linalg::VectorView<float const> left_weight,
-                             linalg::VectorView<float const> right_weight) {
-  CHECK(this->IsLeaf(nidx));
-  CHECK_GE(parent_.Size(), 1);
-  CHECK_EQ(parent_.Size(), left_.Size());
-  CHECK_EQ(left_.Size(), right_.Size());
-
-  std::size_t n = param_->num_nodes + 2;
-  CHECK_LT(split_idx, this->param_->num_feature);
-  left_.Resize(n, InvalidNodeId());
-  right_.Resize(n, InvalidNodeId());
-  parent_.Resize(n, InvalidNodeId());
-
-  auto left_child = parent_.Size() - 2;
-  auto right_child = parent_.Size() - 1;
-
-  left_.HostVector()[nidx] = left_child;
-  right_.HostVector()[nidx] = right_child;
-
-  auto& h_parent = parent_.HostVector();
-  if (nidx != 0) {
-    CHECK_NE(h_parent[nidx], InvalidNodeId());
-  }
-
-  h_parent[left_child] = nidx;
-  h_parent[right_child] = nidx;
-
-  split_index_.Resize(n);
-  split_index_.HostVector()[nidx] = split_idx;
-
-  split_conds_.Resize(n, std::numeric_limits<float>::quiet_NaN());
-  split_conds_.HostVector()[nidx] = split_cond;
-
-  default_left_.Resize(n);
-  default_left_.HostVector()[nidx] = static_cast<std::uint8_t>(default_left);
-
-  weights_.Resize(n * this->NumTarget());
-  auto p_weight = this->NodeWeight(nidx);
-  CHECK_EQ(p_weight.Size(), base_weight.Size());
-  auto l_weight = this->NodeWeight(left_child);
-  CHECK_EQ(l_weight.Size(), left_weight.Size());
-  auto r_weight = this->NodeWeight(right_child);
-  CHECK_EQ(r_weight.Size(), right_weight.Size());
+bst_target_t MultiTargetTree::NumTargets() const { return param_->size_leaf_vector; }
+std::size_t MultiTargetTree::Size() const { return parent_.Size(); }
 
-  for (std::size_t i = 0; i < base_weight.Size(); ++i) {
-    p_weight(i) = base_weight(i);
-    l_weight(i) = left_weight(i);
-    r_weight(i) = right_weight(i);
-  }
+[[nodiscard]] MultiTargetTree* MultiTargetTree::Copy(TreeParam const* param) const {
+  auto ptr = new MultiTargetTree{*this};
+  ptr->param_ = param;
+  return ptr;
 }
 
-bst_target_t MultiTargetTree::NumTarget() const { return param_->size_leaf_vector; }
-std::size_t MultiTargetTree::Size() const { return parent_.Size(); }
+[[nodiscard]] std::size_t MultiTargetTree::MemCostBytes() const {
+  std::size_t n_bytes = 0;
+  n_bytes += left_.SizeBytes();
+  n_bytes += right_.SizeBytes();
+  n_bytes += parent_.SizeBytes();
+  n_bytes += split_index_.SizeBytes();
+  n_bytes += default_left_.SizeBytes();
+  n_bytes += split_conds_.SizeBytes();
+  n_bytes += weights_.SizeBytes();
+  return n_bytes;
+}
 }  // namespace xgboost
diff --git a/src/tree/param.h b/src/tree/param.h
index fab68f0c298e..c4dcdcfe4bce 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2023 by XGBoost Contributors
+ * Copyright 2014-2025, XGBoost Contributors
  * \file param.h
  * \brief training parameters, statistics used to support tree construction.
  * \author Tianqi Chen
@@ -242,8 +242,8 @@ XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p, T sum_grad,
 
 // calculate weight given the statistics
 template <typename TrainingParams, typename T>
-XGBOOST_DEVICE inline T CalcWeight(const TrainingParams &p, T sum_grad,
-                                   T sum_hess) {
+XGBOOST_DEVICE std::enable_if_t<std::is_floating_point_v<T>, T> CalcWeight(TrainingParams const &p,
+                                                                           T sum_grad, T sum_hess) {
   if (sum_hess < p.min_child_weight || sum_hess <= 0.0) {
     return 0.0;
   }
@@ -291,17 +291,17 @@ XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad)
 }
 
 /**
- * \brief multi-target weight, calculated with learning rate.
+ * @brief multi-target weight, calculated with learning rate.
  */
 inline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,
                        float eta, linalg::VectorView<float> out_w) {
-  for (bst_target_t i = 0; i < out_w.Size(); ++i) {
-    out_w(i) = CalcWeight(p, grad_sum(i).GetGrad(), grad_sum(i).GetHess()) * eta;
+  for (bst_target_t t = 0, n_targets = out_w.Size(); t < n_targets; ++t) {
+    out_w(t) = CalcWeight(p, grad_sum(t).GetGrad(), grad_sum(t).GetHess()) * eta;
   }
 }
 
 /**
- * \brief multi-target weight
+ * @brief multi-target weight
  */
 inline void CalcWeight(TrainParam const &p, linalg::VectorView<GradientPairPrecise const> grad_sum,
                        linalg::VectorView<float> out_w) {
@@ -312,8 +312,8 @@ inline double CalcGainGivenWeight(TrainParam const &p,
                                   linalg::VectorView<GradientPairPrecise const> sum_grad,
                                   linalg::VectorView<float const> weight) {
   double gain{0};
-  for (bst_target_t i = 0; i < weight.Size(); ++i) {
-    gain += -weight(i) * ThresholdL1(sum_grad(i).GetGrad(), p.reg_alpha);
+  for (bst_target_t t = 0, n_targets = weight.Size(); t < n_targets; ++t) {
+    gain += -weight(t) * ThresholdL1(sum_grad(t).GetGrad(), p.reg_alpha);
   }
   return gain;
 }
@@ -411,9 +411,29 @@ struct SplitEntryContainer {
        << "dft_left: " << s.DefaultLeft() << "\n"
        << "split_index: " << s.SplitIndex() << "\n"
        << "split_value: " << s.split_value << "\n"
-       << "is_cat: " << s.is_cat << "\n"
-       << "left_sum: " << s.left_sum << "\n"
-       << "right_sum: " << s.right_sum << std::endl;
+       << "is_cat: " << s.is_cat << "\n";
+    if constexpr (std::is_same_v<GradStats, GradientT>) {
+      os << "left_sum: " << s.left_sum << "\n"
+         << "right_sum: " << s.right_sum << std::endl;
+    } else {
+      auto print_vec = [&](auto const &vec) {
+        for (std::size_t i = 0; i < vec.size(); ++i) {
+          os << vec[i];
+          if (i != vec.size() - 1) {
+            os << ", ";
+          }
+        }
+      };
+
+      os << "left_sum: [";
+      print_vec(s.left_sum);
+      os << "]\n";
+
+      os << "right_sum: [";
+      print_vec(s.right_sum);
+      os << "]\n";
+    }
+
     return os;
   }
 
diff --git a/src/tree/tree_model.cc b/src/tree/tree_model.cc
index 0639233510f7..6e8176fce21e 100644
--- a/src/tree/tree_model.cc
+++ b/src/tree/tree_model.cc
@@ -1,12 +1,12 @@
 /**
- * Copyright 2015-2024, XGBoost Contributors
+ * Copyright 2015-2025, XGBoost Contributors
  * \file tree_model.cc
  * \brief model structure for tree
  */
+#include "xgboost/tree_model.h"
+
 #include <dmlc/json.h>
 #include <dmlc/registry.h>
-#include <xgboost/json.h>
-#include <xgboost/tree_model.h>
 
 #include <cmath>
 #include <iomanip>
@@ -16,17 +16,15 @@
 
 #include "../common/categorical.h"  // for GetNodeCats
 #include "../common/common.h"       // for EscapeU8
-#include "../predictor/predict_fn.h"
-#include "io_utils.h"  // for GetElem
+#include "io_utils.h"               // for GetElem
 #include "param.h"
+#include "tree_view.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
+#include "xgboost/json.h"
 #include "xgboost/logging.h"
 
 namespace xgboost {
-// register tree parameter
-DMLC_REGISTER_PARAMETER(TreeParam);
-
 namespace tree {
 DMLC_REGISTER_PARAMETER(TrainParam);
 }
@@ -43,7 +41,9 @@ std::enable_if_t<std::is_floating_point_v<Float>, std::string> ToStr(Float value
 }
 
 template <typename Float>
-std::string ToStr(linalg::VectorView<Float> value, bst_target_t limit) {
+std::string ToStr(linalg::VectorView<Float> value) {
+  // Hardcoded limit to avoid dumping long arrays into dot graph.
+  constexpr bst_target_t kLimit = 3;
   int32_t constexpr kFloatMaxPrecision = std::numeric_limits<float>::max_digits10;
   static_assert(std::is_floating_point_v<Float>,
                 "Use std::to_string instead for non-floating point values.");
@@ -53,22 +53,24 @@ std::string ToStr(linalg::VectorView<Float> value, bst_target_t limit) {
     ss << value(0);
     return ss.str();
   }
-  CHECK_GE(limit, 2);
-  auto n = std::min(static_cast<bst_target_t>(value.Size() - 1), limit - 1);
+  CHECK_GE(kLimit, 2);
+  auto n = std::min(static_cast<bst_target_t>(value.Size() - 1), kLimit - 1);
   ss << "[";
   for (std::size_t i = 0; i < n; ++i) {
     ss << value(i) << ", ";
   }
-  if (value.Size() > limit) {
+  if (value.Size() > kLimit) {
     ss << "..., ";
   }
   ss << value(value.Size() - 1) << "]";
   return ss.str();
 }
 }  // namespace
-/*!
- * \brief Base class for dump model implementation, modeling closely after code generator.
+
+/**
+ * @brief Base class for dump model implementation.
  */
+template <typename TreeView>
 class TreeGenerator {
  protected:
   FeatureMap const& fmap_;
@@ -96,30 +98,30 @@ class TreeGenerator {
     return result;
   }
 
-  virtual std::string Indicator(RegTree const& /*tree*/,
+  virtual std::string Indicator(TreeView /*tree*/,
                                 int32_t /*nid*/, uint32_t /*depth*/) const {
     return "";
   }
-  virtual std::string Categorical(RegTree const&, int32_t, uint32_t) const = 0;
-  virtual std::string Integer(RegTree const& /*tree*/,
+  virtual std::string Categorical(TreeView, int32_t, uint32_t) const = 0;
+  virtual std::string Integer(TreeView /*tree*/,
                                 int32_t /*nid*/, uint32_t /*depth*/) const {
     return "";
   }
-  virtual std::string Quantitive(RegTree const& /*tree*/,
+  virtual std::string Quantitive(TreeView /*tree*/,
                                 int32_t /*nid*/, uint32_t /*depth*/) const {
     return "";
   }
-  virtual std::string NodeStat(RegTree const& /*tree*/, int32_t /*nid*/) const {
+  virtual std::string NodeStat(TreeView /*tree*/, int32_t /*nid*/) const {
     return "";
   }
 
-  virtual std::string PlainNode(RegTree const& /*tree*/,
+  virtual std::string PlainNode(TreeView /*tree*/,
                                 int32_t /*nid*/, uint32_t /*depth*/) const = 0;
 
-  virtual std::string SplitNode(RegTree const& tree, int32_t nid, uint32_t depth) {
-    auto const split_index = tree[nid].SplitIndex();
+  virtual std::string SplitNode(TreeView tree, int32_t nid, uint32_t depth) {
+    auto const split_index = tree.SplitIndex(nid);
     std::string result;
-    auto is_categorical = tree.GetSplitTypes()[nid] == FeatureType::kCategorical;
+    auto is_categorical = tree.SplitType(nid) == FeatureType::kCategorical;
     if (split_index < fmap_.Size()) {
       auto check_categorical = [&]() {
         CHECK(is_categorical)
@@ -168,71 +170,29 @@ class TreeGenerator {
     return result;
   }
 
-  virtual std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const = 0;
-  virtual std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) = 0;
+  virtual std::string LeafNode(TreeView tree, int32_t nid, uint32_t depth) const = 0;
+  virtual std::string BuildTree(TreeView tree, int32_t nid, uint32_t depth) = 0;
 
  public:
   TreeGenerator(FeatureMap const& _fmap, bool with_stats) :
       fmap_{_fmap}, with_stats_{with_stats} {}
   virtual ~TreeGenerator() = default;
 
-  virtual void BuildTree(RegTree const& tree) {
+  virtual void BuildTree(TreeView tree) {
     ss_ << this->BuildTree(tree, 0, 0);
   }
 
   std::string Str() const {
     return ss_.str();
   }
-
-  static TreeGenerator* Create(std::string const& attrs, FeatureMap const& fmap,
-                               bool with_stats);
-};
-
-struct TreeGenReg : public dmlc::FunctionRegEntryBase<
-  TreeGenReg,
-  std::function<TreeGenerator* (
-      FeatureMap const& fmap, std::string attrs, bool with_stats)> > {
 };
 }  // namespace xgboost
 
 
-namespace dmlc {
-DMLC_REGISTRY_ENABLE(::xgboost::TreeGenReg);
-}  // namespace dmlc
-
 namespace xgboost {
-
-TreeGenerator* TreeGenerator::Create(std::string const& attrs, FeatureMap const& fmap,
-                                     bool with_stats) {
-  auto pos = attrs.find(':');
-  std::string name;
-  std::string params;
-  if (pos != std::string::npos) {
-    name = attrs.substr(0, pos);
-    params = attrs.substr(pos+1, attrs.length() - pos - 1);
-    // Eliminate all occurrences of single quote string.
-    size_t pos = std::string::npos;
-    while ((pos = params.find('\'')) != std::string::npos) {
-      params.replace(pos, 1, "\"");
-    }
-  } else {
-    name = attrs;
-  }
-  auto *e = ::dmlc::Registry< ::xgboost::TreeGenReg>::Get()->Find(name);
-  if (e == nullptr) {
-    LOG(FATAL) << "Unknown Model Builder:" << name;
-  }
-  auto p_io_builder = (e->body)(fmap, params, with_stats);
-  return p_io_builder;
-}
-
-#define XGBOOST_REGISTER_TREE_IO(UniqueId, Name)                        \
-  static DMLC_ATTRIBUTE_UNUSED ::xgboost::TreeGenReg&                   \
-  __make_ ## TreeGenReg ## _ ## UniqueId ## __ =                        \
-                  ::dmlc::Registry< ::xgboost::TreeGenReg>::Get()->__REGISTER__(Name)
-
 namespace {
-std::vector<bst_cat_t> GetSplitCategories(RegTree const& tree, int32_t nidx) {
+template <typename TreeView>
+std::vector<bst_cat_t> GetSplitCategories(TreeView const& tree, int32_t nidx) {
   auto const& csr = tree.GetCategoriesMatrix();
   auto seg = csr.node_ptr[nidx];
   auto split = common::KCatBitField{csr.categories.subspan(seg.beg, seg.size)};
@@ -268,60 +228,59 @@ std::string GetFeatureName(FeatureMap const& fmap, bst_feature_t split_index) {
 }
 }  // anonymous namespace
 
-class TextGenerator : public TreeGenerator {
-  using SuperT = TreeGenerator;
+template <typename TreeView>
+class TextGenerator : public TreeGenerator<TreeView> {
+  using SuperT = TreeGenerator<TreeView>;
 
  public:
-  TextGenerator(FeatureMap const& fmap, bool with_stats) :
-      TreeGenerator(fmap, with_stats) {}
+  TextGenerator(FeatureMap const& fmap, bool with_stats) : SuperT(fmap, with_stats) {}
 
-  std::string LeafNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
+  std::string LeafNode(TreeView tree, int32_t nid, uint32_t depth) const override {
     static std::string kLeafTemplate = "{tabs}{nid}:leaf={leaf}{stats}";
     static std::string kStatTemplate = ",cover={cover}";
     std::string result = SuperT::Match(
         kLeafTemplate,
         {{"{tabs}",  SuperT::Tabs(depth)},
          {"{nid}",   std::to_string(nid)},
-         {"{leaf}",  ToStr(tree[nid].LeafValue())},
-         {"{stats}", with_stats_ ?
+         {"{leaf}",  ToStr(tree.LeafValue(nid))},
+         {"{stats}", SuperT::with_stats_ ?
           SuperT::Match(kStatTemplate,
-                        {{"{cover}", ToStr(tree.Stat(nid).sum_hess)}}) : ""}});
+                        {{"{cover}", ToStr(tree.SumHess(nid))}}) : ""}});
     return result;
   }
 
-  std::string Indicator(RegTree const& tree, int32_t nid, uint32_t) const override {
+  std::string Indicator(TreeView tree, bst_node_t nid, uint32_t) const override {
     static std::string const kIndicatorTemplate = "{nid}:[{fname}] yes={yes},no={no}";
-    int32_t nyes = tree[nid].DefaultLeft() ?
-                   tree[nid].RightChild() : tree[nid].LeftChild();
-    auto split_index = tree[nid].SplitIndex();
+    int32_t nyes = tree.DefaultLeft(nid) ? tree.RightChild(nid) : tree.LeftChild(nid);
+    auto split_index = tree.SplitIndex(nid);
     std::string result = SuperT::Match(
         kIndicatorTemplate,
         {{"{nid}",   std::to_string(nid)},
-         {"{fname}", GetFeatureName(fmap_, split_index)},
+         {"{fname}", GetFeatureName(SuperT::fmap_, split_index)},
          {"{yes}",   std::to_string(nyes)},
-         {"{no}",    std::to_string(tree[nid].DefaultChild())}});
+         {"{no}",    std::to_string(tree.DefaultChild(nid))}});
     return result;
   }
 
-  std::string SplitNodeImpl(RegTree const& tree, bst_node_t nid, std::string const& template_str,
+  std::string SplitNodeImpl(TreeView tree, bst_node_t nid, std::string const& template_str,
                             std::string cond, uint32_t depth) const {
-    auto split_index = tree[nid].SplitIndex();
+    auto split_index = tree.SplitIndex(nid);
     std::string const result = SuperT::Match(
         template_str,
         {{"{tabs}",    SuperT::Tabs(depth)},
          {"{nid}",     std::to_string(nid)},
-         {"{fname}",   GetFeatureName(fmap_, split_index)},
+         {"{fname}",   GetFeatureName(SuperT::fmap_, split_index)},
          {"{cond}",    cond},
-         {"{left}",    std::to_string(tree[nid].LeftChild())},
-         {"{right}",   std::to_string(tree[nid].RightChild())},
-         {"{missing}", std::to_string(tree[nid].DefaultChild())}});
+         {"{left}",    std::to_string(tree.LeftChild(nid))},
+         {"{right}",   std::to_string(tree.RightChild(nid))},
+         {"{missing}", std::to_string(tree.DefaultChild(nid))}});
     return result;
   }
 
-  std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const override {
+  std::string Integer(TreeView tree, int32_t nid, uint32_t depth) const override {
     static std::string const kIntegerTemplate =
         "{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
-    auto cond = tree[nid].SplitCond();
+    auto cond = tree.SplitCond(nid);
     const bst_float floored = std::floor(cond);
     const int32_t integer_threshold
         = (floored == cond) ? static_cast<int>(floored)
@@ -330,21 +289,21 @@ class TextGenerator : public TreeGenerator {
                          std::to_string(integer_threshold), depth);
   }
 
-  std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const override {
+  std::string Quantitive(TreeView tree, int32_t nid, uint32_t depth) const override {
     static std::string const kQuantitiveTemplate =
         "{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
-    auto cond = tree[nid].SplitCond();
+    auto cond = tree.SplitCond(nid);
     return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);
   }
 
-  std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
-    auto cond = tree[nid].SplitCond();
+  std::string PlainNode(TreeView tree, int32_t nid, uint32_t depth) const override {
+    auto cond = tree.SplitCond(nid);
     static std::string const kNodeTemplate =
         "{tabs}{nid}:[{fname}<{cond}] yes={left},no={right},missing={missing}";
     return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);
   }
 
-  std::string Categorical(RegTree const& tree, bst_node_t nid, uint32_t depth) const override {
+  std::string Categorical(TreeView tree, bst_node_t nid, uint32_t depth) const override {
     auto cats = GetSplitCategories(tree, nid);
     std::string cats_str = PrintCatsAsSet(cats);
     static std::string const kNodeTemplate =
@@ -353,50 +312,44 @@ class TextGenerator : public TreeGenerator {
     return result;
   }
 
-  std::string NodeStat(RegTree const& tree, bst_node_t nid) const override {
+  std::string NodeStat(TreeView tree, bst_node_t nid) const override {
     static std::string const kStatTemplate = ",gain={loss_chg},cover={sum_hess}";
     std::string const result = SuperT::Match(
         kStatTemplate,
-        {{"{loss_chg}", ToStr(tree.Stat(nid).loss_chg)},
-         {"{sum_hess}", ToStr(tree.Stat(nid).sum_hess)}});
+        {{"{loss_chg}", ToStr(tree.LossChg(nid))},
+         {"{sum_hess}", ToStr(tree.SumHess(nid))}});
     return result;
   }
 
-  std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) override {
-    if (tree[nid].IsLeaf()) {
+  std::string BuildTree(TreeView tree, int32_t nid, uint32_t depth) override {
+    if (tree.IsLeaf(nid)) {
       return this->LeafNode(tree, nid, depth);
     }
     static std::string const kNodeTemplate = "{parent}{stat}\n{left}\n{right}";
     auto result = SuperT::Match(
         kNodeTemplate,
         {{"{parent}", this->SplitNode(tree, nid, depth)},
-         {"{stat}",   with_stats_ ? this->NodeStat(tree, nid) : ""},
-         {"{left}",   this->BuildTree(tree, tree[nid].LeftChild(), depth+1)},
-         {"{right}",  this->BuildTree(tree, tree[nid].RightChild(), depth+1)}});
+         {"{stat}",   SuperT::with_stats_ ? this->NodeStat(tree, nid) : ""},
+         {"{left}",   this->BuildTree(tree, tree.LeftChild(nid), depth+1)},
+         {"{right}",  this->BuildTree(tree, tree.RightChild(nid), depth+1)}});
     return result;
   }
 
-  void BuildTree(RegTree const& tree) override {
+  void BuildTree(TreeView tree) override {
     static std::string const& kTreeTemplate = "{nodes}\n";
     auto result = SuperT::Match(
         kTreeTemplate,
         {{"{nodes}", this->BuildTree(tree, 0, 0)}});
-    ss_ << result;
+    SuperT::ss_ << result;
   }
 };
 
-XGBOOST_REGISTER_TREE_IO(TextGenerator, "text")
-    .describe("Dump text representation of tree")
-    .set_body([](FeatureMap const& fmap, std::string const& /*attrs*/, bool with_stats) {
-      return new TextGenerator(fmap, with_stats);
-    });
-
-class JsonGenerator : public TreeGenerator {
-  using SuperT = TreeGenerator;
+template <typename TreeView>
+class JsonGenerator : public TreeGenerator<TreeView> {
+  using SuperT = TreeGenerator<TreeView>;
 
  public:
-  JsonGenerator(FeatureMap const& fmap, bool with_stats) :
-      TreeGenerator(fmap, with_stats) {}
+  JsonGenerator(FeatureMap const& fmap, bool with_stats) : SuperT{fmap, with_stats} {}
 
   std::string Indent(uint32_t depth) const {
     std::string result;
@@ -406,38 +359,36 @@ class JsonGenerator : public TreeGenerator {
     return result;
   }
 
-  std::string LeafNode(RegTree const& tree, bst_node_t nid, uint32_t) const override {
+  std::string LeafNode(TreeView tree, bst_node_t nid, uint32_t) const override {
     static std::string const kLeafTemplate =
         R"L({ "nodeid": {nid}, "leaf": {leaf} {stat}})L";
     static std::string const kStatTemplate =
         R"S(, "cover": {sum_hess} )S";
     std::string result = SuperT::Match(
         kLeafTemplate,
-        {{"{nid}",  std::to_string(nid)},
-         {"{leaf}", ToStr(tree[nid].LeafValue())},
-         {"{stat}", with_stats_ ? SuperT::Match(
-             kStatTemplate,
-             {{"{sum_hess}",
-               ToStr(tree.Stat(nid).sum_hess)}})  : ""}});
+        {{"{nid}", std::to_string(nid)},
+         {"{leaf}", ToStr(tree.LeafValue(nid))},
+         {"{stat}", SuperT::with_stats_
+                        ? SuperT::Match(kStatTemplate, {{"{sum_hess}", ToStr(tree.SumHess(nid))}})
+                        : ""}});
     return result;
   }
 
-  std::string Indicator(RegTree const& tree, bst_node_t nid, uint32_t depth) const override {
-    int32_t nyes = tree[nid].DefaultLeft() ?
-                   tree[nid].RightChild() : tree[nid].LeftChild();
+  std::string Indicator(TreeView tree, bst_node_t nid, uint32_t depth) const override {
+    int32_t nyes = tree.DefaultLeft(nid) ? tree.RightChild(nid) : tree.LeftChild(nid);
     static std::string const kIndicatorTemplate =
         R"ID( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", "yes": {yes}, "no": {no})ID";
-    auto split_index = tree[nid].SplitIndex();
+    auto split_index = tree.SplitIndex(nid);
     auto result =
         SuperT::Match(kIndicatorTemplate, {{"{nid}", std::to_string(nid)},
                                            {"{depth}", std::to_string(depth)},
-                                           {"{fname}", GetFeatureName(fmap_, split_index)},
+                                           {"{fname}", GetFeatureName(SuperT::fmap_, split_index)},
                                            {"{yes}", std::to_string(nyes)},
-                                           {"{no}", std::to_string(tree[nid].DefaultChild())}});
+                                           {"{no}", std::to_string(tree.DefaultChild(nid))}});
     return result;
   }
 
-  std::string Categorical(RegTree const& tree, bst_node_t nid, uint32_t depth) const override {
+  std::string Categorical(TreeView tree, bst_node_t nid, uint32_t depth) const override {
     auto cats = GetSplitCategories(tree, nid);
     static std::string const kCategoryTemplate =
         R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
@@ -455,22 +406,22 @@ class JsonGenerator : public TreeGenerator {
     return results;
   }
 
-  std::string SplitNodeImpl(RegTree const& tree, bst_node_t nid, std::string const& template_str,
+  std::string SplitNodeImpl(TreeView tree, bst_node_t nid, std::string const& template_str,
                             std::string cond, uint32_t depth) const {
-    auto split_index = tree[nid].SplitIndex();
+    auto split_index = tree.SplitIndex(nid);
     std::string const result =
         SuperT::Match(template_str, {{"{nid}", std::to_string(nid)},
                                      {"{depth}", std::to_string(depth)},
-                                     {"{fname}", GetFeatureName(fmap_, split_index)},
+                                     {"{fname}", GetFeatureName(SuperT::fmap_, split_index)},
                                      {"{cond}", cond},
-                                     {"{left}", std::to_string(tree[nid].LeftChild())},
-                                     {"{right}", std::to_string(tree[nid].RightChild())},
-                                     {"{missing}", std::to_string(tree[nid].DefaultChild())}});
+                                     {"{left}", std::to_string(tree.LeftChild(nid))},
+                                     {"{right}", std::to_string(tree.RightChild(nid))},
+                                     {"{missing}", std::to_string(tree.DefaultChild(nid))}});
     return result;
   }
 
-  std::string Integer(RegTree const& tree, int32_t nid, uint32_t depth) const override {
-    auto cond = tree[nid].SplitCond();
+  std::string Integer(TreeView tree, int32_t nid, uint32_t depth) const override {
+    auto cond = tree.SplitCond(nid);
     const bst_float floored = std::floor(cond);
     const int32_t integer_threshold
         = (floored == cond) ? static_cast<int32_t>(floored)
@@ -483,17 +434,17 @@ class JsonGenerator : public TreeGenerator {
                          std::to_string(integer_threshold), depth);
   }
 
-  std::string Quantitive(RegTree const& tree, int32_t nid, uint32_t depth) const override {
+  std::string Quantitive(TreeView tree, int32_t nid, uint32_t depth) const override {
     static std::string const kQuantitiveTemplate =
         R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
         R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
         R"I("missing": {missing})I";
-    bst_float cond = tree[nid].SplitCond();
+    bst_float cond = tree.SplitCond(nid);
     return SplitNodeImpl(tree, nid, kQuantitiveTemplate, ToStr(cond), depth);
   }
 
-  std::string PlainNode(RegTree const& tree, int32_t nid, uint32_t depth) const override {
-    auto cond = tree[nid].SplitCond();
+  std::string PlainNode(TreeView tree, int32_t nid, uint32_t depth) const override {
+    auto cond = tree.SplitCond(nid);
     static std::string const kNodeTemplate =
         R"I( "nodeid": {nid}, "depth": {depth}, "split": "{fname}", )I"
         R"I("split_condition": {cond}, "yes": {left}, "no": {right}, )I"
@@ -501,48 +452,41 @@ class JsonGenerator : public TreeGenerator {
     return SplitNodeImpl(tree, nid, kNodeTemplate, ToStr(cond), depth);
   }
 
-  std::string NodeStat(RegTree const& tree, int32_t nid) const override {
+  std::string NodeStat(TreeView tree, int32_t nid) const override {
     static std::string kStatTemplate =
         R"S(, "gain": {loss_chg}, "cover": {sum_hess})S";
     auto result = SuperT::Match(
         kStatTemplate,
-        {{"{loss_chg}", ToStr(tree.Stat(nid).loss_chg)},
-         {"{sum_hess}", ToStr(tree.Stat(nid).sum_hess)}});
+        {{"{loss_chg}", ToStr(tree.LossChg(nid))},
+         {"{sum_hess}", ToStr(tree.SumHess(nid))}});
     return result;
   }
 
-  std::string SplitNode(RegTree const& tree, int32_t nid, uint32_t depth) override {
+  std::string SplitNode(TreeView tree, int32_t nid, uint32_t depth) override {
     std::string properties = SuperT::SplitNode(tree, nid, depth);
     static std::string const kSplitNodeTemplate =
         "{{properties} {stat}, \"children\": [{left}, {right}\n{indent}]}";
     auto result = SuperT::Match(
         kSplitNodeTemplate,
         {{"{properties}", properties},
-         {"{stat}",   with_stats_ ? this->NodeStat(tree, nid) : ""},
-         {"{left}",   this->BuildTree(tree, tree[nid].LeftChild(), depth+1)},
-         {"{right}",  this->BuildTree(tree, tree[nid].RightChild(), depth+1)},
+         {"{stat}",   SuperT::with_stats_ ? this->NodeStat(tree, nid) : ""},
+         {"{left}",   this->BuildTree(tree, tree.LeftChild(nid), depth+1)},
+         {"{right}",  this->BuildTree(tree, tree.RightChild(nid), depth+1)},
          {"{indent}", this->Indent(depth)}});
     return result;
   }
 
-  std::string BuildTree(RegTree const& tree, int32_t nid, uint32_t depth) override {
+  std::string BuildTree(TreeView tree, int32_t nid, uint32_t depth) override {
     static std::string const kNodeTemplate = "{newline}{indent}{nodes}";
     auto result = SuperT::Match(
-        kNodeTemplate,
-        {{"{newline}", depth == 0 ? "" : "\n"},
-         {"{indent}", Indent(depth)},
-         {"{nodes}",  tree[nid].IsLeaf() ? this->LeafNode(tree, nid, depth) :
-                                           this->SplitNode(tree, nid, depth)}});
+        kNodeTemplate, {{"{newline}", depth == 0 ? "" : "\n"},
+                        {"{indent}", Indent(depth)},
+                        {"{nodes}", tree.IsLeaf(nid) ? this->LeafNode(tree, nid, depth)
+                                                     : this->SplitNode(tree, nid, depth)}});
     return result;
   }
 };
 
-XGBOOST_REGISTER_TREE_IO(JsonGenerator, "json")
-    .describe("Dump json representation of tree")
-    .set_body([](FeatureMap const& fmap, std::string const& /*attrs*/, bool with_stats) {
-      return new JsonGenerator(fmap, with_stats);
-    });
-
 struct GraphvizParam : public XGBoostParameter<GraphvizParam> {
   std::string yes_color;
   std::string no_color;
@@ -575,13 +519,14 @@ struct GraphvizParam : public XGBoostParameter<GraphvizParam> {
 
 DMLC_REGISTER_PARAMETER(GraphvizParam);
 
-class GraphvizGenerator : public TreeGenerator {
-  using SuperT = TreeGenerator;
+template <typename TreeView>
+class GraphvizGenerator : public TreeGenerator<TreeView> {
+  using SuperT = TreeGenerator<TreeView>;
   GraphvizParam param_;
 
  public:
-  GraphvizGenerator(FeatureMap const& fmap, std::string const& attrs, bool with_stats) :
-      TreeGenerator(fmap, with_stats) {
+  GraphvizGenerator(FeatureMap const& fmap, std::string const& attrs, bool with_stats)
+      : SuperT{fmap, with_stats} {
     param_.UpdateAllowUnknown(std::map<std::string, std::string>{});
     using KwArg = std::map<std::string, std::map<std::string, std::string>>;
     KwArg kwargs;
@@ -643,7 +588,7 @@ class GraphvizGenerator : public TreeGenerator {
 
  protected:
   template <bool is_categorical>
-  std::string BuildEdge(RegTree const &tree, bst_node_t nidx, int32_t child, bool left) const {
+  std::string BuildEdge(TreeView tree, bst_node_t nidx, int32_t child, bool left) const {
     static std::string const kEdgeTemplate =
         "    {nid} -> {child} [label=\"{branch}\" color=\"{color}\"]\n";
     // Is this the default child for missing value?
@@ -665,30 +610,32 @@ class GraphvizGenerator : public TreeGenerator {
 
   // Only indicator is different, so we combine all different node types into this
   // function.
-  std::string PlainNode(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
+  std::string PlainNode(TreeView tree, bst_node_t nidx, uint32_t) const override {
     auto split_index = tree.SplitIndex(nidx);
     auto cond = tree.SplitCond(nidx);
     static std::string const kNodeTemplate =
         "    {nid} [ label=\"{fname}{<}{cond}{stat}\" {params}]\n";
 
-    bool has_less =
-        (split_index >= fmap_.Size()) || fmap_.TypeOf(split_index) != FeatureMap::kIndicator;
+    bool has_less = (split_index >= SuperT::fmap_.Size()) ||
+                    SuperT::fmap_.TypeOf(split_index) != FeatureMap::kIndicator;
     std::string result;
     if (this->with_stats_) {
-      CHECK(!tree.IsMultiTarget()) << MTNotImplemented();
-      result = SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nidx)},
-                                             {"{fname}", GetFeatureName(fmap_, split_index)},
-                                             {"{<}", has_less ? "<" : ""},
-                                             {"{cond}", has_less ? ToStr(cond) : ""},
-                                             {"{stat}", this->NodeStat(tree, nidx)},
-                                             {"{params}", param_.condition_node_params}});
+      CHECK(tree::IsScalarTree(tree)) << MTNotImplemented();
+      result =
+          SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nidx)},
+                                        {"{fname}", GetFeatureName(SuperT::fmap_, split_index)},
+                                        {"{<}", has_less ? "<" : ""},
+                                        {"{cond}", has_less ? ToStr(cond) : ""},
+                                        {"{stat}", this->NodeStat(tree, nidx)},
+                                        {"{params}", param_.condition_node_params}});
     } else {
-      result = SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nidx)},
-                                             {"{fname}", GetFeatureName(fmap_, split_index)},
-                                             {"{<}", has_less ? "<" : ""},
-                                             {"{cond}", has_less ? ToStr(cond) : ""},
-                                             {"{stat}", ""},
-                                             {"{params}", param_.condition_node_params}});
+      result =
+          SuperT::Match(kNodeTemplate, {{"{nid}", std::to_string(nidx)},
+                                        {"{fname}", GetFeatureName(SuperT::fmap_, split_index)},
+                                        {"{<}", has_less ? "<" : ""},
+                                        {"{cond}", has_less ? ToStr(cond) : ""},
+                                        {"{stat}", ""},
+                                        {"{params}", param_.condition_node_params}});
     }
 
     result += BuildEdge<false>(tree, nidx, tree.LeftChild(nidx), true);
@@ -697,13 +644,13 @@ class GraphvizGenerator : public TreeGenerator {
     return result;
   };
 
-  std::string NodeStat(RegTree const& tree, bst_node_t nidx) const override {
-    return Match("\ngain={gain}\ncover={cover}",
-                 {{"{cover}", std::to_string(tree.Stat(nidx).sum_hess)},
-                  {"{gain}", std::to_string(tree.Stat(nidx).loss_chg)}});
+  std::string NodeStat(TreeView tree, bst_node_t nidx) const override {
+    return SuperT::Match("\ngain={gain}\ncover={cover}",
+                         {{"{cover}", ToStr(tree.SumHess(nidx))},
+                          {"{gain}", ToStr(tree.LossChg(nidx))}});
   }
 
-  std::string Categorical(RegTree const& tree, bst_node_t nidx, uint32_t /*depth*/) const override {
+  std::string Categorical(TreeView tree, bst_node_t nidx, uint32_t /*depth*/) const override {
     static std::string const kLabelTemplate =
         "    {nid} [ label=\"{fname}:{cond}{stat}\" {params}]\n";
     auto cats = GetSplitCategories(tree, nidx);
@@ -712,7 +659,7 @@ class GraphvizGenerator : public TreeGenerator {
 
     std::string result =
         SuperT::Match(kLabelTemplate, {{"{nid}", std::to_string(nidx)},
-                                       {"{fname}", GetFeatureName(fmap_, split_index)},
+                                       {"{fname}", GetFeatureName(SuperT::fmap_, split_index)},
                                        {"{cond}", cats_str},
                                        {"{stat}", this->NodeStat(tree, nidx)},
                                        {"{params}", param_.condition_node_params}});
@@ -723,41 +670,27 @@ class GraphvizGenerator : public TreeGenerator {
     return result;
   }
 
-  std::string LeafNode(RegTree const& tree, bst_node_t nidx, uint32_t) const override {
+  std::string LeafNode(TreeView tree, bst_node_t nidx, uint32_t) const override {
     static std::string const kCoverTemplate = "\ncover={cover}";
     static std::string const kLeafTemplate =
         "    {nid} [ label=\"leaf={leaf-value}{cover}\" {params}]\n";
-    auto plot = [&](std::string cover) {
-      if (tree.IsMultiTarget()) {
-        auto value = tree.GetMultiTargetTree()->LeafValue(nidx);
-        // Hardcoded limit to avoid dumping long arrays into dot graph.
-        bst_target_t constexpr kLimit{3};
-        return SuperT::Match(kLeafTemplate, {{"{nid}", std::to_string(nidx)},
-                                             {"{leaf-value}", ToStr(value, kLimit)},
-                                             {"{cover}", std::move(cover)},
-                                             {"{params}", param_.leaf_node_params}});
-      } else {
-        auto value = tree[nidx].LeafValue();
-        return SuperT::Match(kLeafTemplate, {{"{nid}", std::to_string(nidx)},
-                                             {"{leaf-value}", ToStr(value)},
-                                             {"{cover}", std::move(cover)},
-                                             {"{params}", param_.leaf_node_params}});
-      }
-    };
-    if (this->with_stats_) {
-      CHECK(!tree.IsMultiTarget()) << MTNotImplemented();
-      return plot(SuperT::Match(kCoverTemplate, {{"{cover}", ToStr(tree.Stat(nidx).sum_hess)}}));
-    } else {
-      return plot("");
-    }
+    auto value = tree.LeafValue(nidx);
+    return SuperT::Match(
+        kLeafTemplate,
+        {{"{nid}", std::to_string(nidx)},
+         {"{leaf-value}", ToStr(value)},
+         {"{cover}", this->with_stats_
+                         ? SuperT::Match(kCoverTemplate, {{"{cover}", ToStr(tree.SumHess(nidx))}})
+                         : ""},
+         {"{params}", param_.leaf_node_params}});
   }
 
-  std::string BuildTree(RegTree const& tree, bst_node_t nidx, uint32_t depth) override {
+  std::string BuildTree(TreeView tree, bst_node_t nidx, uint32_t depth) override {
     if (tree.IsLeaf(nidx)) {
       return this->LeafNode(tree, nidx, depth);
     }
     static std::string const kNodeTemplate = "{parent}\n{left}\n{right}";
-    auto node = tree.GetSplitTypes()[nidx] == FeatureType::kCategorical
+    auto node = tree.SplitType(nidx) == FeatureType::kCategorical
                     ? this->Categorical(tree, nidx, depth)
                     : this->PlainNode(tree, nidx, depth);
     auto result = SuperT::Match(
@@ -768,7 +701,7 @@ class GraphvizGenerator : public TreeGenerator {
     return result;
   }
 
-  void BuildTree(RegTree const& tree) override {
+  void BuildTree(TreeView tree) override {
     static std::string const kTreeTemplate =
         "digraph {\n"
         "    graph [ rankdir={rankdir} ]\n"
@@ -779,27 +712,77 @@ class GraphvizGenerator : public TreeGenerator {
         {{"{rankdir}",     param_.rankdir},
          {"{graph_attrs}", param_.graph_attrs},
          {"{nodes}",       this->BuildTree(tree, 0, 0)}});
-    ss_ << result;
+    SuperT::ss_ << result;
   };
 };
 
-XGBOOST_REGISTER_TREE_IO(GraphvizGenerator, "dot")
-.describe("Dump graphviz representation of tree")
-.set_body([](FeatureMap const& fmap, std::string attrs, bool with_stats) {
-            return new GraphvizGenerator(fmap, attrs, with_stats);
-          });
-
 constexpr bst_node_t RegTree::kRoot;
 
-std::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {
-  if (this->IsMultiTarget() && format != "dot") {
-    LOG(FATAL) << format << " tree dump " << MTNotImplemented();
+void TreeParam::FromJson(Json const& in) {
+  auto const& obj = get<Object const>(in);
+  auto n_deleted_it = obj.find(StringView{"num_deleted"});
+  if (n_deleted_it != obj.cend()) {
+    // Missing in 1.0 models.
+    this->num_deleted = std::stoi(get<String const>(n_deleted_it->second));
   }
-  std::unique_ptr<TreeGenerator> builder{TreeGenerator::Create(format, fmap, with_stats)};
-  builder->BuildTree(*this);
+  this->num_feature = std::stoul(get<String const>(obj.at("num_feature")));
+  this->num_nodes = std::stoi(get<String const>(obj.at("num_nodes")));
+  this->size_leaf_vector = std::stoul(get<String const>(obj.at("size_leaf_vector")));
+}
+
+void TreeParam::ToJson(Json* p_out) const {
+  auto& out = *p_out;
+  out["num_deleted"] = std::to_string(this->num_deleted);
+  out["num_feature"] = std::to_string(this->num_feature);
+  out["num_nodes"] = std::to_string(this->num_nodes);
+  out["size_leaf_vector"] = std::to_string(this->size_leaf_vector);
+}
 
-  std::string result = builder->Str();
-  return result;
+template <typename TreeView>
+std::unique_ptr<TreeGenerator<TreeView>> CreateTreeGenerator(std::string const& attrs,
+                                                             FeatureMap const& fmap,
+                                                             bool with_stats) {
+  auto pos = attrs.find(':');
+  std::string name;
+  std::string params;
+  if (pos != std::string::npos) {
+    name = attrs.substr(0, pos);
+    params = attrs.substr(pos + 1, attrs.length() - pos - 1);
+    // Eliminate all occurrences of single quote string.
+    size_t pos = std::string::npos;
+    while ((pos = params.find('\'')) != std::string::npos) {
+      params.replace(pos, 1, "\"");
+    }
+  } else {
+    name = attrs;
+  }
+  std::unique_ptr<TreeGenerator<TreeView>> ptr;
+  if (name == "dot") {
+    return std::make_unique<GraphvizGenerator<TreeView>>(fmap, params, with_stats);
+  } else if (name == "text") {
+    return std::make_unique<TextGenerator<TreeView>>(fmap, with_stats);
+  } else if (name == "json") {
+    return std::make_unique<JsonGenerator<TreeView>>(fmap, with_stats);
+  } else {
+    LOG(FATAL) << "Unknown Model Builder:" << name;
+  }
+  return {nullptr};
+}
+
+std::string RegTree::DumpModel(const FeatureMap& fmap, bool with_stats, std::string format) const {
+  auto impl = [](auto builder, auto view) {
+    builder->BuildTree(view);
+    std::string result = builder->Str();
+    return result;
+  };
+  if (this->IsMultiTarget()) {
+    CHECK(!with_stats) << " Tree dump with statistic " << MTNotImplemented();
+    return impl(CreateTreeGenerator<tree::MultiTargetTreeView>(format, fmap, with_stats),
+                this->HostMtView());
+  } else {
+    return impl(CreateTreeGenerator<tree::ScalarTreeView>(format, fmap, with_stats),
+                this->HostScView());
+  }
 }
 
 bool RegTree::Equal(const RegTree& b) const {
@@ -809,8 +792,11 @@ bool RegTree::Equal(const RegTree& b) const {
   }
   auto const& self = *this;
   bool ret { true };
-  this->WalkTree([&self, &b, &ret](bst_node_t nidx) {
-    if (!(self.nodes_.at(nidx) == b.nodes_.at(nidx))) {
+  auto sc_tree = this->HostScView();
+  auto const& lhs = self.nodes_.ConstHostVector();
+  auto const& rhs = b.nodes_.ConstHostVector();
+  sc_tree.WalkTree([&](bst_node_t nidx) {
+    if (!(lhs.at(nidx) == rhs.at(nidx))) {
       ret = false;
       return false;
     }
@@ -819,32 +805,42 @@ bool RegTree::Equal(const RegTree& b) const {
   return ret;
 }
 
-bst_node_t RegTree::GetNumLeaves() const {
-  CHECK(!IsMultiTarget());
-  bst_node_t leaves { 0 };
-  auto const& self = *this;
-  this->WalkTree([&leaves, &self](bst_node_t nidx) {
-                   if (self[nidx].IsLeaf()) {
-                     leaves++;
-                   }
-                   return true;
-                 });
+[[nodiscard]] bst_node_t RegTree::GetNumLeaves() const {
+  bst_node_t leaves{0};
+  tree::WalkTree(*this, [&leaves](auto const& tree, bst_node_t nidx) {
+    if (tree.IsLeaf(nidx)) {
+      leaves++;
+    }
+    return true;
+  });
   return leaves;
 }
 
-bst_node_t RegTree::GetNumSplitNodes() const {
-  CHECK(!IsMultiTarget());
-  bst_node_t splits { 0 };
-  auto const& self = *this;
-  this->WalkTree([&splits, &self](bst_node_t nidx) {
-                   if (!self[nidx].IsLeaf()) {
-                     splits++;
-                   }
-                   return true;
-                 });
+[[nodiscard]] bst_node_t RegTree::GetNumSplitNodes() const {
+  bst_node_t splits{0};
+  tree::WalkTree(*this, [&splits](auto const& tree, bst_node_t nidx) {
+    if (!tree.IsLeaf(nidx)) {
+      splits++;
+    }
+    return true;
+  });
   return splits;
 }
 
+[[nodiscard]] bst_node_t RegTree::GetDepth(bst_node_t nidx) const {
+  if (this->IsMultiTarget()) {
+    return this->HostMtView().GetDepth(nidx);
+  }
+  return this->HostScView().GetDepth(nidx);
+}
+
+[[nodiscard]] bst_node_t RegTree::MaxDepth() const {
+  if (this->IsMultiTarget()) {
+    return this->HostMtView().MaxDepth(RegTree::kRoot);
+  }
+  return this->HostScView().MaxDepth(RegTree::kRoot);
+}
+
 void RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_value,
                          bool default_left, bst_float base_weight,
                          bst_float left_leaf_weight,
@@ -854,22 +850,24 @@ void RegTree::ExpandNode(bst_node_t nid, unsigned split_index, bst_float split_v
   CHECK(!IsMultiTarget());
   int pleft = this->AllocNode();
   int pright = this->AllocNode();
-  auto &node = nodes_[nid];
+  auto& h_nodes = nodes_.HostVector();
+
+  auto &node = h_nodes[nid];
   CHECK(node.IsLeaf());
   node.SetLeftChild(pleft);
   node.SetRightChild(pright);
-  nodes_[node.LeftChild()].SetParent(nid, true);
-  nodes_[node.RightChild()].SetParent(nid, false);
+  h_nodes[node.LeftChild()].SetParent(nid, true);
+  h_nodes[node.RightChild()].SetParent(nid, false);
   node.SetSplit(split_index, split_value, default_left);
 
-  nodes_[pleft].SetLeaf(left_leaf_weight, leaf_right_child);
-  nodes_[pright].SetLeaf(right_leaf_weight, leaf_right_child);
+  h_nodes[pleft].SetLeaf(left_leaf_weight, leaf_right_child);
+  h_nodes[pright].SetLeaf(right_leaf_weight, leaf_right_child);
 
   this->Stat(nid) = {loss_change, sum_hess, base_weight};
   this->Stat(pleft) = {0.0f, left_sum, left_leaf_weight};
   this->Stat(pright) = {0.0f, right_sum, right_leaf_weight};
 
-  this->split_types_.at(nid) = FeatureType::kNumerical;
+  this->split_types_.HostVector().at(nid) = FeatureType::kNumerical;
 }
 
 void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split_cond,
@@ -884,103 +882,68 @@ void RegTree::ExpandNode(bst_node_t nidx, bst_feature_t split_index, float split
   this->p_mt_tree_->Expand(nidx, split_index, split_cond, default_left, base_weight, left_weight,
                            right_weight);
 
-  split_types_.resize(this->Size(), FeatureType::kNumerical);
-  split_categories_segments_.resize(this->Size());
-  this->split_types_.at(nidx) = FeatureType::kNumerical;
+  split_types_.HostVector().resize(this->Size(), FeatureType::kNumerical);
+  split_categories_segments_.HostVector().resize(this->Size());
+  this->split_types_.HostVector().at(nidx) = FeatureType::kNumerical;
 
   this->param_.num_nodes = this->p_mt_tree_->Size();
 }
 
-void RegTree::ExpandCategorical(bst_node_t nid, bst_feature_t split_index,
-                                common::Span<const uint32_t> split_cat, bool default_left,
-                                bst_float base_weight, bst_float left_leaf_weight,
-                                bst_float right_leaf_weight, bst_float loss_change, float sum_hess,
-                                float left_sum, float right_sum) {
+void RegTree::SetLeaves(std::vector<bst_node_t> leaves, common::Span<float const> weights) {
+  CHECK(IsMultiTarget());
+  this->p_mt_tree_->SetLeaves(std::move(leaves), weights);
+}
+
+void RegTree::ExpandCategorical(bst_node_t nidx, bst_feature_t split_index,
+                                common::Span<common::KCatBitField::value_type> split_cat,
+                                bool default_left, bst_float base_weight,
+                                bst_float left_leaf_weight, bst_float right_leaf_weight,
+                                bst_float loss_change, float sum_hess, float left_sum,
+                                float right_sum) {
   CHECK(!IsMultiTarget());
-  this->ExpandNode(nid, split_index, std::numeric_limits<float>::quiet_NaN(),
-                   default_left, base_weight,
-                   left_leaf_weight, right_leaf_weight, loss_change, sum_hess,
-                   left_sum, right_sum);
+  this->ExpandNode(nidx, split_index, DftBadValue(), default_left, base_weight, left_leaf_weight,
+                   right_leaf_weight, loss_change, sum_hess, left_sum, right_sum);
 
-  size_t orig_size = split_categories_.size();
-  this->split_categories_.resize(orig_size + split_cat.size());
+  auto& h_split_categories = split_categories_.HostVector();
+  std::size_t orig_size = h_split_categories.size();
+  h_split_categories.resize(orig_size + split_cat.size());
   std::copy(split_cat.data(), split_cat.data() + split_cat.size(),
-            split_categories_.begin() + orig_size);
-  this->split_types_.at(nid) = FeatureType::kCategorical;
-  this->split_categories_segments_.at(nid).beg = orig_size;
-  this->split_categories_segments_.at(nid).size = split_cat.size();
-}
+            h_split_categories.begin() + orig_size);
 
-void RegTree::Load(dmlc::Stream* fi) {
-  CHECK_EQ(fi->Read(&param_, sizeof(TreeParam)), sizeof(TreeParam));
-  if (!DMLC_IO_NO_ENDIAN_SWAP) {
-    param_ = param_.ByteSwap();
-  }
-  nodes_.resize(param_.num_nodes);
-  stats_.resize(param_.num_nodes);
-  CHECK_NE(param_.num_nodes, 0);
-  CHECK_EQ(fi->Read(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size()),
-           sizeof(Node) * nodes_.size());
-  if (!DMLC_IO_NO_ENDIAN_SWAP) {
-    for (Node& node : nodes_) {
-      node = node.ByteSwap();
-    }
-  }
-  CHECK_EQ(fi->Read(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * stats_.size()),
-           sizeof(RTreeNodeStat) * stats_.size());
-  if (!DMLC_IO_NO_ENDIAN_SWAP) {
-    for (RTreeNodeStat& stat : stats_) {
-      stat = stat.ByteSwap();
-    }
-  }
-  // chg deleted nodes
-  deleted_nodes_.resize(0);
-  for (int i = 1; i < param_.num_nodes; ++i) {
-    if (nodes_[i].IsDeleted()) {
-      deleted_nodes_.push_back(i);
-    }
-  }
-  CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param_.num_deleted);
+  this->split_types_.HostVector().at(nidx) = FeatureType::kCategorical;
 
-  split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
-  split_categories_segments_.resize(param_.num_nodes);
+  auto& h_split_categories_segments = this->split_categories_segments_.HostVector();
+  h_split_categories_segments.at(nidx).beg = orig_size;
+  h_split_categories_segments.at(nidx).size = split_cat.size();
 }
 
-void RegTree::Save(dmlc::Stream* fo) const {
-  CHECK_EQ(param_.num_nodes, static_cast<int>(nodes_.size()));
-  CHECK_EQ(param_.num_nodes, static_cast<int>(stats_.size()));
-  CHECK_EQ(param_.deprecated_num_roots, 1);
-  CHECK_NE(param_.num_nodes, 0);
-  CHECK(!IsMultiTarget())
-      << "Please use JSON/UBJSON for saving models with multi-target trees.";
-  CHECK(!HasCategoricalSplit())
-      << "Please use JSON/UBJSON for saving models with categorical splits.";
-
-  if (DMLC_IO_NO_ENDIAN_SWAP) {
-    fo->Write(&param_, sizeof(TreeParam));
-  } else {
-    TreeParam x = param_.ByteSwap();
-    fo->Write(&x, sizeof(x));
-  }
+RegTree* RegTree::Copy() const {
+  auto ptr = new RegTree{};
+  ptr->param_ = this->param_;
 
-  if (DMLC_IO_NO_ENDIAN_SWAP) {
-    fo->Write(dmlc::BeginPtr(nodes_), sizeof(Node) * nodes_.size());
-  } else {
-    for (const Node& node : nodes_) {
-      Node x = node.ByteSwap();
-      fo->Write(&x, sizeof(x));
-    }
-  }
-  if (DMLC_IO_NO_ENDIAN_SWAP) {
-    fo->Write(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * nodes_.size());
-  } else {
-    for (const RTreeNodeStat& stat : stats_) {
-      RTreeNodeStat x = stat.ByteSwap();
-      fo->Write(&x, sizeof(x));
-    }
+  auto copy = [](auto* lhs, auto const& rhs) {
+    lhs->SetDevice(rhs.Device());
+    lhs->Resize(rhs.Size());
+    lhs->Copy(rhs);
+  };
+
+  copy(&ptr->nodes_, this->nodes_);
+  ptr->deleted_nodes_ = this->deleted_nodes_;
+  copy(&ptr->stats_, this->stats_);
+  copy(&ptr->split_types_, this->split_types_);
+  copy(&ptr->split_categories_, this->split_categories_);
+  copy(&ptr->split_categories_segments_, this->split_categories_segments_);
+
+  if (this->p_mt_tree_) {
+    ptr->p_mt_tree_.reset(this->p_mt_tree_->Copy(&ptr->param_));
   }
+  return ptr;
 }
 
+tree::ScalarTreeView RegTree::HostScView() const { return tree::ScalarTreeView{this}; }
+
+tree::MultiTargetTreeView RegTree::HostMtView() const { return tree::MultiTargetTreeView{this}; }
+
 template <bool typed>
 void RegTree::LoadCategoricalSplit(Json const& in) {
   auto const& categories_segments = get<I64ArrayT<typed>>(in["categories_segments"]);
@@ -998,10 +961,14 @@ void RegTree::LoadCategoricalSplit(Json const& in) {
   // `categories_segments' is only available for categorical nodes to prevent overhead for
   // numerical node. As a result, we need to track the categorical nodes we have processed
   // so far.
-  split_types_.resize(n_nodes, FeatureType::kNumerical);
-  split_categories_segments_.resize(n_nodes);
+  auto& h_split_types = split_types_.HostVector();
+  h_split_types.resize(n_nodes, FeatureType::kNumerical);
+  auto& h_split_categories_segments = split_categories_segments_.HostVector();
+  h_split_categories_segments.resize(n_nodes);
+  auto& h_split_categories = this->split_categories_.HostVector();
+
   for (bst_node_t nidx = 0; nidx < n_nodes; ++nidx) {
-    split_types_[nidx] = static_cast<FeatureType>(GetElem<Integer>(split_type, nidx));
+    h_split_types[nidx] = static_cast<FeatureType>(GetElem<Integer>(split_type, nidx));
     if (nidx == last_cat_node) {
       auto j_begin = GetElem<Integer>(categories_segments, cnt);
       auto j_end = GetElem<Integer>(categories_sizes, cnt) + j_begin;
@@ -1023,12 +990,12 @@ void RegTree::LoadCategoricalSplit(Json const& in) {
         cat_bits.Set(common::AsCat(GetElem<Integer>(categories, j)));
       }
 
-      auto begin = split_categories_.size();
-      split_categories_.resize(begin + cat_bits_storage.size());
+      auto begin = h_split_categories.size();
+      h_split_categories.resize(begin + cat_bits_storage.size());
       std::copy(cat_bits_storage.begin(), cat_bits_storage.end(),
-                split_categories_.begin() + begin);
-      split_categories_segments_[nidx].beg = begin;
-      split_categories_segments_[nidx].size = cat_bits_storage.size();
+                h_split_categories.begin() + begin);
+      h_split_categories_segments[nidx].beg = begin;
+      h_split_categories_segments[nidx].size = cat_bits_storage.size();
 
       ++cnt;
       if (cnt == categories_nodes.size()) {
@@ -1037,8 +1004,8 @@ void RegTree::LoadCategoricalSplit(Json const& in) {
         last_cat_node = GetElem<Integer>(categories_nodes, cnt);
       }
     } else {
-      split_categories_segments_[nidx].beg = categories.size();
-      split_categories_segments_[nidx].size = 0;
+      h_split_categories_segments[nidx].beg = categories.size();
+      h_split_categories_segments[nidx].size = 0;
     }
   }
 }
@@ -1048,26 +1015,29 @@ template void RegTree::LoadCategoricalSplit<false>(Json const& in);
 
 void RegTree::SaveCategoricalSplit(Json* p_out) const {
   auto& out = *p_out;
-  CHECK_EQ(this->split_types_.size(), this->Size());
+  CHECK_EQ(this->split_types_.Size(), this->Size());
   CHECK_EQ(this->GetSplitCategoriesPtr().size(), this->Size());
 
   I64Array categories_segments;
   I64Array categories_sizes;
   I32Array categories;        // bst_cat_t = int32_t
   I32Array categories_nodes;  // bst_note_t = int32_t
-  U8Array split_type(split_types_.size());
+  U8Array split_type(split_types_.Size());
+
+  auto const& h_split_types = this->split_types_.ConstHostVector();
+  auto const& h_split_categories_segments = this->split_categories_segments_.ConstHostVector();
 
-  for (size_t i = 0; i < nodes_.size(); ++i) {
-    split_type.Set(i, static_cast<std::underlying_type_t<FeatureType>>(this->NodeSplitType(i)));
-    if (this->split_types_[i] == FeatureType::kCategorical) {
-      categories_nodes.GetArray().emplace_back(i);
+  for (size_t i = 0; i < nodes_.Size(); ++i) {
+    split_type.Set(i, static_cast<std::underlying_type_t<FeatureType>>(h_split_types[i]));
+    if (h_split_types[i] == FeatureType::kCategorical) {
+      categories_nodes.GetArray().emplace_back(static_cast<std::int32_t>(i));
       auto begin = categories.Size();
       categories_segments.GetArray().emplace_back(begin);
-      auto segment = this->split_categories_segments_[i];
-      auto cat_bits = common::GetNodeCats(this->GetSplitCategories(), segment);
+      auto segment = h_split_categories_segments[i];
+      auto cat_bits = common::GetNodeCats(this->GetSplitCategories(DeviceOrd::CPU()), segment);
       for (size_t i = 0; i < cat_bits.Capacity(); ++i) {
         if (cat_bits.Check(i)) {
-          categories.GetArray().emplace_back(i);
+          categories.GetArray().emplace_back(static_cast<std::int32_t>(i));
         }
       }
       size_t size = categories.Size() - begin;
@@ -1144,7 +1114,7 @@ void RegTree::LoadModel(Json const& in) {
   bool typed = IsA<I32Array>(in[tf::kParent]);
   auto const& in_obj = get<Object const>(in);
   // basic properties
-  FromJson(in["tree_param"], &param_);
+  param_.FromJson(in["tree_param"]);
   // categorical splits
   bool has_cat = in_obj.find("split_type") != in_obj.cend();
   if (has_cat) {
@@ -1162,25 +1132,28 @@ void RegTree::LoadModel(Json const& in) {
   }
 
   bool feature_is_64 = IsA<I64Array>(in["split_indices"]);
+  auto& h_stats = this->stats_.HostVector();
+  auto& h_nodes = this->nodes_.HostVector();
   if (typed && feature_is_64) {
-    LoadModelImpl<true, true>(in, param_, &stats_, &nodes_);
+    LoadModelImpl<true, true>(in, param_, &h_stats, &h_nodes);
   } else if (typed && !feature_is_64) {
-    LoadModelImpl<true, false>(in, param_, &stats_, &nodes_);
+    LoadModelImpl<true, false>(in, param_, &h_stats, &h_nodes);
   } else if (!typed && feature_is_64) {
-    LoadModelImpl<false, true>(in, param_, &stats_, &nodes_);
+    LoadModelImpl<false, true>(in, param_, &h_stats, &h_nodes);
   } else {
-    LoadModelImpl<false, false>(in, param_, &stats_, &nodes_);
+    LoadModelImpl<false, false>(in, param_, &h_stats, &h_nodes);
   }
 
   if (!has_cat) {
-    this->split_categories_segments_.resize(this->param_.num_nodes);
-    this->split_types_.resize(this->param_.num_nodes);
-    std::fill(split_types_.begin(), split_types_.end(), FeatureType::kNumerical);
+    this->split_categories_segments_.HostVector().resize(this->param_.num_nodes);
+    auto& h_split_types = this->split_types_.HostVector();
+    h_split_types.resize(this->param_.num_nodes);
+    std::fill(h_split_types.begin(), h_split_types.end(), FeatureType::kNumerical);
   }
 
   deleted_nodes_.clear();
   for (bst_node_t i = 1; i < param_.num_nodes; ++i) {
-    if (nodes_[i].IsDeleted()) {
+    if (h_nodes[i].IsDeleted()) {
       deleted_nodes_.push_back(i);
     }
   }
@@ -1192,13 +1165,14 @@ void RegTree::LoadModel(Json const& in) {
     self[nid].SetParent(self[nid].Parent(), self[parent].LeftChild() == nid);
   }
   CHECK_EQ(static_cast<bst_node_t>(deleted_nodes_.size()), param_.num_deleted);
-  CHECK_EQ(this->split_categories_segments_.size(), param_.num_nodes);
+  CHECK_EQ(this->split_categories_segments_.Size(), param_.num_nodes);
 }
 
 void RegTree::SaveModel(Json* p_out) const {
   auto& out = *p_out;
   // basic properties
-  out["tree_param"] = ToJson(param_);
+  out["tree_param"] = Object{};
+  param_.ToJson(&out["tree_param"]);
   // categorical splits
   this->SaveCategoricalSplit(p_out);
   // multi-target
@@ -1213,8 +1187,8 @@ void RegTree::SaveModel(Json* p_out) const {
    *  pruner, and this pruner can be used inside another updater so leaf are not necessary
    *  at the end of node array.
    */
-  CHECK_EQ(param_.num_nodes, static_cast<int>(nodes_.size()));
-  CHECK_EQ(param_.num_nodes, static_cast<int>(stats_.size()));
+  CHECK_EQ(param_.num_nodes, static_cast<int>(nodes_.Size()));
+  CHECK_EQ(param_.num_nodes, static_cast<int>(stats_.Size()));
 
   CHECK_EQ(get<String>(out["tree_param"]["num_nodes"]), std::to_string(param_.num_nodes));
   auto n_nodes = param_.num_nodes;
@@ -1231,19 +1205,22 @@ void RegTree::SaveModel(Json* p_out) const {
 
   F32Array conds(n_nodes);
   U8Array default_left(n_nodes);
-  CHECK_EQ(this->split_types_.size(), param_.num_nodes);
+  CHECK_EQ(this->split_types_.Size(), param_.num_nodes);
 
   namespace tf = tree_field;
 
+  auto const& h_nodes = this->nodes_.ConstHostVector();
+  auto const& h_stats = this->stats_.ConstHostVector();
+
   auto save_tree = [&](auto* p_indices_array) {
     auto& indices_array = *p_indices_array;
     for (bst_node_t i = 0; i < n_nodes; ++i) {
-      auto const& s = stats_[i];
+      auto const& s = h_stats[i];
       loss_changes.Set(i, s.loss_chg);
       sum_hessian.Set(i, s.sum_hess);
       base_weights.Set(i, s.base_weight);
 
-      auto const& n = nodes_[i];
+      auto const& n = h_nodes[i];
       lefts.Set(i, n.LeftChild());
       rights.Set(i, n.RightChild());
       parents.Set(i, n.Parent());
@@ -1273,36 +1250,4 @@ void RegTree::SaveModel(Json* p_out) const {
   out[tf::kSplitCond] = std::move(conds);
   out[tf::kDftLeft] = std::move(default_left);
 }
-
-void RegTree::CalculateContributionsApprox(const RegTree::FVec &feat,
-                                           std::vector<float>* mean_values,
-                                           bst_float *out_contribs) const {
-  CHECK_GT(mean_values->size(), 0U);
-  // this follows the idea of http://blog.datadive.net/interpreting-random-forests/
-  unsigned split_index = 0;
-  // update bias value
-  bst_float node_value = (*mean_values)[0];
-  out_contribs[feat.Size()] += node_value;
-  if ((*this)[0].IsLeaf()) {
-    // nothing to do anymore
-    return;
-  }
-
-  bst_node_t nid = 0;
-  auto cats = this->GetCategoriesMatrix();
-
-  while (!(*this)[nid].IsLeaf()) {
-    split_index = (*this)[nid].SplitIndex();
-    nid = predictor::GetNextNode<true, true>((*this)[nid], nid,
-                                             feat.GetFvalue(split_index),
-                                             feat.IsMissing(split_index), cats);
-    bst_float new_value = (*mean_values)[nid];
-    // update feature weight
-    out_contribs[split_index] += new_value - node_value;
-    node_value = new_value;
-  }
-  bst_float leaf_value = (*this)[nid].LeafValue();
-  // update leaf feature weight
-  out_contribs[split_index] += leaf_value - node_value;
-}
 }  // namespace xgboost
diff --git a/src/tree/tree_view.cc b/src/tree/tree_view.cc
new file mode 100644
index 000000000000..25946dcc724c
--- /dev/null
+++ b/src/tree/tree_view.cc
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include "tree_view.h"
+
+#include "xgboost/context.h"             // for Context
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
+#include "xgboost/linalg.h"              // for MakeTensorView
+#include "xgboost/span.h"                // for Span
+
+namespace xgboost::tree {
+namespace {
+template <typename T>
+auto DispatchPtr(DeviceOrd device, HostDeviceVector<T> const& vec) {
+  if (device.IsCPU()) {
+    return vec.ConstHostPointer();
+  }
+  vec.SetDevice(device);
+  return vec.ConstDevicePointer();
+}
+
+auto DispatchWeight(DeviceOrd device, RegTree const* tree) {
+  auto const* mt_tree = tree->GetMultiTargetTree();
+  auto n_targets = mt_tree->NumTargets();
+  auto n_leaves = mt_tree->NumLeaves();
+  CHECK_GE(n_leaves, 1);
+  common::Span<float const> weights = tree->GetMultiTargetTree()->Weights(device);
+  return linalg::MakeTensorView(device, weights, n_leaves, n_targets);
+}
+}  // namespace
+
+ScalarTreeView::ScalarTreeView(DeviceOrd device, RegTree const* tree)
+    : CategoriesMixIn{tree->GetCategoriesMatrix(device)},
+      nodes{tree->GetNodes(device).data()},
+      stats{tree->GetStats(device).data()},
+      n{tree->NumNodes()} {
+  CHECK(!tree->IsMultiTarget());
+}
+
+MultiTargetTreeView::MultiTargetTreeView(DeviceOrd device, RegTree const* tree)
+    : CategoriesMixIn{tree->GetCategoriesMatrix(device)},
+      left{DispatchPtr(device, tree->GetMultiTargetTree()->left_)},
+      right{DispatchPtr(device, tree->GetMultiTargetTree()->right_)},
+      parent{DispatchPtr(device, tree->GetMultiTargetTree()->parent_)},
+      split_index{DispatchPtr(device, tree->GetMultiTargetTree()->split_index_)},
+      default_left{DispatchPtr(device, tree->GetMultiTargetTree()->default_left_)},
+      split_conds{DispatchPtr(device, tree->GetMultiTargetTree()->split_conds_)},
+      n{tree->NumNodes()},
+      weights{DispatchWeight(device, tree)} {
+  CHECK(tree->IsMultiTarget());
+}
+
+MultiTargetTreeView::MultiTargetTreeView(RegTree const* tree)
+    : CategoriesMixIn{tree->GetCategoriesMatrix(DeviceOrd::CPU())},
+      left{tree->GetMultiTargetTree()->left_.ConstHostPointer()},
+      right{tree->GetMultiTargetTree()->right_.ConstHostPointer()},
+      parent{tree->GetMultiTargetTree()->parent_.ConstHostPointer()},
+      split_index{tree->GetMultiTargetTree()->split_index_.ConstHostPointer()},
+      default_left{tree->GetMultiTargetTree()->default_left_.ConstHostPointer()},
+      split_conds{tree->GetMultiTargetTree()->split_conds_.ConstHostPointer()},
+      n{tree->NumNodes()},
+      weights{DispatchWeight(DeviceOrd::CPU(), tree)} {
+  CHECK(tree->IsMultiTarget());
+}
+}  // namespace xgboost::tree
diff --git a/src/tree/tree_view.h b/src/tree/tree_view.h
new file mode 100644
index 000000000000..5048be8aa0f2
--- /dev/null
+++ b/src/tree/tree_view.h
@@ -0,0 +1,259 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ *
+ * The file provides views for two tree models. We hope to eventually unify them, but the
+ * original scalar tree `Node` struct is used extensively in the codebase.
+ */
+#pragma once
+#include <algorithm>  // for max
+#include <cstdint>    // for uint8_t
+#include <stack>      // for stack
+#include <utility>    // for move
+
+#include "../common/type.h"      // for GetValueT
+#include "xgboost/base.h"        // for bst_node_t
+#include "xgboost/context.h"     // for DeviceOrd
+#include "xgboost/tree_model.h"  // for RegTree
+
+namespace xgboost::tree {
+template <typename Base>
+struct WalkTreeMixIn {
+  /**
+   * @brief Iterate through all nodes in this tree.
+   *
+   * @param Function that accepts a node index, and returns false when iteration should
+   *        stop, otherwise returns true.
+   */
+  template <typename Fn>
+  void WalkTree(Fn&& func) const {
+    std::stack<bst_node_t> nodes;
+    nodes.push(RegTree::kRoot);
+    auto self = static_cast<Base const*>(this);
+    while (!nodes.empty()) {
+      auto nidx = nodes.top();
+      nodes.pop();
+      if (!func(nidx)) {
+        return;
+      }
+      auto left = self->LeftChild(nidx);
+      auto right = self->RightChild(nidx);
+      if (left != RegTree::kInvalidNodeId) {
+        nodes.push(left);
+      }
+      if (right != RegTree::kInvalidNodeId) {
+        nodes.push(right);
+      }
+    }
+  }
+
+  /**
+   * @brief Get the depth of a node.
+   * @param nidx node id
+   */
+  [[nodiscard]] bst_node_t GetDepth(bst_node_t nidx) const {
+    bst_node_t depth = 0;
+    auto self = static_cast<Base const*>(this);
+    while (!self->IsRoot(nidx)) {
+      ++depth;
+      nidx = self->Parent(nidx);
+    }
+    return depth;
+  }
+
+  [[nodiscard]] bst_node_t MaxDepth(bst_node_t nidx) const {
+    auto self = static_cast<Base const*>(this);
+    if (self->IsLeaf(nidx)) {
+      return 0;
+    }
+    return std::max(this->MaxDepth(self->LeftChild(nidx)) + 1,
+                    this->MaxDepth(self->RightChild(nidx)) + 1);
+  }
+  [[nodiscard]] bst_node_t MaxDepth() const { return this->MaxDepth(RegTree::kRoot); }
+};
+
+struct CategoriesMixIn {
+  RegTree::CategoricalSplitMatrix cats;
+
+  [[nodiscard]] XGBOOST_DEVICE bool HasCategoricalSplit() const { return !cats.categories.empty(); }
+  [[nodiscard]] XGBOOST_DEVICE RegTree::CategoricalSplitMatrix const& GetCategoriesMatrix() const {
+    return cats;
+  }
+  /**
+   * @brief Get the bit storage of categories used by a node.
+   */
+  [[nodiscard]] XGBOOST_DEVICE common::Span<uint32_t const> NodeCats(bst_node_t nidx) const {
+    auto node_ptr = this->GetCategoriesMatrix().node_ptr;
+    auto categories = this->GetCategoriesMatrix().categories;
+    auto segment = node_ptr[nidx];
+    auto node_cats = categories.subspan(segment.beg, segment.size);
+    return node_cats;
+  }
+  [[nodiscard]] XGBOOST_DEVICE FeatureType SplitType(bst_node_t nidx) const {
+    return cats.split_type[nidx];
+  }
+};
+
+/**
+ * @brief Tree view for scalar leaf.
+ */
+struct ScalarTreeView : public WalkTreeMixIn<ScalarTreeView>, public CategoriesMixIn {
+  static bst_node_t constexpr InvalidNodeId() { return RegTree::kInvalidNodeId; }
+  static constexpr bst_node_t RootId() { return RegTree::kRoot; }
+
+  RegTree::Node const* nodes;
+
+  RTreeNodeStat const* stats;
+  // The number of nodes
+  bst_node_t n{0};
+
+  [[nodiscard]] XGBOOST_DEVICE bool IsLeaf(bst_node_t nidx) const { return nodes[nidx].IsLeaf(); }
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t Parent(bst_node_t nidx) const {
+    return nodes[nidx].Parent();
+  }
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t LeftChild(bst_node_t nidx) const {
+    return nodes[nidx].LeftChild();
+  }
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t RightChild(bst_node_t nidx) const {
+    return nodes[nidx].RightChild();
+  }
+  [[nodiscard]] XGBOOST_DEVICE bst_feature_t SplitIndex(bst_node_t nidx) const {
+    return nodes[nidx].SplitIndex();
+  }
+  [[nodiscard]] XGBOOST_DEVICE bool IsDeleted(bst_node_t nidx) const {
+    return nodes[nidx].IsDeleted();
+  }
+  [[nodiscard]] XGBOOST_DEVICE float SplitCond(bst_node_t nidx) const {
+    return nodes[nidx].SplitCond();
+  }
+  [[nodiscard]] XGBOOST_DEVICE bool DefaultLeft(bst_node_t nidx) const {
+    return nodes[nidx].DefaultLeft();
+  }
+  [[nodiscard]] XGBOOST_DEVICE bool IsLeftChild(bst_node_t nidx) const {
+    return nodes[nidx].IsLeftChild();
+  }
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t DefaultChild(bst_node_t nidx) const {
+    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
+  }
+  [[nodiscard]] XGBOOST_DEVICE float LeafValue(bst_node_t nidx) const {
+    return this->nodes[nidx].LeafValue();
+  }
+
+  [[nodiscard]] bst_target_t NumTargets() const { return 1; }
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t Size() const { return this->n; }
+  [[nodiscard]] XGBOOST_DEVICE bool IsRoot(bst_node_t nidx) const {
+    return this->nodes[nidx].IsRoot();
+  }
+
+  [[nodiscard]] RTreeNodeStat const& Stat(bst_node_t nidx) const { return stats[nidx]; }
+  [[nodiscard]] XGBOOST_DEVICE auto SumHess(bst_node_t nidx) const { return stats[nidx].sum_hess; }
+  [[nodiscard]] XGBOOST_DEVICE auto LossChg(bst_node_t nidx) const { return stats[nidx].loss_chg; }
+
+  XGBOOST_DEVICE explicit ScalarTreeView(RegTree::Node const* nodes, RTreeNodeStat const* stats,
+                                         RegTree::CategoricalSplitMatrix cats, bst_node_t n_nodes)
+      : CategoriesMixIn{std::move(cats)}, nodes{nodes}, stats{stats}, n{n_nodes} {}
+
+  /** @brief Create a device view */
+  explicit ScalarTreeView(DeviceOrd device, RegTree const* tree);
+  /** @brief Create a host view */
+  explicit ScalarTreeView(RegTree const* tree)
+      : CategoriesMixIn{tree->GetCategoriesMatrix(DeviceOrd::CPU())},
+        nodes{tree->GetNodes(DeviceOrd::CPU()).data()},
+        stats{tree->GetStats(DeviceOrd::CPU()).data()},
+        n{tree->NumNodes()} {
+    CHECK(!tree->IsMultiTarget());
+  }
+};
+
+/**
+ * @brief A view to the @ref MultiTargetTree suitable for both host and device.
+ */
+struct MultiTargetTreeView : public WalkTreeMixIn<MultiTargetTreeView>, public CategoriesMixIn {
+  static bst_node_t constexpr InvalidNodeId() { return MultiTargetTree::InvalidNodeId(); }
+
+  bst_node_t const* left;
+  bst_node_t const* right;
+  bst_node_t const* parent;
+
+  bst_feature_t const* split_index;
+  std::uint8_t const* default_left;
+  float const* split_conds;
+
+  // The number of nodes
+  bst_node_t n{0};
+
+  linalg::MatrixView<float const> weights;
+
+  [[nodiscard]] XGBOOST_DEVICE bool IsLeaf(bst_node_t nidx) const {
+    return left[nidx] == InvalidNodeId();
+  }
+
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t Parent(bst_node_t nidx) const { return parent[nidx]; }
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t LeftChild(bst_node_t nidx) const { return left[nidx]; }
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t RightChild(bst_node_t nidx) const { return right[nidx]; }
+
+  [[nodiscard]] bool IsLeftChild(bst_node_t nidx) const {
+    auto p = this->Parent(nidx);
+    return nidx == this->LeftChild(p);
+  }
+  [[nodiscard]] XGBOOST_DEVICE bst_feature_t SplitIndex(bst_node_t nidx) const {
+    return split_index[nidx];
+  }
+  [[nodiscard]] XGBOOST_DEVICE float SplitCond(bst_node_t nidx) const { return split_conds[nidx]; }
+  [[nodiscard]] XGBOOST_DEVICE bool DefaultLeft(bst_node_t nidx) const {
+    return default_left[nidx];
+  }
+  [[nodiscard]] XGBOOST_DEVICE bst_node_t DefaultChild(bst_node_t nidx) const {
+    return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
+  }
+  [[nodiscard]] XGBOOST_DEVICE linalg::VectorView<float const> LeafValue(bst_node_t nidx) const {
+    return this->weights.Slice(nidx, linalg::All());
+  }
+
+  [[nodiscard]] bst_target_t NumTargets() const { return this->weights.Shape(1); }
+  [[nodiscard]] bst_node_t Size() const { return this->n; }
+  [[nodiscard]] XGBOOST_DEVICE bool IsRoot(bst_node_t nidx) const { return nidx == RegTree::kRoot; }
+
+  [[nodiscard]] auto SumHess(bst_node_t) const {
+    LOG(FATAL) << "Tree statistic " << MTNotImplemented();
+    return linalg::MakeVec<float>(nullptr, 0);
+  }
+  [[nodiscard]] auto LossChg(bst_node_t) const {
+    LOG(FATAL) << "Tree statistic " << MTNotImplemented();
+    return 0.0f;
+  }
+  /** @brief Create a device view */
+  explicit MultiTargetTreeView(DeviceOrd device, RegTree const* tree);
+  /** @brief Create a host view */
+  explicit MultiTargetTreeView(RegTree const* tree);
+};
+
+/**
+ * @brief Iterate through all nodes in a tree.
+ *
+ * @param tree  The tree to traversal
+ * @param fn    See @ref WalkTreeMixIn , addition tree views are passed into the function if @ref
+ *              trees is not empty.
+ * @param trees Additional trees that have the same target type as @ref tree . We can
+ *              dispatch all trees together for easier access.
+ */
+template <typename Fn, typename... Tree>
+void WalkTree(RegTree const& tree, Fn&& fn, Tree const&... trees) {
+  if (tree.IsMultiTarget()) {
+    auto mt_tree = tree.HostMtView();
+    mt_tree.WalkTree([&](bst_node_t nidx) { return fn(mt_tree, trees.HostMtView()..., nidx); });
+  } else {
+    auto sc_tree = tree.HostScView();
+    sc_tree.WalkTree([&](bst_node_t nidx) { return fn(sc_tree, trees.HostScView()..., nidx); });
+  }
+}
+
+template <typename TreeView>
+[[nodiscard]] bool constexpr IsScalarTree() {
+  return std::is_same_v<common::GetValueT<TreeView>, ScalarTreeView>;
+}
+
+template <typename TreeView>
+[[nodiscard]] bool constexpr IsScalarTree(TreeView const&) {
+  return IsScalarTree<TreeView>();
+}
+}  // namespace xgboost::tree
diff --git a/src/tree/updater_approx.cc b/src/tree/updater_approx.cc
index fa34e9829c2f..91560a2564a6 100644
--- a/src/tree/updater_approx.cc
+++ b/src/tree/updater_approx.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  *
  * \brief Implementation for the approx tree method.
  */
@@ -21,13 +21,14 @@
 #include "driver.h"                          // for Driver
 #include "hist/evaluate_splits.h"            // for HistEvaluator, UpdatePredictionCacheImpl
 #include "hist/expand_entry.h"               // for CPUExpandEntry
+#include "hist/hist_param.h"                 // for HistMakerTrainParam
 #include "hist/histogram.h"                  // for MultiHistogramBuilder
-#include "hist/param.h"                      // for HistMakerTrainParam
 #include "hist/sampler.h"                    // for SampleGradient
 #include "param.h"                           // for GradStats, TrainParam
 #include "xgboost/base.h"                    // for Args, GradientPair, bst_node_t, bst_bin_t
 #include "xgboost/context.h"                 // for Context
 #include "xgboost/data.h"                    // for DMatrix, BatchSet, BatchIterator, MetaInfo
+#include "xgboost/gradient.h"                // for GradientContainer
 #include "xgboost/host_device_vector.h"      // for HostDeviceVector
 #include "xgboost/json.h"                    // for Object, Json, FromJson, ToJson, get
 #include "xgboost/linalg.h"                  // for Matrix, MakeTensorView, Empty, MatrixView
@@ -112,7 +113,7 @@ class GlobalApproxBuilder {
     collective::SafeColl(rc);
 
     std::vector<CPUExpandEntry> nodes{best};
-    this->histogram_builder_.BuildRootHist(p_fmat, p_tree, partitioner_,
+    this->histogram_builder_.BuildRootHist(p_fmat, p_tree->HostScView(), partitioner_,
                                            linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1),
                                            best, BatchSpec(*param_, hess));
 
@@ -143,7 +144,7 @@ class GlobalApproxBuilder {
                       std::vector<GradientPair> const &gpair, common::Span<float> hess) {
     monitor_->Start(__func__);
     this->histogram_builder_.BuildHistLeftRight(
-        ctx_, p_fmat, p_tree, partitioner_, valid_candidates,
+        ctx_, p_fmat, p_tree->HostScView(), partitioner_, valid_candidates,
         linalg::MakeTensorView(ctx_, gpair, gpair.size(), 1), BatchSpec(*param_, hess));
     monitor_->Stop(__func__);
   }
@@ -156,7 +157,7 @@ class GlobalApproxBuilder {
     }
     p_out_position->resize(hess.size());
     for (auto const &part : partitioner_) {
-      part.LeafPartition(ctx_, tree, hess,
+      part.LeafPartition(ctx_, tree.HostScView(), hess,
                          common::Span{p_out_position->data(), p_out_position->size()});
     }
     monitor_->Stop(__func__);
@@ -212,7 +213,7 @@ class GlobalApproxBuilder {
       size_t page_id = 0;
       for (auto const &page :
            p_fmat->GetBatches<GHistIndexMatrix>(ctx_, BatchSpec(*param_, hess))) {
-        partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree);
+        partitioner_.at(page_id).UpdatePosition(ctx_, page, applied, p_tree->HostScView());
         page_id++;
       }
       monitor_->Stop("UpdatePosition");
@@ -284,7 +285,7 @@ class GlobalApproxUpdater : public TreeUpdater {
 
   [[nodiscard]] char const *Name() const override { return "grow_histmaker"; }
 
-  void Update(TrainParam const *param, linalg::Matrix<GradientPair> *gpair, DMatrix *m,
+  void Update(TrainParam const *param, GradientContainer *in_gpair, DMatrix *m,
               common::Span<HostDeviceVector<bst_node_t>> out_position,
               const std::vector<RegTree *> &trees) override {
     CHECK(hist_param_.GetInitialised());
@@ -293,6 +294,7 @@ class GlobalApproxUpdater : public TreeUpdater {
     }
     pimpl_ = std::make_unique<GlobalApproxBuilder>(param, &hist_param_, m->Info(), ctx_,
                                                    column_sampler_, task_, &monitor_);
+    auto gpair = in_gpair->FullGradOnly();
 
     linalg::Matrix<GradientPair> h_gpair;
     // Obtain the hessian values for weighted sketching
diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc
index f71fd189db03..c12bb5ad7aaf 100644
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2024, XGBoost Contributors
+ * Copyright 2014-2025, XGBoost Contributors
  * \file updater_colmaker.cc
  * \brief use columnwise update to construct a tree
  * \author Tianqi Chen
@@ -10,10 +10,12 @@
 
 #include "../common/error_msg.h"  // for NoCategorical
 #include "../common/random.h"
-#include "sample_position.h"  // for SamplePosition
 #include "constraints.h"
 #include "param.h"
+#include "sample_position.h"  // for SamplePosition
 #include "split_evaluator.h"
+#include "tree_view.h"         // for ScalarTreeView
+#include "xgboost/gradient.h"  // for GradientContainer
 #include "xgboost/json.h"
 #include "xgboost/logging.h"
 #include "xgboost/parameter.h"
@@ -93,7 +95,7 @@ class ColMaker: public TreeUpdater {
     }
   }
 
-  void Update(TrainParam const *param, linalg::Matrix<GradientPair> *gpair, DMatrix *dmat,
+  void Update(TrainParam const *param, GradientContainer *in_gpair, DMatrix *dmat,
               common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
               const std::vector<RegTree *> &trees) override {
     if (collective::IsDistributed()) {
@@ -114,6 +116,7 @@ class ColMaker: public TreeUpdater {
     // rescale learning rate according to size of trees
     interaction_constraints_.Configure(*param, dmat->Info().num_row_);
     // build tree
+    auto gpair = in_gpair->FullGradOnly();
     CHECK_EQ(gpair->Shape(1), 1) << MTNotImplemented();
     for (auto tree : trees) {
       CHECK(ctx_);
@@ -260,16 +263,16 @@ class ColMaker: public TreeUpdater {
      * \brief initialize the base_weight, root_gain,
      *  and NodeEntry for all the new nodes in qexpand
      */
-    inline void InitNewNode(const std::vector<int>& qexpand,
-                            const std::vector<GradientPair>& gpair,
-                            const DMatrix& fmat,
-                            const RegTree& tree) {
+    void InitNewNode(const std::vector<int> &qexpand, const std::vector<GradientPair> &gpair,
+                     const DMatrix &fmat, RegTree const& tree) {
+      auto n_nodes = tree.NumNodes();
+      auto sc_tree = tree.HostScView();
       {
         // setup statistics space for each tree node
-        for (auto& i : stemp_) {
-          i.resize(tree.NumNodes(), ThreadEntry());
+        for (auto &i : stemp_) {
+          i.resize(n_nodes, ThreadEntry());
         }
-        snode_.resize(tree.NumNodes(), NodeEntry());
+        snode_.resize(n_nodes, NodeEntry());
       }
       const MetaInfo& info = fmat.Info();
       // setup position
@@ -290,23 +293,23 @@ class ColMaker: public TreeUpdater {
 
       auto evaluator = tree_evaluator_.GetEvaluator();
       // calculating the weights
-      for (int nid : qexpand) {
-        bst_node_t parentid = tree[nid].Parent();
-        snode_[nid].weight = static_cast<float>(
-            evaluator.CalcWeight(parentid, param_, snode_[nid].stats));
-        snode_[nid].root_gain = static_cast<float>(
-            evaluator.CalcGain(parentid, param_, snode_[nid].stats));
+      for (bst_node_t nidx : qexpand) {
+        bst_node_t parentid = sc_tree.Parent(nidx);
+        snode_[nidx].weight =
+            static_cast<float>(evaluator.CalcWeight(parentid, param_, snode_[nidx].stats));
+        snode_[nidx].root_gain =
+            static_cast<float>(evaluator.CalcGain(parentid, param_, snode_[nidx].stats));
       }
     }
     /*! \brief update queue expand add in new leaves */
-    inline void UpdateQueueExpand(const RegTree& tree,
-                                  const std::vector<int> &qexpand,
-                                  std::vector<int>* p_newnodes) {
+    void UpdateQueueExpand(RegTree const &tree, const std::vector<bst_node_t> &qexpand,
+                           std::vector<int> *p_newnodes) {
       p_newnodes->clear();
-      for (int nid : qexpand) {
-        if (!tree[ nid ].IsLeaf()) {
-          p_newnodes->push_back(tree[nid].LeftChild());
-          p_newnodes->push_back(tree[nid].RightChild());
+      auto sc_tree = tree.HostScView();
+      for (bst_node_t nidx : qexpand) {
+        if (!sc_tree.IsLeaf(nidx)) {
+          p_newnodes->push_back(sc_tree.LeftChild(nidx));
+          p_newnodes->push_back(sc_tree.RightChild(nidx));
         }
       }
     }
@@ -504,9 +507,8 @@ class ColMaker: public TreeUpdater {
       }
     }
     // reset position of each data points after split is created in the tree
-    inline void ResetPosition(const std::vector<int> &qexpand,
-                              DMatrix* p_fmat,
-                              const RegTree& tree) {
+    void ResetPosition(const std::vector<int> &qexpand, DMatrix *p_fmat, const RegTree &tree) {
+      auto sc_tree = tree.HostScView();
       // set the positions in the nondefault
       this->SetNonDefaultPosition(qexpand, p_fmat, tree);
       // set rest of instances to default position
@@ -516,18 +518,18 @@ class ColMaker: public TreeUpdater {
       common::ParallelFor(p_fmat->Info().num_row_, this->ctx_->Threads(), [&](auto ridx) {
         CHECK_LT(ridx, position_.size()) << "ridx exceed bound "
                                          << "ridx=" << ridx << " pos=" << position_.size();
-        const int nid = SamplePosition::Decode(position_[ridx]);
-        if (tree[nid].IsLeaf()) {
+        const bst_node_t nidx = SamplePosition::Decode(position_[ridx]);
+        if (sc_tree.IsLeaf(nidx)) {
           // mark finish when it is not a fresh leaf
-          if (tree[nid].RightChild() == -1) {
-            position_[ridx] = ~nid;
+          if (sc_tree.RightChild(nidx) == -1) {
+            position_[ridx] = ~nidx;
           }
         } else {
           // push to default branch
-          if (tree[nid].DefaultLeft()) {
-            this->SetEncodePosition(ridx, tree[nid].LeftChild());
+          if (sc_tree.DefaultLeft(nidx)) {
+            this->SetEncodePosition(ridx, sc_tree.LeftChild(nidx));
           } else {
-            this->SetEncodePosition(ridx, tree[nid].RightChild());
+            this->SetEncodePosition(ridx, sc_tree.RightChild(nidx));
           }
         }
       });
@@ -543,14 +545,14 @@ class ColMaker: public TreeUpdater {
         }
       }
     }
-    virtual void SetNonDefaultPosition(const std::vector<int> &qexpand,
-                                       DMatrix *p_fmat,
+    virtual void SetNonDefaultPosition(const std::vector<int> &qexpand, DMatrix *p_fmat,
                                        const RegTree &tree) {
       // step 1, classify the non-default data into right places
+      auto sc_tree = tree.HostScView();
       std::vector<unsigned> fsplits;
       for (int nid : qexpand) {
-        if (!tree[nid].IsLeaf()) {
-          fsplits.push_back(tree[nid].SplitIndex());
+        if (!sc_tree.IsLeaf(nid)) {
+          fsplits.push_back(sc_tree.SplitIndex(nid));
         }
       }
       std::sort(fsplits.begin(), fsplits.end());
@@ -562,13 +564,13 @@ class ColMaker: public TreeUpdater {
           common::ParallelFor(col.size(), this->ctx_->Threads(), [&](auto j) {
             const bst_uint ridx = col[j].index;
             bst_node_t nidx = SamplePosition::Decode(position_[ridx]);
-            const bst_float fvalue = col[j].fvalue;
+            const float fvalue = col[j].fvalue;
             // go back to parent, correct those who are not default
-            if (!tree[nidx].IsLeaf() && tree[nidx].SplitIndex() == fid) {
-              if (fvalue < tree[nidx].SplitCond()) {
-                this->SetEncodePosition(ridx, tree[nidx].LeftChild());
+            if (!sc_tree.IsLeaf(nidx) && sc_tree.SplitIndex(nidx) == fid) {
+              if (fvalue < sc_tree.SplitCond(nidx)) {
+                this->SetEncodePosition(ridx, sc_tree.LeftChild(nidx));
               } else {
-                this->SetEncodePosition(ridx, tree[nidx].RightChild());
+                this->SetEncodePosition(ridx, sc_tree.RightChild(nidx));
               }
             }
           });
diff --git a/src/tree/updater_gpu_common.cuh b/src/tree/updater_gpu_common.cuh
index 5ea85994e653..6a249f33626c 100644
--- a/src/tree/updater_gpu_common.cuh
+++ b/src/tree/updater_gpu_common.cuh
@@ -40,7 +40,6 @@ struct GPUTrainingParam {
 };
 
 /**
- * @enum DefaultDirection node.cuh
  * @brief Default direction to be followed in case of missing values
  */
 enum DefaultDirection {
@@ -53,13 +52,13 @@ enum DefaultDirection {
 struct DeviceSplitCandidate {
   float loss_chg{-std::numeric_limits<float>::max()};
   DefaultDirection dir{kLeftDir};
-  int findex {-1};
-  float fvalue {0};
+  int findex{-1};
+  float fvalue{0};
   // categorical split, either it's the split category for OHE or the threshold for partition-based
   // split.
   bst_cat_t thresh{-1};
 
-  bool is_cat { false };
+  bool is_cat{false};
 
   GradientPairInt64 left_sum;
   GradientPairInt64 right_sum;
@@ -73,14 +72,14 @@ struct DeviceSplitCandidate {
     if (loss_chg_in > loss_chg &&
         quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&
         quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {
-        loss_chg = loss_chg_in;
-        dir = dir_in;
-        fvalue = fvalue_in;
-        is_cat = cat;
-        left_sum = left_sum_in;
-        right_sum = right_sum_in;
-        findex = findex_in;
-      }
+      loss_chg = loss_chg_in;
+      dir = dir_in;
+      fvalue = fvalue_in;
+      is_cat = cat;
+      left_sum = left_sum_in;
+      right_sum = right_sum_in;
+      findex = findex_in;
+    }
   }
 
   /**
@@ -90,18 +89,18 @@ struct DeviceSplitCandidate {
                                 bst_feature_t findex_in, GradientPairInt64 left_sum_in,
                                 GradientPairInt64 right_sum_in, GPUTrainingParam const& param,
                                 const GradientQuantiser& quantiser) {
-      if (loss_chg_in > loss_chg &&
-          quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&
-          quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {
-        loss_chg = loss_chg_in;
-        dir = dir_in;
-        fvalue = std::numeric_limits<float>::quiet_NaN();
-        thresh = thresh_in;
-        is_cat = true;
-        left_sum = left_sum_in;
-        right_sum = right_sum_in;
-        findex = findex_in;
-      }
+    if (loss_chg_in > loss_chg &&
+        quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&
+        quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {
+      loss_chg = loss_chg_in;
+      dir = dir_in;
+      fvalue = std::numeric_limits<float>::quiet_NaN();
+      thresh = thresh_in;
+      is_cat = true;
+      left_sum = left_sum_in;
+      right_sum = right_sum_in;
+      findex = findex_in;
+    }
   }
 
   [[nodiscard]] XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
@@ -119,6 +118,44 @@ struct DeviceSplitCandidate {
   }
 };
 
+struct MultiSplitCandidate {
+  float loss_chg{-std::numeric_limits<float>::max()};
+  DefaultDirection dir{kLeftDir};
+  int findex{-1};
+  float fvalue{0};
+  // categorical split, either it's the split category for OHE or the threshold for partition-based
+  // split.
+  bst_cat_t thresh{-1};
+
+  bool is_cat{false};
+
+  common::Span<GradientPairInt64 const> node_sum;
+
+  MultiSplitCandidate() = default;
+
+  XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in,
+                             int findex_in, common::Span<GradientPairInt64 const> node_sum_in,
+                             bool cat, GPUTrainingParam const& /*param*/,
+                             common::Span<GradientQuantiser const> /*roundings*/) {
+    // TODO(jiamingy): Support min_child_weight
+    if (loss_chg_in > loss_chg) {
+      loss_chg = loss_chg_in;
+      dir = dir_in;
+      fvalue = fvalue_in;
+      is_cat = cat;
+      node_sum = node_sum_in;
+      findex = findex_in;
+    }
+  }
+  XGBOOST_DEVICE void Update(MultiSplitCandidate const& that, GPUTrainingParam const& param,
+                             common::Span<GradientQuantiser const> roundings) {
+    this->Update(that.loss_chg, that.dir, that.fvalue, that.findex, that.node_sum, that.is_cat,
+                 param, roundings);
+  }
+
+  [[nodiscard]] XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
+};
+
 namespace cuda_impl {
 inline BatchParam HistBatch(TrainParam const& param) {
   auto p = BatchParam{param.max_bin, TrainParam::DftSparseThreshold()};
@@ -139,9 +176,9 @@ inline BatchParam ApproxBatch(TrainParam const& p, common::Span<float const> hes
 template <typename T>
 struct SumCallbackOp {
   // Running prefix
-  T running_total;
-  // Constructor
-  XGBOOST_DEVICE SumCallbackOp() : running_total(T()) {}
+  T running_total{T{}};
+
+  SumCallbackOp() = default;
   XGBOOST_DEVICE T operator()(T block_aggregate) {
     T old_prefix = running_total;
     running_total += block_aggregate;
diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu
index 5a6236b6ccfb..416fac6e7166 100644
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -1,21 +1,25 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
-#include <thrust/functional.h>  // for plus
-#include <thrust/transform.h>   // for transform
-
-#include <algorithm>  // for max
-#include <cmath>      // for isnan
-#include <cstddef>    // for size_t
-#include <memory>     // for unique_ptr, make_unique
-#include <utility>    // for move
-#include <vector>     // for vector
-
+#include <thrust/transform.h>  // for transform
+
+#include <algorithm>        // for max
+#include <cmath>            // for isnan
+#include <cstdint>          // for int32_t, uint32_t
+#include <cuda/functional>  // for plus
+#include <memory>           // for unique_ptr, make_unique
+#include <numeric>          // for partial_sum
+#include <string>           // for string
+#include <type_traits>      // for is_trivially_copyable_v
+#include <utility>          // for move
+#include <vector>           // for vector
+
+#include "../../src/collective/comm.h"  // for Op
 #include "../collective/aggregator.h"
-#include "../collective/broadcast.h"   // for Broadcast
 #include "../common/categorical.h"     // for KCatBitField
 #include "../common/cuda_context.cuh"  // for CUDAContext
-#include "../common/cuda_rt_utils.h"   // for CheckComputeCapability
+#include "../common/cuda_rt_utils.h"   // for SetDevice
+#include "../common/cuda_stream.h"     // for DefaultStream
 #include "../common/device_helpers.cuh"
 #include "../common/device_vector.cuh"  // for device_vector
 #include "../common/hist_util.h"        // for HistogramCuts
@@ -28,19 +32,26 @@
 #include "driver.h"
 #include "gpu_hist/evaluate_splits.cuh"
 #include "gpu_hist/expand_entry.cuh"
-#include "gpu_hist/feature_groups.cuh"
-#include "gpu_hist/gradient_based_sampler.cuh"
+#include "gpu_hist/feature_groups.cuh"          // for FeatureGroups
+#include "gpu_hist/gradient_based_sampler.cuh"  // for GradientBasedSampler
 #include "gpu_hist/histogram.cuh"
+#include "gpu_hist/quantiser.cuh"        // for GradientQuantiser
 #include "gpu_hist/row_partitioner.cuh"  // for RowPartitioner
-#include "hist/param.h"                  // for HistMakerTrainParam
+#include "hist/hist_param.h"             // for HistMakerTrainParam
 #include "param.h"                       // for TrainParam
 #include "sample_position.h"             // for SamplePosition
+#include "tree_view.h"                   // for ScalarTreeView
 #include "updater_gpu_common.cuh"        // for HistBatch
+#include "updater_gpu_hist.cuh"          // for MultiTargetHistMaker
 #include "xgboost/base.h"                // for bst_idx_t
+#include "xgboost/collective/result.h"   // for Success, SafeColl
 #include "xgboost/context.h"             // for Context
 #include "xgboost/data.h"                // for DMatrix
+#include "xgboost/gradient.h"            // for GradientContainer
 #include "xgboost/host_device_vector.h"  // for HostDeviceVector
 #include "xgboost/json.h"                // for Json
+#include "xgboost/linalg.h"              // for MakeVec
+#include "xgboost/logging.h"             // for CHECK_EQ, CHECK_LE, CHECK_GE
 #include "xgboost/span.h"                // for Span
 #include "xgboost/task.h"                // for ObjInfo
 #include "xgboost/tree_model.h"          // for RegTree
@@ -53,6 +64,10 @@ using cuda_impl::ApproxBatch;
 using cuda_impl::HistBatch;
 using xgboost::cuda_impl::StaticBatch;
 
+namespace {
+inline constexpr std::size_t kNeedCopyThreshold = 4;
+}  // anonymous namespace
+
 // Extra data for each node that is passed to the update position function
 struct NodeSplitData {
   RegTree::Node split_node;
@@ -65,7 +80,7 @@ static_assert(std::is_trivially_copyable_v<NodeSplitData>);
 void AssignNodes(RegTree const* p_tree, GradientQuantiser const* quantizer,
                  std::vector<GPUExpandEntry> const& candidates,
                  common::Span<bst_node_t> nodes_to_build, common::Span<bst_node_t> nodes_to_sub) {
-  auto const& tree = *p_tree;
+  auto const& tree = p_tree->HostScView();
   std::size_t nidx_in_set{0};
   auto p_build_nidx = nodes_to_build.data();
   auto p_sub_nidx = nodes_to_sub.data();
@@ -78,11 +93,11 @@ void AssignNodes(RegTree const* p_tree, GradientQuantiser const* quantizer,
     auto right_sum = quantizer->ToFloatingPoint(e.split.right_sum);
     bool fewer_right = right_sum.GetHess() < left_sum.GetHess();
     if (fewer_right) {
-      p_build_nidx[nidx_in_set] = tree[e.nid].RightChild();
-      p_sub_nidx[nidx_in_set] = tree[e.nid].LeftChild();
+      p_build_nidx[nidx_in_set] = tree.RightChild(e.nidx);
+      p_sub_nidx[nidx_in_set] = tree.LeftChild(e.nidx);
     } else {
-      p_build_nidx[nidx_in_set] = tree[e.nid].LeftChild();
-      p_sub_nidx[nidx_in_set] = tree[e.nid].RightChild();
+      p_build_nidx[nidx_in_set] = tree.LeftChild(e.nidx);
+      p_sub_nidx[nidx_in_set] = tree.RightChild(e.nidx);
     }
     ++nidx_in_set;
   }
@@ -106,25 +121,36 @@ struct GPUHistMakerDevice {
   std::shared_ptr<common::HistogramCuts const> const cuts_;
   std::unique_ptr<FeatureGroups> feature_groups_;
 
-  auto CreatePartitionNodes(RegTree const* p_tree, std::vector<GPUExpandEntry> const& candidates) {
-    std::vector<bst_node_t> nidx(candidates.size());
-    std::vector<bst_node_t> left_nidx(candidates.size());
-    std::vector<bst_node_t> right_nidx(candidates.size());
-    std::vector<NodeSplitData> split_data(candidates.size());
+  struct PartitionNodes {
+    std::vector<bst_node_t> nidx;
+    std::vector<bst_node_t> left_nidx;
+    std::vector<bst_node_t> right_nidx;
+    std::vector<NodeSplitData> split_data;
+
+    explicit PartitionNodes(std::size_t n_candidates)
+        : nidx(n_candidates),
+          left_nidx(n_candidates),
+          right_nidx(n_candidates),
+          split_data(n_candidates) {}
+  };
 
+  PartitionNodes CreatePartitionNodes(RegTree const* p_tree,
+                                      std::vector<GPUExpandEntry> const& candidates) {
+    PartitionNodes nodes(candidates.size());
+    auto tree = p_tree->HostScView();
     for (std::size_t i = 0, n = candidates.size(); i < n; i++) {
       auto const& e = candidates[i];
-      RegTree::Node split_node = (*p_tree)[e.nid];
-      auto split_type = p_tree->NodeSplitType(e.nid);
-      nidx.at(i) = e.nid;
-      left_nidx[i] = split_node.LeftChild();
-      right_nidx[i] = split_node.RightChild();
-      split_data[i] =
-          NodeSplitData{split_node, split_type, this->evaluator_.GetDeviceNodeCats(e.nid)};
+      RegTree::Node split_node = tree.nodes[e.nidx];
+      auto split_type = tree.SplitType(e.nidx);
+      nodes.nidx.at(i) = e.nidx;
+      nodes.left_nidx[i] = tree.LeftChild(e.nidx);
+      nodes.right_nidx[i] = tree.RightChild(e.nidx);
+      nodes.split_data[i] =
+          NodeSplitData{split_node, split_type, this->evaluator_.GetDeviceNodeCats(e.nidx)};
 
       CHECK_EQ(split_type == FeatureType::kCategorical, e.split.is_cat);
     }
-    return std::make_tuple(nidx, left_nidx, right_nidx, split_data);
+    return nodes;
   }
 
  public:
@@ -212,7 +238,9 @@ struct GPUHistMakerDevice {
       auto n_samples = batch_ptr.at(k + 1) - base_ridx;
       partitioners_[k]->Reset(ctx_, n_samples, base_ridx);
     }
+    // TODO(jiamingy): Handle reduced number of batches
     CHECK_EQ(partitioners_.size(), n_batches);
+
     if (is_concat) {
       CHECK_EQ(partitioners_.size(), 1);
       CHECK_EQ(partitioners_.front()->Size(), p_fmat->Info().num_row_);
@@ -278,10 +306,11 @@ struct GPUHistMakerDevice {
     dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
     // Store the feature set ptrs so they dont go out of scope before the kernel is called
     std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
+    auto sc_tree = tree.HostScView();
     for (std::size_t i = 0; i < candidates.size(); i++) {
       auto candidate = candidates.at(i);
-      int left_nidx = tree[candidate.nid].LeftChild();
-      int right_nidx = tree[candidate.nid].RightChild();
+      bst_node_t left_nidx = sc_tree.LeftChild(candidate.nidx);
+      bst_node_t right_nidx = sc_tree.RightChild(candidate.nidx);
       nidx[i * 2] = left_nidx;
       nidx[i * 2 + 1] = right_nidx;
       auto left_sampled_features = column_sampler_->GetFeatureSet(tree.GetDepth(left_nidx));
@@ -293,8 +322,7 @@ struct GPUHistMakerDevice {
       right_sampled_features->SetDevice(ctx_->Device());
       feature_sets.emplace_back(right_sampled_features);
       common::Span<bst_feature_t> right_feature_set =
-          interaction_constraints.Query(right_sampled_features->DeviceSpan(),
-                                        right_nidx);
+          interaction_constraints.Query(right_sampled_features->DeviceSpan(), right_nidx);
       h_node_inputs[i * 2] = {left_nidx, candidate.depth + 1, candidate.split.left_sum,
                               left_feature_set, histogram_.GetNodeHistogram(left_nidx)};
       h_node_inputs[i * 2 + 1] = {right_nidx, candidate.depth + 1, candidate.split.right_sum,
@@ -305,25 +333,22 @@ struct GPUHistMakerDevice {
       max_active_features =
           std::max(max_active_features, static_cast<bst_feature_t>(input.feature_set.size()));
     }
-    dh::safe_cuda(cudaMemcpyAsync(
-        d_node_inputs.data().get(), h_node_inputs.data(),
-        h_node_inputs.size() * sizeof(EvaluateSplitInputs), cudaMemcpyDefault));
+    dh::safe_cuda(cudaMemcpyAsync(d_node_inputs.data().get(), h_node_inputs.data(),
+                                  h_node_inputs.size() * sizeof(EvaluateSplitInputs),
+                                  cudaMemcpyDefault));
 
     this->evaluator_.EvaluateSplits(ctx_, nidx, max_active_features, dh::ToSpan(d_node_inputs),
                                     shared_inputs, dh::ToSpan(entries));
-    dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(),
-                                  entries.data().get(), sizeof(GPUExpandEntry) * entries.size(),
-                                  cudaMemcpyDeviceToHost));
+    dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(), entries.data().get(),
+                                  sizeof(GPUExpandEntry) * entries.size(), cudaMemcpyDeviceToHost));
     this->monitor.Stop(__func__);
   }
 
   void BuildHist(EllpackPage const& page, std::int32_t k, bst_bin_t nidx) {
     monitor.Start(__func__);
     auto d_node_hist = histogram_.GetNodeHistogram(nidx);
-    auto batch = page.Impl();
-    auto acc = batch->GetDeviceAccessor(ctx_);
-
     auto d_ridx = partitioners_.at(k)->GetRows(nidx);
+    auto acc = page.Impl()->GetDeviceEllpack(this->ctx_, {});
     this->histogram_.BuildHistogram(ctx_->CUDACtx(), acc,
                                     feature_groups_->DeviceAccessor(ctx_->Device()), this->gpair,
                                     d_ridx, d_node_hist, *quantiser);
@@ -362,7 +387,8 @@ struct GPUHistMakerDevice {
     this->monitor.Stop(__func__);
   }
 
-  void UpdatePositionColumnSplit(EllpackDeviceAccessor d_matrix,
+  template <typename Iter>
+  void UpdatePositionColumnSplit(EllpackAccessorImpl<Iter> d_matrix,
                                  std::vector<NodeSplitData> const& split_data,
                                  std::vector<bst_node_t> const& nidx,
                                  std::vector<bst_node_t> const& left_nidx,
@@ -430,8 +456,9 @@ struct GPUHistMakerDevice {
         });
   }
 
+  template <typename Accessor>
   struct GoLeftOp {
-    EllpackDeviceAccessor d_matrix;
+    Accessor d_matrix;
 
     __device__ bool operator()(cuda_impl::RowIndexT ridx, NodeSplitData const& data) const {
       RegTree::Node const& node = data.split_node;
@@ -452,7 +479,25 @@ struct GPUHistMakerDevice {
     }
   };
 
-  // Update position and build histogram.
+  // Heuristic to avoid copying the data batch.
+  [[nodiscard]] bool NeedCopy(DMatrix* p_fmat,
+                              std::vector<GPUExpandEntry> const& candidates) const {
+    if (p_fmat->SingleColBlock()) {
+      return true;  // use default if it's in-core
+    }
+    bst_idx_t n_total_samples = p_fmat->Info().num_row_;
+    bst_idx_t n_samples = 0;
+    for (auto const& c : candidates) {
+      for (auto const& part : this->partitioners_) {
+        n_samples += part->GetRows(c.nidx).size();
+      }
+    }
+    // avoid copy if the kernel is small.
+    return n_samples * kNeedCopyThreshold > n_total_samples;
+  }
+
+  // Update position and build histogram. We merge these two functions for external
+  // memory, where we want to bundle as many computation as possible for each data read.
   void PartitionAndBuildHist(DMatrix* p_fmat, std::vector<GPUExpandEntry> const& expand_set,
                              std::vector<GPUExpandEntry> const& candidates, RegTree const* p_tree) {
     if (expand_set.empty()) {
@@ -467,14 +512,13 @@ struct GPUHistMakerDevice {
     bool const is_single_block = p_fmat->SingleColBlock();
 
     // Prepare for update partition
-    auto [nidx, left_nidx, right_nidx, split_data] =
-        this->CreatePartitionNodes(p_tree, is_single_block ? candidates : expand_set);
+    auto nodes = this->CreatePartitionNodes(p_tree, is_single_block ? candidates : expand_set);
 
     // Prepare for build hist
     std::vector<bst_node_t> build_nidx(candidates.size());
     std::vector<bst_node_t> subtraction_nidx(candidates.size());
     AssignNodes(p_tree, this->quantiser.get(), candidates, build_nidx, subtraction_nidx);
-    auto prefetch_copy = !build_nidx.empty();
+    auto prefetch_copy = !build_nidx.empty() && this->NeedCopy(p_fmat, candidates);
 
     this->histogram_.AllocateHistograms(ctx_, build_nidx, subtraction_nidx);
 
@@ -482,26 +526,27 @@ struct GPUHistMakerDevice {
 
     std::int32_t k{0};
     for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(prefetch_copy))) {
-      auto d_matrix = page.Impl()->GetDeviceAccessor(ctx_);
-      auto go_left = GoLeftOp{d_matrix};
-
-      // Partition histogram.
-      monitor.Start("UpdatePositionBatch");
-      if (p_fmat->Info().IsColumnSplit()) {
-        UpdatePositionColumnSplit(d_matrix, split_data, nidx, left_nidx, right_nidx);
-      } else {
-        partitioners_.at(k)->UpdatePositionBatch(
-            ctx_, nidx, left_nidx, right_nidx, split_data,
-            [=] __device__(cuda_impl::RowIndexT ridx, int /*nidx_in_batch*/,
-                           const NodeSplitData& data) { return go_left(ridx, data); });
-      }
-
-      monitor.Stop("UpdatePositionBatch");
-
-      for (auto nidx : build_nidx) {
-        this->BuildHist(page, k, nidx);
-      }
+      page.Impl()->Visit(ctx_, {}, [&](auto&& d_matrix) {
+        using Acc = std::remove_reference_t<decltype(d_matrix)>;
+        using GoLeft = GoLeftOp<Acc>;
+        auto go_left = GoLeft{d_matrix};
+
+        // Partition histogram.
+        monitor.Start("UpdatePositionBatch");
+        if (p_fmat->Info().IsColumnSplit()) {
+          UpdatePositionColumnSplit(d_matrix, nodes.split_data, nodes.nidx, nodes.left_nidx,
+                                    nodes.right_nidx);
+        } else {
+          partitioners_.at(k)->UpdatePositionBatch(ctx_, nodes.nidx, nodes.left_nidx,
+                                                   nodes.right_nidx, nodes.split_data,
+                                                   cuda_impl::GoLeftWrapperOp<GoLeft>{go_left});
+        }
+        monitor.Stop("UpdatePositionBatch");
 
+        for (auto nidx : build_nidx) {
+          this->BuildHist(page, k, nidx);
+        }
+      });
       ++k;
     }
 
@@ -512,6 +557,32 @@ struct GPUHistMakerDevice {
     monitor.Stop(__func__);
   }
 
+  struct EncodeOp {
+    common::Span<GradientPair const> d_gpair;
+    __device__ bst_node_t operator()(bst_idx_t ridx, bst_node_t nidx) const {
+      bool is_invalid = d_gpair[ridx].GetHess() - .0f == 0.f;
+      return SamplePosition::Encode(nidx, !is_invalid);
+    }
+  };
+
+  template <typename Accessor>
+  struct FinalizeOp {
+    common::Span<NodeSplitData> s_split_data;
+    GoLeftOp<Accessor> go_left_op;
+    EncodeOp encode_op;
+
+    __device__ auto operator()(bst_idx_t row_id, bst_node_t nidx) const {
+      auto split_data = s_split_data[nidx];
+      auto node = split_data.split_node;
+      while (!node.IsLeaf()) {
+        auto go_left = go_left_op(row_id, split_data);
+        nidx = go_left ? node.LeftChild() : node.RightChild();
+        node = s_split_data[nidx].split_node;
+      }
+      return encode_op(row_id, nidx);
+    }
+  };
+
   // After tree update is finished, update the position of all training
   // instances to their final leaf. This information is used later to update the
   // prediction cache
@@ -534,10 +605,6 @@ struct GPUHistMakerDevice {
     auto d_out_position = p_out_position->DeviceSpan();
 
     auto d_gpair = this->gpair;
-    auto encode_op = [=] __device__(bst_idx_t ridx, bst_node_t nidx) {
-      bool is_invalid = d_gpair[ridx].GetHess() - .0f == 0.f;
-      return SamplePosition::Encode(nidx, !is_invalid);
-    };  // NOLINT
 
     if (!p_fmat->SingleColBlock()) {
       for (std::size_t k = 0; k < partitioners_.size(); ++k) {
@@ -546,7 +613,7 @@ struct GPUHistMakerDevice {
         auto base_ridx = batch_ptr_[k];
         auto n_samples = batch_ptr_.at(k + 1) - base_ridx;
         part->FinalisePosition(ctx_, d_out_position.subspan(base_ridx, n_samples), base_ridx,
-                               encode_op);
+                               EncodeOp{d_gpair});
       }
       dh::CopyTo(d_out_position, &positions_, this->ctx_->CUDACtx()->Stream());
       monitor.Stop(__func__);
@@ -554,41 +621,35 @@ struct GPUHistMakerDevice {
     }
 
     dh::CachingDeviceUVector<std::uint32_t> categories;
-    dh::CopyTo(p_tree->GetSplitCategories(), &categories, this->ctx_->CUDACtx()->Stream());
+    dh::CopyTo(p_tree->GetSplitCategories(DeviceOrd::CPU()), &categories,
+               this->ctx_->CUDACtx()->Stream());
     auto const& cat_segments = p_tree->GetSplitCategoriesPtr();
     auto d_categories = dh::ToSpan(categories);
     auto ft = p_fmat->Info().feature_types.ConstDeviceSpan();
+    auto const& tree = p_tree->HostScView();
 
     for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {
-      auto d_matrix = page.Impl()->GetDeviceAccessor(ctx_, ft);
-
       std::vector<NodeSplitData> split_data(p_tree->NumNodes());
-      auto const& tree = *p_tree;
       for (std::size_t i = 0, n = split_data.size(); i < n; ++i) {
-        RegTree::Node split_node = tree[i];
-        auto split_type = p_tree->NodeSplitType(i);
+        RegTree::Node split_node = tree.nodes[i];
+        auto split_type = tree.SplitType(i);
         auto node_cats = common::GetNodeCats(d_categories, cat_segments[i]);
         split_data[i] = NodeSplitData{std::move(split_node), split_type, node_cats};
       }
 
-      auto go_left_op = GoLeftOp{d_matrix};
       dh::CachingDeviceUVector<NodeSplitData> d_split_data;
       dh::CopyTo(split_data, &d_split_data, this->ctx_->CUDACtx()->Stream());
       auto s_split_data = dh::ToSpan(d_split_data);
 
-      partitioners_.front()->FinalisePosition(ctx_, d_out_position, page.BaseRowId(),
-                                              [=] __device__(bst_idx_t row_id, bst_node_t nidx) {
-                                                auto split_data = s_split_data[nidx];
-                                                auto node = split_data.split_node;
-                                                while (!node.IsLeaf()) {
-                                                  auto go_left = go_left_op(row_id, split_data);
-                                                  nidx = go_left ? node.LeftChild()
-                                                                 : node.RightChild();
-                                                  node = s_split_data[nidx].split_node;
-                                                }
-                                                return encode_op(row_id, nidx);
-                                              });
-      dh::CopyTo(d_out_position, &positions_, this->ctx_->CUDACtx()->Stream());
+      page.Impl()->Visit(ctx_, ft, [&](auto&& d_matrix) {
+        auto go_left_op = GoLeftOp<std::remove_reference_t<decltype(d_matrix)>>{d_matrix};
+        partitioners_.front()->FinalisePosition(
+            ctx_, d_out_position, page.BaseRowId(),
+            FinalizeOp<std::remove_reference_t<decltype(d_matrix)>>{s_split_data, go_left_op,
+                                                                    EncodeOp{d_gpair}});
+
+        dh::CopyTo(d_out_position, &positions_, this->ctx_->CUDACtx()->Stream());
+      });
     }
     monitor.Stop(__func__);
   }
@@ -608,7 +669,8 @@ struct GPUHistMakerDevice {
     // Use the nodes from tree, the leaf value might be changed by the objective since the
     // last update tree call.
     dh::CachingDeviceUVector<RegTree::Node> nodes;
-    dh::CopyTo(p_tree->GetNodes(), &nodes, this->ctx_->CUDACtx()->Stream());
+    // We can remove the CPU copy once we refactor the GPU hist to use the device tree.
+    dh::CopyTo(p_tree->GetNodes(DeviceOrd::CPU()), &nodes, this->ctx_->CUDACtx()->Stream());
     common::Span<RegTree::Node> d_nodes = dh::ToSpan(nodes);
     CHECK_EQ(out_preds_d.Shape(1), 1);
     dh::LaunchN(d_position.size(), ctx_->CUDACtx()->Stream(),
@@ -626,7 +688,7 @@ struct GPUHistMakerDevice {
 
     // Sanity check - have we created a leaf with no training instances?
     if (!collective::IsDistributed() && partitioners_.size() == 1) {
-      CHECK(partitioners_.front()->GetRows(candidate.nid).size() > 0)
+      CHECK(partitioners_.front()->GetRows(candidate.nidx).size() > 0)
           << "No training instances in this leaf!";
     }
 
@@ -635,10 +697,8 @@ struct GPUHistMakerDevice {
     auto right_weight = candidate.right_weight * param.learning_rate;
     auto parent_hess =
         quantiser->ToFloatingPoint(candidate.split.left_sum + candidate.split.right_sum).GetHess();
-    auto left_hess =
-        quantiser->ToFloatingPoint(candidate.split.left_sum).GetHess();
-    auto right_hess =
-        quantiser->ToFloatingPoint(candidate.split.right_sum).GetHess();
+    auto left_hess = quantiser->ToFloatingPoint(candidate.split.left_sum).GetHess();
+    auto right_hess = quantiser->ToFloatingPoint(candidate.split.right_sum).GetHess();
 
     auto is_cat = candidate.split.is_cat;
     if (is_cat) {
@@ -646,27 +706,26 @@ struct GPUHistMakerDevice {
       CHECK(common::CheckNAN(candidate.split.fvalue));
       std::vector<common::CatBitField::value_type> split_cats;
 
-      auto h_cats = this->evaluator_.GetHostNodeCats(candidate.nid);
+      auto h_cats = this->evaluator_.GetHostNodeCats(candidate.nidx);
       auto n_bins_feature = cuts_->FeatureBins(candidate.split.findex);
       split_cats.resize(common::CatBitField::ComputeStorageSize(n_bins_feature), 0);
       CHECK_LE(split_cats.size(), h_cats.size());
       std::copy(h_cats.data(), h_cats.data() + split_cats.size(), split_cats.data());
 
-      tree.ExpandCategorical(
-          candidate.nid, candidate.split.findex, split_cats, candidate.split.dir == kLeftDir,
-          base_weight, left_weight, right_weight, candidate.split.loss_chg, parent_hess,
-          left_hess, right_hess);
+      tree.ExpandCategorical(candidate.nidx, candidate.split.findex, split_cats,
+                             candidate.split.dir == kLeftDir, base_weight, left_weight,
+                             right_weight, candidate.split.loss_chg, parent_hess, left_hess,
+                             right_hess);
     } else {
       CHECK(!common::CheckNAN(candidate.split.fvalue));
-      tree.ExpandNode(candidate.nid, candidate.split.findex, candidate.split.fvalue,
+      tree.ExpandNode(candidate.nidx, candidate.split.findex, candidate.split.fvalue,
                       candidate.split.dir == kLeftDir, base_weight, left_weight, right_weight,
-                      candidate.split.loss_chg, parent_hess,
-          left_hess, right_hess);
+                      candidate.split.loss_chg, parent_hess, left_hess, right_hess);
     }
     evaluator_.ApplyTreeSplit(candidate, p_tree);
 
-    const auto& parent = tree[candidate.nid];
-    interaction_constraints.Split(candidate.nid, parent.SplitIndex(), parent.LeftChild(),
+    const auto& parent = tree[candidate.nidx];
+    interaction_constraints.Split(candidate.nidx, parent.SplitIndex(), parent.LeftChild(),
                                   parent.RightChild());
   }
 
@@ -680,7 +739,7 @@ struct GPUHistMakerDevice {
         [=] __device__(auto const& gpair) { return quantiser.ToFixedPoint(gpair); });
     GradientPairInt64 root_sum_quantised =
         dh::Reduce(ctx_->CUDACtx()->CTP(), gpair_it, gpair_it + this->gpair.size(),
-                   GradientPairInt64{}, thrust::plus<GradientPairInt64>{});
+                   GradientPairInt64{}, cuda::std::plus<GradientPairInt64>{});
     using ReduceT = typename decltype(root_sum_quantised)::ValueT;
     auto rc = collective::GlobalSum(
         ctx_, p_fmat->Info(), linalg::MakeVec(reinterpret_cast<ReduceT*>(&root_sum_quantised), 2));
@@ -711,8 +770,7 @@ struct GPUHistMakerDevice {
 
   void UpdateTree(HostDeviceVector<GradientPair>* gpair_all, DMatrix* p_fmat, ObjInfo const* task,
                   RegTree* p_tree, HostDeviceVector<bst_node_t>* p_out_position) {
-    // Process maximum 32 nodes at a time
-    Driver<GPUExpandEntry> driver(param, 32);
+    Driver<GPUExpandEntry> driver{param, cuda_impl::kMaxNodeBatchSize};
 
     p_fmat = this->Reset(gpair_all, p_fmat);
     driver.Push({this->InitRoot(p_fmat, p_tree)});
@@ -730,13 +788,12 @@ struct GPUHistMakerDevice {
                    [&](auto const& e) { return driver.IsChildValid(e); });
 
       // Allocaate children nodes.
-      auto new_candidates =
-          pinned.GetSpan<GPUExpandEntry>(valid_candidates.size() * 2, GPUExpandEntry());
+      auto new_candidates = pinned.GetSpan(valid_candidates.size() * 2, GPUExpandEntry{});
 
       this->PartitionAndBuildHist(p_fmat, expand_set, valid_candidates, p_tree);
 
       this->EvaluateSplits(p_fmat, valid_candidates, *p_tree, new_candidates);
-      dh::DefaultStream().Sync();
+      curt::DefaultStream().Sync();
 
       driver.Push(new_candidates.begin(), new_candidates.end());
       expand_set = driver.Pop();
@@ -777,15 +834,12 @@ std::pair<std::shared_ptr<common::HistogramCuts const>, bool> InitBatchCuts(
 }
 
 class GPUHistMaker : public TreeUpdater {
-  using GradientSumT = GradientPairPrecise;
-
  public:
   explicit GPUHistMaker(Context const* ctx, ObjInfo const* task) : TreeUpdater(ctx), task_{task} {};
   void Configure(const Args& args) override {
     // Used in test to count how many configurations are performed
     LOG(DEBUG) << "[GPU Hist]: Configure";
     hist_maker_param_.UpdateAllowUnknown(args);
-    curt::CheckComputeCapability();
     initialised_ = false;
 
     monitor_.Init("updater_gpu_hist");
@@ -803,21 +857,26 @@ class GPUHistMaker : public TreeUpdater {
 
   ~GPUHistMaker() override { dh::GlobalMemoryLogger().Log(); }
 
-  void Update(TrainParam const* param, linalg::Matrix<GradientPair>* gpair, DMatrix* dmat,
+  void Update(TrainParam const* param, GradientContainer* in_gpair, DMatrix* p_fmat,
               common::Span<HostDeviceVector<bst_node_t>> out_position,
-              const std::vector<RegTree*>& trees) override {
-    monitor_.Start(__func__);
+              std::vector<RegTree*> const& trees) override {
+    if (in_gpair->HasValueGrad() || in_gpair->gpair.Shape(1) > 1) {
+      CHECK(!this->task_->UpdateTreeLeaf()) << "Adaptive tree" << MTNotImplemented();
+    }
+    in_gpair->gpair.SetDevice(this->ctx_->Device());
 
-    CHECK_EQ(gpair->Shape(1), 1) << MTNotImplemented();
-    auto gpair_hdv = gpair->Data();
     // build tree
     std::size_t t_idx{0};
-    for (xgboost::RegTree* tree : trees) {
-      this->UpdateTree(param, gpair_hdv, dmat, tree, &out_position[t_idx]);
-      this->hist_maker_param_.CheckTreesSynchronized(ctx_, tree);
+    for (xgboost::RegTree* p_tree : trees) {
+      this->InitData(param, p_fmat, p_tree);
+      if (p_tree->IsMultiTarget()) {
+        p_mtimpl_->UpdateTree(in_gpair, p_fmat, task_, p_tree);
+      } else {
+        p_scimpl_->UpdateTree(in_gpair->gpair.Data(), p_fmat, task_, p_tree, &out_position[t_idx]);
+      }
+      this->hist_maker_param_.CheckTreesSynchronized(ctx_, p_tree);
       ++t_idx;
     }
-    dh::safe_cuda(cudaGetLastError());
     monitor_.Stop(__func__);
   }
 
@@ -835,9 +894,11 @@ class GPUHistMaker : public TreeUpdater {
     auto batch = HistBatch(*param);
     auto [cuts, dense_compressed] = InitBatchCuts(ctx_, p_fmat, batch, &batch_ptr);
 
-    this->maker = std::make_unique<GPUHistMakerDevice>(ctx_, *param, &hist_maker_param_,
-                                                       column_sampler_, batch, p_fmat->Info(),
-                                                       batch_ptr, cuts, dense_compressed);
+    this->p_scimpl_ = std::make_unique<GPUHistMakerDevice>(ctx_, *param, &hist_maker_param_,
+                                                           column_sampler_, batch, p_fmat->Info(),
+                                                           batch_ptr, cuts, dense_compressed);
+    this->p_mtimpl_ = std::make_unique<cuda_impl::MultiTargetHistMaker>(
+        this->ctx_, *param, &hist_maker_param_, batch_ptr, cuts, dense_compressed);
 
     p_last_fmat_ = p_fmat;
     initialised_ = true;
@@ -854,31 +915,39 @@ class GPUHistMaker : public TreeUpdater {
     monitor_.Stop(__func__);
   }
 
-  void UpdateTree(TrainParam const* param, HostDeviceVector<GradientPair>* gpair, DMatrix* p_fmat,
+  void UpdateTree(TrainParam const* param, linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat,
                   RegTree* p_tree, HostDeviceVector<bst_node_t>* p_out_position) {
     this->InitData(param, p_fmat, p_tree);
     gpair->SetDevice(ctx_->Device());
-    maker->UpdateTree(gpair, p_fmat, task_, p_tree, p_out_position);
+    auto gpair_hdv = gpair->Data();
+    CHECK(!p_tree->IsMultiTarget());
+    p_scimpl_->UpdateTree(gpair_hdv, p_fmat, task_, p_tree, p_out_position);
   }
 
   bool UpdatePredictionCache(const DMatrix* data, linalg::MatrixView<float> p_out_preds) override {
-    if (maker == nullptr || p_last_fmat_ == nullptr || p_last_fmat_ != data) {
+    if (p_scimpl_ == nullptr || p_last_fmat_ == nullptr || p_last_fmat_ != data) {
+      return false;
+    }
+    if (this->p_last_tree_->IsMultiTarget()) {
       return false;
     }
     monitor_.Start(__func__);
-    bool result = maker->UpdatePredictionCache(p_out_preds, p_last_tree_);
+    bool result = p_scimpl_->UpdatePredictionCache(p_out_preds, p_last_tree_);
     monitor_.Stop(__func__);
     return result;
   }
 
-  std::unique_ptr<GPUHistMakerDevice> maker;  // NOLINT
-
   [[nodiscard]] char const* Name() const override { return "grow_gpu_hist"; }
   [[nodiscard]] bool HasNodePosition() const override { return true; }
 
  private:
   bool initialised_{false};
 
+  // Scalar tree implementation
+  std::unique_ptr<GPUHistMakerDevice> p_scimpl_;
+  // Vector tree implementation
+  std::unique_ptr<cuda_impl::MultiTargetHistMaker> p_mtimpl_;
+
   HistMakerTrainParam hist_maker_param_;
 
   DMatrix* p_last_fmat_{nullptr};
@@ -891,9 +960,7 @@ class GPUHistMaker : public TreeUpdater {
 
 XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist")
     .describe("Grow tree with GPU.")
-    .set_body([](Context const* ctx, ObjInfo const* task) {
-      return new GPUHistMaker(ctx, task);
-    });
+    .set_body([](Context const* ctx, ObjInfo const* task) { return new GPUHistMaker(ctx, task); });
 
 class GPUGlobalApproxMaker : public TreeUpdater {
  public:
@@ -903,7 +970,6 @@ class GPUGlobalApproxMaker : public TreeUpdater {
     // Used in test to count how many configurations are performed
     LOG(DEBUG) << "[GPU Approx]: Configure";
     hist_maker_param_.UpdateAllowUnknown(args);
-    curt::CheckComputeCapability();
     initialised_ = false;
 
     monitor_.Init(this->Name());
@@ -920,12 +986,13 @@ class GPUGlobalApproxMaker : public TreeUpdater {
   }
   ~GPUGlobalApproxMaker() override { dh::GlobalMemoryLogger().Log(); }
 
-  void Update(TrainParam const* param, linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat,
+  void Update(TrainParam const* param, GradientContainer* in_gpair, DMatrix* p_fmat,
               common::Span<HostDeviceVector<bst_node_t>> out_position,
               const std::vector<RegTree*>& trees) override {
     monitor_.Start(__func__);
 
     this->InitDataOnce(p_fmat);
+    auto gpair = in_gpair->FullGradOnly();
     // build tree
     hess_.resize(gpair->Size());
     auto hess = dh::ToSpan(hess_);
diff --git a/src/tree/updater_gpu_hist.cuh b/src/tree/updater_gpu_hist.cuh
new file mode 100644
index 000000000000..ed2139399919
--- /dev/null
+++ b/src/tree/updater_gpu_hist.cuh
@@ -0,0 +1,452 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#pragma once
+#include <thrust/reduce.h>  // for reduce_by_key
+
+#include <memory>  // for unique_ptr
+#include <vector>  // for vector
+
+#include "../common/device_helpers.cuh"        // for MakeTransformIterator
+#include "driver.h"                            // for Driver
+#include "gpu_hist/feature_groups.cuh"         // for FeatureGroups
+#include "gpu_hist/histogram.cuh"              // for DeviceHistogramBuilder
+#include "gpu_hist/leaf_sum.cuh"               // for LeafGradSum
+#include "gpu_hist/multi_evaluate_splits.cuh"  // for MultiHistEvaluator
+#include "gpu_hist/row_partitioner.cuh"        // for RowPartitioner
+#include "hist/hist_param.h"                   // for HistMakerTrainParam
+#include "tree_view.h"                         // for MultiTargetTreeView
+#include "xgboost/base.h"                      // for bst_idx_t
+#include "xgboost/context.h"                   // for Context
+#include "xgboost/gradient.h"                  // for GradientContainer
+#include "xgboost/host_device_vector.h"        // for HostDeviceVector
+#include "xgboost/tree_model.h"                // for RegTree
+
+namespace xgboost::tree::cuda_impl {
+// Use a large number to handle external memory with deep trees.
+inline constexpr std::size_t kMaxNodeBatchSize = 1024;
+using xgboost::cuda_impl::StaticBatch;
+
+template <typename GoLeftOp>
+struct GoLeftWrapperOp {
+  GoLeftOp go_left;
+  template <typename NodeSplitData>
+  __device__ bool operator()(cuda_impl::RowIndexT ridx, int /*nidx_in_batch*/,
+                             const NodeSplitData& data) const {
+    return go_left(ridx, data);
+  }
+};
+
+/**
+ * @brief Implementation for vector leaf.
+ */
+class MultiTargetHistMaker {
+ private:
+  Context const* ctx_;
+
+  TrainParam const param_;
+  std::vector<std::unique_ptr<RowPartitioner>> partitioners_;
+
+  HistMakerTrainParam const* hist_param_;
+  std::shared_ptr<common::HistogramCuts const> const cuts_;
+  std::unique_ptr<FeatureGroups> feature_groups_;
+  DeviceHistogramBuilder histogram_;
+  std::unique_ptr<MultiGradientQuantiser> split_quantizer_;
+  std::unique_ptr<MultiGradientQuantiser> value_quantizer_;
+
+  MultiHistEvaluator evaluator_;
+
+  // Gradient used for building the tree structure
+  linalg::Matrix<GradientPair> split_gpair_;
+  // Gradient used for calculating the leaf values
+  linalg::Matrix<GradientPair> value_gpair_;
+  std::vector<bst_idx_t> const batch_ptr_;
+
+  dh::PinnedMemory pinned_;
+
+  void BuildHist(EllpackPage const& page, std::int32_t k, bst_node_t nidx) {
+    auto d_gpair = this->split_gpair_.View(this->ctx_->Device());
+    CHECK(!this->partitioners_.empty());
+    auto d_ridx = this->partitioners_.at(k)->GetRows(nidx);
+    auto hist = histogram_.GetNodeHistogram(nidx);
+    auto roundings = this->split_quantizer_->Quantizers();
+    auto acc = page.Impl()->GetDeviceEllpack(this->ctx_, {});
+    histogram_.BuildHistogram(this->ctx_->CUDACtx(), acc,
+                              this->feature_groups_->DeviceAccessor(this->ctx_->Device()), d_gpair,
+                              d_ridx, hist, roundings);
+  }
+
+ public:
+  void Reset(linalg::Matrix<GradientPair>* gpair_all, DMatrix* p_fmat) {
+    bst_idx_t n_targets = gpair_all->Shape(1);
+    auto in_gpair = gpair_all->View(ctx_->Device());
+
+    /**
+     * Initialize the partitioners
+     */
+    std::size_t n_batches = p_fmat->NumBatches();
+    if (!partitioners_.empty()) {
+      CHECK_EQ(partitioners_.size(), n_batches);
+    }
+    for (std::size_t k = 0; k < n_batches; ++k) {
+      if (partitioners_.size() != n_batches) {
+        // First run.
+        partitioners_.emplace_back(std::make_unique<RowPartitioner>());
+      }
+      auto base_ridx = this->batch_ptr_[k];
+      auto n_samples = this->batch_ptr_.at(k + 1) - base_ridx;
+      partitioners_[k]->Reset(ctx_, n_samples, base_ridx);
+    }
+    this->partitioners_.resize(n_batches);
+
+    /**
+     * Initialize the histogram
+     */
+    std::size_t shape[2]{p_fmat->Info().num_row_, n_targets};
+    split_gpair_ = linalg::Matrix<GradientPair>{shape, ctx_->Device(), linalg::kF};
+    TransposeGradient(this->ctx_, in_gpair, split_gpair_.View(ctx_->Device()));
+
+    this->split_quantizer_ = std::make_unique<MultiGradientQuantiser>(
+        this->ctx_, split_gpair_.View(ctx_->Device()), p_fmat->Info());
+
+    if (!this->value_gpair_.Empty()) {
+      this->value_quantizer_ = std::make_unique<MultiGradientQuantiser>(
+          this->ctx_, value_gpair_.View(ctx_->Device()), p_fmat->Info());
+    }
+
+    bool force_global = true;
+    histogram_.Reset(this->ctx_, this->hist_param_->MaxCachedHistNodes(ctx_->Device()),
+                     feature_groups_->DeviceAccessor(ctx_->Device()),
+                     cuts_->TotalBins() * n_targets, force_global);
+  }
+
+  dh::device_vector<GradientPairInt64> CalcRootSum(
+      linalg::MatrixView<GradientPair> d_gpair,
+      common::Span<GradientQuantiser const> roundings) const {
+    auto n_samples = d_gpair.Shape(0);
+    auto n_targets = d_gpair.Shape(1);
+    // Calculate the root sum
+    dh::device_vector<GradientPairInt64> root_sum(n_targets);
+
+    auto key_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) {
+      auto cidx = i / n_samples;
+      return cidx;
+    });
+    auto val_it =
+        dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) -> GradientPairInt64 {
+          auto cidx = i / n_samples;
+          auto ridx = i % n_samples;
+          auto g = d_gpair(ridx, cidx);
+          return roundings[cidx].ToFixedPoint(g);
+        });
+    thrust::reduce_by_key(ctx_->CUDACtx()->CTP(), key_it, key_it + d_gpair.Size(), val_it,
+                          thrust::make_discard_iterator(), root_sum.begin());
+    return root_sum;
+  }
+
+  [[nodiscard]] MultiExpandEntry InitRoot(DMatrix* p_fmat, RegTree* p_tree) {
+    auto d_gpair = split_gpair_.View(ctx_->Device());
+    auto n_targets = d_gpair.Shape(1);
+
+    // Calculate the root sum
+    auto root_sum = this->CalcRootSum(d_gpair, this->split_quantizer_->Quantizers());
+    this->evaluator_.AllocNodeSum(RegTree::kRoot, n_targets);
+    auto d_root_sum = this->evaluator_.GetNodeSum(RegTree::kRoot, n_targets);
+    dh::safe_cuda(cudaMemcpyAsync(d_root_sum.data(), root_sum.data().get(), d_root_sum.size_bytes(),
+                                  cudaMemcpyDefault, this->ctx_->CUDACtx()->Stream()));
+
+    // Build the root histogram.
+    histogram_.AllocateHistograms(ctx_, {RegTree::kRoot});
+
+    CHECK_EQ(p_fmat->NumBatches(), this->partitioners_.size());
+    std::int32_t k = 0;
+    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(true))) {
+      this->BuildHist(page, k, RegTree::kRoot);
+      ++k;
+    }
+
+    // Evaluate root split
+    auto node_hist = this->histogram_.GetNodeHistogram(RegTree::kRoot);
+    MultiEvaluateSplitInputs input{RegTree::kRoot, p_tree->GetDepth(RegTree::kRoot),
+                                   dh::ToSpan(root_sum), node_hist};
+    auto d_roundings = split_quantizer_->Quantizers();
+    GPUTrainingParam param{this->param_};
+    MultiEvaluateSplitSharedInputs shared_inputs{d_roundings,
+                                                 this->cuts_->cut_ptrs_.ConstDeviceSpan(),
+                                                 this->cuts_->cut_values_.ConstDeviceSpan(),
+                                                 this->cuts_->min_vals_.ConstDeviceSpan(),
+                                                 this->param_.max_bin,
+                                                 param};
+    auto entry = this->evaluator_.EvaluateSingleSplit(ctx_, input, shared_inputs);
+
+    // TODO(jiamingy): Support learning rate.
+    // TODO(jiamingy): We need to modify the tree structure to account for internal reduced weight
+    // size.
+    std::vector<float> h_base_weight(entry.base_weight.size());
+    dh::CopyDeviceSpanToVector(&h_base_weight, entry.base_weight);
+    p_tree->SetRoot(linalg::MakeVec(h_base_weight));
+
+    return entry;
+  }
+
+  void ApplySplit(MultiExpandEntry const& candidate, RegTree* p_tree) {
+    // TODO(jiamingy): Support learning rate.
+    // TODO(jiamingy): Avoid device to host copies.
+    std::vector<float> h_base_weight(candidate.base_weight.size());
+    std::vector<float> h_left_weight(candidate.left_weight.size());
+    std::vector<float> h_right_weight(candidate.right_weight.size());
+    dh::CopyDeviceSpanToVector(&h_base_weight, candidate.base_weight);
+    dh::CopyDeviceSpanToVector(&h_left_weight, candidate.left_weight);
+    dh::CopyDeviceSpanToVector(&h_right_weight, candidate.right_weight);
+
+    p_tree->ExpandNode(candidate.nidx, candidate.split.findex, candidate.split.fvalue,
+                       candidate.split.dir == kLeftDir, linalg::MakeVec(h_base_weight),
+                       linalg::MakeVec(h_left_weight), linalg::MakeVec(h_right_weight));
+
+    this->evaluator_.ApplyTreeSplit(this->ctx_, p_tree, candidate);
+  }
+
+  void UpdateTreeLeaf(linalg::Matrix<GradientPair> const& full_grad, RegTree* p_tree) const {
+    // TODO(jiamingy): Need to iterate through partitioners for external memory support.
+    CHECK_EQ(this->partitioners_.size(), 1);
+    auto leaves = this->partitioners_.front()->GetLeaves();
+    // Calculate the leaf weight based on the node sum for each leaf.
+    // Update the leaf weight, with learning rate.
+    linalg::Matrix<GradientPairInt64> out_sum(
+        {leaves.size(), static_cast<std::size_t>(p_tree->NumTargets())}, this->ctx_->Device());
+    LeafGradSum(this->ctx_, leaves, this->value_quantizer_->Quantizers(),
+                this->partitioners_.front()->GetRows(), full_grad.View(this->ctx_->Device()),
+                out_sum.View(this->ctx_->Device()));
+    auto param = GPUTrainingParam{this->param_};
+    auto out_weight = linalg::Empty<float>(this->ctx_, leaves.size(), p_tree->NumTargets());
+    // Use full value gradient for leaf values.
+    LeafWeight(this->ctx_, param, this->value_quantizer_->Quantizers(),
+               out_sum.View(this->ctx_->Device()), out_weight.View(this->ctx_->Device()));
+    std::vector<bst_node_t> leaves_idx(leaves.size());
+    std::transform(leaves.begin(), leaves.end(), leaves_idx.begin(),
+                   [](LeafInfo const& leaf) { return leaf.nidx; });
+    p_tree->SetLeaves(leaves_idx, out_weight.Data()->ConstHostSpan());
+  }
+
+  struct NodeSplitData {
+    bst_node_t nidx;
+  };
+
+  struct PartitionNodes {
+    std::vector<bst_node_t> nidx;
+    std::vector<bst_node_t> left_nidx;
+    std::vector<bst_node_t> right_nidx;
+    std::vector<NodeSplitData> split_data;
+
+    explicit PartitionNodes(std::size_t n_candidates)
+        : nidx(n_candidates),
+          left_nidx(n_candidates),
+          right_nidx(n_candidates),
+          split_data(n_candidates) {}
+  };
+
+  PartitionNodes CreatePartitionNodes(RegTree const* p_tree,
+                                      std::vector<MultiExpandEntry> const& candidates) {
+    PartitionNodes nodes(candidates.size());
+    auto tree = p_tree->HostMtView();
+    for (std::size_t i = 0, n = candidates.size(); i < n; i++) {
+      auto const& e = candidates[i];
+      auto split_type = tree.SplitType(e.nidx);
+      nodes.nidx.at(i) = e.nidx;
+      nodes.left_nidx[i] = tree.LeftChild(e.nidx);
+      nodes.right_nidx[i] = tree.RightChild(e.nidx);
+      nodes.split_data[i] = NodeSplitData{e.nidx};
+
+      CHECK_EQ(split_type == FeatureType::kCategorical, e.split.is_cat);
+    }
+    return nodes;
+  }
+
+  // TODO(jiamingy): Merge this with the single target version. Make sure copying tree
+  // data doesn't block external memory execution.
+  template <typename Accessor>
+  struct GoLeftOp {
+    Accessor d_matrix;
+    MultiTargetTreeView tree;
+    __device__ bool operator()(cuda_impl::RowIndexT ridx, NodeSplitData const& data) const {
+      // given a row index, returns the node id it belongs to
+      float cut_value = d_matrix.GetFvalue(ridx, tree.SplitIndex(data.nidx));
+      // Missing value
+      bool go_left = true;
+      if (isnan(cut_value)) {
+        go_left = tree.DefaultLeft(data.nidx);
+      } else {
+        if (tree.SplitType(data.nidx) == FeatureType::kCategorical) {
+          go_left = common::Decision(tree.NodeCats(data.nidx), cut_value);
+        } else {
+          go_left = cut_value <= tree.SplitCond(data.nidx);
+        }
+      }
+      return go_left;
+    }
+  };
+
+  void PartitionAndBuildHist(DMatrix* p_fmat, std::vector<MultiExpandEntry> const& expand_set,
+                             std::vector<MultiExpandEntry> const& candidates,
+                             RegTree const* p_tree) {
+    if (expand_set.empty()) {
+      return;
+    }
+    CHECK_LE(candidates.size(), expand_set.size());
+    // TODO(jiamingy): Implement finalize partition.
+
+    // Prepare for update partition
+    auto nodes = this->CreatePartitionNodes(p_tree, expand_set);
+    auto mt_tree = p_tree->HostMtView();
+    // TODO(jiamingy): subtraction trick
+    std::vector<bst_node_t> build_nidx;
+    for (auto const& nidx_in_set : expand_set) {
+      auto left_child = mt_tree.LeftChild(nidx_in_set.nidx);
+      auto right_child = mt_tree.RightChild(nidx_in_set.nidx);
+      build_nidx.emplace_back(left_child);
+      build_nidx.emplace_back(right_child);
+    }
+
+    histogram_.AllocateHistograms(ctx_, build_nidx);
+
+    // Pull to device
+    mt_tree = MultiTargetTreeView{this->ctx_->Device(), p_tree};
+    std::int32_t k{0};
+    // TODO(jiamingy): Support external memory.
+    bool prefetch_copy = true;
+    for (auto const& page : p_fmat->GetBatches<EllpackPage>(ctx_, StaticBatch(prefetch_copy))) {
+      page.Impl()->Visit(ctx_, {}, [&](auto&& d_acc) {
+        using Acc = std::remove_reference_t<decltype(d_acc)>;
+        using GoLeft = GoLeftOp<Acc>;
+        auto go_left = GoLeft{d_acc, mt_tree};
+        partitioners_.at(k)->UpdatePositionBatch(ctx_, nodes.nidx, nodes.left_nidx,
+                                                 nodes.right_nidx, nodes.split_data,
+                                                 GoLeftWrapperOp<GoLeft>{go_left});
+
+        for (auto nidx : build_nidx) {
+          this->BuildHist(page, k, nidx);
+        }
+      });
+      ++k;
+    }
+  }
+
+  void EvaluateSplits(std::vector<MultiExpandEntry> const& candidates, RegTree const& tree,
+                      common::Span<MultiExpandEntry> pinned_candidates_out) {
+    if (candidates.empty()) {
+      return;
+    }
+    GPUTrainingParam param{this->param_};
+    MultiEvaluateSplitSharedInputs shared_inputs{
+        this->split_quantizer_->Quantizers(),
+        this->cuts_->cut_ptrs_.ConstDeviceSpan(),
+        this->cuts_->cut_values_.ConstDeviceSpan(),
+        this->cuts_->min_vals_.ConstDeviceSpan(),
+        this->param_.max_bin,
+        param,
+    };
+
+    dh::device_vector<MultiEvaluateSplitInputs> inputs(2 * candidates.size());
+    dh::device_vector<MultiExpandEntry> outputs(2 * candidates.size());
+
+    auto mt_tree = tree.HostMtView();
+    std::vector<MultiEvaluateSplitInputs> h_node_inputs(candidates.size() * 2);
+    bst_node_t max_nidx = 0;
+    for (auto const& candidate : candidates) {
+      bst_node_t left_nidx = mt_tree.LeftChild(candidate.nidx);
+      bst_node_t right_nidx = mt_tree.RightChild(candidate.nidx);
+      max_nidx = std::max({max_nidx, left_nidx, right_nidx});
+    }
+    auto n_targets = this->split_gpair_.Shape(1);
+    for (std::size_t i = 0; i < candidates.size(); i++) {
+      auto candidate = candidates.at(i);
+      bst_node_t left_nidx = mt_tree.LeftChild(candidate.nidx);
+      bst_node_t right_nidx = mt_tree.RightChild(candidate.nidx);
+      // Make sure no allocation is happening.
+      // The parent sum is calculated in the last apply tree split.
+      auto left = MultiEvaluateSplitInputs{left_nidx, candidate.depth + 1,
+                                           this->evaluator_.GetNodeSum(left_nidx, n_targets),
+                                           histogram_.GetNodeHistogram(left_nidx)};
+      auto right = MultiEvaluateSplitInputs{right_nidx, candidate.depth + 1,
+                                            this->evaluator_.GetNodeSum(right_nidx, n_targets),
+                                            histogram_.GetNodeHistogram(right_nidx)};
+      h_node_inputs[i * 2] = left;
+      h_node_inputs[i * 2 + 1] = right;
+    }
+    dh::safe_cuda(cudaMemcpyAsync(inputs.data().get(), h_node_inputs.data(),
+                                  common::SizeBytes<MultiEvaluateSplitInputs>(h_node_inputs.size()),
+                                  cudaMemcpyDefault, ctx_->CUDACtx()->Stream()));
+    this->evaluator_.EvaluateSplits(this->ctx_, dh::ToSpan(inputs), shared_inputs,
+                                    dh::ToSpan(outputs));
+    dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(), outputs.data().get(),
+                                  pinned_candidates_out.size_bytes(), cudaMemcpyDefault,
+                                  ctx_->CUDACtx()->Stream()));
+  }
+
+  void UpdateTree(GradientContainer* gpair, DMatrix* p_fmat, ObjInfo const* task, RegTree* p_tree) {
+    auto* split_grad = gpair->Grad();
+    if (gpair->HasValueGrad()) {
+      this->value_gpair_ =
+          linalg::Matrix<GradientPair>{gpair->value_gpair.Shape(), ctx_->Device(), linalg::kF};
+      TransposeGradient(this->ctx_, gpair->value_gpair.View(this->ctx_->Device()),
+                        value_gpair_.View(this->ctx_->Device()));
+    }
+
+    this->GrowTree(split_grad, p_fmat, task, p_tree);
+
+    if (gpair->HasValueGrad()) {
+      this->UpdateTreeLeaf(gpair->value_gpair, p_tree);
+    }
+  }
+
+  void GrowTree(linalg::Matrix<GradientPair>* split_gpair, DMatrix* p_fmat, ObjInfo const*,
+                RegTree* p_tree) {
+    if (this->param_.learning_rate - 1.0 != 0.0) {
+      LOG(FATAL) << "GPU" << MTNotImplemented();
+    }
+    Driver<MultiExpandEntry> driver{param_, kMaxNodeBatchSize};
+
+    this->Reset(split_gpair, p_fmat);
+    driver.Push({this->InitRoot(p_fmat, p_tree)});
+
+    // The set of leaves that can be expanded asynchronously
+    auto expand_set = driver.Pop();
+    while (!expand_set.empty()) {
+      for (auto& candidate : expand_set) {
+        this->ApplySplit(candidate, p_tree);
+      }
+
+      // Get the candidates we are allowed to expand further
+      // e.g. We do not bother further processing nodes whose children are beyond max depth
+      std::vector<MultiExpandEntry> valid_candidates;
+      std::copy_if(expand_set.begin(), expand_set.end(), std::back_inserter(valid_candidates),
+                   [&](auto const& e) { return driver.IsChildValid(e); });
+
+      // Allocate children nodes.
+      auto new_candidates = pinned_.GetSpan(valid_candidates.size() * 2, MultiExpandEntry{});
+
+      this->PartitionAndBuildHist(p_fmat, expand_set, valid_candidates, p_tree);
+
+      this->EvaluateSplits(valid_candidates, *p_tree, new_candidates);
+      this->ctx_->CUDACtx()->Stream().Sync();
+
+      driver.Push(new_candidates.begin(), new_candidates.end());
+
+      expand_set = driver.Pop();
+    }
+  }
+
+  explicit MultiTargetHistMaker(Context const* ctx, TrainParam param,
+                                HistMakerTrainParam const* hist_param,
+                                std::vector<bst_idx_t> batch_ptr,
+                                std::shared_ptr<common::HistogramCuts const> cuts,
+                                bool dense_compressed)
+      : ctx_{ctx},
+        param_{std::move(param)},
+        hist_param_{hist_param},
+        cuts_{std::move(cuts)},
+        feature_groups_{std::make_unique<FeatureGroups>(*cuts_, dense_compressed,
+                                                        dh::MaxSharedMemoryOptin(ctx_->Ordinal()))},
+        batch_ptr_{std::move(batch_ptr)} {}
+};
+}  // namespace xgboost::tree::cuda_impl
diff --git a/src/tree/updater_prune.cc b/src/tree/updater_prune.cc
index 2c2d1a2f0d93..2e95885b88d7 100644
--- a/src/tree/updater_prune.cc
+++ b/src/tree/updater_prune.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2014-2023 by XGBoost Contributors
+ * Copyright 2014-2025, XGBoost Contributors
  * \file updater_prune.cc
  * \brief prune a tree given the statistics
  * \author Tianqi Chen
@@ -11,7 +11,9 @@
 #include "../common/timer.h"
 #include "./param.h"
 #include "xgboost/base.h"
+#include "xgboost/gradient.h"  // for GradientContainer
 #include "xgboost/json.h"
+
 namespace xgboost::tree {
 DMLC_REGISTRY_FILE_TAG(updater_prune);
 
@@ -31,14 +33,14 @@ class TreePruner : public TreeUpdater {
   [[nodiscard]] bool CanModifyTree() const override { return true; }
 
   // update the tree, do pruning
-  void Update(TrainParam const* param, linalg::Matrix<GradientPair>* gpair, DMatrix* p_fmat,
+  void Update(TrainParam const* param, GradientContainer* in_gpair, DMatrix* p_fmat,
               common::Span<HostDeviceVector<bst_node_t>> out_position,
               const std::vector<RegTree*>& trees) override {
     pruner_monitor_.Start("PrunerUpdate");
     for (auto tree : trees) {
       this->DoPrune(param, tree);
     }
-    syncher_->Update(param, gpair, p_fmat, out_position, trees);
+    syncher_->Update(param, in_gpair, p_fmat, out_position, trees);
     pruner_monitor_.Stop("PrunerUpdate");
   }
 
diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc
index 51b26b781148..08541f01a435 100644
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2024, XGBoost Contributors
+ * Copyright 2017-2025, XGBoost Contributors
  * \file updater_quantile_hist.cc
  * \brief use quantized feature values to construct a tree
  * \author Philip Cho, Tianqi Checn, Egor Smirnov
@@ -26,13 +26,14 @@
 #include "hist/evaluate_splits.h"            // for HistEvaluator, HistMultiEvaluator, UpdatePre...
 #include "hist/expand_entry.h"               // for MultiExpandEntry, CPUExpandEntry
 #include "hist/hist_cache.h"                 // for BoundedHistCollection
+#include "hist/hist_param.h"                 // for HistMakerTrainParam
 #include "hist/histogram.h"                  // for MultiHistogramBuilder
-#include "hist/param.h"                      // for HistMakerTrainParam
 #include "hist/sampler.h"                    // for SampleGradient
 #include "param.h"                           // for TrainParam, GradStats
 #include "xgboost/base.h"                    // for Args, GradientPairPrecise, GradientPair, Gra...
 #include "xgboost/context.h"                 // for Context
 #include "xgboost/data.h"                    // for BatchSet, DMatrix, BatchIterator, MetaInfo
+#include "xgboost/gradient.h"                // for GradientContainer
 #include "xgboost/host_device_vector.h"      // for HostDeviceVector
 #include "xgboost/json.h"                    // for Object, Json, FromJson, ToJson, get
 #include "xgboost/linalg.h"                  // for MatrixView, TensorView, All, Matrix, Empty
@@ -50,10 +51,10 @@ DMLC_REGISTRY_FILE_TAG(updater_quantile_hist);
 BatchParam HistBatch(TrainParam const *param) { return {param->max_bin, param->sparse_threshold}; }
 
 template <typename ExpandEntry, typename Updater>
-void UpdateTree(common::Monitor *monitor_, linalg::MatrixView<GradientPair const> gpair,
+void UpdateTree(common::Monitor *monitor, linalg::MatrixView<GradientPair const> gpair,
                 Updater *updater, DMatrix *p_fmat, TrainParam const *param,
                 HostDeviceVector<bst_node_t> *p_out_position, RegTree *p_tree) {
-  monitor_->Start(__func__);
+  monitor->Start(__func__);
   updater->InitData(p_fmat, p_tree);
 
   Driver<ExpandEntry> driver{*param};
@@ -105,7 +106,7 @@ void UpdateTree(common::Monitor *monitor_, linalg::MatrixView<GradientPair const
 
   auto &h_out_position = p_out_position->HostVector();
   updater->LeafPartition(tree, gpair, &h_out_position);
-  monitor_->Stop(__func__);
+  monitor->Stop(__func__);
 }
 
 /**
@@ -136,7 +137,8 @@ class MultiTargetHistBuilder {
     monitor_->Start(__func__);
     std::size_t page_id{0};
     for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(this->param_))) {
-      this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
+      this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied,
+                                                    p_tree->HostMtView());
       page_id++;
     }
     monitor_->Stop(__func__);
@@ -151,15 +153,23 @@ class MultiTargetHistBuilder {
 
     p_last_fmat_ = p_fmat;
     bst_bin_t n_total_bins = 0;
-    partitioner_.clear();
+    size_t page_idx = 0;
     for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
       if (n_total_bins == 0) {
         n_total_bins = page.cut.TotalBins();
       } else {
         CHECK_EQ(n_total_bins, page.cut.TotalBins());
       }
-      partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid, p_fmat->Info().IsColumnSplit());
+      if (page_idx < partitioner_.size()) {
+        partitioner_[page_idx].Reset(ctx_, page.Size(), page.base_rowid,
+                                     p_fmat->Info().IsColumnSplit());
+      } else {
+        partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid,
+                                  p_fmat->Info().IsColumnSplit());
+      }
+      page_idx++;
     }
+    partitioner_.resize(page_idx);
 
     bst_target_t n_targets = p_tree->NumTargets();
     histogram_builder_ = std::make_unique<MultiHistogramBuilder>();
@@ -202,14 +212,15 @@ class MultiTargetHistBuilder {
         linalg::MakeVec(reinterpret_cast<double *>(root_sum.Values().data()), root_sum.Size() * 2));
     collective::SafeColl(rc);
 
-    histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, best, HistBatch(param_));
+    histogram_builder_->BuildRootHist(p_fmat, p_tree->HostMtView(), partitioner_, gpair, best,
+                                      HistBatch(param_));
 
     auto weight = evaluator_->InitRoot(root_sum);
     auto weight_t = weight.HostView();
     std::transform(linalg::cbegin(weight_t), linalg::cend(weight_t), linalg::begin(weight_t),
                    [&](float w) { return w * param_->learning_rate; });
 
-    p_tree->SetLeaf(RegTree::kRoot, weight_t);
+    p_tree->SetRoot(weight_t);
     std::vector<BoundedHistCollection const *> hists;
     std::vector<MultiExpandEntry> nodes{{RegTree::kRoot, 0}};
     for (bst_target_t t{0}; t < p_tree->NumTargets(); ++t) {
@@ -228,8 +239,8 @@ class MultiTargetHistBuilder {
                       std::vector<MultiExpandEntry> const &valid_candidates,
                       linalg::MatrixView<GradientPair const> gpair) {
     monitor_->Start(__func__);
-    histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree, partitioner_, valid_candidates,
-                                           gpair, HistBatch(param_));
+    histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree->HostMtView(), partitioner_,
+                                           valid_candidates, gpair, HistBatch(param_));
     monitor_->Stop(__func__);
   }
 
@@ -256,7 +267,7 @@ class MultiTargetHistBuilder {
     }
     p_out_position->resize(gpair.Shape(0));
     for (auto const &part : partitioner_) {
-      part.LeafPartition(ctx_, tree, gpair,
+      part.LeafPartition(ctx_, tree.HostMtView(), gpair,
                          common::Span{p_out_position->data(), p_out_position->size()});
     }
     monitor_->Stop(__func__);
@@ -362,6 +373,7 @@ class HistUpdater {
       }
       page_idx++;
     }
+    partitioner_.resize(page_idx);
     histogram_builder_->Reset(ctx_, n_total_bins, 1, HistBatch(param_), collective::IsDistributed(),
                               fmat->Info().IsColumnSplit(), hist_param_);
     evaluator_ = std::make_unique<HistEvaluator>(ctx_, this->param_, fmat->Info(), col_sampler_);
@@ -390,7 +402,7 @@ class HistUpdater {
     monitor_->Start(__func__);
     CPUExpandEntry node(RegTree::kRoot, p_tree->GetDepth(0));
 
-    this->histogram_builder_->BuildRootHist(p_fmat, p_tree, partitioner_, gpair, node,
+    this->histogram_builder_->BuildRootHist(p_fmat, p_tree->HostScView(), partitioner_, gpair, node,
                                             HistBatch(param_));
 
     {
@@ -446,7 +458,7 @@ class HistUpdater {
                       std::vector<CPUExpandEntry> const &valid_candidates,
                       linalg::MatrixView<GradientPair const> gpair) {
     monitor_->Start(__func__);
-    this->histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree, partitioner_,
+    this->histogram_builder_->BuildHistLeftRight(ctx_, p_fmat, p_tree->HostScView(), partitioner_,
                                                  valid_candidates, gpair, HistBatch(param_));
     monitor_->Stop(__func__);
   }
@@ -456,7 +468,8 @@ class HistUpdater {
     monitor_->Start(__func__);
     std::size_t page_id{0};
     for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
-      this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied, p_tree);
+      this->partitioner_.at(page_id).UpdatePosition(this->ctx_, page, applied,
+                                                    p_tree->HostScView());
       page_id++;
     }
     monitor_->Stop(__func__);
@@ -471,7 +484,7 @@ class HistUpdater {
     }
     p_out_position->resize(gpair.Shape(0));
     for (auto const &part : partitioner_) {
-      part.LeafPartition(ctx_, tree, gpair,
+      part.LeafPartition(ctx_, tree.HostScView(), gpair,
                          common::Span{p_out_position->data(), p_out_position->size()});
     }
     monitor_->Stop(__func__);
@@ -504,7 +517,7 @@ class QuantileHistMaker : public TreeUpdater {
 
   [[nodiscard]] char const *Name() const override { return "grow_quantile_histmaker"; }
 
-  void Update(TrainParam const *param, linalg::Matrix<GradientPair> *gpair, DMatrix *p_fmat,
+  void Update(TrainParam const *param, GradientContainer *in_gpair, DMatrix *p_fmat,
               common::Span<HostDeviceVector<bst_node_t>> out_position,
               const std::vector<RegTree *> &trees) override {
     if (!column_sampler_) {
@@ -527,7 +540,7 @@ class QuantileHistMaker : public TreeUpdater {
     }
 
     bst_target_t n_targets = trees.front()->NumTargets();
-    auto h_gpair = gpair->HostView();
+    auto h_gpair = in_gpair->FullGradOnly()->HostView();
 
     linalg::Matrix<GradientPair> sample_out;
     auto h_sample_out = h_gpair;
diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc
index 23c8ec9e6ea2..62639e3542a3 100644
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@@ -1,19 +1,20 @@
 /**
- * Copyright 2014-2024, XGBoost Contributors
+ * Copyright 2014-2025, XGBoost Contributors
  * \file updater_refresh.cc
  * \brief refresh the statistics and leaf value on the tree on the dataset
  * \author Tianqi Chen
  */
-#include <xgboost/tree_updater.h>
-
 #include <limits>
 #include <vector>
 
 #include "../collective/allreduce.h"
 #include "../common/threading_utils.h"
 #include "../predictor/predict_fn.h"
+#include "../tree/tree_view.h"  // for ScalarTreeView
 #include "./param.h"
+#include "xgboost/gradient.h"  // for GradientContainer
 #include "xgboost/json.h"
+#include "xgboost/tree_updater.h"
 
 namespace xgboost::tree {
 
@@ -29,13 +30,14 @@ class TreeRefresher : public TreeUpdater {
 
   [[nodiscard]] char const *Name() const override { return "refresh"; }
   [[nodiscard]] bool CanModifyTree() const override { return true; }
-  // update the tree, do pruning
-  void Update(TrainParam const *param, linalg::Matrix<GradientPair> *gpair, DMatrix *p_fmat,
+  // Update the tree, do pruning
+  void Update(TrainParam const *param, GradientContainer *in_gpair, DMatrix *p_fmat,
               common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
               const std::vector<RegTree *> &trees) override {
     if (trees.size() == 0) {
       return;
     }
+    auto gpair = in_gpair->FullGradOnly();
     CHECK_EQ(gpair->Shape(1), 1) << MTNotImplemented();
     const std::vector<GradientPair> &gpair_h = gpair->Data()->ConstHostVector();
     // Thread local variables.
@@ -76,8 +78,7 @@ class TreeRefresher : public TreeUpdater {
           feats.Fill(inst);
           int offset = 0;
           for (auto tree : trees) {
-            AddStats(*tree, feats, gpair_h, info, ridx,
-                     dmlc::BeginPtr(stemp[tid]) + offset);
+            AddStats(*tree, feats, gpair_h, info, ridx, dmlc::BeginPtr(stemp[tid]) + offset);
             offset += tree->NumNodes();
           }
           feats.Drop();
@@ -107,26 +108,22 @@ class TreeRefresher : public TreeUpdater {
   }
 
  private:
-  inline static void AddStats(const RegTree &tree,
-                              const RegTree::FVec &feat,
-                              const std::vector<GradientPair> &gpair,
-                              const MetaInfo&,
-                              const bst_uint ridx,
-                              GradStats *gstats) {
+  inline static void AddStats(const RegTree &tree, const RegTree::FVec &feat,
+                              const std::vector<GradientPair> &gpair, const MetaInfo &,
+                              const bst_uint ridx, GradStats *gstats) {
     // start from groups that belongs to current data
     auto pid = 0;
     gstats[pid].Add(gpair[ridx]);
-    auto const& cats = tree.GetCategoriesMatrix();
     // traverse tree
-    while (!tree[pid].IsLeaf()) {
-      unsigned split_index = tree[pid].SplitIndex();
-      pid = predictor::GetNextNode<true, true>(
-          tree[pid], pid, feat.GetFvalue(split_index), feat.IsMissing(split_index),
-          cats);
+    auto sc_tree = tree.HostScView();
+    while (!sc_tree.IsLeaf(pid)) {
+      unsigned split_index = sc_tree.SplitIndex(pid);
+      pid = predictor::GetNextNode<true, true>(sc_tree, pid, feat.GetFvalue(split_index),
+                                               feat.IsMissing(split_index), sc_tree.cats);
       gstats[pid].Add(gpair[ridx]);
     }
   }
-  inline void Refresh(TrainParam const *param, const GradStats *gstats, int nid, RegTree *p_tree) {
+  void Refresh(TrainParam const *param, const GradStats *gstats, int nid, RegTree *p_tree) {
     RegTree &tree = *p_tree;
     tree.Stat(nid).base_weight =
         static_cast<bst_float>(CalcWeight(*param, gstats[nid]));
diff --git a/src/tree/updater_sync.cc b/src/tree/updater_sync.cc
index 6526e519c9d5..17244bc2c6a7 100644
--- a/src/tree/updater_sync.cc
+++ b/src/tree/updater_sync.cc
@@ -1,17 +1,18 @@
 /**
- * Copyright 2014-2024, XBGoost Contributors
+ * Copyright 2014-2025, XBGoost Contributors
  * \file updater_sync.cc
  * \brief synchronize the tree in all distributed nodes
  */
-#include <xgboost/tree_updater.h>
-
 #include <string>
 #include <vector>
 
-#include "../collective/broadcast.h"
-#include "../collective/communicator-inl.h"
-#include "../common/io.h"
-#include "xgboost/json.h"
+#include "../collective/broadcast.h"         // for Broadcast
+#include "../collective/communicator-inl.h"  // for GetRank, GetWorldSize
+#include "xgboost/context.h"                 // for Context
+#include "xgboost/gradient.h"                // for GradientContainer
+#include "xgboost/json.h"                    // for Json, Object
+#include "xgboost/linalg.h"                  // for Matrix
+#include "xgboost/tree_updater.h"            // for TreeUpdater
 
 namespace xgboost::tree {
 
@@ -23,31 +24,35 @@ DMLC_REGISTRY_FILE_TAG(updater_sync);
  */
 class TreeSyncher : public TreeUpdater {
  public:
-  explicit TreeSyncher(Context const* tparam) : TreeUpdater(tparam) {}
-  void Configure(const Args&) override {}
+  explicit TreeSyncher(Context const* tparam) : TreeUpdater{tparam} {}
+  void Configure(Args const&) override {}
 
   void LoadConfig(Json const&) override {}
   void SaveConfig(Json*) const override {}
 
-  [[nodiscard]] char const* Name() const override { return "prune"; }
+  [[nodiscard]] char const* Name() const override { return "sync"; }
 
-  void Update(TrainParam const*, linalg::Matrix<GradientPair>*, DMatrix*,
+  void Update(TrainParam const*, GradientContainer*, DMatrix*,
               common::Span<HostDeviceVector<bst_node_t>> /*out_position*/,
-              const std::vector<RegTree*>& trees) override {
-    if (collective::GetWorldSize() == 1) return;
-    std::string s_model;
-    common::MemoryBufferStream fs(&s_model);
-    int rank = collective::GetRank();
+              std::vector<RegTree*> const& trees) override {
+    if (collective::GetWorldSize() == 1) {
+      return;
+    }
+
+    Json model{Object{}};
+    auto rank = collective::GetRank();
     if (rank == 0) {
       for (auto tree : trees) {
-        tree->Save(&fs);
+        tree->SaveModel(&model);
       }
     }
-    fs.Seek(0);
-    auto rc = collective::Broadcast(ctx_, linalg::MakeVec(s_model.data(), s_model.size()), 0);
+    std::vector<char> jmodel;
+    Json::Dump(model, &jmodel, std::ios::binary);
+    auto rc = collective::Broadcast(ctx_, linalg::MakeVec(jmodel.data(), jmodel.size()), 0);
     SafeColl(rc);
+
     for (auto tree : trees) {
-      tree->Load(&fs);
+      tree->LoadModel(model);
     }
   }
 };
diff --git a/tests/cli/machine.conf.in b/tests/cli/machine.conf.in
deleted file mode 100644
index e9575261ac42..000000000000
--- a/tests/cli/machine.conf.in
+++ /dev/null
@@ -1,13 +0,0 @@
-# Originally an example in demo/regression/
-booster = gbtree
-objective = reg:squarederror
-eta = 1.0
-gamma = 1.0
-seed = 0
-min_child_weight = 0
-max_depth = 3
-
-num_round = 2
-save_period = 0
-data = "@PROJECT_SOURCE_DIR@/demo/data/agaricus.txt.train?format=libsvm"
-eval[test] = "@PROJECT_SOURCE_DIR@/demo/data/agaricus.txt.test?format=libsvm"
\ No newline at end of file
diff --git a/tests/cpp/c_api/test_c_api.cc b/tests/cpp/c_api/test_c_api.cc
index e38b36f14450..2a3b01432a62 100644
--- a/tests/cpp/c_api/test_c_api.cc
+++ b/tests/cpp/c_api/test_c_api.cc
@@ -23,7 +23,6 @@
 #include "../../../src/data/batch_utils.h"          // for MatchingPageBytes
 #include "../../../src/data/gradient_index.h"       // for GHistIndexMatrix
 #include "../../../src/data/iterative_dmatrix.h"    // for IterativeDMatrix
-#include "../../../src/data/proxy_dmatrix.h"        // for DMatrixProxy
 #include "../../../src/data/sparse_page_dmatrix.h"  // for SparsePageDMatrix
 #include "../helpers.h"
 
@@ -472,9 +471,9 @@ auto MakeExtMemForTest(bst_idx_t n_samples, bst_feature_t n_features, Json dconf
   NumpyArrayIterForTest iter_1{0.0f, n_samples, n_features, n_batches};
   auto config = ExtMemConfig{"",
                              false,
+                             cuda_impl::AutoHostRatio(),
                              cuda_impl::MatchingPageBytes(),
                              std::numeric_limits<float>::quiet_NaN(),
-                             cuda_impl::MaxNumDevicePages(),
                              0};
   auto Xy = std::make_shared<data::SparsePageDMatrix>(&iter_1, iter_1.Proxy(), Reset, Next, config);
   MakeLabelForTest(Xy, p_fmat);
diff --git a/tests/cpp/collective/test_allreduce.cc b/tests/cpp/collective/test_allreduce.cc
index 6af659a3f342..6da214475bca 100644
--- a/tests/cpp/collective/test_allreduce.cc
+++ b/tests/cpp/collective/test_allreduce.cc
@@ -114,11 +114,14 @@ TEST_F(AllreduceTest, BitOr) {
 
 TEST_F(AllreduceTest, Restricted) {
   std::int32_t n_workers = std::min(3u, std::thread::hardware_concurrency());
-  TestDistributed(n_workers, [=](std::string host, std::int32_t port, std::chrono::seconds timeout,
-                                 std::int32_t r) {
-    AllreduceWorker worker{host, port, timeout, n_workers, r};
-    worker.Restricted();
-  });
+  auto timeout = std::chrono::seconds{4};
+  TestDistributed(
+      n_workers,
+      [=](std::string host, std::int32_t port, std::chrono::seconds timeout, std::int32_t r) {
+        AllreduceWorker worker{host, port, timeout, n_workers, r};
+        worker.Restricted();
+      },
+      timeout);
 }
 
 TEST(AllreduceGlobal, Basic) {
diff --git a/tests/cpp/collective/test_worker.h b/tests/cpp/collective/test_worker.h
index ed511675bade..29d4f8998276 100644
--- a/tests/cpp/collective/test_worker.h
+++ b/tests/cpp/collective/test_worker.h
@@ -113,7 +113,7 @@ inline Json MakeTrackerConfig(std::string host, std::int32_t n_workers,
 
 template <typename WorkerFn>
 void TestDistributed(std::int32_t n_workers, WorkerFn worker_fn,
-                     std::chrono::seconds timeout = std::chrono::seconds{2}) {
+                     std::chrono::seconds timeout = std::chrono::seconds{3}) {
   std::string host;
   auto rc = GetHostAddress(&host);
   SafeColl(rc);
diff --git a/tests/cpp/common/test_algorithm.cu b/tests/cpp/common/test_algorithm.cu
index 8f857ff50dda..0db737ad9f27 100644
--- a/tests/cpp/common/test_algorithm.cu
+++ b/tests/cpp/common/test_algorithm.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023 by XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <thrust/copy.h>      // copy
@@ -13,8 +13,7 @@
 #include "../../../src/common/device_helpers.cuh"
 #include "../helpers.h"  // MakeCUDACtx
 
-namespace xgboost {
-namespace common {
+namespace xgboost::common {
 void TestSegmentedArgSort() {
   auto ctx = MakeCUDACtx(0);
 
@@ -41,8 +40,8 @@ void TestSegmentedArgSort() {
   thrust::copy(sorted_idx.begin(), sorted_idx.end(), h_sorted_index.begin());
 
   for (size_t i = 1; i < kGroups + 1; ++i) {
-    auto group_sorted_idx = common::Span<size_t>(h_sorted_index)
-                                .subspan(offset_ptr[i - 1], offset_ptr[i] - offset_ptr[i - 1]);
+    auto group_sorted_idx =
+        Span<size_t>(h_sorted_index).subspan(offset_ptr[i - 1], offset_ptr[i] - offset_ptr[i - 1]);
     ASSERT_TRUE(std::is_sorted(group_sorted_idx.begin(), group_sorted_idx.end(), std::greater<>{}));
     ASSERT_EQ(group_sorted_idx.back(), 0);
     for (auto j : group_sorted_idx) {
@@ -91,5 +90,26 @@ TEST(Algorithm, SegmentedSequence) {
   ASSERT_EQ(idx[3], 3);
   ASSERT_EQ(idx[15], 11);
 }
-}  // namespace common
-}  // namespace xgboost
+
+namespace {
+void TestAllOf(std::size_t n) {
+  auto ctx = MakeCUDACtx(0);
+  dh::device_vector<double> values(n);
+  dh::Iota(dh::ToSpan(values), ctx.CUDACtx()->Stream());
+  EXPECT_TRUE(AllOf(ctx.CUDACtx()->CTP(), values.cbegin(), values.cend(),
+                    [n] XGBOOST_DEVICE(double v) { return v < n; }));
+  if (n == 0) {
+    return;
+  }
+  EXPECT_FALSE(AllOf(ctx.CUDACtx()->CTP(), values.cbegin(), values.cend(),
+                     [n] XGBOOST_DEVICE(double v) { return v < n && v > 0; }));
+}
+}  // namespace
+
+TEST(Algorithm, AllOf) {
+  TestAllOf(0);
+  TestAllOf(1);
+  TestAllOf(2);
+  TestAllOf(4096);
+}
+}  // namespace xgboost::common
diff --git a/tests/cpp/common/test_common.cc b/tests/cpp/common/test_common.cc
index abc760ec2ab9..eff90d5ffa93 100644
--- a/tests/cpp/common/test_common.cc
+++ b/tests/cpp/common/test_common.cc
@@ -1,8 +1,13 @@
 /**
- * Copyright 2024, XGBoost Contributors
+ * Copyright 2024-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 
+#include <algorithm>    // for equal
+#include <cstddef>      // for size_t
+#include <string>       // for string
+#include <string_view>  // for string_view
+
 #include "../../../src/common/common.h"
 
 namespace xgboost::common {
@@ -16,4 +21,62 @@ TEST(Common, HumanMemUnit) {
   name = HumanMemUnit(1);
   ASSERT_EQ(name, "1B");
 }
+
+TEST(Common, Trim) {
+  // string
+  {
+    std::string in{"foobar "};
+    auto out = TrimLast(in);
+    ASSERT_EQ(out, "foobar");
+  }
+  {
+    std::string in{R"(foobar
+)"};
+    auto out = TrimLast(in);
+    ASSERT_EQ(out, "foobar");
+  }
+  // string view
+  {
+    auto res = TrimFirst(" foo ");
+    ASSERT_EQ(res, std::string_view{"foo "});
+  }
+  {
+    auto res = TrimLast(" foo ");
+    ASSERT_EQ(res, std::string_view{" foo"});
+  }
+  {
+    auto res = TrimLast("  ");
+    ASSERT_EQ(res, std::string_view{});
+  }
+  {
+    auto res = TrimFirst("  ");
+    ASSERT_EQ(res, std::string_view{});
+  }
+  {
+    auto res = TrimFirst("");
+    ASSERT_EQ(res, std::string_view{});
+  }
+}
+
+TEST(Common, Split) {
+  auto check = [](char const* chars, std::size_t n) {
+    std::string str{chars};
+    auto res_str = Split(str, ',');
+    std::string_view view{chars};
+    auto res_view = Split(view, ',');
+    ASSERT_EQ(res_view.size(), res_str.size());
+    ASSERT_EQ(res_view.size(), n);
+    for (std::size_t i = 0; i < res_str.size(); ++i) {
+      ASSERT_EQ(res_str[i].size(), res_view[i].size());
+      auto eq = std::equal(res_str[i].cbegin(), res_str[i].cend(), res_view[i].cbegin());
+      ASSERT_TRUE(eq);
+    }
+  };
+  check("foo,bar", 2);
+  check("foo,bar,", 2);
+  check(",foo,bar", 3);
+  check(",foo,bar,", 3);  // last is ignored
+  check(",,,,foo,bar", 6);
+  check(",foo,,,,bar", 6);
+}
 }  // namespace xgboost::common
diff --git a/tests/cpp/common/test_config.cc b/tests/cpp/common/test_config.cc
deleted file mode 100644
index 5807db8caa9a..000000000000
--- a/tests/cpp/common/test_config.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/*!
- * Copyright 2019 by Contributors
- */
-#include <gtest/gtest.h>
-
-#include <fstream>
-#include <string>
-
-#include "../../../src/common/config.h"
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
-#include "../helpers.h"
-
-namespace xgboost {
-namespace common {
-
-TEST(ConfigParser, NormalizeConfigEOL) {
-  // Test whether strings with NL are loaded correctly.
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/my.conf";
-  /* Old Mac OS uses \r for line ending */
-  {
-    std::string const input = "foo\rbar\rdog\r";
-    std::string const output = "foo\nbar\ndog\n";
-    {
-      std::ofstream fp(
-          tmp_file,
-          std::ios_base::out | std::ios_base::trunc | std::ios_base::binary);
-      fp << input;
-    }
-    {
-      ConfigParser parser(tmp_file);
-      auto content = parser.LoadConfigFile(tmp_file);
-      content = parser.NormalizeConfigEOL(content);
-      ASSERT_EQ(content, output);
-    }
-  }
-  /* Windows uses \r\n for line ending */
-  {
-    std::string const input = "foo\r\nbar\r\ndog\r\n";
-    std::string const output = "foo\n\nbar\n\ndog\n\n";
-    {
-      std::ofstream fp(tmp_file,
-                       std::ios_base::out | std::ios_base::trunc | std::ios_base::binary);
-      fp << input;
-    }
-    {
-      ConfigParser parser(tmp_file);
-      auto content = parser.LoadConfigFile(tmp_file);
-      content = parser.NormalizeConfigEOL(content);
-      ASSERT_EQ(content, output);
-    }
-  }
-}
-
-TEST(ConfigParser, TrimWhitespace) {
-  ASSERT_EQ(ConfigParser::TrimWhitespace("foo bar"), "foo bar");
-  ASSERT_EQ(ConfigParser::TrimWhitespace("  foo bar"), "foo bar");
-  ASSERT_EQ(ConfigParser::TrimWhitespace("foo bar  "), "foo bar");
-  ASSERT_EQ(ConfigParser::TrimWhitespace("foo bar\t"), "foo bar");
-  ASSERT_EQ(ConfigParser::TrimWhitespace("   foo bar  "), "foo bar");
-  ASSERT_EQ(ConfigParser::TrimWhitespace("\t\t  foo bar  \t"), "foo bar");
-  ASSERT_EQ(ConfigParser::TrimWhitespace("\tabc\t"), "abc");
-  ASSERT_EQ(ConfigParser::TrimWhitespace("\r abc\t"), "abc");
-}
-
-TEST(ConfigParser, ParseKeyValuePair) {
-  // Create dummy configuration file
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/my.conf";
-  {
-    std::ofstream fp(tmp_file);
-    fp << "";
-  }
-
-  ConfigParser parser(tmp_file);
-
-  std::string key, value;
-  // 1. Empty lines or comments
-  ASSERT_FALSE(parser.ParseKeyValuePair("# Mary had a little lamb",
-                                        &key, &value));
-  ASSERT_FALSE(parser.ParseKeyValuePair("#tree_method = gpu_hist",
-                                        &key, &value));
-  ASSERT_FALSE(parser.ParseKeyValuePair(
-                 "# minimum sum of instance weight(hessian) needed in a child",
-                 &key, &value));
-  ASSERT_FALSE(parser.ParseKeyValuePair("", &key, &value));
-
-  // 2. Key-value pairs
-  ASSERT_TRUE(parser.ParseKeyValuePair("booster = gbtree", &key, &value));
-  ASSERT_EQ(key, "booster");
-  ASSERT_EQ(value, "gbtree");
-  ASSERT_TRUE(parser.ParseKeyValuePair("gpu_id = 2", &key, &value));
-  ASSERT_EQ(key, "gpu_id");
-  ASSERT_EQ(value, "2");
-  ASSERT_TRUE(parser.ParseKeyValuePair("monotone_constraints = (1,0,-1)",
-                                       &key, &value));
-  ASSERT_EQ(key, "monotone_constraints");
-  ASSERT_EQ(value, "(1,0,-1)");
-  // whitespace should not matter
-  ASSERT_TRUE(parser.ParseKeyValuePair("  objective=binary:logistic",
-                                       &key, &value));
-  ASSERT_EQ(key, "objective");
-  ASSERT_EQ(value, "binary:logistic");
-  ASSERT_TRUE(parser.ParseKeyValuePair("tree_method\t=\thist  ", &key, &value));
-  ASSERT_EQ(key, "tree_method");
-  ASSERT_EQ(value, "hist");
-
-  // 3. Use of forward and backward slashes in value
-  ASSERT_TRUE(parser.ParseKeyValuePair("test:data = test/data.libsvm",
-                                       &key, &value));
-  ASSERT_EQ(key, "test:data");
-  ASSERT_EQ(value, "test/data.libsvm");
-  ASSERT_TRUE(parser.ParseKeyValuePair("data = C:\\data.libsvm", &key, &value));
-  ASSERT_EQ(key, "data");
-  ASSERT_EQ(value, "C:\\data.libsvm");
-
-  // 4. One-line comment
-  ASSERT_TRUE(parser.ParseKeyValuePair("learning_rate = 0.3   # small step",
-                                       &key, &value));
-  ASSERT_EQ(key, "learning_rate");
-  ASSERT_EQ(value, "0.3");
-  // Note: '#' in path won't be accepted correctly unless the whole path is
-  // wrapped with quotes. This is important for external memory.
-  ASSERT_TRUE(parser.ParseKeyValuePair("data = dmatrix.libsvm#dtrain.cache",
-                                       &key, &value));
-  ASSERT_EQ(key, "data");
-  ASSERT_EQ(value, "dmatrix.libsvm");  // cache was silently ignored
-
-  // 5. Wrapping key/value with quotes
-  // Any key or value containing '#' needs to be wrapped with quotes
-  ASSERT_TRUE(parser.ParseKeyValuePair("data = \"dmatrix.libsvm#dtrain.cache\"",
-                                       &key, &value));
-  ASSERT_EQ(key, "data");
-  ASSERT_EQ(value, "dmatrix.libsvm#dtrain.cache");  // cache is now kept
-  ASSERT_TRUE(parser.ParseKeyValuePair(
-                "data = \"C:\\Administrator\\train_file.txt#trainbincache\"",
-                &key, &value));
-  ASSERT_EQ(key, "data");
-  ASSERT_EQ(value, "C:\\Administrator\\train_file.txt#trainbincache");
-  ASSERT_TRUE(parser.ParseKeyValuePair("\'month#day\' = \"November#2019\"",
-                                       &key, &value));
-  ASSERT_EQ(key, "month#day");
-  ASSERT_EQ(value, "November#2019");
-  // Likewise, key or value containing a space needs to be quoted
-  ASSERT_TRUE(parser.ParseKeyValuePair("\"my data\" = \' so precious!  \'",
-                                       &key, &value));
-  ASSERT_EQ(key, "my data");
-  ASSERT_EQ(value, " so precious!  ");
-  ASSERT_TRUE(parser.ParseKeyValuePair("interaction_constraints = "
-                                       "\"[[0, 2], [1, 3, 4], [5, 6]]\"",
-                                       &key, &value));
-  ASSERT_EQ(key, "interaction_constraints");
-  ASSERT_EQ(value, "[[0, 2], [1, 3, 4], [5, 6]]");
-
-  // 6. Unicode
-  ASSERT_TRUE(parser.ParseKeyValuePair("클래스상속 = 类继承", &key, &value));
-  ASSERT_EQ(key, "클래스상속");
-  ASSERT_EQ(value, "类继承");
-
-  // 7. Ill-formed data should throw exception
-  for (const char* str : {"data = C:\\My Documents\\cat.csv", "cow=",
-                          "C# = 100%", "= woof ",
-                          "interaction_constraints = [[0, 2], [1]]",
-                          "data = \"train.txt#cache",
-                          "data = \'train.txt#cache", "foo = \'bar\""}) {
-    ASSERT_THROW(parser.ParseKeyValuePair(str, &key, &value), dmlc::Error);
-  }
-}
-
-}  // namespace common
-}  // namespace xgboost
diff --git a/tests/cpp/common/test_cuda_dr_utils.cc b/tests/cpp/common/test_cuda_dr_utils.cc
new file mode 100644
index 000000000000..5876c11e376b
--- /dev/null
+++ b/tests/cpp/common/test_cuda_dr_utils.cc
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+
+#if defined(XGBOOST_USE_CUDA) && defined(__linux__)
+#include "../../../src/common/cuda_dr_utils.h"
+
+namespace xgboost::cudr {
+TEST(DrUtils, GetVersionFromSmi) {
+  std::int32_t major = 0, minor = 0;
+  bool result = GetVersionFromSmi(&major, &minor);
+
+  if (result) {
+    EXPECT_GE(major, 0);
+    EXPECT_GE(minor, 0);
+  } else {
+    EXPECT_EQ(major, -1);
+    EXPECT_EQ(minor, -1);
+  }
+}
+
+TEST(DrUtils, GetC2cLinkCountFromSmi) {
+  {
+    auto out = R"(GPU 0: NVIDIA GH200 480GB (UUID: GPU-********-****-****-****-************)
+    C2C Link 0: 44.712 GB/s
+    C2C Link 1: 44.712 GB/s
+    C2C Link 2: 44.712 GB/s
+    C2C Link 3: 44.712 GB/s
+    C2C Link 4: 44.712 GB/s
+    C2C Link 5: 44.712 GB/s
+    C2C Link 6: 44.712 GB/s
+    C2C Link 7: 44.712 GB/s
+    C2C Link 8: 44.712 GB/s
+    C2C Link 9: 44.712 GB/s
+  )";
+    auto lc = detail::GetC2cLinkCountFromSmiImpl(out);
+    ASSERT_EQ(lc, 10);
+  }
+  {
+    auto out = R"(No Devices support C2C.
+)";
+    auto lc = detail::GetC2cLinkCountFromSmiImpl(out);
+    ASSERT_EQ(lc, -1);
+  }
+
+  {
+    [[maybe_unused]] auto _ = GetC2cLinkCountFromSmi();
+  }
+  {
+    [[maybe_unused]] auto _ = GetC2cLinkCountFromSmiGlobal();
+  }
+}
+}  // namespace xgboost::cudr
+#endif  // defined(XGBOOST_USE_CUDA)
diff --git a/tests/cpp/common/test_cuda_host_allocator.cu b/tests/cpp/common/test_cuda_host_allocator.cu
index 4e3224bd8a88..c7cad2d07d93 100644
--- a/tests/cpp/common/test_cuda_host_allocator.cu
+++ b/tests/cpp/common/test_cuda_host_allocator.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2024, XGBoost Contributors
+ * Copyright 2024-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>  // for Context
@@ -7,8 +7,9 @@
 #include <vector>
 
 #include "../../../src/common/cuda_pinned_allocator.h"
-#include "../../../src/common/device_helpers.cuh"  // for DefaultStream
-#include "../../../src/common/numeric.h"           // for Iota
+#include "../../../src/common/cuda_stream.h"       // for DefaultStream
+#include "../../../src/common/device_helpers.cuh"
+#include "../../../src/common/numeric.h"      // for Iota
 
 namespace xgboost {
 TEST(CudaHostMalloc, Pinned) {
@@ -28,9 +29,17 @@ TEST(CudaHostMalloc, Managed) {
   std::vector<float, common::cuda_impl::ManagedAllocator<float>> vec;
   vec.resize(10);
 #if defined(__linux__)
+#if (CUDA_VERSION / 1000) >= 13
+  cudaMemLocation loc;
+  loc.type = cudaMemLocationTypeDevice;
+  loc.id = 0;
   dh::safe_cuda(
-      cudaMemPrefetchAsync(vec.data(), vec.size() * sizeof(float), 0, dh::DefaultStream()));
+      cudaMemPrefetchAsync(vec.data(), vec.size() * sizeof(float), loc, 0, curt::DefaultStream()));
+#else
+  dh::safe_cuda(
+      cudaMemPrefetchAsync(vec.data(), vec.size() * sizeof(float), 0, curt::DefaultStream()));
+#endif  // (CUDA_VERSION / 1000) >= 13
 #endif
-  dh::DefaultStream().Sync();
+  curt::DefaultStream().Sync();
 }
 }  // namespace xgboost
diff --git a/tests/cpp/common/test_cuda_rt_utils.cu b/tests/cpp/common/test_cuda_rt_utils.cu
new file mode 100644
index 000000000000..0465fa65194e
--- /dev/null
+++ b/tests/cpp/common/test_cuda_rt_utils.cu
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstdint>  // for int32_t
+#include <set>      // for set
+
+#include "../../../src/common/cuda_stream_pool.h"
+
+namespace xgboost::curt {
+TEST(RtUtils, StreamPool) {
+  auto n_streams = 16;
+  auto pool = std::make_unique<StreamPool>(n_streams);
+  std::set<cudaStream_t> hdls;
+
+  for (std::int32_t i = 0; i < n_streams; ++i) {
+    hdls.insert(cudaStream_t{pool->Next()});
+  }
+
+  ASSERT_EQ(hdls.size(), n_streams);
+  ASSERT_EQ(hdls.size(), pool->Size());
+
+  for (std::int32_t i = 0; i < n_streams; ++i) {
+    hdls.insert(cudaStream_t{pool->Next()});
+  }
+  ASSERT_EQ(hdls.size(), n_streams);
+  ASSERT_EQ(hdls.size(), pool->Size());
+}
+}  // namespace xgboost::curt
diff --git a/tests/cpp/common/test_device_compression.cu b/tests/cpp/common/test_device_compression.cu
new file mode 100644
index 000000000000..99d246b0325b
--- /dev/null
+++ b/tests/cpp/common/test_device_compression.cu
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#include <gtest/gtest.h>
+#include <thrust/sequence.h>  // for sequence
+
+#include <cstddef>  // for size_t
+#include <cstdint>  // for uint8_t
+#include <memory>   // for make_shared
+#include <tuple>    // for tuple
+
+#include "../../../src/common/cuda_context.cuh"         // for CUDAContext
+#include "../../../src/common/cuda_pinned_allocator.h"  // for HostPinnedMemPool
+#include "../../../src/common/device_compression.cuh"
+#include "../../../src/common/device_helpers.cuh"     // for ToSpan
+#include "../../../src/common/device_vector.cuh"      // for DeviceUVector
+#include "../../../src/common/ref_resource_view.cuh"  // for MakeFixedVecWithPinnedMemPool
+#include "../helpers.h"                               // for MakeCUDACtx
+
+namespace xgboost::dc {
+// We skip the tests but keep the code at compilation time if nvcomp is not enabled. This
+// helps us to ensure correct symbol definitions.
+TEST(NvComp, Snappy) {
+#if !defined(XGBOOST_USE_NVCOMP)
+  GTEST_SKIP_("XGBoost is not compiled with nvcomp.");
+#endif
+  auto ctx = MakeCUDACtx(0);
+  auto cuctx = ctx.CUDACtx();
+  dh::DeviceUVector<common::CompressedByteT> in(1024);
+  thrust::sequence(ctx.CUDACtx()->CTP(), in.begin(), in.end(), 0);
+  dh::DeviceUVector<std::uint8_t> compr;
+
+  std::size_t chunk_size = 512;
+  auto params = CompressSnappy(&ctx, dh::ToSpan(in), &compr, chunk_size);
+  ASSERT_GE(params.size(), 1);
+
+  auto pool = std::make_shared<common::cuda_impl::HostPinnedMemPool>();
+  auto h_in =
+      common::MakeFixedVecWithPinnedMemPool<std::uint8_t>(pool, compr.size(), cuctx->Stream());
+  dh::safe_cuda(cudaMemcpyAsync(h_in.data(), compr.data(), compr.size() * sizeof(std::uint8_t),
+                                cudaMemcpyDefault, cuctx->Stream()));
+
+  dh::device_vector<common::CompressedByteT> dout(in.size(), 0);
+  auto mgr = MakeSnappyDecomprMgr(cuctx->Stream(), pool, params, h_in.ToSpan());
+  DecompressSnappy(cuctx->Stream(), mgr, dh::ToSpan(dout), true);
+
+  bool eq = thrust::equal(ctx.CUDACtx()->CTP(), dout.cbegin(), dout.cend(), in.cbegin());
+  ASSERT_TRUE(eq);
+
+  auto const& status = GetGlobalDeStatus();
+  ASSERT_LT(status.max_output_size, 1ul << 24);
+}
+
+class TestNvComp : public ::testing::TestWithParam<std::tuple<std::size_t, std::size_t>> {
+ public:
+  void Run(std::size_t n_bytes, std::size_t n_chunk_bytes) {
+    auto ctx = MakeCUDACtx(0);
+    auto cuctx = ctx.CUDACtx();
+
+    dh::DeviceUVector<common::CompressedByteT> in(n_bytes);
+    thrust::sequence(ctx.CUDACtx()->CTP(), in.begin(), in.end(), 0);
+    dh::DeviceUVector<std::uint8_t> compr;
+
+    auto params = CompressSnappy(&ctx, dh::ToSpan(in), &compr, n_chunk_bytes);
+    if (n_bytes != 0) {
+      ASSERT_GE(params.size(), 1);
+    } else {
+      ASSERT_TRUE(params.empty());
+    }
+    if (n_chunk_bytes < n_bytes) {
+      ASSERT_GE(params.size(), n_bytes / n_chunk_bytes);
+    }
+
+    auto pool = std::make_shared<common::cuda_impl::HostPinnedMemPool>();
+
+    CuMemParams out_params;
+    auto page = CoalesceCompressedBuffersToHost(cuctx->Stream(), pool, params, compr, &out_params);
+
+    dh::device_vector<common::CompressedByteT> dout(in.size(), 0);
+    auto mgr = MakeSnappyDecomprMgr(cuctx->Stream(), pool, out_params, page.ToSpan());
+    DecompressSnappy(cuctx->Stream(), mgr, dh::ToSpan(dout), true);
+
+    bool eq = thrust::equal(ctx.CUDACtx()->CTP(), dout.cbegin(), dout.cend(), in.cbegin());
+    ASSERT_TRUE(eq);
+  }
+};
+
+TEST_P(TestNvComp, HostBuf) {
+#if !defined(XGBOOST_USE_NVCOMP)
+  GTEST_SKIP_("XGBoost is not compiled with nvcomp.");
+#endif
+  auto [n_bytes, n_chunk_bytes] = this->GetParam();
+  this->Run(n_bytes, n_chunk_bytes);
+}
+
+INSTANTIATE_TEST_SUITE_P(TestNvComp, TestNvComp,
+                         ::testing::Combine(::testing::Values(0, 1, 512, 1024),
+                                            ::testing::Values(1, 256, 512, 1024, 2048)));
+}  // namespace xgboost::dc
diff --git a/tests/cpp/common/test_device_helpers.cu b/tests/cpp/common/test_device_helpers.cu
index 169516c676fc..8ff413cc22cb 100644
--- a/tests/cpp/common/test_device_helpers.cu
+++ b/tests/cpp/common/test_device_helpers.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #include <thrust/device_vector.h>
 #include <thrust/sort.h>  // for is_sorted
@@ -7,6 +7,7 @@
 
 #include <cstddef>
 #include <cstdint>
+#include <functional>  // for equal_to
 #include <vector>
 
 #include "../../../src/common/cuda_context.cuh"
@@ -67,7 +68,7 @@ TEST(SegmentedUnique, Basic) {
   size_t n_uniques = dh::SegmentedUnique(
       ctx.CUDACtx()->CTP(), d_segments.data().get(), d_segments.data().get() + d_segments.size(),
       d_values.data().get(), d_values.data().get() + d_values.size(), d_segs_out.data().get(),
-      d_vals_out.data().get(), thrust::equal_to<float>{});
+      d_vals_out.data().get(), std::equal_to{});
   CHECK_EQ(n_uniques, 5);
 
   std::vector<float> values_sol{0.1f, 0.2f, 0.3f, 0.62448811531066895f, 0.4f};
@@ -85,7 +86,7 @@ TEST(SegmentedUnique, Basic) {
   n_uniques = dh::SegmentedUnique(
       ctx.CUDACtx()->CTP(), d_segments.data().get(), d_segments.data().get() + d_segments.size(),
       d_values.data().get(), d_values.data().get() + d_values.size(), d_segs_out.data().get(),
-      d_vals_out.data().get(), thrust::equal_to<float>{});
+      d_vals_out.data().get(), std::equal_to{});
   ASSERT_EQ(n_uniques, values.size());
   for (size_t i = 0 ; i < values.size(); i ++) {
     ASSERT_EQ(d_vals_out[i], values[i]);
diff --git a/tests/cpp/common/test_device_vector.cu b/tests/cpp/common/test_device_vector.cu
index 6f4c34edfa9f..b25e2d08827f 100644
--- a/tests/cpp/common/test_device_vector.cu
+++ b/tests/cpp/common/test_device_vector.cu
@@ -1,17 +1,18 @@
 /**
- * Copyright 2024, XGBoost Contributors
+ * Copyright 2024-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
-#include <thread>  // for thread
+#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator
+#include <thrust/sequence.h>                    // for sequence
 
-#include <numeric>                     // for iota
-#include <thrust/detail/sequence.inl>  // for sequence
+#include <numeric>  // for iota
+#include <thread>   // for thread
 
 #include "../../../src/common/cuda_rt_utils.h"     // for DrVersion
 #include "../../../src/common/device_helpers.cuh"  // for CachingThrustPolicy, PinnedMemory
 #include "../../../src/common/device_vector.cuh"
 #include "xgboost/global_config.h"  // for GlobalConfigThreadLocalStore
-#include "xgboost/windefs.h"  // for xgboost_IS_WIN
+#include "xgboost/windefs.h"        // for xgboost_IS_WIN
 
 namespace dh {
 TEST(DeviceUVector, Basic) {
@@ -24,6 +25,26 @@ TEST(DeviceUVector, Basic) {
   auto n_bytes = sizeof(decltype(uvec)::value_type) * uvec.size();
   ASSERT_EQ(peak, n_bytes);
   std::swap(verbosity, xgboost::GlobalConfigThreadLocalStore::Get()->verbosity);
+
+  DeviceUVector<double> uvec1{16};
+  ASSERT_EQ(uvec1.size(), 16);
+  uvec1.resize(3);
+  ASSERT_EQ(uvec1.size(), 3);
+  ASSERT_EQ(uvec1.Capacity(), 16);
+  ASSERT_EQ(std::distance(uvec1.begin(), uvec1.end()), uvec1.size());
+  auto orig = uvec1.size();
+
+  thrust::sequence(dh::CachingThrustPolicy(), uvec1.begin(), uvec1.end(), 0);
+  uvec1.resize(32);
+  ASSERT_EQ(uvec1.size(), 32);
+  ASSERT_EQ(uvec1.Capacity(), 32);
+  auto eq = thrust::equal(dh::CachingThrustPolicy(), uvec1.cbegin(), uvec1.cbegin() + orig,
+                          thrust::make_counting_iterator(0));
+  ASSERT_TRUE(eq);
+
+  uvec1.clear();
+  ASSERT_EQ(uvec1.size(), 0);
+  ASSERT_EQ(uvec1.Capacity(), 32);
 }
 
 #if defined(__linux__)
@@ -108,13 +129,13 @@ INSTANTIATE_TEST_SUITE_P(
 
 TEST(TestVirtualMem, Version) {
   std::int32_t major, minor;
-  xgboost::curt::DrVersion(&major, &minor);
+  xgboost::curt::GetDrVersionGlobal(&major, &minor);
   LOG(INFO) << "Latest supported CUDA version by the driver:" << major << "." << minor;
   PinnedMemory pinned;
 #if defined(xgboost_IS_WIN)
   ASSERT_FALSE(pinned.IsVm());
 #else  // defined(xgboost_IS_WIN)
-  if (major >= 12 && minor >= 5) {
+  if (major == 12 && minor >= 5 || major > 12) {
     ASSERT_TRUE(pinned.IsVm());
   } else {
     ASSERT_FALSE(pinned.IsVm());
diff --git a/tests/cpp/common/test_gpu_compressed_iterator.cu b/tests/cpp/common/test_gpu_compressed_iterator.cu
index 779202a62002..65ac036153e3 100644
--- a/tests/cpp/common/test_gpu_compressed_iterator.cu
+++ b/tests/cpp/common/test_gpu_compressed_iterator.cu
@@ -1,23 +1,31 @@
-#include "../../../src/common/compressed_iterator.h"
-#include "../../../src/common/device_helpers.cuh"
-#include "gtest/gtest.h"
-#include <algorithm>
+/**
+ * Copyright 2018-2025, XGBoost Contributors
+ */
+#include <gtest/gtest.h>
 #include <thrust/device_vector.h>
+#include <thrust/sequence.h>  // for sequence
+
+#include <algorithm>  // for generate
+#include <cstddef>    // for size_t
+#include <cstdint>    // for int32_t, uint32_t
+#include <vector>     // for vector
 
-namespace xgboost {
-namespace common {
+#include "../../../src/common/compressed_iterator.h"
+#include "../../../src/common/cuda_context.cuh"    // for CUDAContext
+#include "../../../src/common/device_helpers.cuh"  // for LaunchN
+#include "../../../src/common/device_vector.cuh"   // for DeviceUVector
+#include "../helpers.h"
 
+namespace xgboost::common {
 struct WriteSymbolFunction {
   CompressedBufferWriter cbw;
   unsigned char* buffer_data_d;
-  int* input_data_d;
+  int const* input_data_d;
   WriteSymbolFunction(CompressedBufferWriter cbw, unsigned char* buffer_data_d,
-                      int* input_data_d)
-    : cbw(cbw), buffer_data_d(buffer_data_d), input_data_d(input_data_d) {}
+                      int const* input_data_d)
+      : cbw(cbw), buffer_data_d(buffer_data_d), input_data_d(input_data_d) {}
 
-  __device__ void operator()(size_t i) {
-    cbw.AtomicWriteSymbol(buffer_data_d, input_data_d[i], i);
-  }
+  __device__ void operator()(size_t i) { cbw.AtomicWriteSymbol(buffer_data_d, input_data_d[i], i); }
 };
 
 struct ReadSymbolFunction {
@@ -70,5 +78,72 @@ TEST(CompressedIterator, TestGPU) {
   }
 }
 
-}  // namespace common
-}  // namespace xgboost
+namespace {
+class TestDoubleCompressedIter : public ::testing::TestWithParam<std::size_t> {
+ public:
+  constexpr std::size_t static CompressedBytes() { return 24; }
+
+ private:
+  dh::DeviceUVector<std::int32_t> input_;
+  Context ctx_{MakeCUDACtx(0)};
+  std::size_t n_symbols_{11};
+
+  void SetUp() override {
+    input_.resize(n_symbols_ * 3);
+    auto policy = ctx_.CUDACtx()->CTP();
+    for (std::size_t i = 0; i < 3; ++i) {
+      auto beg = input_.begin() + n_symbols_ * i;
+      auto end = beg + n_symbols_;
+      thrust::sequence(policy, beg, end, 0);
+    }
+  }
+
+ public:
+  void Run(std::size_t n0_bytes) const {
+    auto policy = ctx_.CUDACtx()->CTP();
+
+    auto compressed_nbytes = CompressedBufferWriter::CalculateBufferSize(input_.size(), n_symbols_);
+    ASSERT_EQ(compressed_nbytes, CompressedBytes());
+
+    dh::device_vector<CompressedByteT> buf(compressed_nbytes, 0);
+    CompressedBufferWriter cbw(n_symbols_);
+    dh::LaunchN(input_.size(), ctx_.CUDACtx()->Stream(),
+                WriteSymbolFunction{cbw, buf.data().get(), input_.data()});
+
+    dh::device_vector<CompressedByteT> buf0(n0_bytes);
+    dh::device_vector<CompressedByteT> buf1(compressed_nbytes - buf0.size());
+    thrust::copy_n(policy, buf.begin(), buf0.size(), buf0.begin());
+    thrust::copy_n(policy, buf.begin() + buf0.size(), buf1.size(), buf1.begin());
+
+    HostDeviceVector<std::int32_t> output(input_.size(), 0, ctx_.Device());
+    auto it = DoubleCompressedIter<std::uint32_t>{buf0.data().get(), buf0.size(), buf1.data().get(),
+                                                  n_symbols_};
+    auto d_out = output.DeviceSpan();
+    dh::LaunchN(input_.size(), ctx_.CUDACtx()->Stream(),
+                [=] __device__(std::size_t i) { d_out[i] = it[i]; });
+    auto h_out = output.ConstHostVector();
+    for (std::size_t i = 0; i < 3; ++i) {
+      auto beg = h_out.begin() + n_symbols_ * i;
+      auto end = beg + n_symbols_;
+      std::size_t k = 0;
+      for (auto it = beg; it != end; ++it) {
+        ASSERT_EQ(*it, k);
+        k++;
+      }
+    }
+  }
+};
+
+inline auto kCnBytes = TestDoubleCompressedIter::CompressedBytes();
+}  // namespace
+
+TEST_P(TestDoubleCompressedIter, Basic) {
+  auto n0_bytes = this->GetParam();
+  this->Run(n0_bytes);
+}
+
+INSTANTIATE_TEST_SUITE_P(Gpu, TestDoubleCompressedIter,
+                         ::testing::Values(0, kCnBytes, 1, kCnBytes - 1, kCnBytes / 2, kCnBytes / 3,
+                                           kCnBytes / 4, kCnBytes / 6, kCnBytes / 8,
+                                           kCnBytes / 12));
+}  // namespace xgboost::common
diff --git a/tests/cpp/common/test_hist_util.cc b/tests/cpp/common/test_hist_util.cc
index 24e67c9aa4e6..2017df632e18 100644
--- a/tests/cpp/common/test_hist_util.cc
+++ b/tests/cpp/common/test_hist_util.cc
@@ -1,18 +1,21 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
+#include "test_hist_util.h"
+
 #include <gtest/gtest.h>
-#include <vector>
+#include <xgboost/data.h>                // for ExtMemConfig
+#include <xgboost/host_device_vector.h>  // for HostDeviceVector
+
+#include <memory>      // for shared_ptr
 #include <string>
+#include <vector>
 
 #include "../../../src/common/hist_util.h"
 #include "../../../src/data/gradient_index.h"
 #include "../helpers.h"
-#include "test_hist_util.h"
-
-namespace xgboost {
-namespace common {
 
+namespace xgboost::common {
 void ParallelGHistBuilderReset() {
   constexpr size_t kBins = 10;
   constexpr size_t kNodes = 5;
@@ -258,8 +261,8 @@ TEST(HistUtil, DenseCutsExternalMemory) {
   int num_columns = 5;
   Context ctx;
   for (auto num_rows : sizes) {
-    auto x = GenerateRandom(num_rows, num_columns);
-    dmlc::TemporaryDirectory tmpdir;
+    HostDeviceVector<float> x{GenerateRandom(num_rows, num_columns)};
+    common::TemporaryDirectory tmpdir;
     auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, tmpdir);
     for (auto num_bins : bin_sizes) {
       HistogramCuts cuts = SketchOnDMatrix(&ctx, dmat.get(), num_bins);
@@ -405,5 +408,4 @@ TEST(HistUtil, SketchCategoricalFeatures) {
     return SketchOnDMatrix(&ctx, p_fmat, num_bins);
   });
 }
-}  // namespace common
-}  // namespace xgboost
+}  // namespace xgboost::common
diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu
index 6957fbb8ecdf..3ae9229cc9b5 100644
--- a/tests/cpp/common/test_hist_util.cu
+++ b/tests/cpp/common/test_hist_util.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <thrust/device_vector.h>
@@ -24,7 +24,7 @@
 #include "../../../src/data/device_adapter.cuh"
 #include "../../../src/data/simple_dmatrix.h"
 #include "../data/test_array_interface.h"
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
+#include "../filesystem.h"  // for TemporaryDirectory
 #include "../helpers.h"
 #include "test_hist_util.h"
 
@@ -333,8 +333,8 @@ TEST(HistUtil, DeviceSketchMultipleColumnsExternal) {
   auto sizes = {100, 1000, 1500};
   int num_columns =5;
   for (auto num_rows : sizes) {
-    auto x = GenerateRandom(num_rows, num_columns);
-    dmlc::TemporaryDirectory temp;
+    HostDeviceVector<float> x{GenerateRandom(num_rows, num_columns)};
+    common::TemporaryDirectory temp;
     auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
     for (auto num_bins : bin_sizes) {
       auto cuts = DeviceSketch(&ctx, dmat.get(), num_bins);
@@ -349,9 +349,9 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) {
   auto bin_sizes = {2, 16, 256, 512};
   auto sizes = {100, 1000, 1500};
   int num_columns = 5;
-  dmlc::TemporaryDirectory temp;
+  common::TemporaryDirectory temp;
   for (auto num_rows : sizes) {
-    auto x = GenerateRandom(num_rows, num_columns);
+    HostDeviceVector<float> x{GenerateRandom(num_rows, num_columns)};
     auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, temp);
     dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows);
     for (auto num_bins : bin_sizes) {
diff --git a/tests/cpp/common/test_hist_util.h b/tests/cpp/common/test_hist_util.h
index b8de641ffd7b..f4818b82fe8c 100644
--- a/tests/cpp/common/test_hist_util.h
+++ b/tests/cpp/common/test_hist_util.h
@@ -1,10 +1,10 @@
-/*!
- * Copyright 2019-2022 by XGBoost Contributors
+/**
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #pragma once
 #include <gtest/gtest.h>
 
-#include <fstream>
+#include <memory>  // for shared_ptr
 #include <random>
 #include <string>
 #include <vector>
@@ -12,19 +12,17 @@
 #include "../../../src/common/hist_util.h"
 #include "../../../src/data/adapter.h"
 #include "../../../src/data/simple_dmatrix.h"
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
 #include "../helpers.h"
 
 #ifdef __CUDACC__
 #include <xgboost/json.h>
+
 #include "../../../src/data/device_adapter.cuh"
 #endif  // __CUDACC__
 
 // Some helper functions used to test both GPU and CPU algorithms
 //
-namespace xgboost {
-namespace common {
-
+namespace xgboost::common {
   // Generate columns with different ranges
 inline std::vector<float> GenerateRandom(int num_rows, int num_columns) {
   std::vector<float> x(num_rows*num_columns);
@@ -73,25 +71,6 @@ GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns) {
       &adapter, std::numeric_limits<float>::quiet_NaN(), 1));
 }
 
-inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
-    const std::vector<float>& x, int num_rows, int num_columns,
-    const dmlc::TemporaryDirectory& tempdir) {
-  // Create the svm file in a temp dir
-  const std::string tmp_file = tempdir.path + "/temp.libsvm";
-  std::ofstream fo(tmp_file.c_str());
-  for (auto i = 0; i < num_rows; i++) {
-    std::stringstream row_data;
-    for (auto j = 0; j < num_columns; j++) {
-      row_data << 1 << " " << j << ":" << std::setprecision(15)
-               << x[i * num_columns + j];
-    }
-    fo << row_data.str() << "\n";
-  }
-  fo.close();
-  return std::shared_ptr<DMatrix>(
-      DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache"));
-}
-
 // Test that elements are approximately equally distributed among bins
 inline void TestBinDistribution(const HistogramCuts& cuts, int column_idx,
                                 const std::vector<float>& sorted_column,
@@ -260,5 +239,4 @@ void TestCategoricalSketch(size_t n, size_t num_categories, int32_t num_bins,
     ASSERT_EQ(x[i], values[i]);
   }
 }
-}  // namespace common
-}  // namespace xgboost
+}  // namespace xgboost::common
diff --git a/tests/cpp/common/test_io.cc b/tests/cpp/common/test_io.cc
index e7f72dc27f71..0d0a6900dc4e 100644
--- a/tests/cpp/common/test_io.cc
+++ b/tests/cpp/common/test_io.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 
@@ -8,13 +8,13 @@
 #include <numeric>  // for iota
 
 #include "../../../src/common/io.h"
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
+#include "../filesystem.h"  // TemporaryDirectory
 #include "../helpers.h"
 
 namespace xgboost::common {
 TEST(MemoryFixSizeBuffer, Seek) {
-  size_t constexpr kSize { 64 };
-  std::vector<int32_t> memory( kSize );
+  size_t constexpr kSize{64};
+  std::vector<int32_t> memory(kSize);
   MemoryFixSizeBuffer buf(memory.data(), memory.size());
   buf.Seek(MemoryFixSizeBuffer::kSeekEnd);
   size_t end = buf.Tell();
@@ -22,13 +22,13 @@ TEST(MemoryFixSizeBuffer, Seek) {
 }
 
 TEST(IO, FileExtension) {
-  std::string filename {u8"model.json"};
+  std::string filename{u8"model.json"};
   auto ext = FileExtension(filename);
   ASSERT_EQ(ext, u8"json");
 }
 
 TEST(IO, FixedSizeStream) {
-  std::string buffer {"This is the content of stream"};
+  std::string buffer{"This is the content of stream"};
   {
     MemoryFixSizeBuffer stream(static_cast<void *>(&buffer[0]), buffer.size());
     PeekableInStream peekable(&stream);
@@ -45,7 +45,7 @@ TEST(IO, FixedSizeStream) {
       huge_buffer += buffer;
     }
 
-    MemoryFixSizeBuffer stream(static_cast<void*>(&huge_buffer[0]), huge_buffer.size());
+    MemoryFixSizeBuffer stream(static_cast<void *>(&huge_buffer[0]), huge_buffer.size());
     PeekableInStream peekable(&stream);
     FixedSizeStream fixed(&peekable);
 
@@ -58,8 +58,8 @@ TEST(IO, FixedSizeStream) {
 TEST(IO, LoadSequentialFile) {
   EXPECT_THROW(LoadSequentialFile("non-exist"), dmlc::Error);
 
-  dmlc::TemporaryDirectory tempdir;
-  std::ofstream fout(tempdir.path + "test_file");
+  common::TemporaryDirectory tempdir;
+  std::ofstream fout(tempdir.Path() / "test_file");
   std::string content;
 
   // Generate a JSON file.
@@ -77,7 +77,7 @@ TEST(IO, LoadSequentialFile) {
   std::vector<char> str;
   Json::Dump(out, &str);
 
-  std::string tmpfile = tempdir.path + "/model.json";
+  std::string tmpfile = tempdir.Str() + "/model.json";
   {
     std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(tmpfile.c_str(), "w"));
     fo->Write(str.data(), str.size());
@@ -136,8 +136,8 @@ TEST(IO, Resource) {
 
   {
     // test mmap
-    dmlc::TemporaryDirectory tmpdir;
-    auto path = tmpdir.path + "/testfile";
+    common::TemporaryDirectory tmpdir;
+    auto path = tmpdir.Str() + "/testfile";
 
     std::ofstream fout(path, std::ios::binary);
     double val{1.0};
@@ -145,79 +145,93 @@ TEST(IO, Resource) {
     fout << 1.0 << std::endl;
     fout.close();
 
-    auto resource = std::shared_ptr<MmapResource>{
-      new MmapResource{path, 0, sizeof(double)}};
+    auto resource = std::shared_ptr<MmapResource>{new MmapResource{path, 0, sizeof(double)}};
     ASSERT_EQ(resource->Size(), sizeof(double));
     ASSERT_EQ(resource->Type(), ResourceHandler::kMmap);
     ASSERT_EQ(resource->DataAs<double>()[0], val);
   }
 }
 
-TEST(IO, PrivateMmapStream) {
-  dmlc::TemporaryDirectory tempdir;
-  auto path = tempdir.path + "/testfile";
+class TestFileStream : public ::testing::Test {
+ public:
+  template <typename TestStreamT>
+  void Run() {
+    common::TemporaryDirectory tempdir;
+    auto path = tempdir.Str() + "/testfile";
 
-  // The page size on Linux is usually set to 4096, while the allocation granularity on
-  // the Windows machine where this test is writted is 65536. We span the test to cover
-  // all of them.
-  std::size_t n_batches{64};
-  std::size_t multiplier{2048};
+    // The page size on Linux is usually set to 4096, while the allocation granularity on
+    // the Windows machine where this test is writted is 65536. We span the test to cover
+    // all of them.
+    std::size_t n_batches{64};
+    std::size_t multiplier{2048};
 
-  std::vector<std::vector<std::int32_t>> batches;
-  std::vector<std::size_t> offset{0ul};
+    std::vector<std::vector<std::int32_t>> batches;
+    std::vector<std::size_t> offset{0ul};
 
-  using T = std::int32_t;
+    using T = std::int32_t;
 
-  {
-    std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
-    for (std::size_t i = 0; i < n_batches; ++i) {
-      std::size_t size = (i + 1) * multiplier;
-      std::vector<T> data(size, 0);
-      std::iota(data.begin(), data.end(), i * i);
+    {
+      std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
+      for (std::size_t i = 0; i < n_batches; ++i) {
+        std::size_t size = (i + 1) * multiplier;
+        std::vector<T> data(size, 0);
+        std::iota(data.begin(), data.end(), i * i);
 
-      fo->Write(static_cast<std::uint64_t>(data.size()));
-      fo->Write(data.data(), data.size() * sizeof(T));
+        fo->Write(static_cast<std::uint64_t>(data.size()));
+        fo->Write(data.data(), data.size() * sizeof(T));
 
-      std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T);
-      offset.push_back(bytes);
+        std::size_t bytes = sizeof(std::uint64_t) + data.size() * sizeof(T);
+        offset.push_back(bytes);
 
-      batches.emplace_back(std::move(data));
+        batches.emplace_back(std::move(data));
+      }
     }
-  }
 
-  // Turn size info offset
-  std::partial_sum(offset.begin(), offset.end(), offset.begin());
+    // Turn size info offset
+    std::partial_sum(offset.begin(), offset.end(), offset.begin());
 
-  // Test read
-  for (std::size_t i = 0; i < n_batches; ++i) {
-    std::size_t off = offset[i];
-    std::size_t n = offset.at(i + 1) - offset[i];
-    auto fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
-    std::vector<T> data;
-
-    std::uint64_t size{0};
-    ASSERT_TRUE(fi->Read(&size));
-    ASSERT_EQ(fi->Tell(), sizeof(size));
-    data.resize(size);
+    // Test read
+    for (std::size_t i = 0; i < n_batches; ++i) {
+      std::size_t off = offset[i];
+      std::size_t n = offset.at(i + 1) - offset[i];
+      auto fi{std::make_unique<TestStreamT>(path, off, n)};
+      std::vector<T> data;
+
+      std::uint64_t size{0};
+      ASSERT_TRUE(fi->Read(&size));
+      ASSERT_EQ(fi->Tell(), sizeof(size));
+      data.resize(size);
+
+      ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), size * sizeof(T));
+      ASSERT_EQ(data, batches[i]);
+    }
 
-    ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), size * sizeof(T));
-    ASSERT_EQ(data, batches[i]);
+    // Test consume
+    for (std::size_t i = 0; i < n_batches; ++i) {
+      std::size_t off = offset[i];
+      std::size_t n = offset.at(i + 1) - offset[i];
+      std::unique_ptr<AlignedResourceReadStream> fi{std::make_unique<TestStreamT>(path, off, n)};
+      std::vector<T> data;
+
+      std::uint64_t size{0};
+      ASSERT_TRUE(fi->Consume(&size));
+      ASSERT_EQ(fi->Tell(), sizeof(size));
+      data.resize(size);
+
+      ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), sizeof(T) * size);
+      ASSERT_EQ(data, batches[i]);
+    }
   }
+};
 
-  // Test consume
-  for (std::size_t i = 0; i < n_batches; ++i) {
-    std::size_t off = offset[i];
-    std::size_t n = offset.at(i + 1) - offset[i];
-    std::unique_ptr<AlignedResourceReadStream> fi{std::make_unique<PrivateMmapConstStream>(path, off, n)};
-    std::vector<T> data;
+TEST_F(TestFileStream, PrivateMmapStream) { this->Run<PrivateMmapConstStream>(); }
 
-    std::uint64_t size{0};
-    ASSERT_TRUE(fi->Consume(&size));
-    ASSERT_EQ(fi->Tell(), sizeof(size));
-    data.resize(size);
+TEST_F(TestFileStream, MemBufFileReadStream) { this->Run<MemBufFileReadStream>(); }
 
-    ASSERT_EQ(fi->Read(data.data(), size * sizeof(T)), sizeof(T) * size);
-    ASSERT_EQ(data, batches[i]);
-  }
+TEST(IO, CmdOutput) {
+  // Use a simple command that works in cmd.exe
+  std::string output = CmdOutput("echo HelloWorld");
+  ASSERT_EQ(output, R"(HelloWorld
+)");
 }
 }  // namespace xgboost::common
diff --git a/tests/cpp/common/test_json.cc b/tests/cpp/common/test_json.cc
index 7e290a3dcadc..a5b803354840 100644
--- a/tests/cpp/common/test_json.cc
+++ b/tests/cpp/common/test_json.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 
@@ -11,7 +11,7 @@
 #include "../../../src/common/io.h"
 #include "../../../src/common/json_utils.h"
 #include "../../../src/common/threading_utils.h"  // for ParallelFor
-#include "../filesystem.h"                        // dmlc::TemporaryDirectory
+#include "../filesystem.h"                        // for TemporaryDirectory
 #include "../helpers.h"
 #include "dmlc/logging.h"
 #include "xgboost/json.h"
@@ -422,8 +422,8 @@ TEST(Json, LoadDump) {
   std::string ori_buffer = GetModelStr();
   Json origin{Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()})};
 
-  dmlc::TemporaryDirectory tempdir;
-  auto const& path = tempdir.path + "test_model_dump";
+  common::TemporaryDirectory tempdir;
+  auto const& path = tempdir.Path() / "test_model_dump";
 
   std::string out;
   Json::Dump(origin, &out);
@@ -432,7 +432,7 @@ TEST(Json, LoadDump) {
   ASSERT_TRUE(fout);
   fout << out << std::flush;
 
-  std::vector<char> new_buffer = common::LoadSequentialFile(path);
+  std::vector<char> new_buffer = common::LoadSequentialFile(path.string());
 
   Json load_back{Json::Load(StringView(new_buffer.data(), new_buffer.size()))};
   ASSERT_EQ(load_back, origin);
@@ -769,4 +769,11 @@ TEST(Json, Dump) {
     ASSERT_EQ(result_s[i], result_v[i]);
   }
 }
+
+TEST(Json, NonNullTerminated) {
+  // garbage at the end, not terminated by \0
+  std::vector<char> str{'{', '"', 'a', '"', ':', '"', 'b', '"', '}', 'c', 'c'};
+  auto jobj = Json::Load(StringView{str.data(), str.size()});
+  ASSERT_EQ(get<String const>(jobj["a"]), "b");
+}
 }  // namespace xgboost
diff --git a/tests/cpp/common/test_linalg.cc b/tests/cpp/common/test_linalg.cc
index a9adccd0a511..2d05ec5b0f53 100644
--- a/tests/cpp/common/test_linalg.cc
+++ b/tests/cpp/common/test_linalg.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2023 by XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
@@ -8,9 +8,10 @@
 
 #include <cstddef>  // size_t
 #include <numeric>  // iota
-#include <vector>
+#include <vector>   // for vector
 
 #include "../../../src/common/linalg_op.h"
+#include "test_linalg.h"  // for TestLinalgDispatch
 
 namespace xgboost::linalg {
 namespace {
@@ -329,11 +330,11 @@ TEST(Linalg, Popc) {
 
 TEST(Linalg, Stack) {
   Tensor<float, 3> l{{2, 3, 4}, CPU(), Order::kC};
-  ElementWiseTransformHost(l.View(CPU()), omp_get_max_threads(),
-                           [=](size_t i, float) { return i; });
+  cpu_impl::TransformIdxKernel(l.View(CPU()), omp_get_max_threads(),
+                               [=](size_t i, float) { return i; });
   Tensor<float, 3> r_0{{2, 3, 4}, CPU(), Order::kC};
-  ElementWiseTransformHost(r_0.View(CPU()), omp_get_max_threads(),
-                           [=](size_t i, float) { return i; });
+  cpu_impl::TransformIdxKernel(r_0.View(CPU()), omp_get_max_threads(),
+                               [=](size_t i, float) { return i; });
 
   Stack(&l, r_0);
 
@@ -372,4 +373,47 @@ TEST(Linalg, FOrder) {
     k += kCols;
   }
 }
+
+TEST(Linalg, IO) {
+  std::vector<double> data(128, 0);
+  std::iota(data.begin(), data.end(), 0.0f);
+  Vector<double> vec(data.begin(), data.end(), {data.size()}, DeviceOrd::CPU());
+  Json jvec{F32Array{}};
+  SaveVector(vec, &jvec);
+
+  auto check = [&data](linalg::Vector<double> const &loaded) {
+    ASSERT_EQ(loaded.Size(), data.size());
+    for (std::size_t i = 0; i < data.size(); ++i) {
+      ASSERT_NEAR(data[i], loaded(i), kRtEps);
+    }
+  };
+
+  {
+    auto str = Json::Dump(jvec);
+    auto jloaded = Json::Load(StringView{str});
+
+    Vector<double> loaded;
+    LoadVector(jloaded, &loaded);
+    check(loaded);
+  }
+  {
+    Vector<double> loaded;
+    LoadVector(jvec, &loaded);
+    check(loaded);
+  }
+  {
+    std::vector<char> str;
+    Json::Dump(jvec, &str, std::ios::binary);
+    auto jloaded = Json::Load(StringView{str.data(), str.size()}, std::ios::binary);
+
+    Vector<double> loaded;
+    LoadVector(jloaded, &loaded);
+    check(loaded);
+  }
+}
+
+TEST(Linalg, CpuDispatch) {
+  Context ctx;
+  TestLinalgDispatch(&ctx, [](auto v) { return v + 1; });
+}
 }  // namespace xgboost::linalg
diff --git a/tests/cpp/common/test_linalg.cu b/tests/cpp/common/test_linalg.cu
index 6ba398bdeb47..6a34513db5b1 100644
--- a/tests/cpp/common/test_linalg.cu
+++ b/tests/cpp/common/test_linalg.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <thrust/equal.h>                       // for equal
@@ -7,15 +7,20 @@
 #include <thrust/sequence.h>                    // for sequence
 
 #include "../../../src/common/cuda_context.cuh"
-#include "../../../src/common/linalg_op.cuh"
+#include "../../../src/common/linalg_op.h"
+#include "../../../src/common/optional_weight.h"  // for MakeOptionalWeights
 #include "../helpers.h"
+#include "test_linalg.h"     // for TestLinalgDispatch
+#include "thrust/random.h"   // for default_random_engine
+#include "thrust/shuffle.h"  // for shuffle
 #include "xgboost/context.h"
 #include "xgboost/linalg.h"
 
 namespace xgboost::linalg {
 namespace {
 void TestElementWiseKernel() {
-  auto device = DeviceOrd::CUDA(0);
+  auto ctx = MakeCUDACtx(0);
+  auto device = ctx.Device();
   Tensor<float, 3> l{{2, 3, 4}, device};
   {
     /**
@@ -24,7 +29,7 @@ void TestElementWiseKernel() {
     // GPU view
     auto t = l.View(device).Slice(linalg::All(), 1, linalg::All());
     ASSERT_FALSE(t.CContiguous());
-    ElementWiseTransformDevice(t, [] __device__(size_t i, float) { return i; });
+    cuda_impl::TransformIdxKernel(&ctx, t, [] XGBOOST_DEVICE(std::size_t i, float) { return i; });
     // CPU view
     t = l.View(DeviceOrd::CPU()).Slice(linalg::All(), 1, linalg::All());
     std::size_t k = 0;
@@ -51,7 +56,7 @@ void TestElementWiseKernel() {
      * Contiguous
      */
     auto t = l.View(device);
-    ElementWiseTransformDevice(t, [] XGBOOST_DEVICE(size_t i, float) { return i; });
+    cuda_impl::TransformIdxKernel(&ctx, t, [] XGBOOST_DEVICE(size_t i, float) { return i; });
     ASSERT_TRUE(t.CContiguous());
     // CPU view
     t = l.View(DeviceOrd::CPU());
@@ -118,4 +123,38 @@ TEST(Linalg, GPUIter) {
 
   TestWriteAccess(cuctx, t);
 }
+
+TEST(Linalg, SmallHistogram) {
+  auto ctx = MakeCUDACtx(0);
+  // Generate random data with 4 bins and 32 elements for each bin.
+  std::size_t cnt = 32, n_bins = 4;
+  dh::device_vector<float> values(cnt * n_bins);
+  for (std::size_t i = 0; i < n_bins; ++i) {
+    thrust::fill_n(ctx.CUDACtx()->CTP(), values.begin() + i * cnt, cnt, i);
+  }
+  thrust::default_random_engine rng;
+  rng.seed(2025);
+  thrust::shuffle(ctx.CUDACtx()->CTP(), values.begin(), values.end(), rng);
+
+  linalg::MatrixView<float> indices =
+      linalg::MakeTensorView(&ctx, dh::ToSpan(values), values.size(), 1);
+  dh::CachingDeviceUVector<float> bins(n_bins);
+  HostDeviceVector<float> weights;
+  SmallHistogram(&ctx, indices, common::MakeOptionalWeights(ctx.Device(), weights),
+                 linalg::MakeTensorView(&ctx, dh::ToSpan(bins), bins.size()));
+
+  std::vector<float> h_bins(n_bins);
+  dh::safe_cuda(cudaMemcpyAsync(h_bins.data(), bins.data(), dh::ToSpan(bins).size_bytes(),
+                                cudaMemcpyDefault, ctx.CUDACtx()->Stream()));
+  for (std::size_t i = 0; i < n_bins; ++i) {
+    ASSERT_EQ(h_bins[i], cnt);
+  }
+}
+namespace {
+void TestGpuDispatch() {
+  auto ctx = MakeCUDACtx(0);
+  TestLinalgDispatch(&ctx, [] XGBOOST_DEVICE(double v) { return v + 1; });
+}
+}  // namespace
+TEST(Linalg, GpuDispatch) { TestGpuDispatch(); }
 }  // namespace xgboost::linalg
diff --git a/tests/cpp/common/test_linalg.h b/tests/cpp/common/test_linalg.h
new file mode 100644
index 000000000000..d79ed1422931
--- /dev/null
+++ b/tests/cpp/common/test_linalg.h
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#pragma once
+
+#include <gtest/gtest.h>
+#include <xgboost/context.h>
+#include <xgboost/linalg.h>  // for Vector
+
+#include <numeric>  // for iota
+#include <vector>   // for vector
+
+#include "../../../src/common/linalg_op.h"
+
+namespace xgboost::linalg {
+template <typename Fn>
+void TestLinalgDispatch(Context const* ctx, Fn&& fn) {
+  std::vector<double> data(128, 0);
+  std::iota(data.begin(), data.end(), 0.0);
+  Vector<double> vec(data.begin(), data.end(), {data.size()}, DeviceOrd::CPU());
+
+  TransformKernel(ctx, vec.View(ctx->Device()), [=] XGBOOST_DEVICE(double v) { return fn(v); });
+  auto h_v = vec.HostView();
+  for (std::size_t i = 0; i < h_v.Size(); ++i) {
+    ASSERT_EQ(h_v(i), fn(i));
+  }
+}
+}  // namespace xgboost::linalg
diff --git a/tests/cpp/common/test_math.cc b/tests/cpp/common/test_math.cc
new file mode 100644
index 000000000000..6ff3107b39df
--- /dev/null
+++ b/tests/cpp/common/test_math.cc
@@ -0,0 +1,16 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+#include <numeric>  // for accumulate
+
+#include "../../../src/common/math.h"
+
+namespace xgboost::common {
+TEST(Math, Softmax) {
+  std::vector<float> values{2.0f, 2.0f, 3.0f, 4.0f};
+
+  Softmax(values.begin(), values.end());
+  ASSERT_NEAR(std::accumulate(values.cbegin(), values.cend(), 0.0f), 1.0f, 1e-5f);
+}
+}  // namespace xgboost::common
diff --git a/tests/cpp/common/test_numa_topo.cc b/tests/cpp/common/test_numa_topo.cc
new file mode 100644
index 000000000000..b0e904bf30a5
--- /dev/null
+++ b/tests/cpp/common/test_numa_topo.cc
@@ -0,0 +1,172 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+
+#include <filesystem>  // for path
+#include <fstream>     // for ofstream
+#include <vector>      // for vector
+
+#include "../../../src/common/numa_topo.h"
+#include "../filesystem.h"  // for TemporaryDirectory
+
+namespace xgboost::common {
+namespace {
+namespace fs = std::filesystem;
+}
+
+TEST(Numa, CpuListParser) {
+  common::TemporaryDirectory tmpdir;
+  auto path = tmpdir.Path() / "cpulist";
+  std::vector<std::int32_t> cpus;
+
+  auto write = [&](auto const& cpulist) {
+    std::ofstream fout{path};
+    fout << cpulist;
+  };
+
+  {
+    std::string cpulist = R"(1
+)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    ASSERT_EQ(cpus[0], 1);
+    ASSERT_EQ(cpus.size(), 1);
+  }
+  {
+    std::string cpulist = R"(2)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    ASSERT_EQ(cpus.size(), 1);
+    ASSERT_EQ(cpus[0], 2);
+  }
+  {
+    std::string cpulist = R"(2,3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    ASSERT_EQ(cpus.size(), 2);
+    ASSERT_EQ(cpus[0], 2);
+    ASSERT_EQ(cpus[1], 3);
+  }
+
+  auto check_4cpu_case = [&] {
+    ASSERT_EQ(cpus.size(), 4);
+    for (std::size_t i = 0; i < cpus.size(); ++i) {
+      ASSERT_EQ(cpus[i], static_cast<std::int32_t>(i));
+    }
+  };
+  {
+    std::string cpulist = R"(0-3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    check_4cpu_case();
+  }
+  {
+    std::string cpulist = R"(0-2,3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    check_4cpu_case();
+  }
+  {
+    std::string cpulist = R"(0,1-3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    check_4cpu_case();
+  }
+  {
+    std::string cpulist = R"(0,1-2,3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    check_4cpu_case();
+  }
+  {
+    std::string cpulist = R"(0,1,2,3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    check_4cpu_case();
+  }
+  {
+    std::string cpulist = R"(0,1,2-3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    check_4cpu_case();
+  }
+  {
+    std::string cpulist = R"(0-1,2,3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    check_4cpu_case();
+  }
+  {
+    std::string cpulist = R"(0-1,2-3)";
+    write(cpulist);
+    ReadCpuList(path, &cpus);
+    check_4cpu_case();
+  }
+  {
+    auto path = tmpdir.Path() / "foo";
+    testing::internal::CaptureStderr();
+    ReadCpuList(path, &cpus);
+    std::string output = testing::internal::GetCapturedStderr();
+    ASSERT_TRUE(cpus.empty());
+    ASSERT_NE(output.find("foo"), std::string::npos);
+  }
+}
+
+TEST(Numa, GetCpus) {
+  std::vector<std::int32_t> cpus;
+  if (GetNumaNumNodes() > 0) {
+    GetNumaNodeCpus(0, &cpus);
+    ASSERT_FALSE(cpus.empty());
+  } else {
+    GTEST_SKIP();
+  }
+}
+
+TEST(Numa, GetMaxNumNodes) {
+  auto n_nodes = GetNumaMaxNumNodes();
+#if defined(__linux__)
+  ASSERT_GE(n_nodes, 0);
+#else
+  ASSERT_EQ(n_nodes, -1);
+#endif  // defined(__linux__)
+}
+
+TEST(Numa, GetMemBind) {
+  // You can run this test with:
+  // numactl --membind=0 ./testxgboost --gtest_filter="Numa.GetMemBind"
+  // or
+  // hwloc-bind --strict --membind node:0 ./testxgboost --gtest_filter="Numa.GetMemBind"
+  // The strict flag is required.
+  [[maybe_unused]] auto bind = GetNumaMemBind();
+}
+
+TEST(Numa, GetNumNodes) {
+  auto n_nodes = GetNumaNumNodes();
+#if defined(__linux__)
+  ASSERT_GE(n_nodes, 1);
+#else
+  ASSERT_EQ(n_nodes, -1);
+#endif  // defined(__linux__)
+}
+
+TEST(Numa, GetHasCpuNodes) {
+  std::vector<std::int32_t> nodes;
+  GetNumaHasCpuNodes(&nodes);
+#if defined(__linux__)
+  ASSERT_GE(nodes.size(), 1);
+#else
+  ASSERT_EQ(nodes.size(), 0);
+#endif  // defined(__linux__)
+}
+
+TEST(Numa, GetHasNormalMemoryNodes) {
+  std::vector<std::int32_t> nodes;
+  GetNumaHasNormalMemoryNodes(&nodes);
+#if defined(__linux__)
+  ASSERT_GE(nodes.size(), 1);
+#else
+  ASSERT_EQ(nodes.size(), 0);
+#endif  // defined(__linux__)
+}
+}  // namespace xgboost::common
diff --git a/tests/cpp/common/test_optional_weight.cc b/tests/cpp/common/test_optional_weight.cc
index e2c59e608f43..0e0b9c527913 100644
--- a/tests/cpp/common/test_optional_weight.cc
+++ b/tests/cpp/common/test_optional_weight.cc
@@ -11,12 +11,12 @@ namespace common {
 TEST(OptionalWeight, Basic) {
   HostDeviceVector<float> weight{{2.0f, 3.0f, 4.0f}};
   Context ctx;
-  auto opt_w = MakeOptionalWeights(&ctx, weight);
+  auto opt_w = MakeOptionalWeights(ctx.Device(), weight);
   ASSERT_EQ(opt_w[0], 2.0f);
   ASSERT_FALSE(opt_w.Empty());
 
   weight.HostVector().clear();
-  opt_w = MakeOptionalWeights(&ctx, weight);
+  opt_w = MakeOptionalWeights(ctx.Device(), weight);
   ASSERT_EQ(opt_w[0], 1.0f);
   ASSERT_TRUE(opt_w.Empty());
 }
diff --git a/tests/cpp/common/test_param_array.cc b/tests/cpp/common/test_param_array.cc
new file mode 100644
index 000000000000..d354edcdca6b
--- /dev/null
+++ b/tests/cpp/common/test_param_array.cc
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include <xgboost/base.h>         // for kRtEps
+#include <xgboost/json.h>         // for Json
+#include <xgboost/parameter.h>    // for XGBoostParameter
+#include <xgboost/string_view.h>  // for StringView
+
+#include <sstream>  // for istringstream, ostringstream
+#include <string>   // for string
+
+#include "../../../src/common/param_array.h"
+#include "../helpers.h"
+
+namespace xgboost::common {
+TEST(ParamArray, Float) {
+  ParamArray<float> values{"values"};
+  {
+    std::istringstream sin{"1.1"};
+    sin >> values;
+    ASSERT_EQ(values.size(), 1);
+    ASSERT_NEAR(values[0], 1.1, kRtEps);
+    std::ostringstream sout;
+    sout << values;
+    auto jarr = Json::Load(StringView{sout.str()});
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      ASSERT_EQ(get<Number const>(jarr[i]), values[i]);
+    }
+  }
+  {
+    std::string str = "[1.1, 1.3]";
+    std::istringstream sin{str};
+    sin >> values;
+    ASSERT_EQ(values.size(), 2);
+    ASSERT_NEAR(values[0], 1.1, kRtEps);
+    ASSERT_NEAR(values[1], 1.3, kRtEps);
+    std::ostringstream sout;
+    sout << values;
+    auto jarr = Json::Load(StringView{sout.str()});
+    for (std::size_t i = 0; i < values.size(); ++i) {
+      ASSERT_EQ(get<Number const>(jarr[i]), values[i]);
+    }
+  }
+  {
+    ParamArray<float> values{"values"};
+    std::istringstream sin{"[\"foo\"]"};
+    ASSERT_THAT(
+        [&] { sin >> values; },
+        GMockThrow(
+            R"(Invalid type for: `values`, expecting one of the: {`Number`, `Integer`}, got: `String`)"));
+  }
+}
+
+namespace {
+struct TestParamArray : public XGBoostParameter<TestParamArray> {
+  ParamArray<float> test_key{"test_key", 0.2f};
+  DMLC_DECLARE_PARAMETER(TestParamArray) {
+    DMLC_DECLARE_FIELD(test_key).describe("test").set_default(ParamArray<float>{"test_key", 0.2f});
+  }
+};
+
+DMLC_REGISTER_PARAMETER(TestParamArray);
+}  // namespace
+
+TEST(ParamArray, Update) {
+  TestParamArray param;
+  param.UpdateAllowUnknown(Args{{}});
+  ASSERT_EQ(param.test_key.size(), 1);
+  ASSERT_EQ(param.test_key.Name(), "test_key");
+}
+}  // namespace xgboost::common
diff --git a/tests/cpp/common/test_parameter.cc b/tests/cpp/common/test_parameter.cc
index 5288366f8831..bcd1dc98f6f2 100644
--- a/tests/cpp/common/test_parameter.cc
+++ b/tests/cpp/common/test_parameter.cc
@@ -1,11 +1,12 @@
-/*!
- * Copyright (c) by Contributors 2019
+/**
+ * Copyright 2019-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
-
 #include <xgboost/base.h>
 #include <xgboost/parameter.h>
 
+#include "xgboost/json.h"  // for ToJson, FromJson
+
 enum class Foo : int {
   kBar = 0, kFrog = 1, kCat = 2, kDog = 3
 };
@@ -103,3 +104,16 @@ TEST(XGBoostParameter, Update) {
   a.UpdateAllowUnknown(xgboost::Args{{"f", "2.71828"}});
   ASSERT_NE(a.f, b.f);
 }
+namespace xgboost {
+TEST(XGBoostParameter, Json) {
+  UpdatableParam a, b;
+  a.UpdateAllowUnknown(Args{{"f", "1024"}, {"d", "2048"}});
+  auto ja = Json{ToJson(a)};
+
+  UpdatableParam c;
+  FromJson(ja, &c);
+  ASSERT_FLOAT_EQ(a.f, 1024);
+  ASSERT_FLOAT_EQ(c.f, 1024);
+  ASSERT_FLOAT_EQ(b.f, 0);  // Make sure dmlc global variable is not used here.
+}
+}  // namespace xgboost
diff --git a/tests/cpp/common/test_ref_resource_view.cu b/tests/cpp/common/test_ref_resource_view.cu
index ed69d087dc3c..b29849bfbc1d 100644
--- a/tests/cpp/common/test_ref_resource_view.cu
+++ b/tests/cpp/common/test_ref_resource_view.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2024, XGBoost Contributors
+ * Copyright 2024-2025, XGBoost Contributors
  */
 #if defined(__linux__)
 
@@ -10,7 +10,8 @@
 #include <thrust/sequence.h>                    // for sequence
 
 #include "../../../src/common/ref_resource_view.cuh"
-#include "../helpers.h"  // for MakeCUDACtx
+#include "../../../src/common/threadpool.h"  // for ThreadPool
+#include "../helpers.h"                      // for MakeCUDACtx
 
 namespace xgboost::common {
 class TestCudaGrowOnly : public ::testing::TestWithParam<std::size_t> {
@@ -44,6 +45,48 @@ class TestCudaGrowOnly : public ::testing::TestWithParam<std::size_t> {
 TEST_P(TestCudaGrowOnly, Resize) { this->Run(this->GetParam()); }
 
 INSTANTIATE_TEST_SUITE_P(RefResourceView, TestCudaGrowOnly, ::testing::Values(1 << 20, 1 << 21));
+
+TEST(HostPinnedMemPool, Alloc) {
+  std::vector<RefResourceView<double>> refs;
+
+  {
+    // pool goes out of scope before refs does. Test memory safety.
+    auto pool = std::make_shared<cuda_impl::HostPinnedMemPool>();
+    for (std::size_t i = 0; i < 4; ++i) {
+      auto ref = MakeFixedVecWithPinnedMemPool<double>(pool, 128 + i, curt::DefaultStream());
+      refs.emplace_back(std::move(ref));
+    }
+    for (std::size_t i = 0; i < 4; ++i) {
+      auto const& ref = refs[i];
+      ASSERT_EQ(ref.size(), 128 + i);
+      ASSERT_EQ(ref.size_bytes(), ref.size() * sizeof(double));
+    }
+
+    // Thread safety.
+    auto n_threads = static_cast<std::int32_t>(std::thread::hardware_concurrency());
+    common::ThreadPool workers{"tmempool", n_threads, [] {
+                               }};
+    std::vector<std::future<RefResourceView<double>>> alloc_futs;
+    for (std::int32_t i = 0, n = n_threads * 4; i < n; ++i) {
+      auto fut = workers.Submit([i, pool] {
+        auto ref = MakeFixedVecWithPinnedMemPool<double>(pool, 128 + i, curt::DefaultStream());
+        return ref;
+      });
+      alloc_futs.emplace_back(std::move(fut));
+    }
+    std::vector<std::future<void>> free_futs(alloc_futs.size());
+    for (std::int32_t i = 0, n = n_threads * 4; i < n; ++i) {
+      auto fut = workers.Submit([i, pool, &alloc_futs, &free_futs] {
+        auto ref = alloc_futs[i].get();
+        ASSERT_EQ(ref.size(), 128 + i);
+      });
+      free_futs[i] = std::move(fut);
+    }
+    for (std::int32_t i = 0, n = n_threads * 4; i < n; ++i) {
+      free_futs[i].get();
+    }
+  }
+}
 }  // namespace xgboost::common
 
 #endif  // defined(__linux__)
diff --git a/tests/cpp/common/test_stats.cu b/tests/cpp/common/test_stats.cu
index 28d4714238eb..0b149cb7092d 100644
--- a/tests/cpp/common/test_stats.cu
+++ b/tests/cpp/common/test_stats.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2022-2023 by XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 
@@ -7,7 +7,7 @@
 #include <utility>  // std::pair
 #include <vector>   // std::vector
 
-#include "../../../src/common/linalg_op.cuh"  // ElementWiseTransformDevice
+#include "../../../src/common/linalg_op.cuh"  // ElementWiseTransformKernel
 #include "../../../src/common/stats.cuh"
 #include "../helpers.h"
 #include "xgboost/base.h"                // XGBOOST_DEVICE
@@ -81,8 +81,9 @@ class StatsGPU : public ::testing::Test {
         dh::MakeTransformIterator<float>(thrust::make_counting_iterator(0ul),
                                          [=] XGBOOST_DEVICE(std::size_t i) { return d_arr(i); });
     linalg::Tensor<float, 1> weights{{10}, FstCU()};
-    linalg::ElementWiseTransformDevice(weights.View(DeviceOrd::CUDA(0)),
-                                       [=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
+    linalg::cuda_impl::TransformIdxKernel(
+        &ctx_, weights.View(DeviceOrd::CUDA(0)),
+        [=] XGBOOST_DEVICE(std::size_t, float) { return 1.0; });
     auto w_it = weights.Data()->ConstDevicePointer();
     for (auto const& pair : TestSet{{0.0f, 1.0f}, {0.5f, 3.0f}, {1.0f, 5.0f}}) {
       SegmentedWeightedQuantile(&ctx_, pair.first, key_it, key_it + indptr_.Size(), val_it,
diff --git a/tests/cpp/common/test_version.cc b/tests/cpp/common/test_version.cc
index 802e04344645..d8008be6838b 100644
--- a/tests/cpp/common/test_version.cc
+++ b/tests/cpp/common/test_version.cc
@@ -1,5 +1,5 @@
-/*!
- * Copyright 2019 XGBoost contributors
+/**
+ * Copyright 2019-2025, XGBoost contributors
  */
 #include <dmlc/io.h>
 #include <gtest/gtest.h>
@@ -19,8 +19,8 @@ TEST(Version, Basic) {
   auto triplet { Version::Load(j_ver) };
   ASSERT_TRUE(Version::Same(triplet));
 
-  dmlc::TemporaryDirectory tempdir;
-  const std::string fname = tempdir.path + "/version";
+  common::TemporaryDirectory tempdir;
+  const std::string fname = tempdir.Str() + "/version";
 
   {
     std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
diff --git a/tests/cpp/data/test_adapter.cc b/tests/cpp/data/test_adapter.cc
index 6833dc19e46d..cc8728cef7b0 100644
--- a/tests/cpp/data/test_adapter.cc
+++ b/tests/cpp/data/test_adapter.cc
@@ -1,68 +1,80 @@
-// Copyright (c) 2019-2021 by XGBoost Contributors
+/**
+ *  Copyright 2019-2025, XGBoost Contributors
+ */
 #include <gtest/gtest.h>
+#include <xgboost/data.h>
+
 #include <type_traits>
 #include <utility>
-#include <xgboost/data.h>
+
 #include "../../../src/data/adapter.h"
 #include "../../../src/data/simple_dmatrix.h"
-#include "../../../src/common/timer.h"
 #include "../helpers.h"
-
 #include "xgboost/base.h"
 #include "xgboost/c_api.h"
 
 namespace xgboost {
-TEST(Adapter, CSRAdapter) {
-  int n = 2;
-  std::vector<float> data = {1, 2, 3, 4, 5};
-  std::vector<unsigned> feature_idx = {0, 1, 0, 1, 1};
-  std::vector<size_t> row_ptr = {0, 2, 4, 5};
-  data::CSRAdapter adapter(row_ptr.data(), feature_idx.data(), data.data(),
-                           row_ptr.size() - 1, data.size(), n);
-  adapter.Next();
-  auto & batch = adapter.Value();
-  auto line0 = batch.GetLine(0);
-  EXPECT_EQ(line0.GetElement(0).value, 1);
-  EXPECT_EQ(line0.GetElement(1).value, 2);
-
-  auto line1 = batch.GetLine(1);
-  EXPECT_EQ(line1.GetElement(0).value, 3);
-  EXPECT_EQ(line1.GetElement(1).value, 4);
-
-  auto line2 = batch.GetLine(2);
-  EXPECT_EQ(line2.GetElement(0).value, 5);
-  EXPECT_EQ(line2.GetElement(0).row_idx, 2);
-  EXPECT_EQ(line2.GetElement(0).column_idx, 1);
-}
-
 TEST(Adapter, CSRArrayAdapter) {
-  HostDeviceVector<std::size_t> indptr;
-  HostDeviceVector<float> values;
-  HostDeviceVector<bst_feature_t> indices;
-  size_t n_features = 100, n_samples = 10;
-  RandomDataGenerator{n_samples, n_features, 0.5}.GenerateCSR(&values, &indptr, &indices);
-  using linalg::MakeVec;
-  auto indptr_arr = ArrayInterfaceStr(MakeVec(indptr.HostPointer(), indptr.Size()));
-  auto values_arr = ArrayInterfaceStr(MakeVec(values.HostPointer(), values.Size()));
-  auto indices_arr = ArrayInterfaceStr(MakeVec(indices.HostPointer(), indices.Size()));
-  auto adapter = data::CSRArrayAdapter(
-      StringView{indptr_arr.c_str(), indptr_arr.size()},
-      StringView{values_arr.c_str(), values_arr.size()},
-      StringView{indices_arr.c_str(), indices_arr.size()}, n_features);
-  auto batch = adapter.Value();
-  ASSERT_EQ(batch.NumRows(), n_samples);
-  ASSERT_EQ(batch.NumCols(), n_features);
-
-  ASSERT_EQ(adapter.NumRows(), n_samples);
-  ASSERT_EQ(adapter.NumColumns(), n_features);
+  {
+    std::size_t n = 2;
+    HostDeviceVector<float> data{1, 2, 3, 4, 5};
+    HostDeviceVector<unsigned> feature_idx{0, 1, 0, 1, 1};
+    HostDeviceVector<size_t> row_ptr{0, 2, 4, 5};
+
+    auto j_data = Json::Dump(GetArrayInterface(&data, data.Size(), 1));
+    auto j_feature_idx = Json::Dump(GetArrayInterface(&feature_idx, feature_idx.Size(), 1));
+    auto j_row_ptr = Json::Dump(GetArrayInterface(&row_ptr, row_ptr.Size(), 1));
+
+    data::CSRArrayAdapter adapter{j_row_ptr, j_feature_idx, j_data, n};
+    adapter.Next();
+    auto &batch = adapter.Value();
+    auto line0 = batch.GetLine(0);
+    EXPECT_EQ(line0.GetElement(0).value, 1);
+    EXPECT_EQ(line0.GetElement(1).value, 2);
+
+    auto line1 = batch.GetLine(1);
+    EXPECT_EQ(line1.GetElement(0).value, 3);
+    EXPECT_EQ(line1.GetElement(1).value, 4);
+
+    auto line2 = batch.GetLine(2);
+    EXPECT_EQ(line2.GetElement(0).value, 5);
+    EXPECT_EQ(line2.GetElement(0).row_idx, 2);
+    EXPECT_EQ(line2.GetElement(0).column_idx, 1);
+  }
+  {
+    HostDeviceVector<std::size_t> indptr;
+    HostDeviceVector<float> values;
+    HostDeviceVector<bst_feature_t> indices;
+    size_t n_features = 100, n_samples = 10;
+    RandomDataGenerator{n_samples, n_features, 0.5}.GenerateCSR(&values, &indptr, &indices);
+    using linalg::MakeVec;
+    auto indptr_arr = ArrayInterfaceStr(MakeVec(indptr.HostPointer(), indptr.Size()));
+    auto values_arr = ArrayInterfaceStr(MakeVec(values.HostPointer(), values.Size()));
+    auto indices_arr = ArrayInterfaceStr(MakeVec(indices.HostPointer(), indices.Size()));
+    auto adapter =
+        data::CSRArrayAdapter(StringView{indptr_arr.c_str(), indptr_arr.size()},
+                              StringView{values_arr.c_str(), values_arr.size()},
+                              StringView{indices_arr.c_str(), indices_arr.size()}, n_features);
+    auto batch = adapter.Value();
+    ASSERT_EQ(batch.NumRows(), n_samples);
+    ASSERT_EQ(batch.NumCols(), n_features);
+
+    ASSERT_EQ(adapter.NumRows(), n_samples);
+    ASSERT_EQ(adapter.NumColumns(), n_features);
+  }
 }
 
 TEST(Adapter, CSCAdapterColsMoreThanRows) {
-  std::vector<float> data = {1, 2, 3, 4, 5, 6, 7, 8};
-  std::vector<unsigned> row_idx = {0, 1, 0, 1, 0, 1, 0, 1};
-  std::vector<size_t> col_ptr = {0, 2, 4, 6, 8};
+  HostDeviceVector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
+  HostDeviceVector<unsigned> row_idx{0, 1, 0, 1, 0, 1, 0, 1};
+  HostDeviceVector<size_t> col_ptr{0, 2, 4, 6, 8};
+
+  auto j_data = Json::Dump(GetArrayInterface(&data, data.Size(), 1));
+  auto j_row_idx = Json::Dump(GetArrayInterface(&row_idx, row_idx.Size(), 1));
+  auto j_col_ptr = Json::Dump(GetArrayInterface(&col_ptr, col_ptr.Size(), 1));
+
+  data::CSCArrayAdapter adapter{j_col_ptr, j_row_idx, j_data, 0};
   // Infer row count
-  data::CSCAdapter adapter(col_ptr.data(), row_idx.data(), data.data(), 4, 0);
   data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1);
   EXPECT_EQ(dmat.Info().num_col_, 4);
   EXPECT_EQ(dmat.Info().num_row_, 2);
diff --git a/tests/cpp/data/test_array_interface.cu b/tests/cpp/data/test_array_interface.cu
index be8160c8a493..36cba365b753 100644
--- a/tests/cpp/data/test_array_interface.cu
+++ b/tests/cpp/data/test_array_interface.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost Contributors
+ * Copyright 2021-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/host_device_vector.h>
@@ -23,7 +23,7 @@ TEST(ArrayInterface, Stream) {
   HostDeviceVector<float> storage;
   auto arr_str = RandomDataGenerator{kRows, kCols, 0}.GenerateArrayInterface(&storage);
 
-  dh::CUDAStream stream;
+  curt::Stream stream;
 
   auto j_arr = Json::Load(StringView{arr_str});
   j_arr["stream"] = Integer(reinterpret_cast<int64_t>(stream.Handle()));
diff --git a/tests/cpp/data/test_batch_utils.cu b/tests/cpp/data/test_batch_utils.cu
new file mode 100644
index 000000000000..81c2b592d053
--- /dev/null
+++ b/tests/cpp/data/test_batch_utils.cu
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+
+#include <cstdint>  // for int64_t
+#include <tuple>    // for tie
+
+#include "../../../src/common/cuda_rt_utils.h"  // for TotalMemory
+#include "../../../src/data/batch_utils.h"      // for AutoHostRatio
+#include "../helpers.h"
+
+namespace xgboost::data {
+TEST(BatchUtils, CacheHostRatio) {
+  {
+    bst_idx_t n_cache_bytes = 128;
+    double cache_host_ratio = ::xgboost::cuda_impl::AutoHostRatio();
+    std::int64_t min_cache_page_bytes = ::xgboost::cuda_impl::AutoCachePageBytes();
+    std::tie(cache_host_ratio, min_cache_page_bytes) =
+        detail::DftPageSizeHostRatio(n_cache_bytes, false, cache_host_ratio, min_cache_page_bytes);
+    ASSERT_EQ(cache_host_ratio, 0.0);  // Assuming the device has more than 256 bytes of memory ..
+    ASSERT_GT(min_cache_page_bytes, 0);
+    ASSERT_THAT(
+        [&] {
+          [[maybe_unused]] auto r =
+              detail::DftPageSizeHostRatio(n_cache_bytes, false, 2.0, min_cache_page_bytes);
+        },
+        GMockThrow(R"(cache_host_ratio)"));
+  }
+  {
+    bst_idx_t constexpr kGB = 1024ul * 1024ul * 1024ul;
+    bst_idx_t n_cache_bytes = 1024ul * kGB;
+    double cache_host_ratio = ::xgboost::cuda_impl::AutoHostRatio();
+    std::int64_t min_cache_page_bytes = ::xgboost::cuda_impl::AutoCachePageBytes();
+    std::tie(cache_host_ratio, min_cache_page_bytes) =
+        detail::DftPageSizeHostRatio(n_cache_bytes, false, cache_host_ratio, min_cache_page_bytes);
+    ASSERT_GE(min_cache_page_bytes + 512, curt::TotalMemory() * cuda_impl::CachePageRatio() * 0.5);
+    ASSERT_GT(cache_host_ratio, (1.0 - curt::TotalMemory() / static_cast<double_t>(n_cache_bytes)));
+    ASSERT_LT(cache_host_ratio, (1.0 - curt::TotalMemory() / (3.0 * n_cache_bytes)));
+  }
+}
+}  // namespace xgboost::data
diff --git a/tests/cpp/data/test_cat_container.cu b/tests/cpp/data/test_cat_container.cu
index 860d386464d7..965135abbe16 100644
--- a/tests/cpp/data/test_cat_container.cu
+++ b/tests/cpp/data/test_cat_container.cu
@@ -3,10 +3,15 @@
  */
 
 #include <gtest/gtest.h>
+#include <xgboost/base.h>  // for bst_cat_t
+#include <xgboost/span.h>  // for Span
 
-#include "../../../src/common/common.h"
+#include <vector>  // for vector
+
+#include "../../../src/common/common.h"           // for safe_cuda
+#include "../../../src/common/threading_utils.h"  // for ParallelFor
 #include "../encoder/df_mock.h"
-#include "../helpers.h"
+#include "../helpers.h"  // for MakeCUDACtx
 #include "test_cat_container.h"
 
 namespace xgboost {
@@ -30,4 +35,25 @@ TEST(CatContainer, MixedGpu) {
   auto ctx = MakeCUDACtx(0);
   auto df = TestCatContainerMixed<DfTest>(&ctx, eq_check);
 }
+
+TEST(CatContainer, ThreadSafety) {
+  auto ctx = MakeCUDACtx(0);
+  auto df = DfTest::Make(DfTest::MakeStrs("abc", "bcd", "cde", "ab"), DfTest::MakeInts(2, 2, 3, 0));
+  auto h_df = df.View();
+  auto cats = test_cat_detail::FromDf(&ctx, h_df);
+  cats.Sort(&ctx);  // not thread safe
+
+  common::ParallelFor(ctx.Threads(), 64, [&](auto i) {
+    auto sorted_idx = cats.RefSortedIndex(&ctx);
+    if (i % 2 == 0) {
+      auto h_cats = cats.HostView();
+      ASSERT_EQ(h_cats.n_total_cats, 8);
+    } else {
+      auto d_cats = cats.DeviceView(&ctx);
+      ASSERT_EQ(d_cats.n_total_cats, 8);
+    }
+    auto sol = std::vector<bst_cat_t>{3, 0, 1, 2, 3, 0, 1, 2};
+    eq_check(sorted_idx, sol);
+  });
+}
 }  // namespace xgboost
diff --git a/tests/cpp/data/test_cat_container.h b/tests/cpp/data/test_cat_container.h
index 6f13b1d244fe..f869bb3c24c5 100644
--- a/tests/cpp/data/test_cat_container.h
+++ b/tests/cpp/data/test_cat_container.h
@@ -24,12 +24,12 @@ inline void DeviceCheck(CatContainer const& cats) {
 }
 
 [[nodiscard]] inline CatContainer FromDf(Context const*, enc::HostColumnsView df) {
-  return CatContainer{df};
+  return CatContainer{df, false};
 }
 
 #if defined(XGBOOST_USE_CUDA)
 [[nodiscard]] inline CatContainer FromDf(Context const* ctx, enc::DeviceColumnsView df) {
-  return CatContainer{ctx->Device(), df};
+  return CatContainer{ctx, df, false};
 }
 #endif  // defined(XGBOOST_USE_CUDA)
 }  // namespace test_cat_detail
diff --git a/tests/cpp/data/test_data.cc b/tests/cpp/data/test_data.cc
index 49e43d8340e0..a1545da53b7c 100644
--- a/tests/cpp/data/test_data.cc
+++ b/tests/cpp/data/test_data.cc
@@ -1,13 +1,12 @@
 /**
- * Copyright 2019-2023 by XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 
-#include <fstream>
 #include <memory>
 #include <vector>
 
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
+#include "../filesystem.h"  // TemporaryDirectory
 #include "../helpers.h"
 #include "xgboost/data.h"
 
@@ -116,15 +115,15 @@ TEST(DMatrix, Uri) {
   auto constexpr kRows {16};
   auto constexpr kCols {8};
 
-  dmlc::TemporaryDirectory tmpdir;
-  auto const path = tmpdir.path + "/small.csv";
-  CreateTestCSV(path, kRows, kCols);
+  common::TemporaryDirectory tmpdir;
+  auto const path = tmpdir.Path() / "small.csv";
+  CreateTestCSV(path.string(), kRows, kCols);
 
   std::unique_ptr<DMatrix> dmat;
   // FIXME(trivialfis): Enable the following test by restricting csv parser in dmlc-core.
   // EXPECT_THROW(dmat.reset(DMatrix::Load(path, false, true)), dmlc::Error);
 
-  std::string uri = path + "?format=csv";
+  std::string uri = path.string() + "?format=csv";
   dmat.reset(DMatrix::Load(uri, false));
 
   ASSERT_EQ(dmat->Info().num_col_, kCols);
diff --git a/tests/cpp/data/test_ellpack_page.cu b/tests/cpp/data/test_ellpack_page.cu
index 85d3008dc55b..96acb87a37a8 100644
--- a/tests/cpp/data/test_ellpack_page.cu
+++ b/tests/cpp/data/test_ellpack_page.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost contributors
+ * Copyright 2019-2025, XGBoost contributors
  */
 #include <xgboost/base.h>
 
@@ -7,6 +7,7 @@
 
 #include "../../../src/common/categorical.h"          // for AsCat
 #include "../../../src/common/compressed_iterator.h"  // for CompressedByteT
+#include "../../../src/common/cuda_stream.h"          // for DefaultStream
 #include "../../../src/common/hist_util.h"
 #include "../../../src/data/device_adapter.cuh"  // for CupyAdapter
 #include "../../../src/data/ellpack_page.cuh"
@@ -29,15 +30,13 @@ TEST(EllpackPage, EmptyDMatrix) {
   auto impl = page.Impl();
   ASSERT_EQ(impl->info.row_stride, 0);
   ASSERT_EQ(impl->Cuts().TotalBins(), 0);
-  ASSERT_EQ(impl->gidx_buffer.size(), 4);
+  ASSERT_EQ(impl->gidx_buffer.size(), 5);
 }
 
 TEST(EllpackPage, BuildGidxDense) {
   bst_idx_t n_samples = 16, n_features = 8;
   auto ctx = MakeCUDACtx(0);
   auto page = BuildEllpackPage(&ctx, n_samples, n_features);
-  std::vector<common::CompressedByteT> h_gidx_buffer;
-  auto h_accessor = page->GetHostAccessor(&ctx, &h_gidx_buffer);
 
   ASSERT_EQ(page->info.row_stride, n_features);
 
@@ -59,13 +58,16 @@ TEST(EllpackPage, BuildGidxDense) {
     2, 4, 8, 10, 14, 15, 19, 22,
     1, 4, 7, 10, 14, 16, 19, 21,
   };
-  for (size_t i = 0; i < n_samples * n_features; ++i) {
-    auto fidx = i % n_features;
-    ASSERT_EQ(solution[i], h_accessor.gidx_iter[i] + h_accessor.feature_segments[fidx]);
-  }
+
+  page->VisitOnHost(&ctx, [&](auto&& h_accessor) {
+    for (size_t i = 0; i < n_samples * n_features; ++i) {
+      auto fidx = i % n_features;
+      ASSERT_EQ(solution[i], h_accessor.gidx_iter[i] + h_accessor.feature_segments[fidx]);
+      ASSERT_EQ(page->NumSymbols(), h_accessor.NullValue());
+    }
+  });
   ASSERT_EQ(page->NumSymbols(), 3);
   ASSERT_EQ(page->NumNonMissing(&ctx, {}), n_samples * n_features);
-  ASSERT_EQ(page->NumSymbols(), h_accessor.NullValue());
 }
 
 TEST(EllpackPage, BuildGidxSparse) {
@@ -73,9 +75,6 @@ TEST(EllpackPage, BuildGidxSparse) {
   auto ctx = MakeCUDACtx(0);
   auto page = BuildEllpackPage(&ctx, kNRows, kNCols, 0.9f);
 
-  std::vector<common::CompressedByteT> h_gidx_buffer;
-  auto h_acc = page->GetHostAccessor(&ctx, &h_gidx_buffer);
-
   ASSERT_EQ(page->info.row_stride, 3);
 
   // row_stride = 3, 16 rows, 48 entries for ELLPack
@@ -84,9 +83,11 @@ TEST(EllpackPage, BuildGidxSparse) {
     24, 24, 24, 24, 24,  5, 24, 24,  0, 16, 24, 15, 24, 24, 24, 24,
     24,  7, 14, 16,  4, 24, 24, 24, 24, 24,  9, 24, 24,  1, 24, 24
   };
-  for (size_t i = 0; i < kNRows * page->info.row_stride; ++i) {
-    ASSERT_EQ(solution[i], h_acc.gidx_iter[i]);
-  }
+  page->VisitOnHost(&ctx, [&](auto&& h_acc) {
+    for (size_t i = 0; i < kNRows * page->info.row_stride; ++i) {
+      ASSERT_EQ(solution[i], h_acc.gidx_iter[i]);
+    }
+  });
 }
 
 TEST(EllpackPage, FromCategoricalBasic) {
@@ -101,31 +102,30 @@ TEST(EllpackPage, FromCategoricalBasic) {
   auto ctx = MakeCUDACtx(0);
   auto p = BatchParam{max_bins, tree::TrainParam::DftSparseThreshold()};
   auto ellpack = EllpackPage(&ctx, m.get(), p);
-  auto accessor = ellpack.Impl()->GetDeviceAccessor(&ctx);
-  ASSERT_EQ(kCats, accessor.NumBins());
 
   auto x_copy = x;
   std::sort(x_copy.begin(), x_copy.end());
   auto n_uniques = std::unique(x_copy.begin(), x_copy.end()) - x_copy.begin();
   ASSERT_EQ(n_uniques, kCats);
 
-  std::vector<uint32_t> h_cuts_ptr(accessor.NumFeatures() + 1);
-  dh::safe_cuda(cudaMemcpyAsync(h_cuts_ptr.data(), accessor.feature_segments,
-                                sizeof(bst_feature_t) * h_cuts_ptr.size(), cudaMemcpyDefault));
-  std::vector<float> h_cuts_values(accessor.gidx_fvalue_map.size());
-  dh::CopyDeviceSpanToVector(&h_cuts_values, accessor.gidx_fvalue_map);
-
-  ASSERT_EQ(h_cuts_ptr.size(), 2);
-  ASSERT_EQ(h_cuts_values.size(), kCats);
-
-  std::vector<common::CompressedByteT> h_gidx_buffer;
-  auto h_accessor = ellpack.Impl()->GetHostAccessor(&ctx, &h_gidx_buffer);
-
-  for (size_t i = 0; i < x.size(); ++i) {
-    auto bin = h_accessor.gidx_iter[i];
-    auto bin_value = h_cuts_values.at(bin);
-    ASSERT_EQ(AsCat(x[i]), AsCat(bin_value));
-  }
+  ellpack.Impl()->Visit(&ctx, {}, [&](auto&& accessor) {
+    ASSERT_EQ(kCats, accessor.NumBins());
+    std::vector<uint32_t> h_cuts_ptr(accessor.NumFeatures() + 1);
+    dh::safe_cuda(cudaMemcpyAsync(h_cuts_ptr.data(), accessor.feature_segments,
+                                  sizeof(bst_feature_t) * h_cuts_ptr.size(), cudaMemcpyDefault));
+    std::vector<float> h_cuts_values(accessor.gidx_fvalue_map.size());
+    dh::CopyDeviceSpanToVector(&h_cuts_values, accessor.gidx_fvalue_map);
+    ASSERT_EQ(h_cuts_ptr.size(), 2);
+    ASSERT_EQ(h_cuts_values.size(), kCats);
+
+    ellpack.Impl()->VisitOnHost(&ctx, [&](auto&& h_accessor) {
+      for (size_t i = 0; i < x.size(); ++i) {
+        auto bin = h_accessor.gidx_iter[i];
+        auto bin_value = h_cuts_values.at(bin);
+        ASSERT_EQ(AsCat(x[i]), AsCat(bin_value));
+      }
+    });
+  });
 }
 
 TEST(EllpackPage, FromCategoricalMissing) {
@@ -146,24 +146,24 @@ TEST(EllpackPage, FromCategoricalMissing) {
   }
   cuts->SetDevice(ctx.Device());
   for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, p)) {
-    std::vector<common::CompressedByteT> h_buffer;
-    auto h_acc = page.Impl()->GetHostAccessor(&ctx, &h_buffer,
-                                              p_fmat->Info().feature_types.ConstDeviceSpan());
-    ASSERT_EQ(h_acc.n_rows, 2);
-    ASSERT_EQ(cuts->NumFeatures(), 3);
-    ASSERT_EQ(h_acc.row_stride, 2);
-    ASSERT_EQ(h_acc.gidx_iter[0], 0);
-    ASSERT_EQ(h_acc.gidx_iter[1], 4);  // cat 1
-    ASSERT_EQ(h_acc.gidx_iter[2], 1);
-    ASSERT_EQ(h_acc.gidx_iter[3], 3);  // cat 0
+    page.Impl()->VisitOnHost(&ctx, [&](auto&& h_acc) {
+      ASSERT_EQ(h_acc.n_rows, 2);
+      ASSERT_EQ(cuts->NumFeatures(), 3);
+      ASSERT_EQ(h_acc.row_stride, 2);
+      ASSERT_EQ(h_acc.gidx_iter[0], 0);
+      ASSERT_EQ(h_acc.gidx_iter[1], 4);  // cat 1
+      ASSERT_EQ(h_acc.gidx_iter[2], 1);
+      ASSERT_EQ(h_acc.gidx_iter[3], 3);  // cat 0
+    });
   }
 }
 
+template <typename Accessor>
 struct ReadRowFunction {
-  EllpackDeviceAccessor matrix;
-  int row;
+  Accessor matrix;
+  std::size_t row;
   bst_float* row_data_d;
-  ReadRowFunction(EllpackDeviceAccessor matrix, int row, bst_float* row_data_d)
+  ReadRowFunction(Accessor matrix, std::size_t row, bst_float* row_data_d)
       : matrix(std::move(matrix)), row(row), row_data_d(row_data_d) {}
 
   __device__ void operator()(size_t col) {
@@ -206,12 +206,13 @@ TEST(EllpackPage, Copy) {
     EXPECT_EQ(impl->base_rowid, current_row);
 
     for (size_t i = 0; i < impl->Size(); i++) {
-      dh::LaunchN(kCols,
-                  ReadRowFunction(impl->GetDeviceAccessor(&ctx), current_row, row_d.data().get()));
+      impl->Visit(&ctx, {}, [&](auto&& acc) {
+        dh::LaunchN(kCols, ReadRowFunction(acc, current_row, row_d.data().get()));
+      });
       thrust::copy(row_d.begin(), row_d.end(), row.begin());
-
-      dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(&ctx), current_row,
-                                         row_result_d.data().get()));
+      result.Visit(&ctx, {}, [&](auto&& acc) {
+        dh::LaunchN(kCols, ReadRowFunction(acc, current_row, row_result_d.data().get()));
+      });
       thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
 
       EXPECT_EQ(row, row_result);
@@ -262,13 +263,16 @@ TEST(EllpackPage, Compact) {
         continue;
       }
 
-      dh::LaunchN(kCols,
-                  ReadRowFunction(impl->GetDeviceAccessor(&ctx), current_row, row_d.data().get()));
+      impl->Visit(&ctx, {}, [&](auto&& acc) {
+        dh::LaunchN(kCols, ReadRowFunction{acc, current_row, row_d.data().get()});
+      });
+
       dh::safe_cuda(cudaDeviceSynchronize());
       thrust::copy(row_d.begin(), row_d.end(), row.begin());
 
-      dh::LaunchN(kCols, ReadRowFunction(result.GetDeviceAccessor(&ctx), compacted_row,
-                                         row_result_d.data().get()));
+      result.Visit(&ctx, {}, [&](auto&& acc) {
+        dh::LaunchN(kCols, ReadRowFunction(acc, compacted_row, row_result_d.data().get()));
+      });
       thrust::copy(row_result_d.begin(), row_result_d.end(), row_result.begin());
 
       EXPECT_EQ(row, row_result);
@@ -300,17 +304,18 @@ class CompressedDense : public ::testing::TestWithParam<std::size_t> {
     ASSERT_EQ(impl.NumSymbols(), batch.max_bin + 1);
 
     std::vector<common::CompressedByteT> h_gidx;
-    auto h_acc = impl.GetHostAccessor(ctx, &h_gidx);
-    ASSERT_EQ(h_acc.row_stride, h_acc.NumFeatures());
-    ASSERT_EQ(h_acc.NullValue(), batch.max_bin);
-    for (std::size_t i = 0; i < h_acc.row_stride * h_acc.n_rows; ++i) {
-      auto [m, n] = linalg::UnravelIndex(i, h_acc.n_rows, h_acc.row_stride);
-      if (n == null_column && m != 0) {
-        ASSERT_EQ(static_cast<std::int32_t>(h_acc.gidx_iter[i]), h_acc.NullValue());
-      } else {
-        ASSERT_EQ(static_cast<std::int32_t>(h_acc.gidx_iter[i]), m);
+    impl.VisitOnHost(ctx, [&](auto&& h_acc) {
+      ASSERT_EQ(h_acc.row_stride, h_acc.NumFeatures());
+      ASSERT_EQ(h_acc.NullValue(), batch.max_bin);
+      for (std::size_t i = 0; i < h_acc.row_stride * h_acc.n_rows; ++i) {
+        auto [m, n] = linalg::UnravelIndex(i, h_acc.n_rows, h_acc.row_stride);
+        if (n == null_column && m != 0) {
+          ASSERT_EQ(static_cast<std::int32_t>(h_acc.gidx_iter[i]), h_acc.NullValue());
+        } else {
+          ASSERT_EQ(static_cast<std::int32_t>(h_acc.gidx_iter[i]), m);
+        }
       }
-    }
+    });
   }
 
  public:
@@ -360,7 +365,7 @@ class CompressedDense : public ::testing::TestWithParam<std::size_t> {
                         false,      d_row_counts,    {},
                         n_features, n_samples,       cuts};
     this->CheckBasic(&ctx, batch, null_column, impl);
-    dh::DefaultStream().Sync();
+    curt::DefaultStream().Sync();
   }
 
   void CheckFromToGHist(std::size_t null_column) {
@@ -438,11 +443,15 @@ class SparseEllpack : public testing::TestWithParam<float> {
       ASSERT_EQ(from_sparse_page->gidx_buffer.size(), from_ghist->gidx_buffer.size());
       ASSERT_EQ(from_sparse_page->NumSymbols(), from_ghist->NumSymbols());
       std::vector<common::CompressedByteT> h_gidx_from_sparse, h_gidx_from_ghist;
-      auto from_ghist_acc = from_ghist->GetHostAccessor(&gpu_ctx, &h_gidx_from_ghist);
-      auto from_sparse_acc = from_sparse_page->GetHostAccessor(&gpu_ctx, &h_gidx_from_sparse);
-      for (size_t i = 0; i < from_ghist->n_rows * from_ghist->info.row_stride; ++i) {
-        ASSERT_EQ(from_ghist_acc.gidx_iter[i], from_sparse_acc.gidx_iter[i]);
-      }
+      auto from_ghist_acc = from_ghist->GetHostEllpack(&gpu_ctx, &h_gidx_from_ghist);
+      auto from_sparse_acc = from_sparse_page->GetHostEllpack(&gpu_ctx, &h_gidx_from_sparse);
+      std::visit(
+          [&](auto&& from_ghist_acc, auto&& from_sparse_acc) {
+            for (size_t i = 0; i < from_ghist->n_rows * from_ghist->info.row_stride; ++i) {
+              ASSERT_EQ(from_ghist_acc.gidx_iter[i], from_sparse_acc.gidx_iter[i]);
+            }
+          },
+          from_ghist_acc, from_sparse_acc);
     }
   }
 
@@ -466,4 +475,34 @@ TEST_P(SparseEllpack, FromGHistIndex) { this->TestFromGHistIndex(GetParam()); }
 TEST_P(SparseEllpack, NumNonMissing) { this->TestNumNonMissing(this->GetParam()); }
 
 INSTANTIATE_TEST_SUITE_P(EllpackPage, SparseEllpack, ::testing::Values(.0f, .2f, .4f, .8f));
+
+TEST(EllpackPage, IsDense) {
+  auto test = [](float sparsity) {
+    auto p_fmat = RandomDataGenerator{64, 16, sparsity}.GenerateDMatrix();
+    auto p = BatchParam{16, tree::TrainParam::DftSparseThreshold()};
+    auto ctx = MakeCUDACtx(0);
+    for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, p)) {
+      page.Impl()->Visit(&ctx, {}, [&](auto&& d_acc) {
+        if (sparsity == 0.0) {
+          ASSERT_EQ(d_acc.IsDense(), page.Impl()->IsDense());
+          ASSERT_TRUE(d_acc.IsDense());
+          ASSERT_EQ(p.max_bin, d_acc.NullValue());
+        } else {
+          ASSERT_FALSE(d_acc.IsDense());
+          ASSERT_EQ(p.max_bin * p_fmat->Info().num_col_, d_acc.NullValue());
+        }
+      });
+
+      page.Impl()->VisitOnHost(&ctx, [&](auto&& h_acc) {
+        if (sparsity == 0.0) {
+          ASSERT_TRUE(h_acc.IsDense());
+        } else {
+          ASSERT_FALSE(h_acc.IsDense());
+        }
+      });
+    }
+  };
+  test(0.0);
+  test(0.5);
+}
 }  // namespace xgboost
diff --git a/tests/cpp/data/test_ellpack_page_raw_format.cu b/tests/cpp/data/test_ellpack_page_raw_format.cu
index 32f0bed1e016..a0d7405ede20 100644
--- a/tests/cpp/data/test_ellpack_page_raw_format.cu
+++ b/tests/cpp/data/test_ellpack_page_raw_format.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/data.h>
@@ -8,7 +8,8 @@
 #include "../../../src/data/ellpack_page_raw_format.h"  // for EllpackPageRawFormat
 #include "../../../src/data/ellpack_page_source.h"      // for EllpackFormatStreamPolicy
 #include "../../../src/tree/param.h"                    // for TrainParam
-#include "../filesystem.h"                              // dmlc::TemporaryDirectory
+#include "../../../src/data/batch_utils.h"              // for AutoHostRatio
+#include "../filesystem.h"                              // for TemporaryDirectory
 #include "../helpers.h"
 
 namespace xgboost::data {
@@ -16,14 +17,19 @@ namespace {
 [[nodiscard]] EllpackCacheInfo CInfoForTest(Context const *ctx, DMatrix *Xy, bst_idx_t row_stride,
                                             BatchParam param,
                                             std::shared_ptr<common::HistogramCuts const> cuts) {
-  EllpackCacheInfo cinfo{param, false, 1, std::numeric_limits<float>::quiet_NaN()};
+  EllpackCacheInfo cinfo{param, ::xgboost::cuda_impl::AutoHostRatio(),
+                         std::numeric_limits<float>::quiet_NaN()};
   ExternalDataInfo ext_info;
   ext_info.n_batches = 1;
   ext_info.row_stride = row_stride;
   ext_info.base_rowids.push_back(Xy->Info().num_row_);
 
-  CalcCacheMapping(ctx, Xy->IsDense(), cuts, 0, ext_info, &cinfo);
+  CalcCacheMapping(ctx, Xy->IsDense(), cuts, 0, ext_info, false, &cinfo);
   CHECK_EQ(ext_info.n_batches, cinfo.cache_mapping.size());
+  if (cinfo.NumBatchesCc() == 1) {
+    EXPECT_EQ(cinfo.cache_host_ratio, 0.0);
+    cinfo.cache_host_ratio = 1.0;  // We test the host cache.
+  }
   return cinfo;
 }
 
@@ -37,8 +43,8 @@ class TestEllpackPageRawFormat : public ::testing::TestWithParam<bool> {
     param.prefetch_copy = prefetch_copy;
 
     auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
-    dmlc::TemporaryDirectory tmpdir;
-    std::string path = tmpdir.path + "/ellpack.page";
+    common::TemporaryDirectory tmpdir;
+    std::string path = tmpdir.Str() + "/ellpack.page";
 
     std::shared_ptr<common::HistogramCuts const> cuts;
     for (auto const &page : m->GetBatches<EllpackPage>(&ctx, param)) {
@@ -75,8 +81,8 @@ class TestEllpackPageRawFormat : public ::testing::TestWithParam<bool> {
       ASSERT_EQ(loaded->base_rowid, orig->base_rowid);
       ASSERT_EQ(loaded->info.row_stride, orig->info.row_stride);
       std::vector<common::CompressedByteT> h_loaded, h_orig;
-      [[maybe_unused]] auto h_loaded_acc = loaded->GetHostAccessor(&ctx, &h_loaded);
-      [[maybe_unused]] auto h_orig_acc = orig->GetHostAccessor(&ctx, &h_orig);
+      [[maybe_unused]] auto h_loaded_acc = loaded->GetHostEllpack(&ctx, &h_loaded);
+      [[maybe_unused]] auto h_orig_acc = orig->GetHostEllpack(&ctx, &h_orig);
       ASSERT_EQ(h_loaded, h_orig);
     }
   }
@@ -115,8 +121,11 @@ TEST_P(TestEllpackPageRawFormat, HostIO) {
       auto p_fmat = RandomDataGenerator{100, 14, 0.5}.Seed(i).GenerateDMatrix();
       for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
         if (!format) {
+          auto n_cache_bytes = page.Impl()->MemCostBytes() * 3;
           // Prepare the mapping info.
-          EllpackCacheInfo cinfo{param, false, 1, std::numeric_limits<float>::quiet_NaN()};
+          auto [cache_host_ratio, min_cache_page_bytes] = detail::DftPageSizeHostRatio(
+              n_cache_bytes, false, 1.0, ::xgboost::cuda_impl::AutoCachePageBytes());
+          EllpackCacheInfo cinfo{param, cache_host_ratio, std::numeric_limits<float>::quiet_NaN()};
           for (std::size_t i = 0; i < 3; ++i) {
             cinfo.cache_mapping.push_back(i);
             cinfo.buffer_bytes.push_back(page.Impl()->MemCostBytes());
@@ -141,20 +150,99 @@ TEST_P(TestEllpackPageRawFormat, HostIO) {
       auto p_fmat = RandomDataGenerator{100, 14, 0.5}.Seed(i).GenerateDMatrix();
       for (auto const &orig : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
         std::vector<common::CompressedByteT> h_orig;
-        auto h_acc_orig = orig.Impl()->GetHostAccessor(&ctx, &h_orig, {});
+        auto h_acc_orig = orig.Impl()->GetHostEllpack(&ctx, &h_orig, {});
         std::vector<common::CompressedByteT> h_page;
-        auto h_acc = page.Impl()->GetHostAccessor(&ctx, &h_page, {});
+        auto h_acc = page.Impl()->GetHostEllpack(&ctx, &h_page, {});
         ASSERT_EQ(h_orig, h_page);
-        ASSERT_EQ(h_acc_orig.NumFeatures(), h_acc.NumFeatures());
-        ASSERT_EQ(h_acc_orig.row_stride, h_acc.row_stride);
-        ASSERT_EQ(h_acc_orig.n_rows, h_acc.n_rows);
-        ASSERT_EQ(h_acc_orig.base_rowid, h_acc.base_rowid);
-        ASSERT_EQ(h_acc_orig.IsDenseCompressed(), h_acc.IsDenseCompressed());
-        ASSERT_EQ(h_acc_orig.NullValue(), h_acc.NullValue());
+        std::visit(
+            [&](auto &&h_acc_orig, auto &&h_acc) {
+              ASSERT_EQ(h_acc_orig.NumFeatures(), h_acc.NumFeatures());
+              ASSERT_EQ(h_acc_orig.row_stride, h_acc.row_stride);
+              ASSERT_EQ(h_acc_orig.n_rows, h_acc.n_rows);
+              ASSERT_EQ(h_acc_orig.base_rowid, h_acc.base_rowid);
+              ASSERT_EQ(h_acc_orig.IsDenseCompressed(), h_acc.IsDenseCompressed());
+              ASSERT_EQ(h_acc_orig.NullValue(), h_acc.NullValue());
+            },
+            h_acc_orig, h_acc);
       }
     }
   }
 }
 
 INSTANTIATE_TEST_SUITE_P(EllpackPageRawFormat, TestEllpackPageRawFormat, ::testing::Bool());
+
+TEST(EllpackPageRawFormat, DevicePageConcat) {
+  auto ctx = MakeCUDACtx(0);
+  auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
+  bst_idx_t n_features = 16, n_samples = 128;
+
+  auto test = [&](std::int64_t min_cache_page_bytes, float cache_host_ratio) {
+    EllpackCacheInfo cinfo{param, cache_host_ratio, std::numeric_limits<float>::quiet_NaN()};
+    ExternalDataInfo ext_info;
+
+    ext_info.n_batches = 8;
+    ext_info.row_stride = n_features;
+    for (bst_idx_t i = 0; i < ext_info.n_batches; ++i) {
+      ext_info.base_rowids.push_back(n_samples);
+    }
+    std::partial_sum(ext_info.base_rowids.cbegin(), ext_info.base_rowids.cend(),
+                     ext_info.base_rowids.begin());
+    ext_info.accumulated_rows = n_samples * ext_info.n_batches;
+    ext_info.nnz = ext_info.accumulated_rows * n_features;
+
+    auto p_fmat = RandomDataGenerator{n_samples, n_features, 0}.Seed(0).GenerateDMatrix();
+    EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;
+
+    for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
+      auto cuts = page.Impl()->CutsShared();
+      EXPECT_TRUE(page.Impl()->IsDense());
+      CalcCacheMapping(&ctx, page.Impl()->IsDense(), cuts, min_cache_page_bytes, ext_info, false,
+                       &cinfo);
+      if (min_cache_page_bytes == ::xgboost::cuda_impl::MatchingPageBytes()) {
+        EXPECT_EQ(cinfo.NumBatchesCc(), ext_info.n_batches);
+      } else {
+        EXPECT_EQ(cinfo.buffer_rows.size(), 4ul);
+      }
+      policy.SetCuts(page.Impl()->CutsShared(), ctx.Device(), std::move(cinfo));
+    }
+
+    auto format = policy.CreatePageFormat(param);
+
+    // write multipe identical pages
+    std::size_t n_gidx_total_bytes = 0;
+    for (bst_idx_t i = 0; i < ext_info.n_batches; ++i) {
+      for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
+        auto writer = policy.CreateWriter({}, i);
+        [[maybe_unused]] auto n_bytes = format->Write(page, writer.get());
+        n_gidx_total_bytes += page.Impl()->gidx_buffer.size_bytes();
+      }
+    }
+    // check correct concatenation.
+    auto mem_cache = policy.Share();
+    EXPECT_EQ(mem_cache->GidxSizeBytes(), n_gidx_total_bytes);
+    return mem_cache;
+  };
+
+  {
+    auto mem_cache =
+        test(::xgboost::cuda_impl::MatchingPageBytes(), ::xgboost::cuda_impl::AutoHostRatio());
+    ASSERT_EQ(mem_cache->d_pages.size(), 8);
+  }
+  {
+    auto mem_cache = test(n_features * n_samples, ::xgboost::cuda_impl::AutoHostRatio());
+    ASSERT_EQ(mem_cache->h_pages.size(), 4);
+    ASSERT_EQ(mem_cache->d_pages.size(), 4);
+    ASSERT_FALSE(mem_cache->d_pages[0].empty());
+  }
+  {
+    float cache_host_ratio = 0.65;
+    auto mem_cache = test(n_features * n_samples, cache_host_ratio);
+    ASSERT_EQ(mem_cache->h_pages.size(), 4);
+    ASSERT_EQ(mem_cache->d_pages.size(), 4);
+    ASSERT_FALSE(mem_cache->d_pages[0].empty());
+    auto n_total_bytes = mem_cache->SizeBytes();
+    ASSERT_LT(mem_cache->DeviceSizeBytes(), n_total_bytes - (n_total_bytes * cache_host_ratio));
+    ASSERT_GT(mem_cache->DeviceSizeBytes(), n_total_bytes - (n_total_bytes * 0.7));
+  }
+}
 }  // namespace xgboost::data
diff --git a/tests/cpp/data/test_extmem_quantile_dmatrix.cu b/tests/cpp/data/test_extmem_quantile_dmatrix.cu
index e781acebc00e..351acfa31789 100644
--- a/tests/cpp/data/test_extmem_quantile_dmatrix.cu
+++ b/tests/cpp/data/test_extmem_quantile_dmatrix.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2024, XGBoost Contributors
+ * Copyright 2024-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/data.h>  // for BatchParam
@@ -7,8 +7,9 @@
 #include <tuple>   // for tuple
 #include <vector>  // for vector
 
+#include "../../../src/data/batch_utils.h"     // for AutoHostRatio
 #include "../../../src/data/ellpack_page.cuh"  // for EllpackPageImpl
-#include "../helpers.h"                        // for RandomDataGenerator
+#include "../helpers.h"                        // for RandomDataGenerator, GMockThrow
 #include "test_extmem_quantile_dmatrix.h"      // for TestExtMemQdmBasic
 
 namespace xgboost::data {
@@ -23,11 +24,15 @@ auto AssertEllpackEq(Context const* ctx, EllpackPageImpl const* lhs, EllpackPage
   ASSERT_EQ(lhs->Cuts().Ptrs(), rhs->Cuts().Ptrs());
 
   std::vector<common::CompressedByteT> h_buf, d_buf;
-  auto h_acc = rhs->GetHostAccessor(ctx, &h_buf);
-  auto d_acc = rhs->GetHostAccessor(ctx, &d_buf);
-  for (std::size_t i = 0; i < h_acc.n_rows * h_acc.row_stride; ++i) {
-    ASSERT_EQ(h_acc.gidx_iter[i], d_acc.gidx_iter[i]);
-  }
+  auto h_acc = rhs->GetHostEllpack(ctx, &h_buf);
+  auto d_acc = rhs->GetHostEllpack(ctx, &d_buf);
+  std::visit(
+      [&](auto&& h_acc, auto&& d_acc) {
+        for (std::size_t i = 0; i < h_acc.n_rows * h_acc.row_stride; ++i) {
+          ASSERT_EQ(h_acc.gidx_iter[i], d_acc.gidx_iter[i]);
+        }
+      },
+      h_acc, d_acc);
 }
 
 class ExtMemQuantileDMatrixGpu : public ::testing::TestWithParam<std::tuple<float, bool>> {
@@ -54,7 +59,7 @@ INSTANTIATE_TEST_SUITE_P(ExtMemQuantileDMatrix, ExtMemQuantileDMatrixGpu,
                          ::testing::Combine(::testing::Values(0.0f, 0.2f, 0.4f, 0.8f),
                                             ::testing::Bool()));
 
-class EllpackHostCacheTest : public ::testing::TestWithParam<std::tuple<double, bool>> {
+class EllpackHostCacheTest : public ::testing::TestWithParam<std::tuple<double, bool, float>> {
  public:
   static constexpr bst_idx_t NumSamples() { return 8192; }
   static constexpr bst_idx_t NumFeatures() { return 4; }
@@ -62,14 +67,14 @@ class EllpackHostCacheTest : public ::testing::TestWithParam<std::tuple<double,
   // Assumes dense
   static constexpr bst_idx_t NumBytes() { return NumFeatures() * NumSamples(); }
 
-  void Run(float sparsity, bool is_concat) {
+  void Run(float sparsity, bool is_concat, float cache_host_ratio) {
     auto ctx = MakeCUDACtx(0);
     auto param = BatchParam{NumBins(), tree::TrainParam::DftSparseThreshold()};
     auto n_batches = 4;
     auto p_fmat = RandomDataGenerator{NumSamples(), NumFeatures(), sparsity}
                       .Device(ctx.Device())
                       .GenerateDMatrix();
-    bst_idx_t min_page_cache_bytes = 0;
+    bst_idx_t min_page_cache_bytes = ::xgboost::cuda_impl::MatchingPageBytes();
     if (is_concat) {
       min_page_cache_bytes =
           p_fmat->GetBatches<EllpackPage>(&ctx, param).begin().Page()->Impl()->MemCostBytes() / 3;
@@ -81,6 +86,7 @@ class EllpackHostCacheTest : public ::testing::TestWithParam<std::tuple<double,
                           .Device(ctx.Device())
                           .OnHost(true)
                           .MinPageCacheBytes(min_page_cache_bytes)
+                          .CacheHostRatio(cache_host_ratio)
                           .GenerateExtMemQuantileDMatrix("temp", true);
     if (!is_concat) {
       ASSERT_EQ(p_ext_fmat->NumBatches(), n_batches);
@@ -105,76 +111,132 @@ class EllpackHostCacheTest : public ::testing::TestWithParam<std::tuple<double,
 };
 
 TEST_P(EllpackHostCacheTest, Basic) {
-  auto ctx = MakeCUDACtx(0);
-  auto [sparsity, min_page_cache_bytes] = this->GetParam();
-  this->Run(sparsity, min_page_cache_bytes);
+  auto [sparsity, is_concat, cache_host_ratio] = this->GetParam();
+  this->Run(sparsity, is_concat, cache_host_ratio);
 }
 
-INSTANTIATE_TEST_SUITE_P(ExtMemQuantileDMatrix, EllpackHostCacheTest,
-                         ::testing::Combine(::testing::Values(0.0f, 0.2f, 0.4f, 0.8f),
-                                            ::testing::Bool()));
+INSTANTIATE_TEST_SUITE_P(
+    ExtMemQuantileDMatrix, EllpackHostCacheTest,
+    ::testing::Combine(::testing::Values(0.0f, 0.2f, 0.4f, 0.8f), ::testing::Bool(),
+                       ::testing::Values(0.0f, 0.5f, 1.0f, ::xgboost::cuda_impl::AutoHostRatio())));
 
-class EllpackDeviceCacheTest : public ::testing::TestWithParam<float> {
- public:
-  void Run() {
-    auto sparsity = this->GetParam();
-    auto ctx = MakeCUDACtx(0);
-    bst_idx_t n_samples = 2048, n_features = 16;
-    bst_bin_t n_bins = 32;
-    auto p = BatchParam{n_bins, tree::TrainParam::DftSparseThreshold()};
-    auto p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}
-                      .Batches(4)
-                      .Device(ctx.Device())
-                      .Bins(p.max_bin)
-                      .OnHost(true)
-                      .MinPageCacheBytes(0)
-                      .GenerateExtMemQuantileDMatrix("temp", true);
-
-    auto p_fmat_valid_d = RandomDataGenerator{n_samples, n_features, sparsity}
-                              .Batches(4)
-                              .Device(ctx.Device())
-                              .Bins(p.max_bin)
-                              .OnHost(true)
-                              .Ref(p_fmat)
-                              .MinPageCacheBytes(0)
-                              .MaxNumDevicePages(4)
-                              .GenerateExtMemQuantileDMatrix("temp", true);
-    ASSERT_EQ(p_fmat_valid_d->NumBatches(), 4);
-    auto p_fmat_valid_h = RandomDataGenerator{n_samples, n_features, sparsity}
-                              .Batches(4)
-                              .Device(ctx.Device())
-                              .Bins(p.max_bin)
-                              .OnHost(true)
-                              .Ref(p_fmat)
-                              .MinPageCacheBytes(0)
-                              .MaxNumDevicePages(0)
-                              .GenerateExtMemQuantileDMatrix("temp", true);
-    ASSERT_EQ(p_fmat_valid_h->NumBatches(), 4);
-
-    auto d_it = p_fmat_valid_d->GetBatches<EllpackPage>(&ctx, p).begin();
-    std::vector<std::shared_ptr<EllpackPage const>> d_pages;
-    auto h_it = p_fmat_valid_h->GetBatches<EllpackPage>(&ctx, p).begin();
-    std::vector<std::shared_ptr<EllpackPage const>> h_pages;
-    for (; !d_it.AtEnd(); ++d_it) {
-      d_pages.push_back(d_it.Page());
+TEST(EllpackHostCacheTest, Accessor) {
+  auto ctx = MakeCUDACtx(0);
+  auto param = BatchParam{32, tree::TrainParam::DftSparseThreshold()};
+  param.prefetch_copy = false;
+  std::size_t n_bytes = 0;
+  {
+    auto p_ext_fmat = RandomDataGenerator{128, 16, 0.0}
+                          .Batches(4)
+                          .Bins(param.max_bin)
+                          .Device(ctx.Device())
+                          .OnHost(true)
+                          .MinPageCacheBytes(1024 * 1024 * 1024)
+                          .CacheHostRatio(0.0)
+                          .GenerateExtMemQuantileDMatrix("temp", true);
+    ASSERT_EQ(p_ext_fmat->NumBatches(), 1);
+
+    for (auto const& page : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {
+      auto acc = page.Impl()->GetDeviceEllpack(&ctx, {});
+      // Fully on device
+      auto dacc = std::get_if<EllpackDeviceAccessor>(&acc);
+      ASSERT_TRUE(dacc);
+      n_bytes = page.Impl()->MemCostBytes();
     }
-    for (; !h_it.AtEnd(); ++h_it) {
-      h_pages.push_back(h_it.Page());
+  }
+  if (!curt::SupportsPageableMem()) {
+    GTEST_SKIP_("Requires HMM or ATS.");
+  }
+  {
+    std::size_t n_pages = 2;  // split for 2 pages
+    auto p_ext_fmat = RandomDataGenerator{128, 16, 0.0}
+                          .Batches(4)
+                          .Bins(param.max_bin)
+                          .Device(ctx.Device())
+                          .OnHost(true)
+                          .MinPageCacheBytes(n_bytes / n_pages)
+                          .CacheHostRatio(0.5)
+                          .GenerateExtMemQuantileDMatrix("temp", true);
+    ASSERT_EQ(p_ext_fmat->NumBatches(), n_pages);
+    for (auto const& page : p_ext_fmat->GetBatches<EllpackPage>(&ctx, param)) {
+      auto acc = page.Impl()->GetDeviceEllpack(&ctx, {});
+      // Host + device
+      auto dacc = std::get_if<DoubleEllpackAccessor>(&acc);
+      ASSERT_TRUE(dacc);
     }
-    ASSERT_EQ(h_pages.size(), d_pages.size());
-    for (std::size_t i = 0; i < h_pages.size(); ++i) {
-      if (sparsity != 0.0) {
-        ASSERT_LT(d_pages[i]->Impl()->info.row_stride, p_fmat_valid_d->Info().num_col_);
-      } else {
-        ASSERT_EQ(d_pages[i]->Impl()->info.row_stride, p_fmat_valid_d->Info().num_col_);
+  }
+}
+
+class EllpackDecompTest : public ::testing::TestWithParam<float> {
+ public:
+  void Run(float hw_decomp_ratio) {
+    auto ctx = MakeCUDACtx(0);
+    auto param = BatchParam{128, tree::TrainParam::DftSparseThreshold()};
+    std::size_t n_samples = 8192, n_features = 512;
+    float sparsity = 0.6;
+    auto full_p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}
+                           .Batches(4)
+                           .Bins(param.max_bin)
+                           .Device(ctx.Device())
+                           .HwDecompRatio(0.0)
+                           .OnHost(true)
+                           .MinPageCacheBytes(n_samples * n_features / 4)
+                           .CacheHostRatio(0.8)
+                           .GenerateExtMemQuantileDMatrix("temp", false);
+
+    auto comp_p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}
+                           .Batches(4)
+                           .Bins(param.max_bin)
+                           .Device(ctx.Device())
+                           .HwDecompRatio(hw_decomp_ratio)
+                           .OnHost(true)
+                           .MinPageCacheBytes(n_samples * n_features / 4)
+                           .CacheHostRatio(0.8)
+                           .GenerateExtMemQuantileDMatrix("temp", false);
+
+    auto get_pages = [&](std::shared_ptr<DMatrix> p_fmat) {
+      std::vector<std::shared_ptr<EllpackPage const>> pages;
+      auto it = p_fmat->GetBatches<EllpackPage>(&ctx, param).begin();
+      while (!it.AtEnd()) {
+        auto page = it.Page();
+        EXPECT_FALSE(page->Impl()->IsDenseCompressed());
+        pages.emplace_back(std::move(page));
+        ++it;
+      }
+      return pages;
+    };
+
+    std::vector<std::shared_ptr<EllpackPage const>> full_pages = get_pages(full_p_fmat);
+    std::vector<std::shared_ptr<EllpackPage const>> comp_pages = get_pages(comp_p_fmat);
+
+    ASSERT_EQ(full_pages.size(), comp_pages.size());
+    for (std::size_t i = 0, n = full_pages.size(); i < n; ++i) {
+      auto impl_f = full_pages[i]->Impl();
+      auto impl_c = comp_pages[i]->Impl();
+      ASSERT_EQ(impl_f->gidx_buffer.size(), impl_c->gidx_buffer.size());
+      ASSERT_EQ(impl_f->d_gidx_buffer.size(), impl_c->d_gidx_buffer.size());
+      ASSERT_EQ(impl_f->NumNonMissing(&ctx, {}), impl_c->NumNonMissing(&ctx, {}));
+
+      std::vector<common::CompressedByteT> buf_f;
+      [[maybe_unused]] auto acc_f = impl_f->GetHostEllpack(&ctx, &buf_f);
+
+      std::vector<common::CompressedByteT> buf_c;
+      [[maybe_unused]] auto acc_c = impl_c->GetHostEllpack(&ctx, &buf_c);
+
+      ASSERT_EQ(buf_f.size(), buf_c.size());
+      for (std::size_t i = 0, m = buf_f.size(); i < m; ++i) {
+        ASSERT_EQ(buf_f[i], buf_c[i]) << i;
       }
-      AssertEllpackEq(&ctx, h_pages[i]->Impl(), d_pages[i]->Impl());
     }
   }
 };
 
-TEST_P(EllpackDeviceCacheTest, Basic) { this->Run(); }
+TEST_P(EllpackDecompTest, Basic) {
+  auto ctx = MakeCUDACtx(0);
+  auto hw_decomp_ratio = this->GetParam();
+  this->Run(hw_decomp_ratio);
+}
 
-INSTANTIATE_TEST_SUITE_P(ExtMemQuantileDMatrix, EllpackDeviceCacheTest,
-                         ::testing::Values(0.0f, 0.8f));
+INSTANTIATE_TEST_SUITE_P(ExtMemQuantileDMatrix, EllpackDecompTest,
+                         ::testing::Values(1.0f, 0.1f, 0.5f, 0.0f));
 }  // namespace xgboost::data
diff --git a/tests/cpp/data/test_file_iterator.cc b/tests/cpp/data/test_file_iterator.cc
index bd8c4b9c269d..a01eee863c06 100644
--- a/tests/cpp/data/test_file_iterator.cc
+++ b/tests/cpp/data/test_file_iterator.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2023 XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 
@@ -9,7 +9,7 @@
 #include "../../../src/data/adapter.h"
 #include "../../../src/data/file_iterator.h"
 #include "../../../src/data/proxy_dmatrix.h"
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
+#include "../filesystem.h"  // for TemporaryDirectory
 #include "../helpers.h"
 
 namespace xgboost::data {
@@ -25,9 +25,9 @@ TEST(FileIterator, Basic) {
     ASSERT_EQ(n_features, 5);
   };
 
-  dmlc::TemporaryDirectory tmpdir;
+  common::TemporaryDirectory tmpdir;
   {
-    auto zpath = tmpdir.path + "/0-based.svm";
+    auto zpath = tmpdir.Str() + "/0-based.svm";
     CreateBigTestData(zpath, 3 * 64, true);
     zpath += "?indexing_mode=0&format=libsvm";
     FileIterator iter{zpath, 0, 1};
@@ -35,7 +35,7 @@ TEST(FileIterator, Basic) {
   }
 
   {
-    auto opath = tmpdir.path + "/1-based.svm";
+    auto opath = tmpdir.Str() + "/1-based.svm";
     CreateBigTestData(opath, 3 * 64, false);
     opath += "?indexing_mode=1&format=libsvm";
     FileIterator iter{opath, 0, 1};
diff --git a/tests/cpp/data/test_gradient_index_page_raw_format.cc b/tests/cpp/data/test_gradient_index_page_raw_format.cc
index 2c2a4b1b1aab..2dae93f0ca12 100644
--- a/tests/cpp/data/test_gradient_index_page_raw_format.cc
+++ b/tests/cpp/data/test_gradient_index_page_raw_format.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>  // for Context
@@ -10,16 +10,17 @@
 #include "../../../src/common/column_matrix.h"  // for common::ColumnMatrix
 #include "../../../src/common/io.h"             // for MmapResource, AlignedResourceReadStream...
 #include "../../../src/data/gradient_index.h"   // for GHistIndexMatrix
-#include "../../../src/data/gradient_index_format.h"  // for GHistIndexRawFormat
-#include "../helpers.h"                               // for RandomDataGenerator
+#include "../../../src/data/gradient_index_format.h"       // for GHistIndexRawFormat
+#include "../../../src/data/gradient_index_page_source.h"  // for GHistIndexFormatPolicy
+#include "../helpers.h"                                    // for RandomDataGenerator
 
 namespace xgboost::data {
 TEST(GHistIndexPageRawFormat, IO) {
   Context ctx;
 
   auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
-  dmlc::TemporaryDirectory tmpdir;
-  std::string path = tmpdir.path + "/ghistindex.page";
+  common::TemporaryDirectory tmpdir;
+  std::string path = tmpdir.Str() + "/ghistindex.page";
   auto batch = BatchParam{256, 0.5};
 
   common::HistogramCuts cuts;
@@ -59,4 +60,12 @@ TEST(GHistIndexPageRawFormat, IO) {
     ASSERT_EQ(loaded.Transpose().GetTypeSize(), loaded.Transpose().GetTypeSize());
   }
 }
+
+TEST(GHistIndexPageRawFormat, File) {
+  auto policy = MemBufFileReadFormatStreamPolicy<GHistIndexMatrix, GHistIndexFormatPolicy>{};
+
+  std::string path = "ghist.page";
+  ASSERT_THAT([&] { policy.CreateReader(StringView{path}, static_cast<bst_idx_t>(0), 0); },
+              GMockThrow("doesn't exist"));
+}
 }  // namespace xgboost::data
diff --git a/tests/cpp/data/test_iterative_dmatrix.cu b/tests/cpp/data/test_iterative_dmatrix.cu
index 122991d33c20..26ad75e3b472 100644
--- a/tests/cpp/data/test_iterative_dmatrix.cu
+++ b/tests/cpp/data/test_iterative_dmatrix.cu
@@ -1,13 +1,17 @@
 /**
- * Copyright 2020-2024, XGBoost contributors
+ * Copyright 2020-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 
+#include <memory>  // for dynamic_pointer_cast
+
+#include "../../../src/common/io.h"  // for AlignedFileWriteStream
 #include "../../../src/data/device_adapter.cuh"
 #include "../../../src/data/ellpack_page.cuh"
 #include "../../../src/data/ellpack_page.h"
 #include "../../../src/data/iterative_dmatrix.h"
 #include "../../../src/tree/param.h"  // TrainParam
+#include "../filesystem.h"            // for TemporaryDirectory
 #include "../helpers.h"
 #include "test_iterative_dmatrix.h"
 
@@ -16,9 +20,9 @@ void TestEquivalent(float sparsity) {
   auto ctx = MakeCUDACtx(0);
 
   CudaArrayIterForTest iter{sparsity};
-  IterativeDMatrix m{&iter, iter.Proxy(), nullptr, Reset, Next,
-                     std::numeric_limits<float>::quiet_NaN(), 0, 256,
-                     std::numeric_limits<std::int64_t>::max()};
+  IterativeDMatrix m{&iter, iter.Proxy(), nullptr,
+                     Reset, Next,         std::numeric_limits<float>::quiet_NaN(),
+                     0,     256,          std::numeric_limits<std::int64_t>::max()};
   std::size_t offset = 0;
   auto first = (*m.GetEllpackBatches(&ctx, {}).begin()).Impl();
   std::unique_ptr<EllpackPageImpl> page_concatenated{new EllpackPageImpl{
@@ -29,7 +33,7 @@ void TestEquivalent(float sparsity) {
     offset += num_elements;
   }
   std::vector<common::CompressedByteT> h_iter_buffer;
-  auto from_iter = page_concatenated->GetHostAccessor(&ctx, &h_iter_buffer);
+  auto from_iter = page_concatenated->GetHostEllpack(&ctx, &h_iter_buffer);
   ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());
   ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());
 
@@ -40,31 +44,45 @@ void TestEquivalent(float sparsity) {
   auto bp = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
   for (auto& ellpack : dm->GetBatches<EllpackPage>(&ctx, bp)) {
     std::vector<common::CompressedByteT> h_data_buffer;
-    auto from_data = ellpack.Impl()->GetHostAccessor(&ctx, &h_data_buffer);
+    auto from_data = ellpack.Impl()->GetHostEllpack(&ctx, &h_data_buffer);
 
-    ASSERT_EQ(from_iter.gidx_fvalue_map.size(), from_data.gidx_fvalue_map.size());
-    for (size_t i = 0; i < from_iter.gidx_fvalue_map.size(); ++i) {
-      EXPECT_NEAR(from_iter.gidx_fvalue_map[i], from_data.gidx_fvalue_map[i], kRtEps);
-    }
-    ASSERT_EQ(from_iter.min_fvalue.size(), from_data.min_fvalue.size());
-    for (size_t i = 0; i < from_iter.min_fvalue.size(); ++i) {
-      ASSERT_NEAR(from_iter.min_fvalue[i], from_data.min_fvalue[i], kRtEps);
-    }
-    ASSERT_EQ(from_iter.NumFeatures(), from_data.NumFeatures());
-    for (size_t i = 0; i < from_iter.NumFeatures() + 1; ++i) {
-      ASSERT_EQ(from_iter.feature_segments[i], from_data.feature_segments[i]);
-    }
+    std::visit(
+        [](auto&& from_iter, auto&& from_data) {
+          ASSERT_EQ(from_iter.gidx_fvalue_map.size(), from_data.gidx_fvalue_map.size());
+          for (size_t i = 0; i < from_iter.gidx_fvalue_map.size(); ++i) {
+            EXPECT_NEAR(from_iter.gidx_fvalue_map[i], from_data.gidx_fvalue_map[i], kRtEps);
+          }
+          ASSERT_EQ(from_iter.min_fvalue.size(), from_data.min_fvalue.size());
+          for (size_t i = 0; i < from_iter.min_fvalue.size(); ++i) {
+            ASSERT_NEAR(from_iter.min_fvalue[i], from_data.min_fvalue[i], kRtEps);
+          }
+          ASSERT_EQ(from_iter.NumFeatures(), from_data.NumFeatures());
+          for (size_t i = 0; i < from_iter.NumFeatures() + 1; ++i) {
+            ASSERT_EQ(from_iter.feature_segments[i], from_data.feature_segments[i]);
+          }
+        },
+        from_iter, from_data);
 
     std::vector<common::CompressedByteT> buffer_from_iter, buffer_from_data;
-    auto data_iter = page_concatenated->GetHostAccessor(&ctx, &buffer_from_iter);
-    auto data_buf = ellpack.Impl()->GetHostAccessor(&ctx, &buffer_from_data);
+    auto data_iter = page_concatenated->GetHostEllpack(&ctx, &buffer_from_iter);
+    auto data_buf = ellpack.Impl()->GetHostEllpack(&ctx, &buffer_from_data);
     ASSERT_NE(buffer_from_data.size(), 0);
     ASSERT_NE(buffer_from_iter.size(), 0);
     CHECK_EQ(ellpack.Impl()->NumSymbols(), page_concatenated->NumSymbols());
-    CHECK_EQ(from_data.n_rows * from_data.row_stride, from_data.n_rows * from_iter.row_stride);
-    for (size_t i = 0; i < from_data.n_rows * from_data.row_stride; ++i) {
-      CHECK_EQ(data_buf.gidx_iter[i], data_iter.gidx_iter[i]);
-    }
+
+    std::visit(
+        [](auto&& from_iter, auto&& from_data) {
+          CHECK_EQ(from_data.n_rows * from_data.row_stride,
+                   from_data.n_rows * from_iter.row_stride);
+        },
+        from_iter, from_data);
+    std::visit(
+        [](auto&& from_data, auto&& data_buf, auto&& data_iter) {
+          for (size_t i = 0; i < from_data.n_rows * from_data.row_stride; ++i) {
+            CHECK_EQ(data_buf.gidx_iter[i], data_iter.gidx_iter[i]);
+          }
+        },
+        from_data, data_buf, data_iter);
   }
 }
 
@@ -82,26 +100,27 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
   std::string interface_str = iter.AsArray();
   Context ctx{MakeCUDACtx(0)};
   for (auto& ellpack : m.GetBatches<EllpackPage>(&ctx, {})) {
-    n_batches ++;
+    n_batches++;
     auto impl = ellpack.Impl();
-    std::vector<common::CompressedByteT> h_gidx;
-    auto h_accessor = impl->GetHostAccessor(&ctx, &h_gidx);
+
     auto cols = CudaArrayIterForTest::Cols();
     auto rows = CudaArrayIterForTest::Rows();
 
-    auto j_interface =
-        Json::Load({interface_str.c_str(), interface_str.size()});
-    ArrayInterface<2> loaded {get<Object const>(j_interface)};
+    auto j_interface = Json::Load({interface_str.c_str(), interface_str.size()});
+    ArrayInterface<2> loaded{get<Object const>(j_interface)};
     std::vector<float> h_data(cols * rows);
     common::Span<float const> s_data{static_cast<float const*>(loaded.data), cols * rows};
     dh::CopyDeviceSpanToVector(&h_data, s_data);
 
-    auto cut_ptr = h_accessor.feature_segments;
-    for (auto i = 0ull; i < rows * cols; i++) {
-      int column_idx = i % cols;
-      EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx),
-                h_accessor.gidx_iter[i] + cut_ptr[column_idx]);
-    }
+    impl->VisitOnHost(&ctx, [&](auto&& h_accessor) {
+      auto cut_ptr = h_accessor.feature_segments;
+      for (auto i = 0ull; i < rows * cols; i++) {
+        int column_idx = i % cols;
+        EXPECT_EQ(impl->Cuts().SearchBin(h_data[i], column_idx),
+                  h_accessor.gidx_iter[i] + cut_ptr[column_idx]);
+      }
+    });
+
     EXPECT_EQ(m.Info().num_col_, cols);
     EXPECT_EQ(m.Info().num_row_, rows);
     EXPECT_EQ(m.Info().num_nonzero_, rows * cols);
@@ -124,8 +143,8 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
   h_data[1] = kMissing;
   h_data[5] = kMissing;
   h_data[6] = kMissing;
-  h_data[9] = kMissing;  // idx = (2, 0)
-  h_data[10] = kMissing; // idx = (2, 1)
+  h_data[9] = kMissing;   // idx = (2, 0)
+  h_data[10] = kMissing;  // idx = (2, 1)
   auto ptr =
       thrust::device_ptr<float>(reinterpret_cast<float*>(get<Integer>(j_interface["data"][0])));
   thrust::copy(h_data.cbegin(), h_data.cend(), ptr);
@@ -137,15 +156,15 @@ TEST(IterativeDeviceDMatrix, RowMajorMissing) {
       *m.GetBatches<EllpackPage>(&ctx, BatchParam{256, tree::TrainParam::DftSparseThreshold()})
            .begin();
   auto impl = ellpack.Impl();
-  std::vector<common::CompressedByteT> h_gidx;
-  auto h_acc = impl->GetHostAccessor(&ctx, &h_gidx);
-  // null values get placed after valid values in a row
-  ASSERT_FALSE(h_acc.IsDenseCompressed());
-  ASSERT_EQ(h_acc.row_stride, cols - 1);
-  ASSERT_EQ(h_acc.gidx_iter[7], impl->GetDeviceAccessor(&ctx).NullValue());
-  for (std::size_t i = 0; i < 7; ++i) {
-  ASSERT_NE(h_acc.gidx_iter[i], impl->GetDeviceAccessor(&ctx).NullValue());
-  }
+  impl->VisitOnHost(&ctx, [&](auto&& h_acc) {
+    // null values get placed after valid values in a row
+    ASSERT_FALSE(h_acc.IsDenseCompressed());
+    ASSERT_EQ(h_acc.row_stride, cols - 1);
+    ASSERT_EQ(h_acc.gidx_iter[7], impl->NullValue());
+    for (std::size_t i = 0; i < 7; ++i) {
+      ASSERT_NE(h_acc.gidx_iter[i], impl->NullValue());
+    }
+  });
 
   EXPECT_EQ(m.Info().num_col_, cols);
   EXPECT_EQ(m.Info().num_row_, rows);
@@ -175,4 +194,37 @@ TEST(IterativeDeviceDMatrix, Ref) {
   TestRefDMatrix<EllpackPage, CudaArrayIterForTest>(
       &ctx, [](EllpackPage const& page) { return page.Impl()->Cuts(); });
 }
+
+TEST(IterativeDeviceDMatrix, IO) {
+  auto ctx = MakeCUDACtx(0);
+  std::size_t n_samples = 2048, n_features = 128;
+  auto p_fmat = RandomDataGenerator{n_samples, n_features, 0.0}
+                    .Bins(32)
+                    .Device(ctx.Device())
+                    .GenerateQuantileDMatrix(true);
+  auto qdm = std::dynamic_pointer_cast<IterativeDMatrix>(p_fmat);
+  ASSERT_TRUE(qdm);
+  common::TemporaryDirectory tmpdir;
+  auto path = tmpdir.Path() / "data.qdm";
+  {
+    auto fo = std::make_unique<common::AlignedFileWriteStream>(path.string(), "wb");
+    qdm->Save(fo.get());
+  }
+  auto fsize = std::filesystem::file_size(path);
+  auto fi = std::make_unique<common::MemBufFileReadStream>(path.string(), 0ul, fsize);
+  auto loaded = std::shared_ptr<IterativeDMatrix>(IterativeDMatrix::Load(fi.get()));
+  for (auto const& orig_page : qdm->GetBatches<EllpackPage>(&ctx, {})) {
+    for (auto const& new_page : loaded->GetBatches<EllpackPage>(&ctx, {})) {
+      std::vector<common::CompressedByteT> h_orig, h_new;
+      orig_page.Impl()->GetHostEllpack(&ctx, &h_orig);
+      new_page.Impl()->GetHostEllpack(&ctx, &h_new);
+      ASSERT_EQ(h_orig, h_new);
+      auto orig_cuts = orig_page.Impl()->Cuts();
+      auto new_cuts = new_page.Impl()->Cuts();
+      ASSERT_EQ(orig_cuts.Ptrs(), new_cuts.Ptrs());
+      ASSERT_EQ(orig_cuts.Values(), new_cuts.Values());
+      ASSERT_EQ(orig_cuts.MinValues(), new_cuts.MinValues());
+    }
+  }
+}
 }  // namespace xgboost::data
diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc
index dffa0bfeddb7..a788ab9ec56d 100644
--- a/tests/cpp/data/test_metainfo.cc
+++ b/tests/cpp/data/test_metainfo.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016-2024, XGBoost contributors
+ * Copyright 2016-2025, XGBoost contributors
  */
 #include "test_metainfo.h"
 
@@ -11,7 +11,7 @@
 #include <string>
 
 #include "../collective/test_worker.h"  // for TestDistributedGlobal
-#include "../filesystem.h"              // dmlc::TemporaryDirectory
+#include "../filesystem.h"              // TemporaryDirectory
 #include "../helpers.h"                 // for GMockTHrow
 #include "xgboost/base.h"
 
@@ -153,8 +153,8 @@ TEST(MetaInfo, SaveLoadBinary) {
                  [](auto const &str) { return str.c_str(); });
   info.SetFeatureInfo(u8"feature_name", c_names.data(), c_names.size());;
 
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/metainfo.binary";
+  common::TemporaryDirectory tempdir;
+  const std::string tmp_file = tempdir.Str() + "/metainfo.binary";
   {
     std::unique_ptr<dmlc::Stream> fs {
       dmlc::Stream::Create(tmp_file.c_str(), "w")
@@ -204,8 +204,8 @@ TEST(MetaInfo, SaveLoadBinary) {
 }
 
 TEST(MetaInfo, LoadQid) {
-  dmlc::TemporaryDirectory tempdir;
-  std::string tmp_file = tempdir.path + "/qid_test.libsvm";
+  common::TemporaryDirectory tempdir;
+  std::string tmp_file = tempdir.Str() + "/qid_test.libsvm";
   {
     std::unique_ptr<dmlc::Stream> fs(dmlc::Stream::Create(tmp_file.c_str(), "w"));
     dmlc::ostream os(fs.get());
diff --git a/tests/cpp/data/test_metainfo.h b/tests/cpp/data/test_metainfo.h
index 53da10dcc4f3..941b6e6577dd 100644
--- a/tests/cpp/data/test_metainfo.h
+++ b/tests/cpp/data/test_metainfo.h
@@ -30,12 +30,13 @@ inline void TestMetaInfoStridedData(DeviceOrd device) {
     auto const& h_result = info.labels.View(DeviceOrd::CPU());
     ASSERT_EQ(h_result.Shape().size(), 2);
     auto in_labels = labels.View(DeviceOrd::CPU());
-    linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(), [&](size_t i, std::size_t j) {
-      // Sliced at second dimension.
-      auto v_0 = h_result(i, j);
-      auto v_1 = in_labels(i, 0, j);
-      CHECK_EQ(v_0, v_1);
-    });
+    linalg::cpu_impl::ElementWiseKernel(h_result, omp_get_max_threads(),
+                                        [&](size_t i, std::size_t j) {
+                                          // Sliced at second dimension.
+                                          auto v_0 = h_result(i, j);
+                                          auto v_1 = in_labels(i, 0, j);
+                                          CHECK_EQ(v_0, v_1);
+                                        });
   }
   {
     // qid
@@ -62,13 +63,13 @@ inline void TestMetaInfoStridedData(DeviceOrd device) {
     auto const& h_result = info.base_margin_.View(DeviceOrd::CPU());
     ASSERT_EQ(h_result.Shape().size(), 2);
     auto in_margin = base_margin.View(DeviceOrd::CPU());
-    linalg::ElementWiseKernelHost(h_result, omp_get_max_threads(),
-                                  [&](std::size_t i, std::size_t j) {
-                                    // Sliced at second dimension.
-                                    auto v_0 = h_result(i, j);
-                                    auto v_1 = in_margin(i, 0, j);
-                                    CHECK_EQ(v_0, v_1);
-                                  });
+    linalg::cpu_impl::ElementWiseKernel(h_result, omp_get_max_threads(),
+                                        [&](std::size_t i, std::size_t j) {
+                                          // Sliced at second dimension.
+                                          auto v_0 = h_result(i, j);
+                                          auto v_1 = in_margin(i, 0, j);
+                                          CHECK_EQ(v_0, v_1);
+                                        });
   }
 }
 }  // namespace xgboost
diff --git a/tests/cpp/data/test_proxy_dmatrix.cc b/tests/cpp/data/test_proxy_dmatrix.cc
index 996836ed6ab2..63688067e49c 100644
--- a/tests/cpp/data/test_proxy_dmatrix.cc
+++ b/tests/cpp/data/test_proxy_dmatrix.cc
@@ -1,27 +1,31 @@
 /**
- * Copyright 2021-2023, XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 
-#include "../../../src/data/adapter.h"
+#include <cstddef>  // for size_t
+#include <vector>   // for vector
+
 #include "../../../src/data/proxy_dmatrix.h"
 #include "../helpers.h"
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
 
 namespace xgboost::data {
 TEST(ProxyDMatrix, HostData) {
   DMatrixProxy proxy;
-  size_t constexpr kRows = 100, kCols = 10;
+  std::size_t constexpr kRows = 100, kCols = 10;
   std::vector<HostDeviceVector<float>> label_storage(1);
 
   HostDeviceVector<float> storage;
   auto data =
       RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage);
 
-  proxy.SetArrayData(data.c_str());
+  proxy.SetArray(data.c_str());
+  using cpu_impl::DispatchAny;
 
-  auto n_samples = HostAdapterDispatch(&proxy, [](auto const &value) { return value.Size(); });
+  auto n_samples = DispatchAny(&proxy, [](auto const &value) { return value.Size(); });
   ASSERT_EQ(n_samples, kRows);
-  auto n_features = HostAdapterDispatch(&proxy, [](auto const &value) { return value.NumCols(); });
+  auto n_features = DispatchAny(&proxy, [](auto const &value) { return value.NumCols(); });
   ASSERT_EQ(n_features, kCols);
 }
 }  // namespace xgboost::data
diff --git a/tests/cpp/data/test_proxy_dmatrix.cu b/tests/cpp/data/test_proxy_dmatrix.cu
index e7780951c8bc..1762759ad095 100644
--- a/tests/cpp/data/test_proxy_dmatrix.cu
+++ b/tests/cpp/data/test_proxy_dmatrix.cu
@@ -1,15 +1,17 @@
 /**
- * Copyright 2020-2023 XGBoost contributors
+ * Copyright 2020-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/host_device_vector.h>
 
-#include <any>  // for any_cast
-#include <memory>
+#include <any>     // for any_cast
+#include <memory>  // for shared_ptr
+#include <vector>  // for vector
 
 #include "../../../src/data/device_adapter.cuh"
 #include "../../../src/data/proxy_dmatrix.h"
 #include "../helpers.h"
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
 
 namespace xgboost::data {
 TEST(ProxyDMatrix, DeviceData) {
@@ -23,7 +25,7 @@ TEST(ProxyDMatrix, DeviceData) {
                     .GenerateColumnarArrayInterface(&label_storage);
 
   DMatrixProxy proxy;
-  proxy.SetCUDAArray(data.c_str());
+  proxy.SetCudaArray(data.c_str());
   proxy.SetInfo("label", labels.c_str());
 
   ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));
@@ -35,7 +37,7 @@ TEST(ProxyDMatrix, DeviceData) {
   data = RandomDataGenerator(kRows, kCols, 0)
              .Device(FstCU())
              .GenerateColumnarArrayInterface(&columnar_storage);
-  proxy.SetCUDAArray(data.c_str());
+  proxy.SetCudaColumnar(data.c_str());
   ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));
   ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(), kRows);
   ASSERT_EQ(std::any_cast<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(), kCols);
diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc
index 16448c2e197f..1fb8c4e75c39 100644
--- a/tests/cpp/data/test_simple_dmatrix.cc
+++ b/tests/cpp/data/test_simple_dmatrix.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016-2024, XGBoost Contributors
+ * Copyright 2016-2025, XGBoost Contributors
  */
 #include <xgboost/data.h>
 
@@ -10,7 +10,7 @@
 #include "../../../src/data/adapter.h"         // ArrayAdapter
 #include "../../../src/data/simple_dmatrix.h"  // SimpleDMatrix
 #include "../collective/test_worker.h"         // for TestDistributedGlobal
-#include "../filesystem.h"                     // dmlc::TemporaryDirectory
+#include "../filesystem.h"                     // for TemporaryDirectory
 #include "../helpers.h"                        // RandomDataGenerator,CreateSimpleTestData
 #include "xgboost/base.h"
 #include "xgboost/host_device_vector.h"  // HostDeviceVector
@@ -23,8 +23,8 @@ std::string UriSVM(std::string name) { return name + "?format=libsvm"; }
 }  // namespace
 
 TEST(SimpleDMatrix, MetaInfo) {
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/simple.libsvm";
+  common::TemporaryDirectory tempdir;
+  const std::string tmp_file = tempdir.Str() + "/simple.libsvm";
   CreateSimpleTestData(tmp_file);
   xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
 
@@ -39,8 +39,8 @@ TEST(SimpleDMatrix, MetaInfo) {
 }
 
 TEST(SimpleDMatrix, RowAccess) {
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/simple.libsvm";
+  common::TemporaryDirectory tempdir;
+  const std::string tmp_file = tempdir.Str() + "/simple.libsvm";
   CreateSimpleTestData(tmp_file);
   xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file), false);
 
@@ -63,8 +63,8 @@ TEST(SimpleDMatrix, RowAccess) {
 
 TEST(SimpleDMatrix, ColAccessWithoutBatches) {
   Context ctx;
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/simple.libsvm";
+  common::TemporaryDirectory tempdir;
+  const std::string tmp_file = tempdir.Str() + "/simple.libsvm";
   CreateSimpleTestData(tmp_file);
   xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
 
@@ -82,14 +82,17 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
 }
 
 TEST(SimpleDMatrix, Empty) {
-  std::vector<float> data{};
-  std::vector<unsigned> feature_idx = {};
-  std::vector<size_t> row_ptr = {};
-
-  data::CSRAdapter csr_adapter(row_ptr.data(), feature_idx.data(), data.data(),
-                               0, 0, 0);
-  std::unique_ptr<data::SimpleDMatrix> dmat(new data::SimpleDMatrix(
-      &csr_adapter, std::numeric_limits<float>::quiet_NaN(), 1));
+  HostDeviceVector<float> data{};
+  HostDeviceVector<unsigned> feature_idx{};
+  HostDeviceVector<size_t> row_ptr{};
+
+  auto j_data = Json::Dump(GetArrayInterface(&data, 0, 1));
+  auto j_feature_idx = Json::Dump(GetArrayInterface(&feature_idx, 0, 1));
+  auto j_row_ptr = Json::Dump(GetArrayInterface(&row_ptr, 0, 1));
+
+  data::CSRArrayAdapter csr_adapter(j_row_ptr, j_feature_idx, j_data, 0);
+  std::unique_ptr<data::SimpleDMatrix> dmat(
+      new data::SimpleDMatrix(&csr_adapter, std::numeric_limits<float>::quiet_NaN(), 1));
   CHECK_EQ(dmat->Info().num_nonzero_, 0);
   CHECK_EQ(dmat->Info().num_row_, 0);
   CHECK_EQ(dmat->Info().num_col_, 0);
@@ -98,8 +101,7 @@ TEST(SimpleDMatrix, Empty) {
   }
 
   data::DenseAdapter dense_adapter(nullptr, 0, 0);
-  dmat.reset( new data::SimpleDMatrix(&dense_adapter,
-                                      std::numeric_limits<float>::quiet_NaN(), 1) );
+  dmat.reset(new data::SimpleDMatrix(&dense_adapter, std::numeric_limits<float>::quiet_NaN(), 1));
   CHECK_EQ(dmat->Info().num_nonzero_, 0);
   CHECK_EQ(dmat->Info().num_row_, 0);
   CHECK_EQ(dmat->Info().num_col_, 0);
@@ -107,9 +109,8 @@ TEST(SimpleDMatrix, Empty) {
     CHECK_EQ(batch.Size(), 0);
   }
 
-  data::CSCAdapter csc_adapter(nullptr, nullptr, nullptr, 0, 0);
-  dmat.reset(new data::SimpleDMatrix(
-      &csc_adapter, std::numeric_limits<float>::quiet_NaN(), 1));
+  data::CSCArrayAdapter csc_adapter(j_row_ptr, j_feature_idx, j_data, 0);
+  dmat.reset(new data::SimpleDMatrix(&csc_adapter, std::numeric_limits<float>::quiet_NaN(), 1));
   CHECK_EQ(dmat->Info().num_nonzero_, 0);
   CHECK_EQ(dmat->Info().num_row_, 0);
   CHECK_EQ(dmat->Info().num_col_, 0);
@@ -119,36 +120,40 @@ TEST(SimpleDMatrix, Empty) {
 }
 
 TEST(SimpleDMatrix, MissingData) {
-  std::vector<float> data{0.0, std::nanf(""), 1.0};
-  std::vector<unsigned> feature_idx = {0, 1, 0};
-  std::vector<size_t> row_ptr = {0, 2, 3};
-
-  data::CSRAdapter adapter(row_ptr.data(), feature_idx.data(), data.data(), 2,
-                           3, 2);
-  std::unique_ptr<data::SimpleDMatrix> dmat{new data::SimpleDMatrix{
-      &adapter, std::numeric_limits<float>::quiet_NaN(), 1}};
+  HostDeviceVector<float> data{0.0, std::nanf(""), 1.0};
+  HostDeviceVector<unsigned> feature_idx = {0, 1, 0};
+  HostDeviceVector<size_t> row_ptr = {0, 2, 3};
+
+  auto j_data = Json::Dump(GetArrayInterface(&data, 3, 1));
+  auto j_feature_idx = Json::Dump(GetArrayInterface(&feature_idx, 3, 1));
+  auto j_row_ptr = Json::Dump(GetArrayInterface(&row_ptr, 3, 1));
+
+  data::CSRArrayAdapter adapter{j_row_ptr, j_feature_idx, j_data, 2ul};
+  std::unique_ptr<data::SimpleDMatrix> dmat{
+      new data::SimpleDMatrix{&adapter, std::numeric_limits<float>::quiet_NaN(), 1}};
   CHECK_EQ(dmat->Info().num_nonzero_, 2);
   dmat.reset(new data::SimpleDMatrix(&adapter, 1.0, 1));
   CHECK_EQ(dmat->Info().num_nonzero_, 1);
 
   {
-    data[1] = std::numeric_limits<float>::infinity();
-    data::DenseAdapter adapter(data.data(), data.size(), 1);
-    EXPECT_THROW(data::SimpleDMatrix dmat(
-                     &adapter, std::numeric_limits<float>::quiet_NaN(), -1),
+    data.HostVector()[1] = std::numeric_limits<float>::infinity();
+    data::DenseAdapter adapter(data.ConstHostPointer(), data.Size(), 1);
+    EXPECT_THROW(data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1),
                  dmlc::Error);
   }
 }
 
 TEST(SimpleDMatrix, EmptyRow) {
-  std::vector<float> data{0.0, 1.0};
-  std::vector<unsigned> feature_idx = {0, 1};
-  std::vector<size_t> row_ptr = {0, 2, 2};
+  HostDeviceVector<float> data{0.0, 1.0};
+  HostDeviceVector<unsigned> feature_idx{0, 1};
+  HostDeviceVector<size_t> row_ptr{0, 2, 2};
 
-  data::CSRAdapter adapter(row_ptr.data(), feature_idx.data(), data.data(), 2,
-                           2, 2);
-  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),
-                           1);
+  auto j_data = Json::Dump(GetArrayInterface(&data, 2, 1));
+  auto j_feature_idx = Json::Dump(GetArrayInterface(&feature_idx, 2, 1));
+  auto j_row_ptr = Json::Dump(GetArrayInterface(&row_ptr, 3, 1));
+
+  data::CSRArrayAdapter adapter{j_row_ptr, j_feature_idx, j_data, 2};
+  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), 1);
   CHECK_EQ(dmat.Info().num_nonzero_, 2);
   CHECK_EQ(dmat.Info().num_row_, 2);
   CHECK_EQ(dmat.Info().num_col_, 2);
@@ -178,12 +183,16 @@ TEST(SimpleDMatrix, FromDense) {
 }
 
 TEST(SimpleDMatrix, FromCSC) {
-  std::vector<float> data = {1, 3, 2, 4, 5};
-  std::vector<unsigned> row_idx = {0, 1, 0, 1, 2};
-  std::vector<size_t> col_ptr = {0, 2, 5};
-  data::CSCAdapter adapter(col_ptr.data(), row_idx.data(), data.data(), 2, 3);
-  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),
-                           -1);
+  HostDeviceVector<float> data{1, 3, 2, 4, 5};
+  HostDeviceVector<unsigned> row_idx{0, 1, 0, 1, 2};
+  HostDeviceVector<size_t> col_ptr{0, 2, 5};
+
+  auto j_data = Json::Dump(GetArrayInterface(&data, data.Size(), 1));
+  auto j_row_idx = Json::Dump(GetArrayInterface(&row_idx, row_idx.Size(), 1));
+  auto j_col_ptr = Json::Dump(GetArrayInterface(&col_ptr, col_ptr.Size(), 1));
+
+  data::CSCArrayAdapter adapter{j_col_ptr, j_row_idx, j_data, 3};
+  data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1);
   EXPECT_EQ(dmat.Info().num_col_, 2);
   EXPECT_EQ(dmat.Info().num_row_, 3);
   EXPECT_EQ(dmat.Info().num_nonzero_, 5);
@@ -208,8 +217,8 @@ TEST(SimpleDMatrix, FromCSC) {
 }
 
 TEST(SimpleDMatrix, FromFile) {
-  dmlc::TemporaryDirectory tempdir;
-  std::string filename = tempdir.path + "test.libsvm";
+  common::TemporaryDirectory tempdir;
+  std::string filename = tempdir.Str() + "/test.libsvm";
   CreateBigTestData(filename, 3 * 5);
   // Add an empty row at the end of the matrix
   {
@@ -390,13 +399,13 @@ TEST(SimpleDMatrix, SliceCol) {
 }
 
 TEST(SimpleDMatrix, SaveLoadBinary) {
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/simple.libsvm";
+  common::TemporaryDirectory tempdir;
+  const std::string tmp_file = tempdir.Str() + "/simple.libsvm";
   CreateSimpleTestData(tmp_file);
   xgboost::DMatrix * dmat = xgboost::DMatrix::Load(UriSVM(tmp_file));
   data::SimpleDMatrix *simple_dmat = dynamic_cast<data::SimpleDMatrix*>(dmat);
 
-  const std::string tmp_binfile = tempdir.path + "/csr_source.binary";
+  const std::string tmp_binfile = tempdir.Str() + "/csr_source.binary";
   simple_dmat->SaveToLocalFile(tmp_binfile);
   xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile);
 
diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cc b/tests/cpp/data/test_sparse_page_dmatrix.cc
index 96fd2482f419..ad54d3fc8459 100644
--- a/tests/cpp/data/test_sparse_page_dmatrix.cc
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cc
@@ -1,65 +1,53 @@
 /**
- * Copyright 2016-2024, XGBoost Contributors
+ * Copyright 2016-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/data.h>
+#include <xgboost/host_device_vector.h>  // for HostDeviceVector
 
-#include <future>
-#include <thread>
+#include <filesystem>  // for path
+#include <future>      // for future, async
+#include <thread>      // for sleep_for
 
 #include "../../../src/common/io.h"
-#include "../../../src/data/adapter.h"
-#include "../../../src/data/file_iterator.h"
-#include "../../../src/data/simple_dmatrix.h"
 #include "../../../src/data/batch_utils.h"  // for MatchingPageBytes
 #include "../../../src/data/sparse_page_dmatrix.h"
 #include "../../../src/tree/param.h"  // for TrainParam
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
+#include "../filesystem.h"            // for TemporaryDirectory
 #include "../helpers.h"
 
 using namespace xgboost;  // NOLINT
-namespace {
-std::string UriSVM(std::string name, std::string cache) {
-  return name + "?format=libsvm" + "#" + cache + ".cache";
-}
-}  // namespace
-
 template <typename Page>
-void TestSparseDMatrixLoadFile(Context const* ctx) {
-  dmlc::TemporaryDirectory tmpdir;
-  auto opath = tmpdir.path + "/1-based.svm";
-  CreateBigTestData(opath, 3 * 64, false);
-  opath += "?indexing_mode=1&format=libsvm";
-  data::FileIterator iter{opath, 0, 1};
+void TestSparseDMatrixLoad(Context const *ctx) {
+  auto m = RandomDataGenerator{1024, 5, 0.0}.Batches(4).GenerateSparsePageDMatrix("temp", true);
+
   auto n_threads = 0;
-  auto config =
-      ExtMemConfig{tmpdir.path + "cache",          false,
-                   cuda_impl::MatchingPageBytes(), std::numeric_limits<float>::quiet_NaN(),
-                   cuda_impl::MaxNumDevicePages(), n_threads};
-  data::SparsePageDMatrix m{&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
-                            config};
-  ASSERT_EQ(AllThreadsForTest(), m.Ctx()->Threads());
-  ASSERT_EQ(m.Info().num_col_, 5);
-  ASSERT_EQ(m.Info().num_row_, 64);
-
-  std::unique_ptr<dmlc::Parser<uint32_t>> parser(
-      dmlc::Parser<uint32_t>::Create(opath.c_str(), 0, 1, "auto"));
-  auto adapter = data::FileAdapter{parser.get()};
-
-  data::SimpleDMatrix simple{&adapter, std::numeric_limits<float>::quiet_NaN(),
-                             1};
+  auto config = ExtMemConfig{"temp",
+                             false,
+                             ::xgboost::cuda_impl::AutoHostRatio(),
+                             cuda_impl::MatchingPageBytes(),
+                             std::numeric_limits<float>::quiet_NaN(),
+                             n_threads};
+  ASSERT_EQ(AllThreadsForTest(), m->Ctx()->Threads());
+  ASSERT_EQ(m->Info().num_col_, 5);
+  ASSERT_EQ(m->Info().num_row_, 1024);
+
+  auto simple = RandomDataGenerator{1024, 5, 0.0}.GenerateDMatrix(true);
   Page out;
-  for (auto const &page : m.GetBatches<Page>(ctx)) {
+  for (auto const &page : m->GetBatches<Page>(ctx)) {
     if (std::is_same_v<Page, SparsePage>) {
       out.Push(page);
     } else {
       out.PushCSC(page);
     }
   }
-  ASSERT_EQ(m.Info().num_col_, simple.Info().num_col_);
-  ASSERT_EQ(m.Info().num_row_, simple.Info().num_row_);
+  ASSERT_EQ(m->Info().num_col_, simple->Info().num_col_);
+  ASSERT_EQ(m->Info().num_row_, simple->Info().num_row_);
+  if (std::is_same_v<Page, SortedCSCPage>) {
+    out.SortRows(ctx->Threads());
+  }
 
-  for (auto const& page : simple.GetBatches<Page>(ctx)) {
+  for (auto const &page : simple->GetBatches<Page>(ctx)) {
     ASSERT_EQ(page.offset.HostVector(), out.offset.HostVector());
     for (size_t i = 0; i < page.data.Size(); ++i) {
       ASSERT_EQ(page.data.HostVector()[i].fvalue, out.data.HostVector()[i].fvalue);
@@ -67,11 +55,11 @@ void TestSparseDMatrixLoadFile(Context const* ctx) {
   }
 }
 
-TEST(SparsePageDMatrix, LoadFile) {
+TEST(SparsePageDMatrix, Load) {
   Context ctx;
-  TestSparseDMatrixLoadFile<SparsePage>(&ctx);
-  TestSparseDMatrixLoadFile<CSCPage>(&ctx);
-  TestSparseDMatrixLoadFile<SortedCSCPage>(&ctx);
+  TestSparseDMatrixLoad<SparsePage>(&ctx);
+  TestSparseDMatrixLoad<CSCPage>(&ctx);
+  TestSparseDMatrixLoad<SortedCSCPage>(&ctx);
 }
 
 // allow caller to retain pages so they can process multiple pages at the same time.
@@ -159,10 +147,10 @@ INSTANTIATE_TEST_SUITE_P(SparsePageDMatrix, TestGradientIndexExt, testing::Bool(
 
 // Test GHistIndexMatrix can avoid loading sparse page after the initialization.
 TEST(SparsePageDMatrix, GHistIndexSkipSparsePage) {
-  dmlc::TemporaryDirectory tmpdir;
+  common::TemporaryDirectory tmpdir;
   std::size_t n_batches = 6;
   auto Xy = RandomDataGenerator{180, 12, 0.0}.Batches(n_batches).GenerateSparsePageDMatrix(
-      tmpdir.path + "/", true);
+      tmpdir.Str() + "/", true);
   Context ctx;
   bst_bin_t n_bins{256};
   double sparse_thresh{0.8};
@@ -241,17 +229,14 @@ TEST(SparsePageDMatrix, GHistIndexSkipSparsePage) {
 }
 
 TEST(SparsePageDMatrix, MetaInfo) {
-  dmlc::TemporaryDirectory tmpdir;
-  const std::string tmp_file = tmpdir.path + "/simple.libsvm";
-  size_t constexpr kEntries = 24;
-  CreateBigTestData(tmp_file, kEntries);
-
-  std::unique_ptr<DMatrix> dmat{xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file), false)};
+  common::TemporaryDirectory tmpdir;
+  auto dmat = RandomDataGenerator{256, 5, 0.0}.Batches(4).GenerateSparsePageDMatrix(
+      tmpdir.Str() + "/", true);
 
   // Test the metadata that was parsed
-  EXPECT_EQ(dmat->Info().num_row_, 8ul);
+  EXPECT_EQ(dmat->Info().num_row_, 256ul);
   EXPECT_EQ(dmat->Info().num_col_, 5ul);
-  EXPECT_EQ(dmat->Info().num_nonzero_, kEntries);
+  EXPECT_EQ(dmat->Info().num_nonzero_, dmat->Info().num_col_ * dmat->Info().num_row_);
   EXPECT_EQ(dmat->Info().labels.Size(), dmat->Info().num_row_);
 }
 
@@ -268,12 +253,16 @@ TEST(SparsePageDMatrix, RowAccess) {
 }
 
 TEST(SparsePageDMatrix, ColAccess) {
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/simple.libsvm";
-  CreateSimpleTestData(tmp_file);
-  xgboost::DMatrix *dmat = xgboost::DMatrix::Load(UriSVM(tmp_file, tmp_file));
+  common::TemporaryDirectory tempdir;
   Context ctx;
 
+  auto nan = std::numeric_limits<float>::quiet_NaN();
+  HostDeviceVector<float> x{
+      0, 10,  20,  nan, nan,  // row-0
+      0, nan, nan, 30,  40    // row-1
+  };
+  auto dmat = GetExternalMemoryDMatrixFromData(x, 2, 5, tempdir, 2);
+
   // Loop over the batches and assert the data is as expected
   size_t iter = 0;
   for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>(&ctx)) {
@@ -288,7 +277,7 @@ TEST(SparsePageDMatrix, ColAccess) {
       ASSERT_EQ(col_page[1][0].fvalue, 10.0f);
       ASSERT_EQ(col_page[1].size(), 1);
     }
-    CHECK_LE(col_batch.base_rowid, dmat->Info().num_row_);
+    ASSERT_LE(col_batch.base_rowid, dmat->Info().num_row_);
     ++iter;
   }
 
@@ -296,17 +285,16 @@ TEST(SparsePageDMatrix, ColAccess) {
   iter = 0;
   for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>(&ctx)) {
     auto col_page = col_batch.GetView();
-    EXPECT_EQ(col_page.Size(), dmat->Info().num_col_);
+    ASSERT_EQ(col_page.Size(), dmat->Info().num_col_);
     if (iter == 0) {
-      EXPECT_EQ(col_page[1][0].fvalue, 10.0f);
-      EXPECT_EQ(col_page[1].size(), 1);
+      ASSERT_EQ(col_page[1][0].fvalue, 10.0f);
+      ASSERT_EQ(col_page[1].size(), 1);
     } else {
-      EXPECT_EQ(col_page[3][0].fvalue, 30.f);
-      EXPECT_EQ(col_page[3].size(), 1);
+      ASSERT_EQ(col_page[3][0].fvalue, 30.f);
+      ASSERT_EQ(col_page[3].size(), 1);
     }
     iter++;
   }
-  delete dmat;
 }
 
 TEST(SparsePageDMatrix, ThreadSafetyException) {
@@ -355,29 +343,27 @@ TEST(SparsePageDMatrix, ColAccessBatches) {
   }
 }
 
-auto TestSparsePageDMatrixDeterminism(int32_t threads) {
+auto TestSparsePageDMatrixDeterminism(std::int32_t n_threads) {
   std::vector<float> sparse_data;
   std::vector<size_t> sparse_rptr;
   std::vector<bst_feature_t> sparse_cids;
-  dmlc::TemporaryDirectory tempdir;
-  std::string filename = tempdir.path + "/simple.libsvm";
-  CreateBigTestData(filename, 1 << 16);
 
-  data::FileIterator iter(filename + "?format=libsvm", 0, 1);
-  auto config = ExtMemConfig{filename,
+  common::TemporaryDirectory tmpdir;
+  auto prefix = (tmpdir.Path() / "temp").string();
+  auto dmat = RandomDataGenerator{4096, 64, 0.0}.Batches(4).GenerateSparsePageDMatrix(prefix, true);
+
+  auto config = ExtMemConfig{prefix,
                              false,
+                             ::xgboost::cuda_impl::AutoHostRatio(),
                              cuda_impl::MatchingPageBytes(),
                              std::numeric_limits<float>::quiet_NaN(),
-                             cuda_impl::MaxNumDevicePages(),
-                             threads};
-  std::unique_ptr<DMatrix> sparse{new data::SparsePageDMatrix{
-      &iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next, config}};
-  CHECK(sparse->Ctx()->Threads() == threads || sparse->Ctx()->Threads() == AllThreadsForTest());
+                             n_threads};
+  CHECK(dmat->Ctx()->Threads() == n_threads || dmat->Ctx()->Threads() == AllThreadsForTest());
 
-  DMatrixToCSR(sparse.get(), &sparse_data, &sparse_rptr, &sparse_cids);
+  DMatrixToCSR(dmat.get(), &sparse_data, &sparse_rptr, &sparse_cids);
 
   auto cache_name =
-      data::MakeId(filename, dynamic_cast<data::SparsePageDMatrix *>(sparse.get())) + ".row.page";
+      data::MakeId(prefix, dynamic_cast<data::SparsePageDMatrix *>(dmat.get())) + ".row.page";
   auto cache = common::LoadSequentialFile(cache_name);
   return cache;
 }
diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cu b/tests/cpp/data/test_sparse_page_dmatrix.cu
index 110d0f898758..d3e29ca31cb1 100644
--- a/tests/cpp/data/test_sparse_page_dmatrix.cu
+++ b/tests/cpp/data/test_sparse_page_dmatrix.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <xgboost/data.h>  // for DMatrix
 
@@ -8,34 +8,27 @@
 #include "../../../src/data/ellpack_page.h"
 #include "../../../src/data/sparse_page_dmatrix.h"
 #include "../../../src/tree/param.h"  // TrainParam
-#include "../filesystem.h"            // dmlc::TemporaryDirectory
 #include "../helpers.h"
 
 namespace xgboost {
-
 TEST(SparsePageDMatrix, EllpackPage) {
   auto ctx = MakeCUDACtx(0);
   auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/simple.libsvm";
-  CreateSimpleTestData(tmp_file);
-  DMatrix* dmat = DMatrix::Load(tmp_file + "?format=libsvm" + "#" + tmp_file + ".cache");
+  auto dmat = RandomDataGenerator{512, 12, 0.0}.Batches(4).GenerateSparsePageDMatrix("temp", true);
 
   // Loop over the batches and assert the data is as expected
-  size_t n = 0;
+  std::size_t n = 0;
   for (const auto& batch : dmat->GetBatches<EllpackPage>(&ctx, param)) {
     n += batch.Size();
   }
   EXPECT_EQ(n, dmat->Info().num_row_);
 
-  auto path =
-      data::MakeId(tmp_file + ".cache", dynamic_cast<data::SparsePageDMatrix*>(dmat)) + ".row.page";
-  EXPECT_TRUE(FileExists(path));
-  path = data::MakeId(tmp_file + ".cache", dynamic_cast<data::SparsePageDMatrix*>(dmat)) +
+  auto path = data::MakeId("temp", std::dynamic_pointer_cast<data::SparsePageDMatrix>(dmat).get()) +
+              ".row.page";
+  ASSERT_TRUE(FileExists(path));
+  path = data::MakeId("temp", std::dynamic_pointer_cast<data::SparsePageDMatrix>(dmat).get()) +
          ".ellpack.page";
-  EXPECT_TRUE(FileExists(path));
-
-  delete dmat;
+  ASSERT_TRUE(FileExists(path));
 }
 
 TEST(SparsePageDMatrix, EllpackSkipSparsePage) {
@@ -157,7 +150,7 @@ TEST(SparsePageDMatrix, RetainEllpackPage) {
 
   for (size_t i = 0; i < iterators.size(); ++i) {
     std::vector<common::CompressedByteT> h_buf;
-    [[maybe_unused]] auto h_acc = (*iterators[i]).Impl()->GetHostAccessor(&ctx, &h_buf);
+    [[maybe_unused]] auto h_acc = (*iterators[i]).Impl()->GetHostEllpack(&ctx, &h_buf);
     ASSERT_EQ(h_buf, gidx_buffers.at(i).HostVector());
     // The last page is still kept in the DMatrix until Reset is called.
     if (i == iterators.size() - 1) {
@@ -226,9 +219,9 @@ class TestEllpackPageExt : public ::testing::TestWithParam<std::tuple<bool, bool
     ASSERT_EQ(impl_ext->Cuts().TotalBins(), 4);
 
     std::vector<common::CompressedByteT> buffer;
-    [[maybe_unused]] auto h_acc = impl->GetHostAccessor(&ctx, &buffer);
+    [[maybe_unused]] auto h_acc = impl->GetHostEllpack(&ctx, &buffer);
     std::vector<common::CompressedByteT> buffer_ext;
-    [[maybe_unused]] auto h_ext_acc = impl_ext->GetHostAccessor(&ctx, &buffer_ext);
+    [[maybe_unused]] auto h_ext_acc = impl_ext->GetHostEllpack(&ctx, &buffer_ext);
     ASSERT_EQ(buffer, buffer_ext);
   }
 };
@@ -258,12 +251,13 @@ INSTANTIATE_TEST_SUITE_P(EllpackPageExt, TestEllpackPageExt, ::testing::ValuesIn
                            return ss.str();
                          });
 
+template <typename Accessor>
 struct ReadRowFunction {
-  EllpackDeviceAccessor matrix;
-  int row;
+  Accessor matrix;
+  bst_idx_t row;
   bst_float* row_data_d;
-  ReadRowFunction(EllpackDeviceAccessor matrix, int row, bst_float* row_data_d)
-      : matrix(std::move(matrix)), row(row), row_data_d(row_data_d) {}
+  ReadRowFunction(Accessor matrix, bst_idx_t row, bst_float* row_data_d)
+      : matrix(std::move(matrix)), row{row}, row_data_d(row_data_d) {}
 
   __device__ void operator()(size_t col) {
     auto value = matrix.GetFvalue(row, col);
@@ -303,12 +297,15 @@ TEST(SparsePageDMatrix, MultipleEllpackPageContent) {
     EXPECT_EQ(impl_ext->base_rowid, current_row);
 
     for (size_t i = 0; i < impl_ext->Size(); i++) {
-      dh::LaunchN(kCols,
-                  ReadRowFunction(impl->GetDeviceAccessor(&ctx), current_row, row_d.data().get()));
+      impl->Visit(&ctx, {}, [&](auto&& acc) {
+        dh::LaunchN(kCols, ReadRowFunction{acc, current_row, row_d.data().get()});
+      });
+
       thrust::copy(row_d.begin(), row_d.end(), row.begin());
+      impl_ext->Visit(&ctx, {}, [&](auto&& acc) {
+        dh::LaunchN(kCols, ReadRowFunction{acc, current_row, row_ext_d.data().get()});
+      });
 
-      dh::LaunchN(kCols, ReadRowFunction(impl_ext->GetDeviceAccessor(&ctx), current_row,
-                                         row_ext_d.data().get()));
       thrust::copy(row_ext_d.begin(), row_ext_d.end(), row_ext.begin());
 
       EXPECT_EQ(row, row_ext);
diff --git a/tests/cpp/data/test_sparse_page_raw_format.cc b/tests/cpp/data/test_sparse_page_raw_format.cc
index 9f08c202fc5c..7e9dc05fd66f 100644
--- a/tests/cpp/data/test_sparse_page_raw_format.cc
+++ b/tests/cpp/data/test_sparse_page_raw_format.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021-2024, XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/data.h>  // for CSCPage, SortedCSCPage, SparsePage
@@ -9,8 +9,8 @@
 
 #include "../../../src/common/io.h"  // for PrivateMmapConstStream, AlignedResourceReadStream...
 #include "../../../src/data/sparse_page_writer.h"  // for CreatePageFormat
+#include "../filesystem.h"                         // for TemporaryDirectory
 #include "../helpers.h"                            // for RandomDataGenerator
-#include "dmlc/filesystem.h"                       // for TemporaryDirectory
 #include "xgboost/context.h"                       // for Context
 
 namespace xgboost::data {
@@ -20,8 +20,8 @@ template <typename S> void TestSparsePageRawFormat() {
 
   auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
   ASSERT_TRUE(m->SingleColBlock());
-  dmlc::TemporaryDirectory tmpdir;
-  std::string path = tmpdir.path + "/sparse.page";
+  common::TemporaryDirectory tmpdir;
+  std::string path = tmpdir.Str() + "/sparse.page";
   S orig;
   std::size_t n_bytes{0};
   {
diff --git a/tests/cpp/encoder/test_ordinal.cc b/tests/cpp/encoder/test_ordinal.cc
index 70b491722726..487853d88055 100644
--- a/tests/cpp/encoder/test_ordinal.cc
+++ b/tests/cpp/encoder/test_ordinal.cc
@@ -6,7 +6,9 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
-#include <vector>
+#include <cstdint>  // for int32_t
+#include <sstream>  // for stringstream
+#include <vector>   // for vector
 
 #include "../../../src/encoder/ordinal.h"
 #include "df_mock.h"  // for DfTest
@@ -32,4 +34,17 @@ TEST(CategoricalEncoder, Int) { TestOrdinalEncoderInts<OrdRecoderTest, DfTest>()
 TEST(CategoricalEncoder, Mixed) { TestOrdinalEncoderMixed<OrdRecoderTest, DfTest>(); }
 
 TEST(CategoricalEncoder, Empty) { TestOrdinalEncoderEmpty<OrdRecoderTest, DfTest>(); }
+
+TEST(CategoricalEncoder, Print) {
+  auto df = DfTest::Make(DfTest::MakeInts(0, 1), DfTest::MakeStrs("cbd", "bbd", "dbd", "ab"),
+                         DfTest::MakeInts(2, 3));
+  std::stringstream ss;
+  ss << df.View();
+  auto str = ss.str();
+  auto sol = R"(f0: [0, 1]
+f1: [cbd, bbd, dbd, ab]
+f2: [2, 3]
+)";
+  ASSERT_EQ(sol, str);
+}
 }  // namespace enc
diff --git a/tests/cpp/encoder/test_ordinal.h b/tests/cpp/encoder/test_ordinal.h
index 6b53d323b1b6..fe9663e540f8 100644
--- a/tests/cpp/encoder/test_ordinal.h
+++ b/tests/cpp/encoder/test_ordinal.h
@@ -104,6 +104,18 @@ void TestOrdinalEncoderInts() {
     ASSERT_THAT([&] { encoder.Recode(orig_dict, new_dict, new_df.MappingView()); },
                 ::testing::ThrowsMessage<std::logic_error>(::testing::HasSubstr("`5`")));
   }
+  {
+    auto df = DfTest::Make(DfTest::MakeInts(0), DfTest::MakeInts(0, 1));
+    auto orig_dict = df.View();
+
+    auto new_df = DfTest::Make(DfTest::MakeInts(0), DfTest::MakeInts(0, 1));
+    auto new_dict = new_df.View();
+
+    encoder.Recode(orig_dict, new_dict, new_df.MappingView());
+    auto mapping = new_df.Mapping();
+    std::vector<std::int32_t> sol{0, 0, 1};
+    ASSERT_EQ(mapping, sol);
+  }
 }
 
 template <typename Encoder, typename DfTest>
diff --git a/tests/cpp/filesystem.cc b/tests/cpp/filesystem.cc
new file mode 100644
index 000000000000..42a65d646f31
--- /dev/null
+++ b/tests/cpp/filesystem.cc
@@ -0,0 +1,74 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include "filesystem.h"
+
+#include <xgboost/windefs.h>
+
+#include <filesystem>  // for path, temp_directory_path
+
+#if !defined(xgboost_IS_WIN)
+
+#include <cstdlib>  // for mkdtemp
+
+#include "../../src/common/error_msg.h"  // for SystemError
+
+#else
+
+#include <random>  // for uniform_int_distribution
+
+#include "xgboost/string_view.h"  // for StringView
+
+#endif  // !defined(xgboost_IS_WIN)
+
+namespace xgboost::common {
+TemporaryDirectory::TemporaryDirectory(std::string prefix) : prefix_{std::move(prefix)} {
+  namespace fs = std::filesystem;
+
+  auto tmp = fs::temp_directory_path();
+
+#if defined(xgboost_IS_WIN)
+  std::default_random_engine rng;
+  auto make_name = [&rng, this] {
+    constexpr std::size_t kPathMax = 6;
+    constexpr StringView kAlphabet{"abcdefghijklmnopqrstuvwxyz"};
+    static_assert(kAlphabet.size() == 26);
+    std::uniform_int_distribution dist{0, 25};
+    char path[kPathMax + 1];
+    std::memset(path, 0, sizeof(path));
+    for (std::size_t i = 0; i < kPathMax; ++i) {
+      auto k = dist(rng);
+      path[i] = kAlphabet[k];
+    }
+    auto res = std::string{path};
+    CHECK_EQ(res.size(), kPathMax);
+    return this->prefix_ + "tmpdir." + std::string{path};
+  };
+  auto dirname = tmp / make_name();
+  std::int32_t retry = 0;
+  while (fs::exists(dirname) && retry < 64) {
+    dirname = tmp / make_name();
+    ++retry;
+  }
+  if (retry >= 64) {
+    LOG(FATAL) << "Failed to create temporary directory.";
+  }
+  this->path_ = dirname.string();
+  CHECK(fs::create_directory(this->path_));
+#else
+  auto dirtemplate = (tmp / (this->prefix_ + "tmpdir.XXXXXX")).string();
+  // https://man7.org/linux/man-pages/man3/mkdtemp.3.html
+  char* tmpdir = mkdtemp(dirtemplate.data());
+  if (!tmpdir) {
+    LOG(FATAL) << error::SystemError().message();
+  }
+  this->path_ = tmpdir;
+#endif
+  LOG(DEBUG) << "TmpDir:" << this->path_;
+  CHECK(fs::exists(this->path_));
+}
+
+TemporaryDirectory::~TemporaryDirectory() noexcept(false) {
+  std::filesystem::remove_all(this->path_);
+}
+}  // namespace xgboost::common
diff --git a/tests/cpp/filesystem.h b/tests/cpp/filesystem.h
index fafc8c7d1bf9..576a6f959afa 100644
--- a/tests/cpp/filesystem.h
+++ b/tests/cpp/filesystem.h
@@ -1,11 +1,25 @@
 /**
- * Copyright 2022-2024, XGBoost Contributors
+ * Copyright 2022-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_TESTS_CPP_FILESYSTEM_H
 #define XGBOOST_TESTS_CPP_FILESYSTEM_H
 
-#include <xgboost/windefs.h>
+#include <filesystem>  // for path
 
-#include "dmlc/filesystem.h"
+namespace xgboost::common {
+class TemporaryDirectory {
+  std::filesystem::path path_;
+  std::string prefix_;
+
+ public:
+  explicit TemporaryDirectory(std::string prefix = "xgboost-");
+  ~TemporaryDirectory() noexcept(false);
+
+  [[nodiscard]] std::filesystem::path const& Path() const { return this->path_; }
+  // Path can be implicitly converted to string on unix, but not on windows, due its use
+  // of wchar.
+  [[nodiscard]] std::string Str() const { return this->path_.string(); }
+};
+}  // namespace xgboost::common
 
 #endif  // XGBOOST_TESTS_CPP_FILESYSTEM_H
diff --git a/tests/cpp/gbm/test_gblinear.cu b/tests/cpp/gbm/test_gblinear.cu
index b158fb32b2c4..94abce062584 100644
--- a/tests/cpp/gbm/test_gblinear.cu
+++ b/tests/cpp/gbm/test_gblinear.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/global_config.h>  // for GlobalConfigThreadLocalStore
diff --git a/tests/cpp/gbm/test_gbtree.cc b/tests/cpp/gbm/test_gbtree.cc
index 79e236f11a53..fde9bc3d602f 100644
--- a/tests/cpp/gbm/test_gbtree.cc
+++ b/tests/cpp/gbm/test_gbtree.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost contributors
+ * Copyright 2019-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
@@ -14,7 +14,7 @@
 
 #include "../../../src/data/proxy_dmatrix.h"  // for DMatrixProxy
 #include "../../../src/gbm/gbtree.h"
-#include "../filesystem.h"  // dmlc::TemporaryDirectory
+#include "../filesystem.h"  // TemporaryDirectory
 #include "../helpers.h"
 #include "xgboost/base.h"
 #include "xgboost/predictor.h"
@@ -46,10 +46,10 @@ TEST(GBTree, SelectTreeMethod) {
   ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
 
 #ifdef XGBOOST_USE_CUDA
-  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
-  gbtree.Configure({{"tree_method", "gpu_hist"}});
+  ctx.UpdateAllowUnknown(Args{{"device", "cuda"}});
+  gbtree.Configure({{"tree_method", "hist"}});
   ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
-  gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"}});
+  gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"}});
   ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
 #endif  // XGBOOST_USE_CUDA
 }
@@ -65,8 +65,8 @@ TEST(GBTree, PredictionCache) {
 
   gbtree.Configure({{"tree_method", "hist"}});
   auto p_m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
-  gpair.Data()->Copy(GenerateRandomGradients(kRows));
+
+  GradientContainer gpair = GenerateRandomGradients(&ctx, kRows, 1);
 
   PredictionCacheEntry out_predictions;
   gbtree.DoBoost(p_m.get(), &gpair, &out_predictions, nullptr);
@@ -120,6 +120,7 @@ TEST(GBTree, WrongUpdater) {
 #ifdef XGBOOST_USE_CUDA
 TEST(GBTree, ChoosePredictor) {
   // The test ensures data don't get pulled into device.
+  // XGBoost chooses predictor based on the data placement when input is a SparsePage.
   std::size_t constexpr kRows = 17, kCols = 15;
 
   auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
@@ -128,14 +129,14 @@ TEST(GBTree, ChoosePredictor) {
   p_dmat->Info().labels.Reshape(kRows);
 
   auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
-  learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
+  learner->SetParams(Args{{"tree_method", "hist"}, {"device", "cuda"}});
   for (size_t i = 0; i < 4; ++i) {
     learner->UpdateOneIter(i, p_dmat);
   }
   ASSERT_TRUE(data.HostCanWrite());
 
-  dmlc::TemporaryDirectory tempdir;
-  const std::string fname = tempdir.path + "/model_param.bst";
+  common::TemporaryDirectory tempdir;
+  const std::string fname = tempdir.Str() + "/model_param.bst";
   {
     std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
     learner->Save(fo.get());
@@ -146,7 +147,7 @@ TEST(GBTree, ChoosePredictor) {
     std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
     learner->Load(fi.get());
   }
-  learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
+  learner->SetParams(Args{{"tree_method", "hist"}, {"device", "cuda"}});
   for (size_t i = 0; i < 4; ++i) {
     learner->UpdateOneIter(i, p_dmat);
   }
@@ -162,7 +163,7 @@ TEST(GBTree, ChoosePredictor) {
 
   // another new learner
   learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
-  learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
+  learner->SetParams(Args{{"tree_method", "hist"}, {"device", "cuda"}});
   for (size_t i = 0; i < 4; ++i) {
     learner->UpdateOneIter(i, p_dmat);
   }
@@ -183,11 +184,7 @@ TEST(GBTree, ChooseTreeMethod) {
     }
     if (device.has_value()) {
       auto const& d = device.value();
-      if (std::isdigit(d.front()) || d.front() == '-') {
-        learner->SetParam("gpu_id", d);
-      } else {
-        learner->SetParam("device", d);
-      }
+      learner->SetParam("device", d);
     }
     learner->Configure();
     for (std::int32_t i = 0; i < 3; ++i) {
@@ -207,16 +204,12 @@ TEST(GBTree, ChooseTreeMethod) {
     }
     if (device.has_value()) {
       auto const& d = device.value();
-      if (std::isdigit(d.front()) || d.front() == '-') {
-        learner->SetParam("gpu_id", d);
-      } else {
-        learner->SetParam("device", d);
-      }
+      learner->SetParam("device", d);
     }
     learner->Configure();
+    Context ctx;
     for (std::int32_t i = 0; i < 3; ++i) {
-      linalg::Matrix<GradientPair> gpair{{Xy->Info().num_row_}, DeviceOrd::CPU()};
-      gpair.Data()->Copy(GenerateRandomGradients(Xy->Info().num_row_));
+      GradientContainer gpair = GenerateRandomGradients(&ctx, Xy->Info().num_row_, 1);
       learner->BoostOneIter(0, Xy, &gpair);
     }
 
@@ -226,52 +219,35 @@ TEST(GBTree, ChooseTreeMethod) {
     return updater;
   };
 
-  // |        | hist    | gpu_hist | exact | NA  |
-  // |--------+---------+----------+-------+-----|
-  // | CUDA:0 | GPU     | GPU (w)  | Err   | GPU |
-  // | CPU    | CPU     | GPU (w)  | CPU   | CPU |
-  // |--------+---------+----------+-------+-----|
-  // | -1     | CPU     | GPU (w)  | CPU   | CPU |
-  // | 0      | GPU     | GPU (w)  | Err   | GPU |
-  // |--------+---------+----------+-------+-----|
-  // | NA     | CPU     | GPU (w)  | CPU   | CPU |
+  // |        | hist    | approx | exact | NA  |
+  // |--------+---------+--------+-------+-----|
+  // | CUDA:0 | GPU     | GPU    | Err   | GPU |
+  // | CPU    | CPU     | GPU    | CPU   | CPU |
+  // |--------+---------+--------+-------+-----|
+  // | NA     | CPU     | CPU    | CPU   | CPU |
   //
-  // - (w): warning
   // - CPU: Run on CPU.
   // - GPU: Run on CUDA.
   // - Err: Not feasible.
   // - NA:  Parameter is not specified.
-
-  // When GPU hist is specified with a CPU context, we should emit an error. However, it's
-  // quite difficult to detect whether the CPU context is being used because it's the
-  // default or because it's specified by the user.
-
   std::map<std::pair<std::optional<std::string>, std::optional<std::string>>, std::string>
       expectation{
           // hist
-          {{"hist", "-1"}, "grow_quantile_histmaker"},
-          {{"hist", "0"}, "grow_gpu_hist"},
           {{"hist", "cpu"}, "grow_quantile_histmaker"},
           {{"hist", "cuda"}, "grow_gpu_hist"},
           {{"hist", "cuda:0"}, "grow_gpu_hist"},
           {{"hist", std::nullopt}, "grow_quantile_histmaker"},
-          // gpu_hist
-          {{"gpu_hist", "-1"}, "grow_gpu_hist"},
-          {{"gpu_hist", "0"}, "grow_gpu_hist"},
-          {{"gpu_hist", "cpu"}, "grow_gpu_hist"},
-          {{"gpu_hist", "cuda"}, "grow_gpu_hist"},
-          {{"gpu_hist", "cuda:0"}, "grow_gpu_hist"},
-          {{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
+          // approx
+          {{"approx", "cpu"}, "grow_histmaker"},
+          {{"approx", "cuda"}, "grow_gpu_approx"},
+          {{"approx", "cuda:0"}, "grow_gpu_approx"},
+          {{"approx", std::nullopt}, "grow_histmaker"},
           // exact
-          {{"exact", "-1"}, "grow_colmaker,prune"},
-          {{"exact", "0"}, "err"},
           {{"exact", "cpu"}, "grow_colmaker,prune"},
           {{"exact", "cuda"}, "err"},
           {{"exact", "cuda:0"}, "err"},
           {{"exact", std::nullopt}, "grow_colmaker,prune"},
           // NA
-          {{std::nullopt, "-1"}, "grow_quantile_histmaker"},
-          {{std::nullopt, "0"}, "grow_gpu_hist"},  // default to hist
           {{std::nullopt, "cpu"}, "grow_quantile_histmaker"},
           {{std::nullopt, "cuda"}, "grow_gpu_hist"},
           {{std::nullopt, "cuda:0"}, "grow_gpu_hist"},
@@ -426,9 +402,9 @@ class Dart : public testing::TestWithParam<char const*> {
     HostDeviceVector<float>* inplace_predts;
     std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
     if (ctx.IsCUDA()) {
-      x->SetCUDAArray(array_str.c_str());
+      x->SetCudaArray(array_str.c_str());
     } else {
-      x->SetArrayData(array_str.c_str());
+      x->SetArray(array_str.c_str());
     }
     learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
                             &inplace_predts, 0, 0);
@@ -653,7 +629,7 @@ TEST(GBTree, PredictRange) {
     HostDeviceVector<float> raw_storage;
     auto raw = RandomDataGenerator{n_samples, n_features, 0.5}.GenerateArrayInterface(&raw_storage);
     std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy{}};
-    x->SetArrayData(raw.data());
+    x->SetArray(raw.data());
 
     HostDeviceVector<float>* out_predt;
     learner->InplacePredict(x, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
diff --git a/tests/cpp/gbm/test_gbtree.cu b/tests/cpp/gbm/test_gbtree.cu
index 227e07ffd3fd..1ef909a29720 100644
--- a/tests/cpp/gbm/test_gbtree.cu
+++ b/tests/cpp/gbm/test_gbtree.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023, XGBoost contributors
+ * Copyright 2023-2025, XGBoost contributors
  */
 #include <xgboost/context.h>      // for Context
 #include <xgboost/learner.h>      // for Learner
@@ -8,7 +8,6 @@
 #include <limits>  // for numeric_limits
 #include <memory>  // for shared_ptr
 #include <string>  // for string
-#include <thread>  // for thread
 
 #include "../../../src/data/adapter.h"           // for ArrayAdapter
 #include "../../../src/data/device_adapter.cuh"  // for CupyAdapter
@@ -50,9 +49,9 @@ void TestInplaceFallback(Context const* ctx) {
   std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
   auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
   if (data_ordinal.IsCPU()) {
-    proxy->SetArrayData(StringView{X});
+    proxy->SetArray(StringView{X});
   } else {
-    proxy->SetCUDAArray(X.c_str());
+    proxy->SetCudaArray(X.c_str());
   }
 
   HostDeviceVector<float>* out_predt{nullptr};
diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc
index 7e2e79ba51f6..83e0c17c4819 100644
--- a/tests/cpp/helpers.cc
+++ b/tests/cpp/helpers.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016-2024, XGBoost contributors
+ * Copyright 2016-2025, XGBoost contributors
  */
 #include "helpers.h"
 
@@ -12,10 +12,13 @@
 #include <xgboost/objective.h>
 
 #include <algorithm>
-#include <limits>  // for numeric_limits
+#include <filesystem>  // for path
+#include <limits>      // for numeric_limits
+#include <random>      // for mt19937
 
 #include "../../src/collective/communicator-inl.h"  // for GetRank
 #include "../../src/data/adapter.h"
+#include "../../src/data/batch_utils.h"  // for AutoHostRatio, AutoCachePageBytes
 #include "../../src/data/iterative_dmatrix.h"
 #include "../../src/data/simple_dmatrix.h"
 #include "../../src/data/sparse_page_dmatrix.h"
@@ -199,9 +202,14 @@ double GetMultiMetricEval(xgboost::Metric* metric,
 }
 
 namespace xgboost {
-
-float GetBaseScore(Json const &config) {
-  return std::stof(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+[[nodiscard]] std::vector<float> GetBaseScore(Json const& config) {
+  auto str = get<String const>(config["learner"]["learner_model_param"]["base_score"]);
+  auto jintercept = Json::Load(str);
+  auto const& array = get<Array const>(jintercept);
+  std::vector<float> results;
+  std::transform(array.begin(), array.end(), std::back_inserter(results),
+                 [](Json v) { return get<Number>(v); });
+  return results;
 }
 
 SimpleLCG::StateType SimpleLCG::operator()() {
@@ -213,6 +221,13 @@ SimpleLCG::StateType SimpleLCG::Max() const { return max(); }
 // Make sure it's compile time constant.
 static_assert(SimpleLCG::max() - SimpleLCG::min());
 
+RandomDataGenerator::RandomDataGenerator(bst_idx_t rows, std::size_t cols, float sparsity)
+    : rows_{rows},
+      cols_{cols},
+      sparsity_{sparsity},
+      lcg_{seed_},
+      cache_host_ratio_{cuda_impl::AutoHostRatio()} {}
+
 void RandomDataGenerator::GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const {
   RandomDataGenerator{static_cast<bst_idx_t>(p_fmat->Info().num_row_), this->n_targets_, 0.0f}.GenerateDense(
       p_fmat->Info().labels.Data());
@@ -399,6 +414,15 @@ void MakeLabels(DeviceOrd device, bst_idx_t n_samples, bst_target_t n_classes,
     out->Info().feature_types.ConstDevicePointer();
   }
 }
+
+[[nodiscard]] bool DecompAllowFallback() {
+#if defined(XGBOOST_USE_NVCOMP)
+  bool allow_decomp_fallback = true;
+#else
+  bool allow_decomp_fallback = false;
+#endif
+  return allow_decomp_fallback;
+}
 }  // namespace
 
 [[nodiscard]] std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDMatrix(
@@ -407,8 +431,15 @@ void MakeLabels(DeviceOrd device, bst_idx_t n_samples, bst_target_t n_classes,
   HostDeviceVector<std::size_t> rptrs;
   HostDeviceVector<bst_feature_t> columns;
   this->GenerateCSR(&data, &rptrs, &columns);
-  data::CSRAdapter adapter(rptrs.HostPointer(), columns.HostPointer(), data.HostPointer(), rows_,
-                           data.Size(), cols_);
+  // Initialize on CPU.
+  data.HostVector();
+  rptrs.HostVector();
+  columns.HostVector();
+  auto adapter =
+      data::CSRArrayAdapter{Json::Dump(GetArrayInterface(&rptrs, rptrs.Size(), 1)),
+                            Json::Dump(GetArrayInterface(&columns, columns.Size(), 1)),
+                            Json::Dump(GetArrayInterface(&data, data.Size(), 1)), this->cols_};
+
   std::shared_ptr<DMatrix> out{
       DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1, "", data_split_mode)};
 
@@ -448,14 +479,16 @@ void MakeLabels(DeviceOrd device, bst_idx_t n_samples, bst_target_t n_classes,
 #endif  // defined(XGBOOST_USE_CUDA)
   }
 
-  auto config = ExtMemConfig{
-      prefix,
-      this->on_host_,
-      this->min_cache_page_bytes_,
-      std::numeric_limits<float>::quiet_NaN(),
-      this->max_num_device_pages_,
-      Context{}.Threads(),
-  };
+  auto config =
+      ExtMemConfig{
+          prefix,
+          this->on_host_,
+          this->cache_host_ratio_,
+          this->min_cache_page_bytes_,
+          std::numeric_limits<float>::quiet_NaN(),
+          Context{}.Threads(),
+      }
+          .SetParamsForTest(this->hw_decomp_ratio_, DecompAllowFallback());
   std::shared_ptr<DMatrix> p_fmat{
       DMatrix::Create(static_cast<DataIterHandle>(iter.get()), iter->Proxy(), Reset, Next, config)};
 
@@ -498,14 +531,17 @@ void MakeLabels(DeviceOrd device, bst_idx_t n_samples, bst_target_t n_classes,
   }
   CHECK(iter);
 
-  auto config = ExtMemConfig{
-      prefix,
-      this->on_host_,
-      this->min_cache_page_bytes_,
-      std::numeric_limits<float>::quiet_NaN(),
-      this->max_num_device_pages_,
-      Context{}.Threads(),
-  };
+  auto config =
+      ExtMemConfig{
+          prefix,
+          this->on_host_,
+          this->cache_host_ratio_,
+          this->min_cache_page_bytes_,
+          std::numeric_limits<float>::quiet_NaN(),
+          Context{}.Threads(),
+      }
+          .SetParamsForTest(this->hw_decomp_ratio_, DecompAllowFallback());
+
   std::shared_ptr<DMatrix> p_fmat{
       DMatrix::Create(static_cast<DataIterHandle>(iter.get()), iter->Proxy(), this->ref_, Reset,
                       Next, this->bins_, std::numeric_limits<std::int64_t>::max(), config)};
@@ -572,6 +608,19 @@ int NumpyArrayIterForTest::Next() {
   return 1;
 }
 
+[[nodiscard]] std::vector<float> GenerateRandomCategoricalSingleColumn(std::size_t n,
+                                                                       std::size_t n_categories) {
+  std::vector<float> x(n);
+  std::mt19937 rng(0);
+  std::uniform_int_distribution<size_t> dist(0, n_categories - 1);
+  std::generate(x.begin(), x.end(), [&]() { return static_cast<float>(dist(rng)); });
+  // Make sure each category is present
+  for (size_t i = 0; i < n_categories; i++) {
+    x[i] = static_cast<decltype(x)::value_type>(i);
+  }
+  return x;
+}
+
 std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,
                                             bst_feature_t num_columns) {
   data::DenseAdapter adapter(x.data(), num_rows, num_columns);
@@ -582,6 +631,26 @@ std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::si
   return p_fmat;
 }
 
+[[nodiscard]] std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
+    HostDeviceVector<float> const& x, bst_idx_t n_samples, bst_feature_t n_features,
+    const common::TemporaryDirectory& tempdir, bst_idx_t n_batches) {
+  Context ctx;
+  auto iter = NumpyArrayIterForTest{&ctx, x, n_samples / n_batches, n_features, n_batches};
+
+  auto prefix = tempdir.Path() / "temp";
+  auto config = ExtMemConfig{
+      prefix.string(),
+      false,
+      ::xgboost::cuda_impl::AutoHostRatio(),
+      ::xgboost::cuda_impl::AutoCachePageBytes(),
+      std::numeric_limits<float>::quiet_NaN(),
+      Context{}.Threads(),
+  };
+  std::shared_ptr<DMatrix> p_fmat{
+      DMatrix::Create(static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next, config)};
+  return p_fmat;
+}
+
 std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs, size_t kRows,
                                                   size_t kCols,
                                                   LearnerModelParam const* learner_model_param,
@@ -597,8 +666,9 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
   }
   p_dmat->Info().labels =
       linalg::Tensor<float, 2>{labels.cbegin(), labels.cend(), {labels.size()}, DeviceOrd::CPU()};
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx->Device());
-  auto h_gpair = gpair.HostView();
+  GradientContainer gpair;
+  gpair.gpair = linalg::Matrix<GradientPair>{{kRows}, ctx->Device()};
+  auto h_gpair = gpair.gpair.HostView();
   for (size_t i = 0; i < kRows; ++i) {
     h_gpair(i) = GradientPair{static_cast<float>(i), 1};
   }
@@ -626,7 +696,7 @@ ArrayIterForTest::ArrayIterForTest(Context const* ctx, HostDeviceVector<float> c
   CHECK_EQ(this->data_.Size(), rows_ * cols_ * n_batches);
   this->data_.Copy(data);
   std::tie(batches_, interface_) =
-      MakeArrayInterfaceBatch(&data_, rows_, cols_, n_batches_, ctx->Device());
+      MakeArrayInterfaceBatch(&data_, rows_ * n_batches_, cols_, n_batches_, ctx->Device());
 }
 
 ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }
@@ -702,6 +772,7 @@ RMMAllocatorPtr SetUpRMMResourceForCppTests(int argc, char** argv) {
   for (int i = 0; i < ptr->n_gpu; ++i) {
     rmm::mr::set_per_device_resource(rmm::cuda_device_id(i), ptr->pool_mr[i].get());
   }
+  GlobalConfigThreadLocalStore::Get()->UpdateAllowUnknown(Args{{"use_rmm", "true"}});
   return ptr;
 }
 #else  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h
index 024a8531f3a7..04e0bf806cd2 100644
--- a/tests/cpp/helpers.h
+++ b/tests/cpp/helpers.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2016-2024, XGBoost contributors
+ * Copyright 2016-2025, XGBoost contributors
  */
 #pragma once
 
@@ -19,12 +19,13 @@
 #include <string>
 #include <vector>
 
+
 #if defined(__CUDACC__)
 #include "../../src/collective/communicator-inl.h"  // for GetRank
 #include "../../src/common/cuda_rt_utils.h"         // for AllVisibleGPUs
 #endif  // defined(__CUDACC__)
 
-#include "filesystem.h"  // dmlc::TemporaryDirectory
+#include "filesystem.h"  // for TemporaryDirectory
 #include "xgboost/linalg.h"
 
 #if defined(__CUDACC__)
@@ -109,8 +110,7 @@ double GetMultiMetricEval(xgboost::Metric* metric,
                           xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);
 
 namespace xgboost {
-
-float GetBaseScore(Json const &config);
+[[nodiscard]] std::vector<float> GetBaseScore(Json const& config);
 
 /*!
  * \brief Linear congruential generator.
@@ -241,15 +241,15 @@ class RandomDataGenerator {
   bool on_host_{false};
   std::shared_ptr<DMatrix> ref_{nullptr};
   std::int64_t min_cache_page_bytes_{0};
-  std::int64_t max_num_device_pages_{1};
+  float cache_host_ratio_;
+  float hw_decomp_ratio_{true};
 
   Json ArrayInterfaceImpl(HostDeviceVector<float>* storage, size_t rows, size_t cols) const;
 
   void GenerateLabels(std::shared_ptr<DMatrix> p_fmat) const;
 
  public:
-  RandomDataGenerator(bst_idx_t rows, size_t cols, float sparsity)
-      : rows_{rows}, cols_{cols}, sparsity_{sparsity}, lcg_{seed_} {}
+  RandomDataGenerator(bst_idx_t rows, std::size_t cols, float sparsity);
 
   RandomDataGenerator& Lower(float v) {
     lower_ = v;
@@ -279,8 +279,12 @@ class RandomDataGenerator {
     this->min_cache_page_bytes_ = min_cache_page_bytes;
     return *this;
   }
-  RandomDataGenerator& MaxNumDevicePages(std::int64_t max_num_device_pages) {
-    this->max_num_device_pages_ = max_num_device_pages;
+  [[nodiscard]] RandomDataGenerator& CacheHostRatio(float cache_host_ratio) {
+    this->cache_host_ratio_ = cache_host_ratio;
+    return *this;
+  }
+  [[nodiscard]] RandomDataGenerator& HwDecompRatio(float hw_decomp_ratio) {
+    this->hw_decomp_ratio_ = hw_decomp_ratio;
     return *this;
   }
   RandomDataGenerator& Seed(uint64_t s) {
@@ -350,21 +354,16 @@ inline std::shared_ptr<DMatrix> EmptyDMatrix() {
   return RandomDataGenerator{0, 0, 0.0}.GenerateDMatrix();
 }
 
-inline std::vector<float> GenerateRandomCategoricalSingleColumn(int n, size_t num_categories) {
-  std::vector<float> x(n);
-  std::mt19937 rng(0);
-  std::uniform_int_distribution<size_t> dist(0, num_categories - 1);
-  std::generate(x.begin(), x.end(), [&]() { return static_cast<float>(dist(rng)); });
-  // Make sure each category is present
-  for (size_t i = 0; i < num_categories; i++) {
-    x[i] = static_cast<decltype(x)::value_type>(i);
-  }
-  return x;
-}
+[[nodiscard]] std::vector<float> GenerateRandomCategoricalSingleColumn(std::size_t n,
+                                                                       std::size_t n_categories);
 
 std::shared_ptr<DMatrix> GetDMatrixFromData(const std::vector<float>& x, std::size_t num_rows,
                                             bst_feature_t num_columns);
 
+[[nodiscard]] std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
+    HostDeviceVector<float> const& x, bst_idx_t n_samples, bst_feature_t n_features,
+    const common::TemporaryDirectory& tempdir, bst_idx_t n_batches = 4);
+
 std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs, size_t kRows,
                                                   size_t kCols,
                                                   LearnerModelParam const* learner_model_param,
@@ -395,13 +394,12 @@ inline HostDeviceVector<GradientPair> GenerateRandomGradients(const size_t n_row
   return gpair;
 }
 
-inline linalg::Matrix<GradientPair> GenerateRandomGradients(Context const* ctx, bst_idx_t n_rows,
-                                                            bst_target_t n_targets,
-                                                            float lower = 0.0f,
-                                                            float upper = 1.0f) {
+inline auto GenerateRandomGradients(Context const* ctx, bst_idx_t n_rows, bst_target_t n_targets,
+                                    float lower = 0.0f, float upper = 1.0f) {
   auto g = GenerateRandomGradients(n_rows * n_targets, lower, upper);
-  linalg::Matrix<GradientPair> gpair({n_rows, static_cast<bst_idx_t>(n_targets)}, ctx->Device());
-  gpair.Data()->Copy(g);
+  GradientContainer gpair;
+  gpair.gpair = linalg::Matrix<GradientPair>{{n_rows, static_cast<bst_idx_t>(n_targets)}, ctx->Device()};
+  gpair.gpair.Data()->Copy(g);
   return gpair;
 }
 
@@ -446,6 +444,10 @@ class CudaArrayIterForTest : public ArrayIterForTest {
  public:
   explicit CudaArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
                                 size_t batches = Batches());
+  explicit CudaArrayIterForTest(Context const* ctx, HostDeviceVector<float> const& data,
+                                std::size_t n_samples, bst_feature_t n_features,
+                                std::size_t n_batches)
+      : ArrayIterForTest{ctx, data, n_samples, n_features, n_batches} {};
   int Next() override;
   ~CudaArrayIterForTest() override = default;
 };
diff --git a/tests/cpp/histogram_helpers.cu b/tests/cpp/histogram_helpers.cu
new file mode 100644
index 000000000000..1b5f338c40d7
--- /dev/null
+++ b/tests/cpp/histogram_helpers.cu
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#include <xgboost/base.h>  // for bst_feature_t
+#include <xgboost/data.h>  // for FeatureType
+#include <xgboost/span.h>  // for Span
+
+#include <memory>  // for make_unique
+#include <random>  // for uniform_real_distribution
+#include <vector>  // for vector
+
+#include "../../src/common/device_vector.cuh"  // for device_vector
+#include "../../src/common/hist_util.h"        // for HistogramCuts
+#include "../../src/data/device_adapter.cuh"   // for CupyAdapter, GetRowCounts
+#include "../../src/data/ellpack_page.cuh"     // for EllpackPageImpl
+#include "histogram_helpers.h"
+
+namespace xgboost {
+[[nodiscard]] std::unique_ptr<EllpackPageImpl> MakeEllpackForTest(Context const* ctx,
+                                                                  bst_idx_t n_samples,
+                                                                  bst_feature_t n_features,
+                                                                  bst_bin_t n_bins_per_feat) {
+  // Construct the histogram bins
+  std::vector<std::uint32_t> cut_indptr(n_features + 1, 0);
+  for (std::size_t i = 1; i < cut_indptr.size(); ++i) {
+    cut_indptr[i] = i * n_bins_per_feat;
+  }
+  std::vector<float> cut_values;
+  for (bst_feature_t f_idx = 0; f_idx < n_features; ++f_idx) {
+    for (bst_bin_t bin_idx = 0; bin_idx < n_bins_per_feat; ++bin_idx) {
+      cut_values.push_back(bin_idx + 1.0f);
+    }
+  }
+  std::vector<float> min_values;
+  std::default_random_engine rng(2025);
+  std::uniform_real_distribution<float> min_dist(-1.0, -0.5);
+  for (bst_feature_t f_idx = 0; f_idx < n_features; ++f_idx) {
+    min_values.push_back(min_dist(rng));
+  }
+  auto p_cuts = std::make_shared<common::HistogramCuts>();
+  p_cuts->cut_ptrs_.HostVector() = cut_indptr;
+  p_cuts->cut_values_.HostVector() = cut_values;
+  p_cuts->min_vals_.HostVector() = min_values;
+
+  // Construct the data
+  auto n_values_per_bin = n_samples / n_bins_per_feat;
+
+  linalg::Matrix<float> values{
+      {n_samples, static_cast<bst_idx_t>(n_features)}, DeviceOrd::CPU(), linalg::kF};
+  auto& h_values = values.Data()->HostVector();
+  h_values.clear();
+
+  for (bst_feature_t f_idx = 0; f_idx < n_features; ++f_idx) {
+    for (bst_bin_t bin_idx = 0; bin_idx < n_bins_per_feat; ++bin_idx) {
+      // min-max value for the current bin
+      auto min_value = static_cast<float>(bin_idx + kRtEps);
+      auto max_value = static_cast<float>(bin_idx + 1.0 - kRtEps);
+      std::uniform_real_distribution<float> dist(min_value, max_value);
+      for (std::size_t i = 0; i < n_values_per_bin; ++i) {
+        h_values.emplace_back(dist(rng));
+      }
+      if (bin_idx == n_bins_per_feat - 1) {
+        auto remainder = n_samples % n_bins_per_feat;
+        for (std::size_t i = 0; i < remainder; ++i) {
+          h_values.emplace_back(dist(rng));
+        }
+      }
+    }
+  }
+  CHECK_EQ(h_values.size(), n_samples * n_features);
+
+  auto str = linalg::ArrayInterfaceStr(values.View(ctx->Device()));
+  auto adapter = data::CupyAdapter{StringView{str}};
+  dh::device_vector<bst_idx_t> row_counts(n_samples);
+  auto missing = std::numeric_limits<float>::quiet_NaN();
+  bst_idx_t row_stride =
+      GetRowCounts(ctx, adapter.Value(), dh::ToSpan(row_counts), ctx->Device(), missing);
+  auto ellpack = std::make_unique<EllpackPageImpl>(
+      ctx, adapter.Value(), missing, true, dh::ToSpan(row_counts),
+      common::Span<FeatureType const>{}, row_stride, n_samples, p_cuts);
+
+  return ellpack;
+}
+}  // namespace xgboost
diff --git a/tests/cpp/histogram_helpers.h b/tests/cpp/histogram_helpers.h
index ff021e819821..3d897c2ded80 100644
--- a/tests/cpp/histogram_helpers.h
+++ b/tests/cpp/histogram_helpers.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2023, XGBoost contributors
+ * Copyright 2020-2025, XGBoost contributors
  */
 #pragma once
 
@@ -52,5 +52,16 @@ inline std::unique_ptr<EllpackPageImpl> BuildEllpackPage(Context const* ctx, int
 
   return page;
 }
+
+/**
+ * @brief Create an ellpack page with evenly distributed values across histogram bins.
+ *
+ * @note The last bin contains all the extra values if @ref n_samples is not divisible by
+ *       @ref n_bins_per_feat. Otherwise, all bins contain the same number of values.
+ */
+[[nodiscard]] std::unique_ptr<EllpackPageImpl> MakeEllpackForTest(Context const* ctx,
+                                                                  bst_idx_t n_samples,
+                                                                  bst_feature_t n_features,
+                                                                  bst_bin_t n_bins_per_feat);
 #endif
 }  // namespace xgboost
diff --git a/tests/cpp/objective/test_lambdarank_obj.cc b/tests/cpp/objective/test_lambdarank_obj.cc
index db8472a2a7dd..36dcab5bc065 100644
--- a/tests/cpp/objective/test_lambdarank_obj.cc
+++ b/tests/cpp/objective/test_lambdarank_obj.cc
@@ -3,25 +3,26 @@
  */
 #include "test_lambdarank_obj.h"
 
-#include <gtest/gtest.h>                        // for Test, Message, TestPartResult, CmpHel...
-
-#include <algorithm>                            // for sort
-#include <cstddef>                              // for size_t
-#include <initializer_list>                     // for initializer_list
-#include <memory>                               // for unique_ptr, shared_ptr, make_shared
-#include <numeric>                              // for iota
-#include <string>                               // for char_traits, basic_string, string
-#include <vector>                               // for vector
-
-#include "../../../src/common/ranking_utils.h"  // for NDCGCache, LambdaRankParam
-#include "../helpers.h"                         // for CheckRankingObjFunction, CheckConfigReload
-#include "xgboost/base.h"                       // for GradientPair, bst_group_t, Args
-#include "xgboost/context.h"                    // for Context
-#include "xgboost/data.h"                       // for MetaInfo, DMatrix
-#include "xgboost/host_device_vector.h"         // for HostDeviceVector
-#include "xgboost/linalg.h"                     // for Tensor, All, TensorView
-#include "xgboost/objective.h"                  // for ObjFunction
-#include "xgboost/span.h"                       // for Span
+#include <gtest/gtest.h>  // for Test, Message, TestPartResult, CmpHel...
+
+#include <algorithm>         // for sort
+#include <cstddef>           // for size_t
+#include <initializer_list>  // for initializer_list
+#include <memory>            // for unique_ptr, shared_ptr, make_shared
+#include <numeric>           // for iota
+#include <string>            // for char_traits, basic_string, string
+#include <vector>            // for vector
+
+#include "../../../src/common/ranking_utils.h"      // for NDCGCache, LambdaRankParam
+#include "../../../src/objective/lambdarank_obj.h"  // for MAPStat, MakePairs
+#include "../helpers.h"                  // for CheckRankingObjFunction, CheckConfigReload
+#include "xgboost/base.h"                // for GradientPair, bst_group_t, Args
+#include "xgboost/context.h"             // for Context
+#include "xgboost/data.h"                // for MetaInfo, DMatrix
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
+#include "xgboost/linalg.h"              // for Tensor, All, TensorView
+#include "xgboost/objective.h"           // for ObjFunction
+#include "xgboost/span.h"                // for Span
 
 namespace xgboost::obj {
 TEST(LambdaRank, NDCGJsonIO) {
@@ -55,14 +56,18 @@ void TestNDCGGPair(Context const* ctx) {
                             {0, 2, 4},
                             {2.06611f, -2.06611f, 0.0f, 0.0f},
                             {2.169331f, 2.169331f, 0.0f, 0.0f});
-
-    CheckRankingObjFunction(obj,
-                            {0, 0.1f, 0, 0.1f},
-                            {0,   1, 0, 1},
-                            {2.0f, 2.0f},
-                            {0, 2, 4},
-                            {2.06611f, -2.06611f, 2.06611f, -2.06611f},
-                            {2.169331f, 2.169331f, 2.169331f, 2.169331f});
+  }
+  {
+    std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
+    obj->Configure(Args{{"lambdarank_pair_method", "topk"}});
+    float weight_norm = 0.5;  // n_groups / sum_weights
+    std::vector<float> out_grad{2.06611f, -2.06611f, 2.06611f, -2.06611f};
+    std::vector<float> out_hess{2.169331f, 2.169331f, 2.169331f, 2.169331f};
+    auto norm = [=](auto v) { return v * weight_norm; };
+    std::transform(out_grad.begin(), out_grad.end(), out_grad.begin(), norm);
+    std::transform(out_hess.begin(), out_hess.end(), out_hess.begin(), norm);
+    CheckRankingObjFunction(obj, {0, 0.1f, 0, 0.1f}, {0, 1, 0, 1}, {2.0f, 2.0f}, {0, 2, 4},
+                            out_grad, out_hess);
   }
 
   std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:ndcg", ctx)};
@@ -119,7 +124,7 @@ void TestNDCGGPair(Context const* ctx) {
     obj->GetGradient(predts, info, 0, &gpairs);
     ASSERT_EQ(gpairs.Size(), 0);
   }
-  ASSERT_NO_THROW(obj->DefaultEvalMetric());
+  ASSERT_NO_THROW({ [[maybe_unused]] auto _ = obj->DefaultEvalMetric(); });
 }
 
 TEST(LambdaRank, NDCGGPair) {
@@ -319,8 +324,7 @@ TEST(LambdaRank, MAPStat) {
 
 void TestMAPGPair(Context const* ctx) {
   std::unique_ptr<xgboost::ObjFunction> obj{xgboost::ObjFunction::Create("rank:map", ctx)};
-  Args args;
-  obj->Configure(args);
+  obj->Configure({});
 
   CheckConfigReload(obj, "rank:map");
 
@@ -331,14 +335,20 @@ void TestMAPGPair(Context const* ctx) {
                           {0, 2, 4},                                           // group
                           {1.2054923f, -1.2054923f, 1.2054923f, -1.2054923f},  // out grad
                           {1.2657166f, 1.2657166f, 1.2657166f, 1.2657166f});
+
+  obj.reset(xgboost::ObjFunction::Create("rank:map", ctx));
+  obj->Configure({});
+
   // disable the second query group with 0 weight
-  CheckRankingObjFunction(obj,                                  // obj
-                          {0, 0.1f, 0, 0.1f},                   // score
-                          {0, 1, 0, 1},                         // label
-                          {2.0f, 0.0f},                         // weight
-                          {0, 2, 4},                            // group
-                          {1.2054923f, -1.2054923f, .0f, .0f},  // out grad
-                          {1.2657166f, 1.2657166f, .0f, .0f});
+  auto w = 2.0f;  // weight for the first group
+  // weight norm is 1.0 (n_groups / sum_weights)
+  CheckRankingObjFunction(obj,                                          // obj
+                          {0, 0.1f, 0, 0.1f},                           // score
+                          {0, 1, 0, 1},                                 // label
+                          {w, 0.0f},                                    // weight
+                          {0, 2, 4},                                    // group
+                          {1.2054923f * w, -1.2054923f * w, .0f, .0f},  // out grad
+                          {1.2657166f * w, 1.2657166f * w, .0f, .0f});
 }
 
 TEST(LambdaRank, MAPGPair) {
diff --git a/tests/cpp/objective/test_lambdarank_obj.h b/tests/cpp/objective/test_lambdarank_obj.h
index 9539f1a3003e..4383a44d1a75 100644
--- a/tests/cpp/objective/test_lambdarank_obj.h
+++ b/tests/cpp/objective/test_lambdarank_obj.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2023, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #ifndef XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
 #define XGBOOST_OBJECTIVE_TEST_LAMBDARANK_OBJ_H_
@@ -10,11 +10,8 @@
 #include <xgboost/objective.h>                      // for ObjFunction
 
 #include <memory>                                   // for shared_ptr, make_shared
-#include <numeric>                                  // for iota
-#include <vector>                                   // for vector
 
 #include "../../../src/common/ranking_utils.h"      // for LambdaRankParam, MAPCache
-#include "../../../src/objective/lambdarank_obj.h"  // for MAPStat
 #include "../helpers.h"                             // for EmptyDMatrix
 
 namespace xgboost::obj {
diff --git a/tests/cpp/objective/test_multiclass_obj.cc b/tests/cpp/objective/test_multiclass_obj.cc
index 734e097b846b..ae3427b61398 100644
--- a/tests/cpp/objective/test_multiclass_obj.cc
+++ b/tests/cpp/objective/test_multiclass_obj.cc
@@ -1,9 +1,8 @@
-/*!
- * Copyright 2018-2023 XGBoost contributors
+/**
+ * Copyright 2018-2025, XGBoost contributors
  */
 #include <xgboost/objective.h>
 #include <xgboost/context.h>
-#include "../../src/common/common.h"
 #include "../helpers.h"
 #include "test_multiclass_obj.h"
 
@@ -32,7 +31,7 @@ void TestSoftmaxMultiClassObjGPair(const Context* ctx) {
 		   {0.24f, -0.91f, 0.66f, -0.33f, 0.09f, 0.24f}, // grad
 		   {0.36f, 0.16f, 0.44f, 0.45f, 0.16f, 0.37f});	 // hess
 
-  ASSERT_NO_THROW(obj->DefaultEvalMetric());
+  ASSERT_NO_THROW({ [[maybe_unused]] auto _ = obj->DefaultEvalMetric(); });
 }
 
 void TestSoftmaxMultiClassBasic(const Context* ctx) {
diff --git a/tests/cpp/objective/test_objective.cc b/tests/cpp/objective/test_objective.cc
index efdd03612a0f..20260b893c38 100644
--- a/tests/cpp/objective/test_objective.cc
+++ b/tests/cpp/objective/test_objective.cc
@@ -25,7 +25,7 @@ namespace xgboost {
 TEST(Objective, PredTransform) {
   // Test that show PredTransform uses the same device with predictor.
   xgboost::Context tparam;
-  tparam.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  tparam.UpdateAllowUnknown(Args{{"device", "cuda"}});
   size_t n = 100;
 
   for (const auto& entry : ::dmlc::Registry<::xgboost::ObjFunctionReg>::List()) {
@@ -50,7 +50,7 @@ class TestDefaultObjConfig : public ::testing::TestWithParam<std::string> {
 
  public:
   void Run(std::string objective) {
-    auto Xy = MakeFmatForObjTest(objective, 10, 10);
+    auto Xy = MakeFmatForObjTest(objective, 10, 10, 3);
     std::unique_ptr<Learner> learner{Learner::Create({Xy})};
     std::unique_ptr<ObjFunction> objfn{ObjFunction::Create(objective, &ctx_)};
 
diff --git a/tests/cpp/objective/test_quantile_obj.cc b/tests/cpp/objective/test_quantile_obj.cc
index a1e0e3b8678b..e1165699b49d 100644
--- a/tests/cpp/objective/test_quantile_obj.cc
+++ b/tests/cpp/objective/test_quantile_obj.cc
@@ -57,17 +57,17 @@ void TestQuantileIntercept(const Context* ctx) {
 
   linalg::Vector<float> base_scores;
   obj->InitEstimation(info, &base_scores);
-  ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
-  // mean([5.6, 7.8])
-  ASSERT_NEAR(base_scores(0), 6.7, kRtEps);
+  ASSERT_EQ(base_scores.Size(), 2);
+  ASSERT_NEAR(base_scores(0), 5.6, kRtEps);
+  ASSERT_NEAR(base_scores(1), 7.8, kRtEps);
 
   for (std::size_t i = 0; i < info.num_row_; ++i) {
     info.weights_.HostVector().emplace_back(info.num_row_ - i - 1.0);
   }
 
   obj->InitEstimation(info, &base_scores);
-  ASSERT_EQ(base_scores.Size(), 1) << "Vector is not yet supported.";
-  // mean([3, 5])
-  ASSERT_NEAR(base_scores(0), 4.0, kRtEps);
+  ASSERT_EQ(base_scores.Size(), 2);
+  ASSERT_NEAR(base_scores(0), 3.0, kRtEps);
+  ASSERT_NEAR(base_scores(1), 5.0, kRtEps);
 }
 }  // namespace xgboost
diff --git a/tests/cpp/objective/test_regression_obj.cc b/tests/cpp/objective/test_regression_obj.cc
index 2cb57a066391..67327f349046 100644
--- a/tests/cpp/objective/test_regression_obj.cc
+++ b/tests/cpp/objective/test_regression_obj.cc
@@ -1,24 +1,33 @@
 /**
- * Copyright 2017-2023 by XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
+#include "test_regression_obj.h"
+
 #include <gtest/gtest.h>
 #include <xgboost/context.h>
 #include <xgboost/json.h>
 #include <xgboost/objective.h>
+#include <xgboost/tree_model.h>  // for RegTree
 
 #include <numeric>  // for iota
 
 #include "../../../src/common/linalg_op.h"  // for begin, end
-#include "../../../src/objective/adaptive.h"
 #include "../../../src/tree/param.h"        // for TrainParam
 #include "../helpers.h"
 #include "xgboost/base.h"
 #include "xgboost/data.h"
 #include "xgboost/linalg.h"
-
-#include "test_regression_obj.h"
+#include "xgboost/tree_model.h"  // for RegTree
 
 namespace xgboost {
+namespace {
+void CheckProbaToMargin(std::unique_ptr<ObjFunction> const& obj, float in, float expect,
+                        float abs_error = 1e-2f) {
+  linalg::Vector<float> t{{in}, {1}, obj->Ctx()->Device()};
+  obj->ProbToMargin(&t);
+  ASSERT_NEAR(t(0), expect, abs_error);
+}
+}  // namespace
 
 void TestLinearRegressionGPair(const Context* ctx) {
   std::string obj_name = "reg:squarederror";
@@ -39,7 +48,8 @@ void TestLinearRegressionGPair(const Context* ctx) {
                    {},  // empty weight
                    {0, 0.1f, 0.9f, 1.0f, -1.0f, -0.9f, -0.1f, 0},
                    {1,   1,   1,   1,    1,    1,    1, 1});
-  ASSERT_NO_THROW(obj->DefaultEvalMetric());
+
+  ASSERT_NO_THROW({ [[maybe_unused]] auto _ = obj->DefaultEvalMetric(); });
 }
 
 void TestSquaredLog(const Context* ctx) {
@@ -94,11 +104,10 @@ void TestLogisticRegressionBasic(const Context* ctx) {
     << "Expected error when label not in range [0,1f] for LogisticRegression";
 
   // test ProbToMargin
-  EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.197f, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.5f), 0, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.9f), 2.197f, 0.01f);
-  EXPECT_ANY_THROW((void)obj->ProbToMargin(10))
-      << "Expected error when base_score not in range [0,1f] for LogisticRegression";
+  CheckProbaToMargin(obj, 0.1f, -2.197f);
+  CheckProbaToMargin(obj, 0.5f, 0);
+  CheckProbaToMargin(obj, 0.9f, 2.197f);
+  ASSERT_THAT([&] { CheckProbaToMargin(obj, 10, 0); }, GMockThrow("base_score must be in (0,1)"));
 
   // test PredTransform
   HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
@@ -161,9 +170,9 @@ void TestPoissonRegressionBasic(const Context* ctx) {
     << "Expected error when label < 0 for PoissonRegression";
 
   // test ProbToMargin
-  EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
+  CheckProbaToMargin(obj, 0.1f, -2.30f);
+  CheckProbaToMargin(obj, 0.5f, -0.69f);
+  CheckProbaToMargin(obj, 0.9f, -0.10f);
 
   // test PredTransform
   HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
@@ -210,9 +219,9 @@ void TestGammaRegressionBasic(const Context* ctx) {
     << "Expected error when label < 0 for GammaRegression";
 
   // test ProbToMargin
-  EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
+  CheckProbaToMargin(obj, 0.1f, -2.30f);
+  CheckProbaToMargin(obj, 0.5f, -0.69f);
+  CheckProbaToMargin(obj, 0.9f, -0.10f);
 
   // test PredTransform
   HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
@@ -258,9 +267,9 @@ void TestTweedieRegressionBasic(const Context* ctx) {
     << "Expected error when label < 0 for TweedieRegression";
 
   // test ProbToMargin
-  EXPECT_NEAR(obj->ProbToMargin(0.1f), -2.30f, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.5f), -0.69f, 0.01f);
-  EXPECT_NEAR(obj->ProbToMargin(0.9f), -0.10f, 0.01f);
+  CheckProbaToMargin(obj, 0.1f, -2.30f);
+  CheckProbaToMargin(obj, 0.5f, -0.69f);
+  CheckProbaToMargin(obj, 0.9f, -0.10f);
 
   // test PredTransform
   HostDeviceVector<bst_float> io_preds = {0, 0.1f, 0.5f, 0.9f, 1};
diff --git a/tests/cpp/objective_helpers.cc b/tests/cpp/objective_helpers.cc
index 9ad4b5c39688..b037b160fb9d 100644
--- a/tests/cpp/objective_helpers.cc
+++ b/tests/cpp/objective_helpers.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023-2024, XGBoost contributors
+ * Copyright 2023-2025, XGBoost contributors
  */
 #include "objective_helpers.h"
 
@@ -28,10 +28,21 @@ void MakeLabelForObjTest(std::shared_ptr<DMatrix> p_fmat, std::string const& obj
   }
 }
 
-std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj, bst_idx_t n_samples,
-                                            bst_feature_t n_features) {
-  auto p_fmat = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(true);
-  MakeLabelForObjTest(p_fmat, obj);
+[[nodiscard]] std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj,
+                                                          bst_idx_t n_samples,
+                                                          bst_feature_t n_features,
+                                                          bst_target_t n_classes, bool make_label) {
+  std::shared_ptr<DMatrix> p_fmat;
+  if (obj.find("multi:") != std::string::npos) {
+    CHECK_GE(n_classes, 3);
+    p_fmat = RandomDataGenerator{n_samples, n_features, 0}.Classes(n_classes).GenerateDMatrix(
+        make_label);
+  } else {
+    p_fmat = RandomDataGenerator{n_samples, n_features, 0}.GenerateDMatrix(make_label);
+  }
+  if (make_label) {
+    MakeLabelForObjTest(p_fmat, obj);
+  }
   return p_fmat;
-};
+}
 }  // namespace xgboost
diff --git a/tests/cpp/objective_helpers.h b/tests/cpp/objective_helpers.h
index 972747c36e21..6e41ecd8e185 100644
--- a/tests/cpp/objective_helpers.h
+++ b/tests/cpp/objective_helpers.h
@@ -37,6 +37,9 @@ inline std::string ObjTestNameGenerator(const ::testing::TestParamInfo<ParamType
  */
 void MakeLabelForObjTest(std::shared_ptr<DMatrix> p_fmat, std::string const& obj);
 
-std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj, bst_idx_t n_samples,
-                                            bst_feature_t n_features);
+[[nodiscard]] std::shared_ptr<DMatrix> MakeFmatForObjTest(std::string const& obj,
+                                                          bst_idx_t n_samples,
+                                                          bst_feature_t n_features,
+                                                          bst_target_t n_classes,
+                                                          bool make_label = true);
 }  // namespace xgboost
diff --git a/tests/cpp/plugin/federated/test_federated_data.cc b/tests/cpp/plugin/federated/test_federated_data.cc
index 664f85ac8926..c3c3fd1c0f81 100644
--- a/tests/cpp/plugin/federated/test_federated_data.cc
+++ b/tests/cpp/plugin/federated/test_federated_data.cc
@@ -1,11 +1,11 @@
 /**
- * Copyright 2023-2024, XGBoost contributors
+ * Copyright 2023-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/data.h>
 
 #include "../../../../src/collective/communicator-inl.h"
-#include "../../filesystem.h"
+#include "../../filesystem.h"  // for test_federated_data
 #include "../../helpers.h"
 #include "test_worker.h"
 
@@ -17,12 +17,12 @@ void VerifyLoadUri() {
   size_t constexpr kRows{16};
   size_t const kCols = 8 + rank;
 
-  dmlc::TemporaryDirectory tmpdir;
-  std::string path = tmpdir.path + "/small" + std::to_string(rank) + ".csv";
-  CreateTestCSV(path, kRows, kCols);
+  common::TemporaryDirectory tmpdir;
+  auto path = tmpdir.Path() / ("small" + std::to_string(rank) + ".csv");
+  CreateTestCSV(path.string(), kRows, kCols);
 
   std::unique_ptr<DMatrix> dmat;
-  std::string uri = path + "?format=csv";
+  std::string uri = path.string() + "?format=csv";
   dmat.reset(DMatrix::Load(uri, false, DataSplitMode::kCol));
 
   ASSERT_EQ(dmat->Info().num_col_, 8 * collective::GetWorldSize() + 1);
diff --git a/tests/cpp/plugin/federated/test_federated_learner.cc b/tests/cpp/plugin/federated/test_federated_learner.cc
index ed0bbcb3b749..b7b6a6e27c1b 100644
--- a/tests/cpp/plugin/federated/test_federated_learner.cc
+++ b/tests/cpp/plugin/federated/test_federated_learner.cc
@@ -16,6 +16,7 @@
 
 namespace xgboost {
 namespace {
+inline constexpr bst_target_t kClassesForTest = 3;
 auto MakeModel(std::string tree_method, std::string device, std::string objective,
                std::shared_ptr<DMatrix> dmat) {
   std::unique_ptr<Learner> learner{Learner::Create({dmat})};
@@ -26,7 +27,7 @@ auto MakeModel(std::string tree_method, std::string device, std::string objectiv
     learner->SetParam("quantile_alpha", "0.5");
   }
   if (objective.find("multi") != std::string::npos) {
-    learner->SetParam("num_class", "3");
+    learner->SetParam("num_class", std::to_string(kClassesForTest));
   }
   learner->UpdateOneIter(0, dmat);
   Json config{Object{}};
@@ -37,15 +38,13 @@ auto MakeModel(std::string tree_method, std::string device, std::string objectiv
   return model;
 }
 
-void VerifyObjective(std::size_t rows, std::size_t cols, float expected_base_score,
-                     Json expected_model, std::string const &tree_method, std::string device,
+void VerifyObjective(std::size_t rows, std::size_t cols,
+                     std::vector<float> const &expected_base_score, Json expected_model,
+                     std::string const &tree_method, std::string device,
                      std::string const &objective) {
   auto rank = collective::GetRank();
-  std::shared_ptr<DMatrix> dmat{RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(rank == 0)};
-
-  if (rank == 0) {
-    MakeLabelForObjTest(dmat, objective);
-  }
+  std::shared_ptr<DMatrix> dmat =
+      MakeFmatForObjTest(objective, rows, cols, kClassesForTest, rank == 0);
   std::shared_ptr<DMatrix> sliced{dmat->SliceCol(collective::GetWorldSize(), rank)};
 
   auto model = MakeModel(tree_method, device, objective, sliced);
@@ -63,26 +62,7 @@ class VerticalFederatedLearnerTest : public ::testing::TestWithParam<std::string
     static auto constexpr kRows{16};
     static auto constexpr kCols{16};
 
-    std::shared_ptr<DMatrix> dmat{RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true)};
-    MakeLabelForObjTest(dmat, objective);
-
-    auto &h_upper = dmat->Info().labels_upper_bound_.HostVector();
-    auto &h_lower = dmat->Info().labels_lower_bound_.HostVector();
-    h_lower.resize(kRows);
-    h_upper.resize(kRows);
-    for (size_t i = 0; i < kRows; ++i) {
-      h_lower[i] = 1;
-      h_upper[i] = 10;
-    }
-    if (objective.find("rank:") != std::string::npos) {
-      auto h_label = dmat->Info().labels.HostView();
-      std::size_t k = 0;
-      for (auto &v : h_label) {
-        v = k % 2 == 0;
-        ++k;
-      }
-    }
-
+    auto dmat = MakeFmatForObjTest(objective, kRows, kCols, kClassesForTest);
     auto model = MakeModel(tree_method, device, objective, dmat);
     auto score = GetBaseScore(model);
     collective::TestFederatedGlobal(kWorldSize, [&]() {
diff --git a/tests/cpp/plugin/test_sycl_ghist_builder.cc b/tests/cpp/plugin/test_sycl_ghist_builder.cc
index 95ba4d6f1e14..95e38a61adf5 100644
--- a/tests/cpp/plugin/test_sycl_ghist_builder.cc
+++ b/tests/cpp/plugin/test_sycl_ghist_builder.cc
@@ -11,6 +11,7 @@
 
 #include "../../../plugin/sycl/common/hist_util.h"
 #include "../../../plugin/sycl/device_manager.h"
+#include "../../../plugin/sycl/tree/hist_dispatcher.h"
 #include "sycl_helpers.h"
 #include "../helpers.h"
 
@@ -67,8 +68,9 @@ void GHistBuilderTest(float sparsity, bool force_atomic_use) {
   InitHist(qu, &hist, hist.Size(), &event);
   InitHist(qu, &hist_buffer, hist_buffer.Size(), &event);
 
+  DeviceProperties device_prop(qu->get_device());
   event = builder.BuildHist(gpair, row_set_collection[0], gmat_sycl, &hist,
-                            sparsity < eps , &hist_buffer, event, force_atomic_use);
+                            sparsity < eps , &hist_buffer, device_prop, event, force_atomic_use);
   qu->memcpy(hist_host.data(), hist.Data(),
             2 * n_bins * sizeof(GradientSumT), event);
   qu->wait_and_throw();
diff --git a/tests/cpp/plugin/test_sycl_hist_updater.cc b/tests/cpp/plugin/test_sycl_hist_updater.cc
index 199aa7e2d7cb..69c5047d045c 100644
--- a/tests/cpp/plugin/test_sycl_hist_updater.cc
+++ b/tests/cpp/plugin/test_sycl_hist_updater.cc
@@ -431,7 +431,8 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa
 
     size_t n_nodes = nodes.size();
     std::vector<int32_t> split_conditions(n_nodes);
-    xgboost::tree::CommonRowPartitioner::FindSplitConditions(nodes, tree, gmat, &split_conditions);
+    xgboost::tree::CommonRowPartitioner::FindSplitConditions(nodes, tree.HostScView(), gmat,
+                                                             &split_conditions);
 
     common::PartitionBuilder partition_builder;
     partition_builder.Init(qu, n_nodes, [&](size_t node_in_set) {
@@ -499,7 +500,7 @@ void TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param)
 
   updater.TestExpandWithLossGuide(gmat, p_fmat.get(), &tree, gpair);
 
-  const auto& nodes = tree.GetNodes();
+  const auto& nodes = tree.GetNodes(DeviceOrd::CPU());
   std::vector<float> ans(data.size());
   for (size_t data_idx = 0; data_idx < data.size(); ++data_idx) {
       size_t node_idx = 0;
@@ -543,7 +544,7 @@ void TestHistUpdaterExpandWithDepthWise(const xgboost::tree::TrainParam& param)
 
   updater.TestExpandWithDepthWise(gmat, p_fmat.get(), &tree, gpair);
 
-  const auto& nodes = tree.GetNodes();
+  const auto& nodes = tree.GetNodes(DeviceOrd::CPU());
   std::vector<float> ans(data.size());
   for (size_t data_idx = 0; data_idx < data.size(); ++data_idx) {
       size_t node_idx = 0;
diff --git a/tests/cpp/plugin/test_sycl_linalg.cc b/tests/cpp/plugin/test_sycl_linalg.cc
new file mode 100644
index 000000000000..2827aa34fbb3
--- /dev/null
+++ b/tests/cpp/plugin/test_sycl_linalg.cc
@@ -0,0 +1,47 @@
+/*!
+ * Copyright 2017-2025 XGBoost contributors
+ */
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <random>
+
+#include "../../src/common/linalg_op.h"
+#include "../../../src/common/optional_weight.h"  // for MakeOptionalWeights
+#include "sycl_helpers.h"
+
+namespace xgboost::sycl::linalg {
+TEST(SyclLinalg, SmallHistogram) {
+  Context ctx;
+  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
+
+  std::size_t cnt = 32, n_bins = 4;
+  DeviceManager device_manager;
+  auto qu = device_manager.GetQueue(ctx.Device());
+
+  HostDeviceVector<float> values(cnt * n_bins);
+  values.SetDevice(ctx.Device());
+  float* values_host_ptr = values.HostPointer();
+  for (std::size_t i = 0; i < n_bins; ++i) {
+    std::fill(values_host_ptr + i * cnt, values_host_ptr + (i  + 1) * cnt, i);
+  }
+
+  std::mt19937 rng;
+  rng.seed(2025);
+  std::shuffle(values_host_ptr, values_host_ptr + cnt * n_bins, rng);
+
+  float* values_device_ptr = values.DevicePointer();
+  xgboost::linalg::MatrixView<float> indices =
+      xgboost::linalg::MakeTensorView(&ctx, xgboost::common::Span(values_device_ptr, cnt * n_bins),
+                                      cnt * n_bins, 1);
+  HostDeviceVector<float> bins(n_bins, 0);
+  bins.SetDevice(ctx.Device());
+
+  HostDeviceVector<float> weights;
+  xgboost::linalg::SmallHistogram(&ctx, indices, xgboost::common::MakeOptionalWeights(ctx.Device(), weights),
+                 xgboost::linalg::MakeTensorView(&ctx, xgboost::common::Span(bins.DevicePointer(), n_bins), n_bins));
+
+  for (std::size_t i = 0; i < n_bins; ++i) {
+    ASSERT_EQ(bins.HostVector()[i], cnt);
+  }
+}
+}  // namespace xgboost::linalg
\ No newline at end of file
diff --git a/tests/cpp/plugin/test_sycl_multiclass_obj.cc b/tests/cpp/plugin/test_sycl_multiclass_obj.cc
index d306337ac599..a46565df53bc 100644
--- a/tests/cpp/plugin/test_sycl_multiclass_obj.cc
+++ b/tests/cpp/plugin/test_sycl_multiclass_obj.cc
@@ -21,7 +21,7 @@ TEST(SyclObjective, SoftmaxMultiClassObjGPair) {
 TEST(SyclObjective, SoftmaxMultiClassBasic) {
   Context ctx;
   ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
-  TestSoftmaxMultiClassObjGPair(&ctx);
+  TestSoftmaxMultiClassBasic(&ctx);
 }
 
 TEST(SyclObjective, SoftprobMultiClassBasic) {
diff --git a/tests/cpp/plugin/test_sycl_partition_builder.cc b/tests/cpp/plugin/test_sycl_partition_builder.cc
index 5928988c6441..584b5c26fb72 100644
--- a/tests/cpp/plugin/test_sycl_partition_builder.cc
+++ b/tests/cpp/plugin/test_sycl_partition_builder.cc
@@ -67,7 +67,7 @@ void TestPartitioning(float sparsity, int max_bins) {
 
   std::vector<uint8_t> ridx_left(num_rows, 0);
   std::vector<uint8_t> ridx_right(num_rows, 0);
-  for (auto &batch : gmat.p_fmat->GetBatches<SparsePage>()) {
+  for (auto &batch : p_fmat->GetBatches<SparsePage>()) {
     const auto& data_vec = batch.data.HostVector();
     const auto& offset_vec = batch.offset.HostVector();
 
diff --git a/tests/cpp/plugin/test_sycl_predictor.cc b/tests/cpp/plugin/test_sycl_predictor.cc
index a7ec51594e08..04df03e29bc4 100755
--- a/tests/cpp/plugin/test_sycl_predictor.cc
+++ b/tests/cpp/plugin/test_sycl_predictor.cc
@@ -1,5 +1,5 @@
 /*!
- * Copyright 2017-2023 XGBoost contributors
+ * Copyright 2017-2025 XGBoost contributors
  */
 #include <gtest/gtest.h>
 #pragma GCC diagnostic push
@@ -58,7 +58,7 @@ TEST(SyclPredictor, InplacePredict) {
     auto array_interface = GetArrayInterface(&data, kRows, kCols);
     std::string arr_str;
     Json::Dump(array_interface, &arr_str);
-    x->SetArrayData(arr_str.data());
+    x->SetArray(arr_str.data());
     TestInplacePrediction(&ctx, x, kRows, kCols);
   }
 }
@@ -101,10 +101,4 @@ TEST(SyclPredictor, Sparse) {
   TestSparsePrediction(&ctx, 0.8);
 }
 
-TEST(SyclPredictor, Multi) {
-  Context ctx;
-  ctx.UpdateAllowUnknown(Args{{"device", "sycl"}});
-  TestVectorLeafPrediction(&ctx);
-}
-
 }  // namespace xgboost
diff --git a/tests/cpp/predictor/test_cpu_predictor.cc b/tests/cpp/predictor/test_cpu_predictor.cc
index 2a1b43bf730f..bda5d43d58e9 100644
--- a/tests/cpp/predictor/test_cpu_predictor.cc
+++ b/tests/cpp/predictor/test_cpu_predictor.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/predictor.h>
@@ -7,6 +7,8 @@
 #include "../../../src/collective/communicator-inl.h"
 #include "../../../src/data/adapter.h"
 #include "../../../src/data/proxy_dmatrix.h"
+#include "../../../src/predictor/array_tree_layout.h"
+#include "../../../src/tree/tree_view.h"
 #include "../../../src/gbm/gbtree.h"
 #include "../../../src/gbm/gbtree_model.h"
 #include "../collective/test_worker.h"  // for TestDistributedGlobal
@@ -22,6 +24,88 @@ TEST(CpuPredictor, Basic) {
   TestBasic(dmat.get(), &ctx);
 }
 
+template <typename ArrayLayoutT>
+void CheckArrayLayout(const RegTree& tree, ArrayLayoutT buffer, int max_depth, int depth,
+                      size_t nid, size_t nid_array) {
+  const auto& split_idx = buffer.SplitIndex();
+  const auto& split_cond = buffer.SplitCond();
+  const auto& default_left = buffer.DefaultLeft();
+  const auto& nidx_in_tree = buffer.NidxInTree();
+  const auto& nodes = tree.GetNodes(DeviceOrd::CPU());
+
+  if (depth == max_depth) {
+    ASSERT_EQ(nidx_in_tree[nid_array - (1u << max_depth) + 1], nid);
+    return;
+  }
+
+  if (nodes[nid].IsLeaf()) {
+    ASSERT_EQ(default_left[nid_array], 0);
+    ASSERT_TRUE(std::isnan(split_cond[nid_array]));
+
+    CheckArrayLayout(tree, buffer, max_depth, depth + 1, nid, 2 * nid_array + 2);
+  } else {
+    ASSERT_EQ(nodes[nid].SplitIndex(), split_idx[nid_array]);
+    ASSERT_EQ(nodes[nid].SplitCond(), split_cond[nid_array]);
+    ASSERT_EQ(nodes[nid].DefaultLeft(), default_left[nid_array]);
+
+    if (nodes[nid].LeftChild() != RegTree::kInvalidNodeId) {
+      CheckArrayLayout(tree, buffer, max_depth, depth + 1, nodes[nid].LeftChild(),
+                       2 * nid_array + 1);
+    }
+    if (nodes[nid].RightChild() != RegTree::kInvalidNodeId) {
+      CheckArrayLayout(tree, buffer, max_depth, depth + 1, nodes[nid].RightChild(),
+                       2 * nid_array + 2);
+    }
+  }
+}
+
+namespace {
+template <bst_node_t kDepth>
+using LayoutForTest = predictor::ArrayTreeLayout<false, true, kDepth, tree::ScalarTreeView>;
+}
+
+TEST(CpuPredictor, ArrayTreeLayout) {
+  Context ctx;
+
+  RegTree tree;
+  size_t n_nodes = 15;  // 2^4 - 1
+  for (size_t nid = 0; nid < n_nodes; ++nid) {
+    // Some place-holders
+    size_t split_index = nid + 1;
+    bst_float split_cond = nid + 2;
+    bool default_left = nid % 2 == 0;
+
+    tree.ExpandNode(nid, split_index, split_cond, default_left, 0, 0, 0, 0, 0, 0, 0);
+  }
+
+  auto sc_tree = tree::ScalarTreeView{ctx.Device(), &tree};
+  {
+    constexpr int kDepth = 1;
+    LayoutForTest<kDepth> buffer(sc_tree, sc_tree.GetCategoriesMatrix());
+    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);
+  }
+  {
+    constexpr int kDepth = 2;
+    LayoutForTest<kDepth> buffer{sc_tree, sc_tree.GetCategoriesMatrix()};
+    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);
+  }
+  {
+    constexpr int kDepth = 3;
+    LayoutForTest<kDepth> buffer{sc_tree, sc_tree.GetCategoriesMatrix()};
+    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);
+  }
+  {
+    constexpr int kDepth = 4;
+    LayoutForTest<kDepth> buffer{sc_tree, sc_tree.GetCategoriesMatrix()};
+    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);
+  }
+  {
+    constexpr int kDepth = 5;
+    LayoutForTest<kDepth> buffer{sc_tree, sc_tree.GetCategoriesMatrix()};
+    CheckArrayLayout(tree, buffer, kDepth, 0, 0, 0);
+  }
+}
+
 namespace {
 void TestColumnSplit() {
   Context ctx;
@@ -80,7 +164,7 @@ TEST(CpuPredictor, InplacePredict) {
     auto array_interface = GetArrayInterface(&data, kRows, kCols);
     std::string arr_str;
     Json::Dump(array_interface, &arr_str);
-    x->SetArrayData(arr_str.data());
+    x->SetArray(arr_str.data());
     TestInplacePrediction(&ctx, x, kRows, kCols);
   }
 
@@ -97,7 +181,7 @@ TEST(CpuPredictor, InplacePredict) {
     Json::Dump(rptr_interface, &rptr_str);
     Json::Dump(col_interface, &col_str);
     std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy};
-    x->SetCSRData(rptr_str.data(), col_str.data(), data_str.data(), kCols, true);
+    x->SetCsr(rptr_str.data(), col_str.data(), data_str.data(), kCols, true);
     TestInplacePrediction(&ctx, x, kRows, kCols);
   }
 }
@@ -118,8 +202,9 @@ void TestUpdatePredictionCache(bool use_subsampling) {
 
   auto dmat = RandomDataGenerator(kRows, kCols, 0).Classes(kClasses).GenerateDMatrix(true);
 
-  linalg::Matrix<GradientPair> gpair({kRows, kClasses}, ctx.Device());
-  auto h_gpair = gpair.HostView();
+  GradientContainer gpair;
+  gpair.gpair = linalg::Matrix<GradientPair>({kRows, kClasses}, ctx.Device());
+  auto h_gpair = gpair.gpair.HostView();
   for (size_t i = 0; i < kRows * kClasses; ++i) {
     std::apply(h_gpair, linalg::UnravelIndex(i, kRows, kClasses)) = {static_cast<float>(i), 1};
   }
@@ -204,7 +289,6 @@ TEST(CpuPredictor, SparseColumnSplit) {
 
 TEST(CpuPredictor, Multi) {
   Context ctx;
-  ctx.nthread = 1;
   TestVectorLeafPrediction(&ctx);
 }
 
diff --git a/tests/cpp/predictor/test_gpu_predictor.cu b/tests/cpp/predictor/test_gpu_predictor.cu
index 11c9d4946455..01f19dec6a12 100644
--- a/tests/cpp/predictor/test_gpu_predictor.cu
+++ b/tests/cpp/predictor/test_gpu_predictor.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/c_api.h>
@@ -35,7 +35,8 @@ TEST(GPUPredictor, Basic) {
 
     auto ctx = MakeCUDACtx(0);
     LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
-    gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
+    std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, &ctx);
+    auto const& model = *p_model;
 
     // Test predict batch
     PredictionCacheEntry gpu_out_predictions;
@@ -71,7 +72,8 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
     std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
 
     LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
-    gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
+    std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, &ctx);
+    auto const& model = *p_model;
 
     // Test predict batch
     PredictionCacheEntry out_predictions;
@@ -99,7 +101,8 @@ TEST_F(MGPUPredictorTest, BasicColumnSplit) {
     auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
 
     LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Device())};
-    gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
+    std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, &ctx);
+    auto const& model = *p_model;
 
     // Test predict batch
     PredictionCacheEntry out_predictions;
@@ -152,7 +155,8 @@ void TestDecisionStumpExternalMemory(Context const* ctx, bst_feature_t n_feature
                                      Create create_fn) {
   std::int32_t n_classes = 3;
   LearnerModelParam mparam{MakeMP(n_features, .5, n_classes, ctx->Device())};
-  auto model = CreateTestModel(&mparam, ctx, n_classes);
+  std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, ctx, n_classes);
+  auto const& model = *p_model;
   std::unique_ptr<Predictor> gpu_predictor =
       std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", ctx));
   gpu_predictor->Configure({});
@@ -203,7 +207,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
   HostDeviceVector<float> data;
   std::string interface_str = gen.GenerateArrayInterface(&data);
   std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
-  dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCUDAArray(interface_str.c_str());
+  dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCudaArray(interface_str.c_str());
   TestInplacePrediction(&ctx, p_fmat, kRows, kCols);
 }
 
@@ -215,7 +219,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
   std::vector<HostDeviceVector<float>> storage(kCols);
   auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
   std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
-  dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCUDAArray(interface_str.c_str());
+  dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCudaColumnar(interface_str.c_str());
   TestInplacePrediction(&ctx, p_fmat, kRows, kCols);
 }
 
@@ -341,7 +345,8 @@ TEST(GPUPredictor, PredictLeafBasic) {
 
   LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
   Context ctx;
-  gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
+  std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, &ctx);
+  auto const& model = *p_model;
 
   HostDeviceVector<float> leaf_out_predictions;
   gpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
@@ -351,6 +356,11 @@ TEST(GPUPredictor, PredictLeafBasic) {
   }
 }
 
+TEST(GPUPredictor, Multi) {
+  auto ctx = MakeCUDACtx(0);
+  TestVectorLeafPrediction(&ctx);
+}
+
 TEST(GPUPredictor, Sparse) {
   auto ctx = MakeCUDACtx(0);
   TestSparsePrediction(&ctx, 0.2);
diff --git a/tests/cpp/predictor/test_predictor.cc b/tests/cpp/predictor/test_predictor.cc
index 6e1f1301a530..615407f7e15b 100644
--- a/tests/cpp/predictor/test_predictor.cc
+++ b/tests/cpp/predictor/test_predictor.cc
@@ -18,6 +18,7 @@
 #include "../../../src/common/bitfield.h"         // for LBitField32
 #include "../../../src/data/iterative_dmatrix.h"  // for IterativeDMatrix
 #include "../../../src/data/proxy_dmatrix.h"      // for DMatrixProxy
+#include "../../../src/tree/tree_view.h"          // for MultiTargetTreeView
 #include "../collective/test_worker.h"            // for TestDistributedGlobal
 #include "../helpers.h"                           // for GetDMatrixFromData, RandomDataGenerator
 #include "xgboost/json.h"                         // for Json, Object, get, String
@@ -33,9 +34,10 @@ void TestBasic(DMatrix* dmat, Context const *ctx) {
   size_t const kRows = dmat->Info().num_row_;
   size_t const kCols = dmat->Info().num_col_;
 
-  LearnerModelParam mparam{MakeMP(kCols, .0, 1)};
+  LearnerModelParam mparam{MakeMP(kCols, .0, 1, ctx->Device())};
 
-  gbm::GBTreeModel model = CreateTestModel(&mparam, ctx);
+  std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, ctx);
+  auto const &model = *p_model;
 
   // Test predict batch
   PredictionCacheEntry out_predictions;
@@ -127,7 +129,7 @@ void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
                           {"num_feature", std::to_string(kCols)},
                           {"num_class", std::to_string(kClasses)},
                           {"max_bin", std::to_string(bins)},
-                          {"device", ctx->IsSycl() ? "cpu" : ctx->DeviceName()}});
+                          {"device", ctx->DeviceName()}});
   learner->Configure();
 
   for (size_t i = 0; i < kIters; ++i) {
@@ -217,12 +219,17 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_i
   auto& h_pred_0 = predict_0.HostVector();
   auto& h_pred_1 = predict_1.HostVector();
 
+  Json config {Object{}};
+  learner->SaveConfig(&config);
+  auto base_score = GetBaseScore(config);
+
   ASSERT_EQ(h_pred.size(), rows * kClasses);
   ASSERT_EQ(h_pred.size(), h_pred_0.size());
   ASSERT_EQ(h_pred.size(), h_pred_1.size());
   for (size_t i = 0; i < h_pred.size(); ++i) {
     // Need to remove the global bias here.
-    ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - 0.5f, kRtEps);
+    auto j = i % kClasses;
+    ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - base_score.at(j), kRtEps);
   }
 
   learner->SetParam("device", "cpu");
@@ -349,7 +356,7 @@ void TestCategoricalPrediction(bool use_gpu, bool is_column_split) {
   size_t constexpr kCols = 10;
   PredictionCacheEntry out_predictions;
 
-  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1, ctx.Device())};
   uint32_t split_ind = 3;
   bst_cat_t split_cat = 4;
   float left_weight = 1.3f;
@@ -392,7 +399,7 @@ void TestCategoricalPredictLeaf(Context const *ctx, bool is_column_split) {
   size_t constexpr kCols = 10;
   PredictionCacheEntry out_predictions;
 
-  LearnerModelParam mparam{MakeMP(kCols, .5, 1)};
+  LearnerModelParam mparam{MakeMP(kCols, .5, 1, ctx->Device())};
 
   uint32_t split_ind = 3;
   bst_cat_t split_cat = 4;
@@ -617,9 +624,8 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
   learner->LoadModel(model);
   learner->SetParam("device", ctx->DeviceName());
   learner->Configure();
-
-  if (ctx->IsCUDA()) {
-    learner->SetParam("tree_method", "gpu_hist");
+  if (!ctx->IsCPU()) {
+    learner->SetParam("tree_method", "hist");
     learner->SetParam("device", ctx->Device().Name());
   }
   learner->Predict(Xy, false, &sparse_predt, 0, 0);
@@ -637,13 +643,13 @@ void TestSparsePrediction(Context const *ctx, float sparsity) {
   }
 
   learner->SetParam("tree_method", "hist");
-  learner->SetParam("gpu_id", "-1");
+  learner->SetParam("device", "cpu");
   // Xcode_12.4 doesn't compile with `std::make_shared`.
   auto dense = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});
   auto array_interface = GetArrayInterface(&with_nan, kRows, kCols);
   std::string arr_str;
   Json::Dump(array_interface, &arr_str);
-  dynamic_cast<data::DMatrixProxy *>(dense.get())->SetArrayData(arr_str.data());
+  dynamic_cast<data::DMatrixProxy *>(dense.get())->SetArray(arr_str.data());
   HostDeviceVector<float> *p_dense_predt;
   learner->InplacePredict(dense, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
                           &p_dense_predt, 0, 0);
@@ -726,14 +732,13 @@ void TestSparsePredictionColumnSplit(int world_size, bool use_gpu, float sparsit
 }
 
 void TestVectorLeafPrediction(Context const *ctx) {
-  std::unique_ptr<Predictor> cpu_predictor =
-      std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", ctx));
+  std::unique_ptr<Predictor> predictor{CreatePredictorForTest(ctx)};
 
   size_t constexpr kRows = 5;
   size_t constexpr kCols = 5;
 
   LearnerModelParam mparam{static_cast<bst_feature_t>(kCols),
-                           linalg::Vector<float>{{0.5}, {1}, DeviceOrd::CPU()}, 1, 3,
+                           linalg::Vector<float>{{0.5}, {1}, ctx->Device()}, 1, 3,
                            MultiStrategy::kMultiOutputTree};
 
   std::vector<std::unique_ptr<RegTree>> trees;
@@ -753,73 +758,89 @@ void TestVectorLeafPrediction(Context const *ctx) {
   gbm::GBTreeModel model{&mparam, ctx};
   model.CommitModelGroup(std::move(trees), 0);
 
-  auto run_test = [&](float expected, HostDeviceVector<float> *p_data) {
-    {
+  auto test_batch = [&](float expected, HostDeviceVector<float> const*p_data) {
       auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
       PredictionCacheEntry predt_cache;
-      cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+      predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
       ASSERT_EQ(predt_cache.predictions.Size(), kRows * mparam.LeafLength());
-      cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
+      predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
       auto const &h_predt = predt_cache.predictions.HostVector();
       for (auto v : h_predt) {
         ASSERT_EQ(v, expected);
       }
-    }
-
-    {
-      // inplace
+  };
+  auto test_inplace = [&](float expected, HostDeviceVector<float> const*p_data) {
       PredictionCacheEntry predt_cache;
-      auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
-      cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+      std::shared_ptr<DMatrix> p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
+      predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+      if (ctx->IsCUDA()) {
+        // pull data to device.
+        p_data->SetDevice(ctx->Device());
+        p_data->ConstDeviceSpan();
+      }
       auto arr = GetArrayInterface(p_data, kRows, kCols);
       std::string str;
       Json::Dump(arr, &str);
       auto proxy = std::shared_ptr<DMatrix>(new data::DMatrixProxy{});
-      dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetArrayData(str.data());
-      cpu_predictor->InplacePredict(proxy, model, std::numeric_limits<float>::quiet_NaN(),
-                                    &predt_cache, 0, 1);
+      if (ctx->IsCUDA()) {
+        dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetCudaArray(str.c_str());
+      } else {
+        dynamic_cast<data::DMatrixProxy *>(proxy.get())->SetArray(str.c_str());
+      }
+      predictor->InplacePredict(proxy, model, std::numeric_limits<float>::quiet_NaN(), &predt_cache,
+                                0, 1);
       auto const &h_predt = predt_cache.predictions.HostVector();
       for (auto v : h_predt) {
         ASSERT_EQ(v, expected);
       }
+  };
+  auto test_ghist = [&](float expected, HostDeviceVector<float> *p_data) {
+    // ghist
+    PredictionCacheEntry predt_cache;
+    auto &h_data = p_data->HostVector();
+    // give it at least two bins, otherwise the histogram cuts only have min and max values.
+    for (std::size_t i = 0; i < kCols; ++i) {
+      h_data[i] = 1.0;
     }
+    auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
 
-    {
-      // ghist
-      PredictionCacheEntry predt_cache;
-      auto &h_data = p_data->HostVector();
-      // give it at least two bins, otherwise the histogram cuts only have min and max values.
-      for (std::size_t i = 0; i < 5; ++i) {
-        h_data[i] = 1.0;
-      }
-      auto p_fmat = GetDMatrixFromData(p_data->ConstHostVector(), kRows, kCols);
-
-      cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+    predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
 
-      auto iter = NumpyArrayIterForTest{ctx, *p_data, kRows, static_cast<bst_feature_t>(kCols),
-                                        static_cast<std::size_t>(1)};
-      p_fmat = std::make_shared<data::IterativeDMatrix>(
-          &iter, iter.Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0,
-          256, std::numeric_limits<std::int64_t>::max());
+    std::unique_ptr<ArrayIterForTest> iter;
+    if (ctx->IsCUDA()) {
+      iter.reset(new CudaArrayIterForTest{ctx, *p_data, kRows, static_cast<bst_feature_t>(kCols),
+                                          static_cast<std::size_t>(1)});
+    } else {
+      iter.reset(new NumpyArrayIterForTest{ctx, *p_data, kRows, static_cast<bst_feature_t>(kCols),
+                                           static_cast<std::size_t>(1)});
+    }
 
-      cpu_predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
-      cpu_predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
-      auto const &h_predt = predt_cache.predictions.HostVector();
-      // the smallest v uses the min_value from histogram cuts, which leads to a left leaf
-      // during prediction.
-      for (std::size_t i = 5; i < h_predt.size(); ++i) {
-        ASSERT_EQ(h_predt[i], expected) << i;
-      }
+    p_fmat = std::make_shared<data::IterativeDMatrix>(
+        iter.get(), iter->Proxy(), nullptr, Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0,
+        256, std::numeric_limits<std::int64_t>::max());
+
+    predictor->InitOutPredictions(p_fmat->Info(), &predt_cache.predictions, model);
+    predictor->PredictBatch(p_fmat.get(), &predt_cache, model, 0, 1);
+    auto const &h_predt = predt_cache.predictions.HostVector();
+    // the smallest v uses the min_value from histogram cuts, which leads to a left leaf
+    // during prediction.
+    for (std::size_t i = 5; i < h_predt.size(); ++i) {
+      ASSERT_EQ(h_predt[i], expected) << i;
     }
   };
 
   // go to right
-  HostDeviceVector<float> data(kRows * kCols, model.trees.front()->SplitCond(RegTree::kRoot) + 1.0);
-  run_test(2.5, &data);
+  auto mt_tree = model.trees.front()->HostMtView();
+  HostDeviceVector<float> data(kRows * kCols, mt_tree.SplitCond(RegTree::kRoot) + 1.0);
+  test_batch(2.5, &data);
+  test_inplace(2.5, &data);
+  test_ghist(2.5, &data);
 
   // go to left
-  data.HostVector().assign(data.Size(), model.trees.front()->SplitCond(RegTree::kRoot) - 1.0);
-  run_test(1.5, &data);
+  data.HostVector().assign(data.Size(), mt_tree.SplitCond(RegTree::kRoot) - 1.0);
+  test_batch(1.5, &data);
+  test_inplace(1.5, &data);
+  test_ghist(1.5, &data);
 }
 
 void ShapExternalMemoryTest::Run(Context const *ctx, bool is_qdm, bool is_interaction) {
@@ -839,7 +860,7 @@ void ShapExternalMemoryTest::Run(Context const *ctx, bool is_qdm, bool is_intera
                                  .Classes(n_classes));
   std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
   learner->SetParam("device", ctx->DeviceName());
-  learner->SetParam("base_score", "0.5");
+  learner->SetParam("base_score", "[0.5, 0.5, 0.5]");
   learner->SetParam("num_parallel_tree", "3");
   learner->SetParam("max_bin", std::to_string(max_bin));
   for (std::int32_t i = 0; i < 4; ++i) {
@@ -848,8 +869,10 @@ void ShapExternalMemoryTest::Run(Context const *ctx, bool is_qdm, bool is_intera
   Json model{Object{}};
   learner->SaveModel(&model);
   auto j_booster = model["learner"]["gradient_booster"]["model"];
-  auto model_param = MakeMP(n_features, 0.0, n_classes, ctx->Device());
 
+  auto base_score = linalg::Tensor<float, 1>{{0.0, 0.0, 0.0}, {3}, ctx->Device()};
+  LearnerModelParam model_param(n_features, std::move(base_score), n_classes, 1,
+                                MultiStrategy::kOneOutputPerTree);
   gbm::GBTreeModel gbtree{&model_param, ctx};
   gbtree.LoadModel(j_booster);
 
diff --git a/tests/cpp/predictor/test_predictor.h b/tests/cpp/predictor/test_predictor.h
index 8f110efe06e8..f9e52b86e5a9 100644
--- a/tests/cpp/predictor/test_predictor.h
+++ b/tests/cpp/predictor/test_predictor.h
@@ -14,9 +14,9 @@
 #include "../helpers.h"
 
 namespace xgboost {
-inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context const* ctx,
-                                        size_t n_classes = 1) {
-  gbm::GBTreeModel model(param, ctx);
+inline std::unique_ptr<gbm::GBTreeModel> CreateTestModel(LearnerModelParam const* param,
+                                                         Context const* ctx, size_t n_classes = 1) {
+  auto model = std::make_unique<gbm::GBTreeModel>(param, ctx);
 
   for (size_t i = 0; i < n_classes; ++i) {
     std::vector<std::unique_ptr<RegTree>> trees;
@@ -25,7 +25,7 @@ inline gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param, Context
       (*trees.back())[0].SetLeaf(1.5f);
       (*trees.back()).Stat(0).sum_hess = 1.0f;
     }
-    model.CommitModelGroup(std::move(trees), i);
+    model->CommitModelGroup(std::move(trees), i);
   }
 
   return model;
@@ -47,14 +47,15 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
                                      std::shared_ptr<DMatrix> p_hist) {
   constexpr size_t kClasses { 3 };
 
-  LearnerModelParam mparam{MakeMP(cols, .5, kClasses)};
+  LearnerModelParam mparam{MakeMP(cols, .5, kClasses, ctx->Device())};
   auto cuda_ctx = MakeCUDACtx(0);
 
   std::unique_ptr<Predictor> predictor =
       std::unique_ptr<Predictor>(CreatePredictorForTest(&cuda_ctx));
   predictor->Configure({});
 
-  gbm::GBTreeModel model = CreateTestModel(&mparam, ctx, kClasses);
+  std::unique_ptr<gbm::GBTreeModel> p_model = CreateTestModel(&mparam, ctx, kClasses);
+  auto const& model = *p_model;
 
   {
     auto p_precise = RandomDataGenerator(rows, cols, 0).GenerateDMatrix();
diff --git a/tests/cpp/test_context.cu b/tests/cpp/test_context.cu
index 5d8a67c22b05..7f205ab0f849 100644
--- a/tests/cpp/test_context.cu
+++ b/tests/cpp/test_context.cu
@@ -1,7 +1,8 @@
 /**
- * Copyright 2023-2024, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
+#include <thread>         // for thread
 #include <xgboost/base.h>  // for Args
 #include <xgboost/context.h>
 #include <xgboost/json.h>  // for FromJson, ToJson
@@ -79,20 +80,25 @@ TEST(Context, MGPUDeviceOrdinal) {
 
 TEST(Context, MGPUId) {
   Context ctx;
-  ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
+  ctx.UpdateAllowUnknown(Args{{"device", "cuda"}});
   TestCUDA(ctx, 0);
 
   auto n_vis = curt::AllVisibleGPUs();
-  auto ord = n_vis - 1;
-  ctx.UpdateAllowUnknown(Args{{"gpu_id", std::to_string(ord)}});
-  TestCUDA(ctx, ord);
-
-  auto device = "cuda:" + std::to_string(1001);
-  ctx.UpdateAllowUnknown(Args{{"device", device}});
-  ord = 1001 % n_vis;
-  TestCUDA(ctx, ord);
-
-  ctx.UpdateAllowUnknown(Args{{"gpu_id", "-1"}});
-  ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
+  // Use threads to avoid changing the global variable in tests.
+  auto t0 = std::thread{[n_vis] {
+    Context ctx;
+    auto ord = n_vis - 1;
+    ctx.UpdateAllowUnknown(Args{{"device", "cuda:" + std::to_string(ord)}});
+    TestCUDA(ctx, ord);
+  }};
+  auto t1 = std::thread{[n_vis] {
+    Context ctx;
+    auto device = "cuda:" + std::to_string(1001);
+    ctx.UpdateAllowUnknown(Args{{"device", device}});
+    auto ord = 1001 % n_vis;
+    TestCUDA(ctx, ord);
+  }};
+  t0.join();
+  t1.join();
 }
 }  // namespace xgboost
diff --git a/tests/cpp/test_helpers.cc b/tests/cpp/test_helpers.cc
index 529f94e243f1..bfc2100dfe49 100644
--- a/tests/cpp/test_helpers.cc
+++ b/tests/cpp/test_helpers.cc
@@ -1,7 +1,11 @@
+/**
+ * Copyright 2020-2025, XGBoost Contributors
+ */
 #include <gtest/gtest.h>
 #include <algorithm>
 
 #include "helpers.h"
+#include "filesystem.h"  // for TemporaryDirectory
 #include "../../src/data/array_interface.h"
 namespace xgboost {
 
@@ -71,8 +75,8 @@ TEST(RandomDataGenerator, GenerateArrayInterfaceBatch) {
 TEST(RandomDataGenerator, SparseDMatrix) {
   bst_idx_t constexpr kCols{100}, kBatches{13};
   bst_idx_t n_samples{kBatches * 128};
-  dmlc::TemporaryDirectory tmpdir;
-  auto prefix = tmpdir.path + "/cache";
+  common::TemporaryDirectory tmpdir;
+  auto prefix = tmpdir.Str() + "/cache";
   auto p_ext_fmat =
       RandomDataGenerator{n_samples, kCols, 0.0}.Batches(kBatches).GenerateSparsePageDMatrix(prefix,
                                                                                              true);
diff --git a/tests/cpp/test_histogram_helpers.cu b/tests/cpp/test_histogram_helpers.cu
new file mode 100644
index 000000000000..8f2ce06bf3d6
--- /dev/null
+++ b/tests/cpp/test_histogram_helpers.cu
@@ -0,0 +1,84 @@
+#include <gtest/gtest.h>
+#include <xgboost/context.h>
+
+#include "histogram_helpers.h"
+
+namespace xgboost {
+namespace {
+// Count occurrences of each bin for each feature
+template <typename Accessor>
+auto CountBins(Accessor const& accessor, bst_bin_t n_bins_per_feat) {
+  auto n_samples = accessor.NumRows();
+  auto n_features = accessor.NumFeatures();
+  std::vector<std::vector<bst_idx_t>> bin_counts(n_features,
+                                                 std::vector<bst_idx_t>(n_bins_per_feat, 0));
+
+  // Count occurrences of each bin for each feature
+  for (bst_idx_t row = 0; row < n_samples; ++row) {
+    for (bst_feature_t feat = 0; feat < n_features; ++feat) {
+      bst_idx_t idx = row * accessor.row_stride + feat;
+      bst_bin_t bin = accessor.gidx_iter[idx];
+
+      // The bin values are already local to each feature
+      EXPECT_GE(bin, 0);
+      EXPECT_LT(bin, n_bins_per_feat);
+      bin_counts[feat][bin]++;
+    }
+  }
+  return bin_counts;
+}
+}  // namespace
+
+TEST(HistogramHelpers, MakeEllpack) {
+  auto ctx = MakeCUDACtx(0);
+
+  bst_idx_t n_samples = 100;
+  bst_feature_t n_features = 5;
+  bst_bin_t n_bins_per_feat = 10;
+
+  auto ellpack = MakeEllpackForTest(&ctx, n_samples, n_features, n_bins_per_feat);
+
+  ASSERT_NE(ellpack, nullptr);
+  EXPECT_EQ(ellpack->Size(), n_samples);
+  EXPECT_EQ(ellpack->Cuts().NumFeatures(), n_features);
+
+  // Test histogram cuts structure
+  const auto& cuts = ellpack->Cuts();
+  EXPECT_EQ(cuts.NumFeatures(), n_features);
+  EXPECT_EQ(cuts.TotalBins(), n_features * n_bins_per_feat);
+
+  // Verify cut pointers are correct
+  const auto& cut_ptrs = cuts.Ptrs();
+  EXPECT_EQ(cut_ptrs.size(), n_features + 1);
+  for (bst_feature_t f = 0; f < n_features; ++f) {
+    EXPECT_EQ(cut_ptrs[f + 1] - cut_ptrs[f], n_bins_per_feat);
+  }
+
+  EXPECT_TRUE(ellpack->IsDense());
+
+  std::vector<common::CompressedByteT> h_gidx_buffer;
+  auto accessor_var = ellpack->GetHostEllpack(&ctx, &h_gidx_buffer);
+  std::visit(
+      [&](auto&& accessor) {
+        EXPECT_EQ(accessor.row_stride, n_features);
+        EXPECT_EQ(accessor.n_rows, n_samples);
+
+        auto bin_counts = CountBins(accessor, n_bins_per_feat);
+        // Validate histogram index distribution
+        auto n_values_per_bin = n_samples / n_bins_per_feat;
+        auto remainder = n_samples % n_bins_per_feat;
+
+        // Verify expected distribution
+        for (bst_feature_t feat = 0; feat < n_features; ++feat) {
+          for (bst_bin_t bin = 0; bin < n_bins_per_feat; ++bin) {
+            bst_idx_t expected_count = n_values_per_bin;
+            if (bin == n_bins_per_feat - 1) {
+              expected_count += remainder;  // Last bin gets the remainder
+            }
+            EXPECT_EQ(bin_counts[feat][bin], expected_count);
+          }
+        }
+      },
+      accessor_var);
+}
+}  // namespace xgboost
diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
index a8551aa23ce9..4fc0c4a70bab 100644
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -27,7 +27,6 @@
 #include "../../src/common/linalg_op.h"             // for ElementWiseTransformHost, begin, end
 #include "../../src/common/random.h"                // for GlobalRandom
 #include "./collective/test_worker.h"               // for TestDistributedGlobal
-#include "dmlc/io.h"                                // for Stream
 #include "dmlc/omp.h"                               // for omp_get_max_threads
 #include "filesystem.h"                             // for TemporaryDirectory
 #include "helpers.h"                                // for GetBaseScore, RandomDataGenerator
@@ -167,13 +166,13 @@ TEST(Learner, JsonModelIO) {
     Json out { Object() };
     learner->SaveModel(&out);
 
-    dmlc::TemporaryDirectory tmpdir;
+    common::TemporaryDirectory tmpdir;
 
-    std::ofstream fout (tmpdir.path + "/model.json");
+    std::ofstream fout (tmpdir.Path() / "model.json");
     fout << out;
     fout.close();
 
-    auto loaded_str = common::LoadSequentialFile(tmpdir.path + "/model.json");
+    auto loaded_str = common::LoadSequentialFile(tmpdir.Str() + "/model.json");
     Json loaded = Json::Load(StringView{loaded_str.data(), loaded_str.size()});
 
     learner->LoadModel(loaded);
@@ -283,38 +282,6 @@ TEST(Learner, MultiThreadedPredict) {
   }
 }
 
-TEST(Learner, BinaryModelIO) {
-  size_t constexpr kRows = 8;
-  int32_t constexpr kIters = 4;
-  auto p_dmat = RandomDataGenerator{kRows, 10, 0}.GenerateDMatrix();
-  p_dmat->Info().labels.Reshape(kRows);
-
-  std::unique_ptr<Learner> learner{Learner::Create({p_dmat})};
-  learner->SetParam("eval_metric", "rmsle");
-  learner->Configure();
-  for (int32_t iter = 0; iter < kIters; ++iter) {
-    learner->UpdateOneIter(iter, p_dmat);
-  }
-  dmlc::TemporaryDirectory tempdir;
-  std::string const fname = tempdir.path + "binary_model_io.bin";
-  {
-    // Make sure the write is complete before loading.
-    std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
-    learner->SaveModel(fo.get());
-  }
-
-  learner.reset(Learner::Create({p_dmat}));
-  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
-  learner->LoadModel(fi.get());
-  learner->Configure();
-  Json config { Object() };
-  learner->SaveConfig(&config);
-  std::string config_str;
-  Json::Dump(config, &config_str);
-  ASSERT_NE(config_str.find("rmsle"), std::string::npos);
-  ASSERT_EQ(config_str.find("WARNING"), std::string::npos);
-}
-
 #if defined(XGBOOST_USE_CUDA)
 // Tests for automatic GPU configuration.
 TEST(Learner, GPUConfiguration) {
@@ -329,15 +296,15 @@ TEST(Learner, GPUConfiguration) {
   p_dmat->Info().labels.Data()->HostVector() = labels;
   p_dmat->Info().labels.Reshape(kRows);
   {
-    std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->SetParams({Arg{"booster", "gblinear"},
-                        Arg{"updater", "gpu_coord_descent"}});
+    std::unique_ptr<Learner> learner{Learner::Create(mat)};
+    learner->SetParams(
+        {Arg{"booster", "gblinear"}, Arg{"updater", "coord_descent"}, Arg{"device", "cuda"}});
     learner->UpdateOneIter(0, p_dmat);
     ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
   }
   {
     std::unique_ptr<Learner> learner{Learner::Create(mat)};
-    learner->SetParams({Arg{"tree_method", "gpu_hist"}});
+    learner->SetParams({Arg{"tree_method", "hist"}, {"device", "cuda"}});
     learner->Configure();
     ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
     learner->UpdateOneIter(0, p_dmat);
@@ -345,8 +312,7 @@ TEST(Learner, GPUConfiguration) {
   }
   {
     std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->SetParams({Arg{"tree_method", "gpu_hist"},
-                        Arg{"gpu_id", "-1"}});
+    learner->SetParams({Arg{"tree_method", "hist"}, Arg{"device", "cuda"}});
     learner->UpdateOneIter(0, p_dmat);
     ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
   }
@@ -357,13 +323,6 @@ TEST(Learner, GPUConfiguration) {
     learner->UpdateOneIter(0, p_dmat);
     ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CPU());
   }
-  {
-    // with CPU algorithm, but `gpu_id` takes priority
-    std::unique_ptr<Learner> learner {Learner::Create(mat)};
-    learner->SetParams({Arg{"tree_method", "hist"}, Arg{"gpu_id", "0"}});
-    learner->UpdateOneIter(0, p_dmat);
-    ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
-  }
 }
 #endif  // defined(XGBOOST_USE_CUDA)
 
@@ -472,8 +431,8 @@ TEST(Learner, MultiTarget) {
   size_t constexpr kRows{128}, kCols{10}, kTargets{3};
   auto m = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix();
   m->Info().labels.Reshape(kRows, kTargets);
-  linalg::ElementWiseTransformHost(m->Info().labels.HostView(), omp_get_max_threads(),
-                                   [](auto i, auto) { return i; });
+  linalg::cpu_impl::TransformIdxKernel(m->Info().labels.HostView(), omp_get_max_threads(),
+                                       [](auto i, auto) { return i; });
 
   {
     std::unique_ptr<Learner> learner{Learner::Create({m})};
@@ -510,7 +469,8 @@ class InitBaseScore : public ::testing::Test {
     Json config{Object{}};
     learner->SaveConfig(&config);
     auto base_score = GetBaseScore(config);
-    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+    ASSERT_EQ(base_score.size(), 1);
+    ASSERT_NE(base_score[0], ObjFunction::DefaultBaseScore());
 
     // already initialized
     auto Xy1 = RandomDataGenerator{100, Cols(), 0}.Seed(321).GenerateDMatrix(true);
@@ -539,8 +499,9 @@ class InitBaseScore : public ::testing::Test {
     learner->SaveConfig(&config);
 
     auto base_score = GetBaseScore(config);
+    ASSERT_EQ(base_score.size(), 1);
     // no change
-    ASSERT_FLOAT_EQ(base_score, 1.3);
+    ASSERT_FLOAT_EQ(base_score[0], 1.3);
 
     HostDeviceVector<float> predt;
     learner->Predict(Xy_, false, &predt, 0, 0);
@@ -551,8 +512,9 @@ class InitBaseScore : public ::testing::Test {
     learner->UpdateOneIter(0, Xy_);
     learner->SaveConfig(&config);
     base_score = GetBaseScore(config);
+    ASSERT_EQ(base_score.size(), 1);
     // no change
-    ASSERT_FLOAT_EQ(base_score, 1.3);
+    ASSERT_FLOAT_EQ(base_score[0], 1.3);
 
     auto from_avg = std::stoi(
         get<String const>(config["learner"]["learner_model_param"]["boost_from_average"]));
@@ -575,7 +537,9 @@ class InitBaseScore : public ::testing::Test {
     Json model{Object{}};
     learner->SaveModel(&model);
     auto base_score = GetBaseScore(model);
-    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+    ASSERT_EQ(base_score.size(), 1);
+    ASSERT_FALSE(std::isnan(base_score[0]));
+    ASSERT_EQ(base_score[0], ObjFunction::DefaultBaseScore());
 
     learner.reset(Learner::Create({Xy_}));
     learner->LoadModel(model);
@@ -583,12 +547,14 @@ class InitBaseScore : public ::testing::Test {
     learner->Configure();
     learner->SaveConfig(&config);
     base_score = GetBaseScore(config);
-    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+    ASSERT_EQ(base_score[0], ObjFunction::DefaultBaseScore());
 
     learner->UpdateOneIter(0, Xy_);
     learner->SaveConfig(&config);
     base_score = GetBaseScore(config);
-    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+    ASSERT_EQ(base_score.size(), 1);
+    ASSERT_FALSE(std::isnan(base_score[0]));
+    ASSERT_NE(base_score[0], ObjFunction::DefaultBaseScore());
   }
 
   void TestInitWithPredt() {
@@ -605,13 +571,16 @@ class InitBaseScore : public ::testing::Test {
     Json config(Object{});
     learner->SaveConfig(&config);
     auto base_score = GetBaseScore(config);
-    ASSERT_EQ(base_score, ObjFunction::DefaultBaseScore());
+    ASSERT_EQ(base_score.size(), 1);
+    ASSERT_EQ(base_score[0], ObjFunction::DefaultBaseScore());
 
     // since prediction is not used for trianing, the train procedure still runs estimation
     learner->UpdateOneIter(0, Xy_);
     learner->SaveConfig(&config);
     base_score = GetBaseScore(config);
-    ASSERT_NE(base_score, ObjFunction::DefaultBaseScore());
+    ASSERT_EQ(base_score.size(), 1);
+    ASSERT_FALSE(std::isnan(base_score[0]));
+    ASSERT_NE(base_score[0], ObjFunction::DefaultBaseScore());
   }
 
   void TestUpdateProcess() {
@@ -625,6 +594,8 @@ class InitBaseScore : public ::testing::Test {
     Json model{Object{}};
     learner->SaveModel(&model);
     auto base_score = GetBaseScore(model);
+    ASSERT_EQ(base_score.size(), 1);
+    ASSERT_FALSE(std::isnan(base_score[0]));
 
     auto Xy1 = RandomDataGenerator{100, Cols(), 0}.Seed(321).GenerateDMatrix(true);
     learner.reset(Learner::Create({Xy1}));
@@ -636,6 +607,8 @@ class InitBaseScore : public ::testing::Test {
     Json config(Object{});
     learner->SaveConfig(&config);
     auto base_score1 = GetBaseScore(config);
+    ASSERT_EQ(base_score1.size(), 1);
+    ASSERT_FALSE(std::isnan(base_score1[0]));
     ASSERT_EQ(base_score, base_score1);
   }
 };
@@ -651,12 +624,13 @@ TEST_F(InitBaseScore, InitWithPredict) { this->TestInitWithPredt(); }
 TEST_F(InitBaseScore, UpdateProcess) { this->TestUpdateProcess(); }
 
 class TestColumnSplit : public ::testing::TestWithParam<std::string> {
-  void TestBaseScore(std::string objective, float expected_base_score, Json expected_model) {
+  void TestBaseScore(std::string objective, std::vector<float> const& expected_base_score,
+                     Json expected_model) {
     auto const world_size = collective::GetWorldSize();
     auto n_threads = collective::GetWorkerLocalThreads(world_size);
     auto const rank = collective::GetRank();
 
-    auto p_fmat = MakeFmatForObjTest(objective, 10, 10);
+    std::shared_ptr<DMatrix> p_fmat = MakeFmatForObjTest(objective, 10, 10, 3);
     std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
     std::unique_ptr<Learner> learner{Learner::Create({sliced})};
     learner->SetParams(Args{{"nthread", std::to_string(n_threads)},
@@ -681,7 +655,7 @@ class TestColumnSplit : public ::testing::TestWithParam<std::string> {
 
  public:
   void Run(std::string objective) {
-    auto p_fmat = MakeFmatForObjTest(objective, 10, 10);
+    std::shared_ptr<DMatrix> p_fmat = MakeFmatForObjTest(objective, 10, 10, 3);
     std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
     learner->SetParam("tree_method", "approx");
     learner->SetParam("objective", objective);
@@ -704,9 +678,7 @@ class TestColumnSplit : public ::testing::TestWithParam<std::string> {
       this->TestBaseScore(objective, args...);
     };
     auto score = GetBaseScore(config);
-    collective::TestDistributedGlobal(kWorldSize, [&] {
-      call(score, model);
-    });
+    collective::TestDistributedGlobal(kWorldSize, [&] { call(score, model); });
   }
 };
 
@@ -741,7 +713,7 @@ void VerifyColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Arg
                                Json const& expected_model) {
   auto const world_size = collective::GetWorldSize();
   auto const rank = collective::GetRank();
-  auto p_fmat = MakeFmatForObjTest("", 10, 10);
+  auto p_fmat = MakeFmatForObjTest("", 10, 10, 0);
   std::shared_ptr<DMatrix> sliced{p_fmat->SliceCol(world_size, rank)};
   std::string device = "cpu";
   if (use_gpu) {
@@ -753,7 +725,7 @@ void VerifyColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Arg
 
 void TestColumnSplitWithArgs(std::string const& tree_method, bool use_gpu, Args const& args,
                              bool federated) {
-  auto p_fmat = MakeFmatForObjTest("", 10, 10);
+  auto p_fmat = MakeFmatForObjTest("", 10, 10, 0);
   std::string device = use_gpu ? "cuda:0" : "cpu";
   auto model = GetModelWithArgs(p_fmat, tree_method, device, args);
 
diff --git a/tests/cpp/test_multi_target.cc b/tests/cpp/test_multi_target.cc
index cc81a4ba2ccc..cf926ae26563 100644
--- a/tests/cpp/test_multi_target.cc
+++ b/tests/cpp/test_multi_target.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2023 by XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/base.h>                         // for Args, bst_target_t
@@ -16,7 +16,6 @@
 
 #include "../../src/common/linalg_op.h"           // for begin, cbegin, cend
 #include "../../src/common/stats.h"               // for Median
-#include "../../src/common/transform_iterator.h"  // for IndexTransformIter
 #include "helpers.h"                              // for RandomDataGenerator
 #include "xgboost/host_device_vector.h"           // for HostDeviceVector
 #include "xgboost/linalg.h"                       // for Tensor, All, TensorView, Vector
@@ -82,10 +81,9 @@ class TestL1MultiTarget : public ::testing::Test {
 
     Json config{Object{}};
     learner->SaveConfig(&config);
-    auto base_score =
-        std::stod(get<String const>(config["learner"]["learner_model_param"]["base_score"]));
+    auto base_score = GetBaseScore(config);
 
-    std::vector<float> base_scores;
+    std::vector<float> split_scores;
     for (bst_target_t t{0}; t < p_fmat->Info().labels.Shape(1); ++t) {
       auto t_Xy = weight ? single_w_[t] : single_[t];
       std::unique_ptr<Learner> sl{Learner::Create({t_Xy})};
@@ -96,16 +94,14 @@ class TestL1MultiTarget : public ::testing::Test {
       sl->UpdateOneIter(0, t_Xy);
       Json s_config{Object{}};
       sl->SaveConfig(&s_config);
-      auto s_base_score =
-          std::stod(get<String const>(s_config["learner"]["learner_model_param"]["base_score"]));
+      auto s_base_score = GetBaseScore(s_config);
+      ASSERT_EQ(s_base_score.size(), 1);
       linalg::Vector<float> out;
       common::Median(sl->Ctx(), t_Xy->Info().labels, t_Xy->Info().weights_, &out);
-      ASSERT_FLOAT_EQ(s_base_score, out(0));
-      base_scores.push_back(s_base_score);
+      ASSERT_FLOAT_EQ(s_base_score[0], out(0));
+      split_scores.push_back(s_base_score[0]);
     }
-    auto mean = std::accumulate(base_scores.cbegin(), base_scores.cend(), .0f) /
-                static_cast<float>(base_scores.size());
-    ASSERT_FLOAT_EQ(mean, base_score);
+    ASSERT_EQ(split_scores, base_score);
   }
 
   void RunTest(Context const* ctx, std::string const& tree_method) {
diff --git a/tests/cpp/test_serialization.cc b/tests/cpp/test_serialization.cc
index 83b4fd93bdb0..e953c78b0a0e 100644
--- a/tests/cpp/test_serialization.cc
+++ b/tests/cpp/test_serialization.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2023, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/base.h>
@@ -12,7 +12,7 @@
 
 #include "../../src/common/io.h"
 #include "../../src/common/random.h"
-#include "filesystem.h"  // dmlc::TemporaryDirectory
+#include "filesystem.h"  // for TemporaryDirectory
 #include "helpers.h"
 
 namespace xgboost {
@@ -90,14 +90,26 @@ void CompareJSON(Json l, Json r) {
     CompareIntArray<I16Array>(l, r);
     break;
   }
+  case Value::ValueKind::kU16Array: {
+    CompareIntArray<U16Array>(l, r);
+    break;
+  }
   case Value::ValueKind::kI32Array: {
     CompareIntArray<I32Array>(l, r);
     break;
   }
+  case Value::ValueKind::kU32Array: {
+    CompareIntArray<U32Array>(l, r);
+    break;
+  }
   case Value::ValueKind::kI64Array: {
     CompareIntArray<I64Array>(l, r);
     break;
   }
+  case Value::ValueKind::kU64Array: {
+    CompareIntArray<U64Array>(l, r);
+    break;
+  }
   case Value::ValueKind::kBoolean: {
     ASSERT_EQ(l, r);
     break;
@@ -117,8 +129,8 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
 
   int32_t constexpr kIters = 2;
 
-  dmlc::TemporaryDirectory tempdir;
-  std::string const fname = tempdir.path + "/model";
+  common::TemporaryDirectory tempdir;
+  std::string const fname = tempdir.Str() + "/model";
 
   std::vector<std::string> dumped_0;
   std::string model_at_kiter;
@@ -220,11 +232,13 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
     learner->Save(&mem_out);
     ASSERT_EQ(model_at_kiter, serialised_model_tmp);
 
+    // Set the model to device
     for (auto const& [key, value] : args) {
-      if (key == "tree_method" && value == "gpu_hist") {
-        learner->SetParam("gpu_id", "0");
+      if (key == "device") {
+        learner->SetParam(key, value);
       }
     }
+
     // Pull data to device
     for (auto &batch : p_dmat->GetBatches<SparsePage>()) {
       batch.data.SetDevice(DeviceOrd::CUDA(0));
@@ -243,8 +257,10 @@ void TestLearnerSerialization(Args args, FeatureMap const& fmap, std::shared_ptr
     Json m_0 = Json::Load(StringView{model_at_2kiter}, std::ios::binary);
     Json m_1 = Json::Load(StringView{serialised_model_tmp}, std::ios::binary);
     // GPU ID is changed as data is coming from device.
-    ASSERT_EQ(get<Object>(m_0["Config"]["learner"]["generic_param"]).erase("gpu_id"),
-              get<Object>(m_1["Config"]["learner"]["generic_param"]).erase("gpu_id"));
+    get<Object>(m_0["Config"]["learner"]["generic_param"]).erase("device");
+    get<Object>(m_1["Config"]["learner"]["generic_param"]).erase("device");
+    ASSERT_EQ(get<Object>(m_0["Config"]["learner"]["generic_param"]),
+              get<Object>(m_1["Config"]["learner"]["generic_param"]));
   }
 }
 
@@ -368,7 +384,8 @@ TEST_F(SerializationTest, GpuHist) {
                             {"seed", "0"},
                             {"nthread", "1"},
                             {"max_depth", "2"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 
   TestLearnerSerialization({{"booster", "gbtree"},
@@ -376,14 +393,16 @@ TEST_F(SerializationTest, GpuHist) {
                             {"nthread", "1"},
                             {"max_depth", "2"},
                             {"num_parallel_tree", "4"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 
   TestLearnerSerialization({{"booster", "dart"},
                             {"seed", "0"},
                             {"nthread", "1"},
                             {"max_depth", "2"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 }
 
@@ -399,7 +418,7 @@ TEST_F(SerializationTest, ConfigurationCount) {
   {
     auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
 
-    learner->SetParam("tree_method", "gpu_hist");
+    learner->SetParams(Args{{"tree_method", "hist"}, {"device", "cuda"}});
 
     for (size_t i = 0; i < 10; ++i) {
       learner->UpdateOneIter(i, p_dmat);
@@ -437,7 +456,8 @@ TEST_F(SerializationTest, GPUCoordDescent) {
   TestLearnerSerialization({{"booster", "gblinear"},
                             {"seed", "0"},
                             {"nthread", "1"},
-                            {"updater", "gpu_coord_descent"}},
+                            {"device", "cuda"},
+                            {"updater", "coord_descent"}},
                            fmap_, p_dmat_);
 }
 #endif  // defined(XGBOOST_USE_CUDA)
@@ -477,7 +497,8 @@ TEST_F(L1SerializationTest, GpuHist) {
                             {"objective", "reg:absoluteerror"},
                             {"seed", "0"},
                             {"max_depth", "2"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 }
 #endif  //  defined(XGBOOST_USE_CUDA)
@@ -573,7 +594,8 @@ TEST_F(LogitSerializationTest, GpuHist) {
                             {"seed", "0"},
                             {"nthread", "1"},
                             {"max_depth", "2"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 
   TestLearnerSerialization({{"booster", "gbtree"},
@@ -582,7 +604,8 @@ TEST_F(LogitSerializationTest, GpuHist) {
                             {"nthread", "1"},
                             {"max_depth", "2"},
                             {"num_parallel_tree", "4"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 
   TestLearnerSerialization({{"booster", "dart"},
@@ -590,7 +613,8 @@ TEST_F(LogitSerializationTest, GpuHist) {
                             {"seed", "0"},
                             {"nthread", "1"},
                             {"max_depth", "2"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 }
 
@@ -599,7 +623,8 @@ TEST_F(LogitSerializationTest, GPUCoordDescent) {
                             {"objective", "binary:logistic"},
                             {"seed", "0"},
                             {"nthread", "1"},
-                            {"updater", "gpu_coord_descent"}},
+                            {"device", "cuda"},
+                            {"updater", "coord_descent"}},
                            fmap_, p_dmat_);
 }
 #endif  // defined(XGBOOST_USE_CUDA)
@@ -718,7 +743,8 @@ TEST_F(MultiClassesSerializationTest, GpuHist) {
                             // Mitigate the difference caused by hardware fused multiply
                             // add to tree weight during update prediction cache.
                             {"learning_rate", "1.0"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 
   TestLearnerSerialization({{"booster", "gbtree"},
@@ -730,7 +756,8 @@ TEST_F(MultiClassesSerializationTest, GpuHist) {
                             // after num_parallel_tree goes to 4
                             {"num_parallel_tree", "4"},
                             {"learning_rate", "1.0"},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 
   TestLearnerSerialization({{"booster", "dart"},
@@ -739,7 +766,8 @@ TEST_F(MultiClassesSerializationTest, GpuHist) {
                             {"nthread", "1"},
                             {"learning_rate", "1.0"},
                             {"max_depth", std::to_string(kClasses)},
-                            {"tree_method", "gpu_hist"}},
+                            {"device", "cuda"},
+                            {"tree_method", "hist"}},
                            fmap_, p_dmat_);
 }
 
@@ -748,7 +776,8 @@ TEST_F(MultiClassesSerializationTest, GPUCoordDescent) {
                             {"num_class", std::to_string(kClasses)},
                             {"seed", "0"},
                             {"nthread", "1"},
-                            {"updater", "gpu_coord_descent"}},
+                            {"updater", "coord_descent"},
+                            {"device", "cuda"}},
                            fmap_, p_dmat_);
 }
 #endif  // defined(XGBOOST_USE_CUDA)
diff --git a/tests/cpp/tree/gpu_hist/dummy_quantizer.cuh b/tests/cpp/tree/gpu_hist/dummy_quantizer.cuh
new file mode 100644
index 000000000000..62619634264f
--- /dev/null
+++ b/tests/cpp/tree/gpu_hist/dummy_quantizer.cuh
@@ -0,0 +1,22 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#pragma once
+
+#include <xgboost/base.h>  // for bst_target_t
+
+#include <vector>  // for vector
+
+#include "../../../../src/common/device_vector.cuh"     // for device_vector
+#include "../../../../src/tree/gpu_hist/quantiser.cuh"  // for GradientQuantiser
+
+namespace xgboost::tree {
+inline auto MakeDummyQuantizers(bst_target_t n_targets) {
+  std::vector<GradientQuantiser> h_quantizers;
+  for (bst_target_t i = 0; i < n_targets; ++i) {
+    h_quantizers.emplace_back(GradientPairPrecise{1.0f, 1.0f}, GradientPairPrecise{1.0f, 1.0f});
+  }
+  dh::device_vector<GradientQuantiser> d_quantizers(h_quantizers);
+  return d_quantizers;
+}
+}  // namespace xgboost::tree
diff --git a/tests/cpp/tree/gpu_hist/test_driver.cu b/tests/cpp/tree/gpu_hist/test_driver.cu
index 106004c63bac..659e1ba34fd9 100644
--- a/tests/cpp/tree/gpu_hist/test_driver.cu
+++ b/tests/cpp/tree/gpu_hist/test_driver.cu
@@ -1,3 +1,6 @@
+/**
+ * Copyright 2020-2025, XGBoost contributors
+ */
 #include <gtest/gtest.h>
 #include "../../../../src/tree/driver.h"
 #include "../../../../src/tree/gpu_hist/expand_entry.cuh"
@@ -17,7 +20,7 @@ TEST(GpuHist, DriverDepthWise) {
   split.right_sum = {0, 1};
   GPUExpandEntry root(0, 0, split, 2.0f, 1.0f, 1.0f);
   driver.Push({root});
-  EXPECT_EQ(driver.Pop().front().nid, 0);
+  EXPECT_EQ(driver.Pop().front().nidx, 0);
   driver.Push({GPUExpandEntry{1, 1, split, 2.0f, 1.0f, 1.0f}});
   driver.Push({GPUExpandEntry{2, 1, split, 2.0f, 1.0f, 1.0f}});
   driver.Push({GPUExpandEntry{3, 1, split, 2.0f, 1.0f, 1.0f}});
@@ -55,24 +58,24 @@ TEST(GpuHist, DriverLossGuided) {
   EXPECT_TRUE(driver.Pop().empty());
   GPUExpandEntry root(0, 0, high_gain, 2.0f, 1.0f, 1.0f );
   driver.Push({root});
-  EXPECT_EQ(driver.Pop().front().nid, 0);
+  EXPECT_EQ(driver.Pop().front().nidx, 0);
   // Select high gain first
   driver.Push({GPUExpandEntry{1, 1, low_gain, 2.0f, 1.0f, 1.0f}});
   driver.Push({GPUExpandEntry{2, 2, high_gain, 2.0f, 1.0f, 1.0f}});
   auto res = driver.Pop();
   EXPECT_EQ(res.size(), 1);
-  EXPECT_EQ(res[0].nid, 2);
+  EXPECT_EQ(res[0].nidx, 2);
   res = driver.Pop();
   EXPECT_EQ(res.size(), 1);
-  EXPECT_EQ(res[0].nid, 1);
+  EXPECT_EQ(res[0].nidx, 1);
 
   // If equal gain, use nid
   driver.Push({GPUExpandEntry{2, 1, low_gain, 2.0f, 1.0f, 1.0f}});
   driver.Push({GPUExpandEntry{1, 1, low_gain, 2.0f, 1.0f, 1.0f}});
   res = driver.Pop();
-  EXPECT_EQ(res[0].nid, 1);
+  EXPECT_EQ(res[0].nidx, 1);
   res = driver.Pop();
-  EXPECT_EQ(res[0].nid, 2);
+  EXPECT_EQ(res[0].nidx, 2);
 }
 }  // namespace tree
 }  // namespace xgboost
diff --git a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
index 7c2da9d243f9..cd6082f325bb 100644
--- a/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
+++ b/tests/cpp/tree/gpu_hist/test_evaluate_splits.cu
@@ -496,8 +496,8 @@ TEST(GpuHist, EvaluateSplits) {
                              FstCU()};
   dh::device_vector<EvaluateSplitInputs> inputs =
       std::vector<EvaluateSplitInputs>{input_left, input_right};
-  evaluator.LaunchEvaluateSplits(input_left.feature_set.size(), dh::ToSpan(inputs), shared_inputs,
-                                 evaluator.GetEvaluator(), dh::ToSpan(out_splits));
+  evaluator.LaunchEvaluateSplits(&ctx, input_left.feature_set.size(), dh::ToSpan(inputs),
+                                 shared_inputs, evaluator.GetEvaluator(), dh::ToSpan(out_splits));
 
   DeviceSplitCandidate result_left = out_splits[0];
   EXPECT_EQ(result_left.findex, 1);
diff --git a/tests/cpp/tree/gpu_hist/test_histogram.cu b/tests/cpp/tree/gpu_hist/test_histogram.cu
index ce34c3d3b561..42b92e001da4 100644
--- a/tests/cpp/tree/gpu_hist/test_histogram.cu
+++ b/tests/cpp/tree/gpu_hist/test_histogram.cu
@@ -1,15 +1,16 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>  // for Context
 
 #include <memory>  // for unique_ptr
+#include <tuple>   // for tuple
 #include <vector>  // for vector
 
 #include "../../../../src/tree/gpu_hist/histogram.cuh"
 #include "../../../../src/tree/gpu_hist/row_partitioner.cuh"  // for RowPartitioner
-#include "../../../../src/tree/hist/param.h"                  // for HistMakerTrainParam
+#include "../../../../src/tree/hist/hist_param.h"             // for HistMakerTrainParam
 #include "../../../../src/tree/param.h"                       // for TrainParam
 #include "../../categorical_helpers.h"                        // for OneHotEncodeFeature
 #include "../../helpers.h"
@@ -71,12 +72,12 @@ TEST(Histogram, SubtractionTrack) {
   histogram.Reset(&ctx, max_cached_hist_nodes, fg_acc, n_total_bins, false);
   histogram.AllocateHistograms(&ctx, {0, 1, 2});
   GPUExpandEntry root;
-  root.nid = 0;
-  auto need_build = histogram.SubtractHist(&ctx, {root}, {0}, {1});
+  root.nidx = 0;
+  auto need_build = histogram.SubtractHist<GPUExpandEntry>(&ctx, {root}, {0}, {1});
 
   std::vector<GPUExpandEntry> candidates(2);
-  candidates[0].nid = 1;
-  candidates[1].nid = 2;
+  candidates[0].nidx = 1;
+  candidates[1].nidx = 2;
 
   need_build = histogram.SubtractHist(&ctx, candidates, {3, 5}, {4, 6});
   ASSERT_EQ(need_build.size(), 2);
@@ -127,9 +128,11 @@ void TestBuildHist(bool use_shared_memory_histograms) {
                 feature_groups.DeviceAccessor(ctx.Device()), page->Cuts().TotalBins(),
                 !use_shared_memory_histograms);
   builder.AllocateHistograms(&ctx, {0});
-  builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(&ctx),
-                         feature_groups.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(),
-                         row_partitioner->GetRows(0), builder.GetNodeHistogram(0), *quantiser);
+  page->Visit(&ctx, {}, [&](auto&& acc) {
+    builder.BuildHistogram(ctx.CUDACtx(), acc, feature_groups.DeviceAccessor(ctx.Device()),
+                           gpair.DeviceSpan(), row_partitioner->GetRows(0),
+                           builder.GetNodeHistogram(0), *quantiser);
+  });
 
   auto node_histogram = builder.GetNodeHistogram(0);
 
@@ -181,9 +184,10 @@ void TestDeterministicHistogram(bool is_dense, std::size_t shm_size, bool force_
     DeviceHistogramBuilder builder;
     builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(),
                   feature_groups.DeviceAccessor(ctx.Device()), num_bins, force_global);
-    builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(&ctx),
-                           feature_groups.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
-                           d_histogram, quantiser);
+    page->Visit(&ctx, {}, [&](auto&& acc) {
+      builder.BuildHistogram(ctx.CUDACtx(), acc, feature_groups.DeviceAccessor(ctx.Device()),
+                             gpair.DeviceSpan(), ridx, d_histogram, quantiser);
+    });
 
     std::vector<GradientPairInt64> histogram_h(num_bins);
     dh::safe_cuda(cudaMemcpy(histogram_h.data(), d_histogram.data(),
@@ -197,9 +201,10 @@ void TestDeterministicHistogram(bool is_dense, std::size_t shm_size, bool force_
       DeviceHistogramBuilder builder;
       builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(),
                     feature_groups.DeviceAccessor(ctx.Device()), num_bins, force_global);
-      builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(&ctx),
-                             feature_groups.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
-                             d_new_histogram, quantiser);
+      page->Visit(&ctx, {}, [&](auto&& acc) {
+        builder.BuildHistogram(ctx.CUDACtx(), acc, feature_groups.DeviceAccessor(ctx.Device()),
+                               gpair.DeviceSpan(), ridx, d_new_histogram, quantiser);
+      });
 
       std::vector<GradientPairInt64> new_histogram_h(num_bins);
       dh::safe_cuda(cudaMemcpy(new_histogram_h.data(), d_new_histogram.data(),
@@ -222,9 +227,10 @@ void TestDeterministicHistogram(bool is_dense, std::size_t shm_size, bool force_
       // Single group must use global memory.
       builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(),
                     single_group.DeviceAccessor(ctx.Device()), num_bins, /*force_global=*/true);
-      builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(&ctx),
-                             single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
-                             dh::ToSpan(baseline), quantiser);
+      page->Visit(&ctx, {}, [&](auto&& acc) {
+        builder.BuildHistogram(ctx.CUDACtx(), acc, single_group.DeviceAccessor(ctx.Device()),
+                               gpair.DeviceSpan(), ridx, dh::ToSpan(baseline), quantiser);
+      });
 
       std::vector<GradientPairInt64> baseline_h(num_bins);
       dh::safe_cuda(cudaMemcpy(baseline_h.data(), baseline.data().get(),
@@ -274,7 +280,7 @@ void ValidateCategoricalHistogram(size_t n_categories, common::Span<GradientPair
 // Test 1 vs rest categorical histogram is equivalent to one hot encoded data.
 void TestGPUHistogramCategorical(size_t num_categories) {
   auto ctx = MakeCUDACtx(0);
-  size_t constexpr kRows = 340;
+  size_t kRows = std::max(static_cast<decltype(num_categories)>(340), num_categories);
   size_t constexpr kBins = 256;
   auto x = GenerateRandomCategoricalSingleColumn(kRows, num_categories);
   auto cat_m = GetDMatrixFromData(x, kRows, 1);
@@ -296,9 +302,10 @@ void TestGPUHistogramCategorical(size_t num_categories) {
     DeviceHistogramBuilder builder;
     builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(),
                   single_group.DeviceAccessor(ctx.Device()), num_categories, false);
-    builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(&ctx),
-                           single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
-                           dh::ToSpan(cat_hist), quantiser);
+    page->Visit(&ctx, {}, [&](auto&& acc) {
+      builder.BuildHistogram(ctx.CUDACtx(), acc, single_group.DeviceAccessor(ctx.Device()),
+                             gpair.DeviceSpan(), ridx, dh::ToSpan(cat_hist), quantiser);
+    });
   }
 
   /**
@@ -313,9 +320,10 @@ void TestGPUHistogramCategorical(size_t num_categories) {
     DeviceHistogramBuilder builder;
     builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(),
                   single_group.DeviceAccessor(ctx.Device()), encode_hist.size(), false);
-    builder.BuildHistogram(ctx.CUDACtx(), page->GetDeviceAccessor(&ctx),
-                           single_group.DeviceAccessor(ctx.Device()), gpair.DeviceSpan(), ridx,
-                           dh::ToSpan(encode_hist), quantiser);
+    page->Visit(&ctx, {}, [&](auto&& acc) {
+      builder.BuildHistogram(ctx.CUDACtx(), acc, single_group.DeviceAccessor(ctx.Device()),
+                             gpair.DeviceSpan(), ridx, dh::ToSpan(encode_hist), quantiser);
+    });
   }
 
   std::vector<GradientPairInt64> h_cat_hist(cat_hist.size());
@@ -332,6 +340,11 @@ TEST(Histogram, GPUHistCategorical) {
   for (size_t num_categories = 2; num_categories < 8; ++num_categories) {
     TestGPUHistogramCategorical(num_categories);
   }
+  // Larger than the shared memory size, must use global memory since there's no feature
+  // group with a single feature.
+  auto max_shmem = dh::MaxSharedMemoryOptin(0);
+  auto n_categories = common::DivRoundUp(max_shmem, sizeof(GradientPairInt64)) * 2;
+  TestGPUHistogramCategorical(n_categories);
 }
 
 namespace {
@@ -417,17 +430,45 @@ TEST(Histogram, Quantiser) {
   }
 }
 namespace {
-class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<float, bool>> {
+enum CacheMode {
+  kNoCache = 0,
+  kCopy = 1,
+  kDirect = 2,
+};
+
+class HistogramExternalMemoryTest
+    : public ::testing::TestWithParam<std::tuple<float, bool, CacheMode>> {
  public:
-  void Run(float sparsity, bool force_global) {
+  void Run(float sparsity, bool force_global, CacheMode cache_mode) {
+    auto ctx = MakeCUDACtx(0);
     bst_idx_t n_samples{512}, n_features{12}, n_batches{3};
     std::vector<std::unique_ptr<RowPartitioner>> partitioners;
-    auto p_fmat = RandomDataGenerator{n_samples, n_features, sparsity}
-                      .Batches(n_batches)
-                      .GenerateSparsePageDMatrix("cache", true);
+    auto rng = RandomDataGenerator{n_samples, n_features, sparsity}.Batches(n_batches);
     bst_bin_t n_bins = 16;
+    std::shared_ptr<DMatrix> p_fmat;
+    switch (cache_mode) {
+      case kCopy:
+      case kDirect: {
+        p_fmat = rng.CacheHostRatio(0.5)
+                     .Device(ctx.Device())
+                     .Bins(n_bins)
+                     .OnHost(true)
+                     .MinPageCacheBytes(n_bins * n_features)
+                     .GenerateExtMemQuantileDMatrix("cache", true);
+        break;
+      }
+      case kNoCache: {
+        p_fmat = rng.GenerateSparsePageDMatrix("cache", true);
+        break;
+      }
+    }
+
     BatchParam p{n_bins, TrainParam::DftSparseThreshold()};
-    auto ctx = MakeCUDACtx(0);
+    if (cache_mode == kDirect) {
+      p.prefetch_copy = false;
+    } else if (cache_mode == kCopy) {
+      p.prefetch_copy = true;
+    }
 
     std::unique_ptr<FeatureGroups> fg;
     dh::device_vector<GradientPairInt64> single_hist;
@@ -438,6 +479,7 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<f
     auto quantiser = GradientQuantiser{&ctx, gpair.ConstDeviceSpan(), p_fmat->Info()};
     std::shared_ptr<common::HistogramCuts> cuts;
 
+    std::size_t row_stride = 0;
     {
       /**
        * Multi page.
@@ -445,6 +487,7 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<f
       std::int32_t k{0};
       for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, p)) {
         auto impl = page.Impl();
+        row_stride = impl->info.row_stride;
         if (k == 0) {
           // Initialization
           fg = std::make_unique<FeatureGroups>(impl->Cuts());
@@ -462,9 +505,10 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<f
         DeviceHistogramBuilder builder;
         builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(),
                       fg->DeviceAccessor(ctx.Device()), d_histogram.size(), force_global);
-        builder.BuildHistogram(ctx.CUDACtx(), impl->GetDeviceAccessor(&ctx),
-                               fg->DeviceAccessor(ctx.Device()), gpair.ConstDeviceSpan(), ridx,
-                               d_histogram, quantiser);
+        impl->Visit(&ctx, {}, [&](auto&& acc) {
+          builder.BuildHistogram(ctx.CUDACtx(), acc, fg->DeviceAccessor(ctx.Device()),
+                                 gpair.ConstDeviceSpan(), ridx, d_histogram, quantiser);
+        });
         ++k;
       }
       ASSERT_EQ(k, n_batches);
@@ -477,20 +521,22 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<f
       RowPartitioner partitioner;
       partitioner.Reset(&ctx, p_fmat->Info().num_row_, 0);
 
-      SparsePage concat;
+      auto concat = EllpackPageImpl(&ctx, cuts, sparsity == 0.0, row_stride, n_samples);
       std::vector<float> hess(p_fmat->Info().num_row_, 1.0f);
-      for (auto const& page : p_fmat->GetBatches<SparsePage>()) {
-        concat.Push(page);
+      std::size_t offset = 0;
+      for (auto const& page : p_fmat->GetBatches<EllpackPage>(&ctx, p)) {
+        bst_idx_t num_elements = concat.Copy(&ctx, page.Impl(), offset);
+        offset += num_elements;
       }
-      EllpackPageImpl page{&ctx, cuts, concat, p_fmat->IsDense(), p_fmat->Info().num_col_, {}};
       auto ridx = partitioner.GetRows(0);
       auto d_histogram = dh::ToSpan(single_hist);
       DeviceHistogramBuilder builder;
       builder.Reset(&ctx, HistMakerTrainParam::CudaDefaultNodes(), fg->DeviceAccessor(ctx.Device()),
                     d_histogram.size(), force_global);
-      builder.BuildHistogram(ctx.CUDACtx(), page.GetDeviceAccessor(&ctx),
-                             fg->DeviceAccessor(ctx.Device()), gpair.ConstDeviceSpan(), ridx,
-                             d_histogram, quantiser);
+      concat.Visit(&ctx, {}, [&](auto&& acc) {
+        builder.BuildHistogram(ctx.CUDACtx(), acc, fg->DeviceAccessor(ctx.Device()),
+                               gpair.ConstDeviceSpan(), ridx, d_histogram, quantiser);
+      });
     }
 
     std::vector<GradientPairInt64> h_single(single_hist.size());
@@ -499,7 +545,7 @@ class HistogramExternalMemoryTest : public ::testing::TestWithParam<std::tuple<f
     thrust::copy(multi_hist.begin(), multi_hist.end(), h_multi.begin());
 
     for (std::size_t i = 0; i < single_hist.size(); ++i) {
-      ASSERT_EQ(h_single[i].GetQuantisedGrad(), h_multi[i].GetQuantisedGrad());
+      ASSERT_EQ(h_single[i].GetQuantisedGrad(), h_multi[i].GetQuantisedGrad()) << i;
       ASSERT_EQ(h_single[i].GetQuantisedHess(), h_multi[i].GetQuantisedHess());
     }
   }
@@ -510,7 +556,25 @@ TEST_P(HistogramExternalMemoryTest, ExternalMemory) {
   std::apply(&HistogramExternalMemoryTest::Run, std::tuple_cat(std::make_tuple(this), GetParam()));
 }
 
-INSTANTIATE_TEST_SUITE_P(Histogram, HistogramExternalMemoryTest,
-                         ::testing::Combine(::testing::Values(0.0f, 0.2f, 0.8f),
-                                            ::testing::Bool()));
+INSTANTIATE_TEST_SUITE_P(
+    Histogram, HistogramExternalMemoryTest,
+    ::testing::Combine(::testing::Values(0.0f, 0.2f, 0.8f), ::testing::Bool(),
+                       ::testing::Values(kNoCache, kDirect, kCopy)),
+    [](::testing::TestParamInfo<HistogramExternalMemoryTest::ParamType> const& info) {
+      std::stringstream ss;
+      auto const& p = info.param;
+      ss << "sparsity_0" << (std::get<0>(p) * 10) << "_global_" << std::get<1>(p) << "_dcache_";
+      switch (std::get<2>(p)) {
+        case kNoCache:
+          ss << "nocache";
+          break;
+        case kDirect:
+          ss << "direct";
+          break;
+        case kCopy:
+          ss << "copy";
+          break;
+      }
+      return ss.str();
+    });
 }  // namespace xgboost::tree
diff --git a/tests/cpp/tree/gpu_hist/test_leaf_sum.cu b/tests/cpp/tree/gpu_hist/test_leaf_sum.cu
new file mode 100644
index 000000000000..2718741ce491
--- /dev/null
+++ b/tests/cpp/tree/gpu_hist/test_leaf_sum.cu
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#include <gtest/gtest.h>
+#include <thrust/sequence.h>  // for sequence
+#include <xgboost/linalg.h>   // for Constant
+
+#include <vector>  // for vector
+
+#include "../../../../src/common/device_vector.cuh"
+#include "../../../../src/tree/gpu_hist/leaf_sum.cuh"
+#include "../../../../src/tree/gpu_hist/row_partitioner.cuh"  // for LeafInfo
+#include "../../helpers.h"
+#include "dummy_quantizer.cuh"  // for MakeDummyQuantizers
+
+namespace xgboost::tree::cuda_impl {
+TEST(LeafGradSum, Basic) {
+  auto ctx = MakeCUDACtx(0);
+
+  bst_target_t n_targets = 2;
+  bst_idx_t n_samples = 6;
+  bst_idx_t n_leaves = 2;
+
+  // Create leaf information
+  std::vector<LeafInfo> h_leaves(n_leaves);
+  h_leaves[0].nidx = 1;
+  h_leaves[0].node.segment = Segment{0, 3};
+  h_leaves[1].nidx = 2;
+  h_leaves[1].node.segment = Segment{3, 6};
+
+  auto gpairs = linalg::Constant(&ctx, GradientPair{1.0f, 1.0f}, n_samples, n_targets);
+
+  dh::device_vector<RowIndexT> sorted_ridx(n_samples);
+  thrust::sequence(ctx.CUDACtx()->CTP(), sorted_ridx.begin(), sorted_ridx.end(), 0);
+
+  auto quantizers = MakeDummyQuantizers(n_targets);
+  auto out_sum = linalg::Empty<GradientPairInt64>(&ctx, n_leaves, n_targets);
+
+  LeafGradSum(&ctx, h_leaves, dh::ToSpan(quantizers), dh::ToSpan(sorted_ridx),
+              gpairs.View(ctx.Device()), out_sum.View(ctx.Device()));
+
+  for (auto v : out_sum.HostView()) {
+    ASSERT_EQ(v.GetQuantisedGrad(), 3);
+    ASSERT_EQ(v.GetQuantisedHess(), 3);
+  }
+}
+}  // namespace xgboost::tree::cuda_impl
diff --git a/tests/cpp/tree/gpu_hist/test_multi_evaluate_splits.cu b/tests/cpp/tree/gpu_hist/test_multi_evaluate_splits.cu
new file mode 100644
index 000000000000..f0b5083075a2
--- /dev/null
+++ b/tests/cpp/tree/gpu_hist/test_multi_evaluate_splits.cu
@@ -0,0 +1,137 @@
+/**
+ * Copyright 2025, XGBoost contributors
+ */
+#include <gtest/gtest.h>
+
+#include "../../../../src/tree/gpu_hist/evaluate_splits.cuh"
+#include "../../../../src/tree/gpu_hist/multi_evaluate_splits.cuh"
+#include "../../helpers.h"
+#include "dummy_quantizer.cuh"  // for MakeDummyQuantizers
+
+namespace xgboost::tree::cuda_impl {
+class GpuMultiHistEvaluatorBasicTest : public ::testing::Test {
+ public:
+  Context ctx{MakeCUDACtx(0)};
+  bst_target_t n_targets = 2;
+  bst_bin_t n_bins_per_feat_tar = 4;
+
+  dh::device_vector<GradientPairInt64> parent_sum;
+  dh::device_vector<GradientPairInt64> histogram;
+  MultiEvaluateSplitInputs input;
+  dh::device_vector<GradientQuantiser> quantizers;
+  MultiEvaluateSplitSharedInputs shared_inputs;
+
+  dh::device_vector<bst_feature_t> feature_segments;
+  dh::device_vector<float> feature_values{.0f, .1f, .2f, .3f};
+  dh::device_vector<float> min_values{-1.0f};
+
+  void SetUp() override {
+    parent_sum.resize(n_targets);
+    parent_sum[0] = GradientPairInt64{56, 40};
+    parent_sum[1] = GradientPairInt64{96, 128};
+
+    histogram.resize(n_bins_per_feat_tar * n_targets);
+    // first target, dense,                    // 0/0, 56/40
+    histogram[0] = GradientPairInt64{8, 4};    // 8/4, 48/36
+    histogram[2] = GradientPairInt64{12, 8};   // 20/12, 36/28
+    histogram[4] = GradientPairInt64{16, 12};  // 36/24, 20/16
+    histogram[6] = GradientPairInt64{20, 16};  // 56/40, 0/0
+
+    // second target, dense                    // 0/0,  96/128
+    histogram[1] = GradientPairInt64{11, 13};  // 11/13, 85/115
+    histogram[3] = GradientPairInt64{19, 29};  // 30/42, 66/86
+    histogram[5] = GradientPairInt64{27, 45};  // 57/87, 39/41
+    histogram[7] = GradientPairInt64{39, 41};  // 96/128, 0/0
+
+    input.parent_sum = dh::ToSpan(parent_sum);
+    input.histogram = dh::ToSpan(histogram);
+
+    quantizers = MakeDummyQuantizers(2);
+
+    shared_inputs.roundings = dh::ToSpan(quantizers);
+
+    feature_segments.resize(2);
+    feature_segments[0] = 0;
+    feature_segments[1] = static_cast<bst_feature_t>(n_bins_per_feat_tar);
+    shared_inputs.feature_segments = dh::ToSpan(feature_segments);
+
+    shared_inputs.feature_values = dh::ToSpan(feature_values);
+
+    shared_inputs.min_values = dh::ToSpan(min_values);
+
+    shared_inputs.n_bins_per_feat_tar = n_bins_per_feat_tar;
+    TrainParam param;
+    param.Init(Args{{"min_child_weight", "0"}, {"reg_lambda", "0"}, {"learning_rate", "1"}});
+    shared_inputs.param = GPUTrainingParam{param};
+  }
+
+  void TestEmptyHess() {
+    // Turn all Hessian values into 0.
+    thrust::transform(histogram.begin(), histogram.end(), histogram.begin(),
+                      [] XGBOOST_DEVICE(GradientPairInt64 const& bin) {
+                        return GradientPairInt64{bin.GetQuantisedGrad(), 0};
+                      });
+    MultiHistEvaluator evaluator;
+    auto candidate = evaluator.EvaluateSingleSplit(&ctx, input, shared_inputs);
+    TrainParam param;
+    param.Init(Args{});
+    ASSERT_FALSE(candidate.IsValid(param, 100));
+    ASSERT_TRUE(candidate.base_weight.empty());
+    ASSERT_TRUE(candidate.left_weight.empty());
+    ASSERT_TRUE(candidate.right_weight.empty());
+    ASSERT_TRUE(candidate.split.node_sum.empty());
+  }
+};
+
+namespace {
+template <typename T, typename V = std::remove_cv_t<T>>
+void CheckSpan(common::Span<T> span, std::vector<V> const& exp) {
+  std::vector<V> h_vec(span.size());
+  dh::CopyDeviceSpanToVector(&h_vec, span);
+  ASSERT_EQ(h_vec.size(), exp.size());
+  for (std::size_t i = 0; i < h_vec.size(); ++i) {
+    if constexpr (std::is_floating_point_v<V>) {
+      ASSERT_NEAR(h_vec[i], exp[i], 1e-5);
+    } else {
+      ASSERT_EQ(h_vec[i], exp[i]);
+    }
+  }
+}
+}  // namespace
+
+TEST_F(GpuMultiHistEvaluatorBasicTest, Root) {
+  using OnePass = MultiEvaluateSplitSharedInputs;
+
+  std::vector<GradientPairInt64> exp_left_sum{{36, 24}, {57, 87}};
+  std::vector<GradientPairInt64> exp_right_sum{{20, 16}, {39, 41}};
+  std::vector<float> exp_base_weight{-1.4, -0.75};
+  std::vector<float> exp_left_weight{-1.5, -0.655172};
+  std::vector<float> exp_right_weight{-1.25, -0.951219};
+
+  for (auto one_pass : {OnePass::kNone, OnePass::kForward, OnePass::kBackward}) {
+    auto shared = this->shared_inputs;
+    shared.one_pass = one_pass;
+    MultiHistEvaluator evaluator;
+    auto candidate = evaluator.EvaluateSingleSplit(&ctx, input, shared);
+    ASSERT_NEAR(candidate.split.loss_chg, 3.04239, 1e-5);
+    CheckSpan(candidate.left_weight, exp_left_weight);
+    CheckSpan(candidate.right_weight, exp_right_weight);
+    CheckSpan(candidate.base_weight, exp_base_weight);
+
+    std::stringstream ss;
+    ss << candidate;
+    auto str = ss.str();
+    if (one_pass != OnePass::kBackward) {
+      ASSERT_NE(str.find("left_sum"), std::string::npos);
+      ASSERT_EQ(str.find("right_sum"), std::string::npos);
+      CheckSpan(candidate.split.node_sum, exp_left_sum);
+    } else {
+      ASSERT_EQ(str.find("left_sum"), std::string::npos);
+      ASSERT_NE(str.find("right_sum"), std::string::npos);
+      CheckSpan(candidate.split.node_sum, exp_right_sum);
+    }
+  }
+}
+
+TEST_F(GpuMultiHistEvaluatorBasicTest, EmptyHess) { this->TestEmptyHess(); }
+}  // namespace xgboost::tree::cuda_impl
diff --git a/tests/cpp/tree/gpu_hist/test_multi_histogram.cu b/tests/cpp/tree/gpu_hist/test_multi_histogram.cu
new file mode 100644
index 000000000000..f3911200f592
--- /dev/null
+++ b/tests/cpp/tree/gpu_hist/test_multi_histogram.cu
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2025, XGBoost Contributors
+ */
+#include <gtest/gtest.h>
+#include <thrust/sequence.h>
+
+#include "../../../../src/tree/gpu_hist/histogram.cuh"
+#include "../../helpers.h"
+#include "../../histogram_helpers.h"
+#include "dummy_quantizer.cuh"  // for MakeDummyQuantizers
+
+namespace xgboost::tree::cuda_impl {
+TEST(GpuMultiHistogram, Basic) {
+  auto ctx = MakeCUDACtx(0);
+  bst_bin_t n_bins = 16;
+  bst_target_t n_targets = 2;
+  bst_feature_t n_features = 4;
+
+  bst_idx_t n_samples = 64;
+  auto page = MakeEllpackForTest(&ctx, n_samples, n_features, n_bins);
+
+  auto cuts = page->CutsShared();
+
+  FeatureGroups fg{*cuts, true, std::numeric_limits<std::size_t>::max()};
+  auto fg_acc = fg.DeviceAccessor(ctx.Device());
+
+  DeviceHistogramBuilder histogram;
+  bst_bin_t n_total_bins = n_targets * n_features * n_bins;
+  histogram.Reset(&ctx, /*max_cached_hist_nodes=*/2, fg_acc, n_total_bins, true);
+
+  auto gpairs = linalg::Constant(&ctx, GradientPair{1.0f, 1.0f}, n_samples, n_targets);
+  dh::device_vector<std::uint32_t> ridx(n_samples);
+  thrust::sequence(ctx.CUDACtx()->CTP(), ridx.begin(), ridx.end(), 0);
+
+  histogram.AllocateHistograms(&ctx, {0});
+  auto node_hist = histogram.GetNodeHistogram(0);
+  auto quantizers = MakeDummyQuantizers(n_targets);
+
+  histogram.BuildHistogram(ctx.CUDACtx(), page->GetDeviceEllpack(&ctx, {}), fg_acc,
+                           gpairs.View(ctx.Device()), dh::ToSpan(ridx), node_hist,
+                           dh::ToSpan(quantizers));
+
+  std::vector<GradientPairInt64> h_node_hist(node_hist.size());
+  dh::CopyDeviceSpanToVector(&h_node_hist, node_hist);
+  // The values are evenly distributed across all bins
+  auto expected = n_samples / n_bins;
+  for (auto v : h_node_hist) {
+    ASSERT_EQ(v.GetQuantisedGrad(), expected);
+    ASSERT_EQ(v.GetQuantisedHess(), expected);
+  }
+}
+}  // namespace xgboost::tree::cuda_impl
diff --git a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
index 5382c2c22b9a..4b54c445a88e 100644
--- a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
+++ b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <thrust/device_vector.h>
@@ -113,12 +113,25 @@ void GetSplit(RegTree* tree, float split_value, std::vector<GPUExpandEntry>* can
       /*default_left=*/true, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
       /*left_sum=*/0.0f,
       /*right_sum=*/0.0f);
-  candidates->front().nid = 0;
+  candidates->front().nidx = 0;
   candidates->front().depth = 0;
   candidates->front().split.fvalue = split_value;
   candidates->front().split.findex = 0;
 }
 
+namespace {
+template <typename Accessor>
+struct LessThanOp {
+  Accessor acc;
+  explicit LessThanOp(Accessor acc) : acc{acc} {}
+  __device__ bool operator()(bst_idx_t ridx, std::int32_t nidx_in_batch,
+                             RegTree::Node const& node) const {
+    auto fvalue = acc.GetFvalue(ridx, node.SplitIndex());
+    return fvalue <= node.SplitCond();
+  }
+};
+}  // namespace
+
 void TestExternalMemory() {
   auto ctx = MakeCUDACtx(0);
 
@@ -149,13 +162,9 @@ void TestExternalMemory() {
     partitioners.emplace_back(std::make_unique<RowPartitioner>());
     partitioners.back()->Reset(&ctx, page.Size(), page.BaseRowId());
     std::vector<RegTree::Node> splits{tree[0]};
-    auto acc = page.Impl()->GetDeviceAccessor(&ctx);
-    partitioners.back()->UpdatePositionBatch(
-        &ctx, {0}, {1}, {2}, splits,
-        [=] __device__(bst_idx_t ridx, std::int32_t nidx_in_batch, RegTree::Node const& node) {
-          auto fvalue = acc.GetFvalue(ridx, node.SplitIndex());
-          return fvalue <= node.SplitCond();
-        });
+    page.Impl()->Visit(&ctx, {}, [&](auto&& acc) {
+      partitioners.back()->UpdatePositionBatch(&ctx, {0}, {1}, {2}, splits, LessThanOp{acc});
+    });
     partitioners.back()->FinalisePosition(
         &ctx, dh::ToSpan(position).subspan(page.BaseRowId(), page.Size()), page.BaseRowId(),
         encode_op);
diff --git a/tests/cpp/tree/hist/test_evaluate_splits.cc b/tests/cpp/tree/hist/test_evaluate_splits.cc
index dceae5d2b010..a5858bb3e890 100644
--- a/tests/cpp/tree/hist/test_evaluate_splits.cc
+++ b/tests/cpp/tree/hist/test_evaluate_splits.cc
@@ -21,7 +21,7 @@
 #include "../../../../src/tree/hist/evaluate_splits.h"  // for HistEvaluator, TreeEvaluator
 #include "../../../../src/tree/hist/expand_entry.h"     // for CPUExpandEntry
 #include "../../../../src/tree/hist/hist_cache.h"       // for BoundedHistCollection
-#include "../../../../src/tree/hist/param.h"            // for HistMakerTrainParam
+#include "../../../../src/tree/hist/hist_param.h"       // for HistMakerTrainParam
 #include "../../../../src/tree/param.h"                 // for GradStats, TrainParam
 #include "../../helpers.h"                              // for RandomDataGenerator, AllThreadsFo...
 
@@ -204,7 +204,7 @@ TEST(HistMultiEvaluator, Evaluate) {
 
   RegTree tree{n_targets, n_features};
   auto weight = evaluator.InitRoot(root_sum.HostView());
-  tree.SetLeaf(RegTree::kRoot, weight.HostView());
+  tree.SetRoot(weight.HostView());
   auto w = weight.HostView();
   ASSERT_EQ(w.Size(), n_targets);
   ASSERT_EQ(w(0), -1.5);
diff --git a/tests/cpp/tree/hist/test_histogram.cc b/tests/cpp/tree/hist/test_histogram.cc
index 5ab0c599ea6a..36da8209d098 100644
--- a/tests/cpp/tree/hist/test_histogram.cc
+++ b/tests/cpp/tree/hist/test_histogram.cc
@@ -11,14 +11,14 @@
 #include <xgboost/span.h>                // for Span, operator!=
 #include <xgboost/tree_model.h>          // for RegTree
 
-#include <algorithm>   // for max
-#include <cstddef>     // for size_t
-#include <cstdint>     // for int32_t, uint32_t
-#include <iterator>    // for back_inserter
-#include <limits>      // for numeric_limits
-#include <memory>      // for shared_ptr, allocator, unique_ptr
-#include <numeric>     // for iota, accumulate
-#include <vector>      // for vector
+#include <algorithm>  // for max
+#include <cstddef>    // for size_t
+#include <cstdint>    // for int32_t, uint32_t
+#include <iterator>   // for back_inserter
+#include <limits>     // for numeric_limits
+#include <memory>     // for shared_ptr, allocator, unique_ptr
+#include <numeric>    // for iota, accumulate
+#include <vector>     // for vector
 
 #include "../../../../src/collective/communicator-inl.h"  // for GetRank, GetWorldSize
 #include "../../../../src/common/hist_util.h"             // for GHistRow, HistogramCuts, Sketch...
@@ -29,8 +29,9 @@
 #include "../../../../src/tree/common_row_partitioner.h"  // for CommonRowPartitioner
 #include "../../../../src/tree/hist/expand_entry.h"       // for CPUExpandEntry
 #include "../../../../src/tree/hist/hist_cache.h"         // for BoundedHistCollection
+#include "../../../../src/tree/hist/hist_param.h"         // for HistMakerTrainParam
 #include "../../../../src/tree/hist/histogram.h"          // for HistogramBuilder
-#include "../../../../src/tree/hist/param.h"              // for HistMakerTrainParam
+#include "../../../../src/tree/tree_view.h"               // for ScalarTreeView
 #include "../../categorical_helpers.h"                    // for OneHotEncodeFeature
 #include "../../collective/test_worker.h"                 // for TestDistributedGlobal
 #include "../../helpers.h"                                // for RandomDataGenerator, GenerateRa...
@@ -70,7 +71,7 @@ void TestAddHistRows(bool is_distributed) {
   HistogramBuilder histogram_builder;
   histogram_builder.Reset(&ctx, gmat.cut.TotalBins(), {kMaxBins, 0.5}, is_distributed, false,
                           &hist_param);
-  histogram_builder.AddHistRows(&tree, &nodes_to_build, &nodes_to_sub, false);
+  histogram_builder.AddHistRows(tree.HostScView(), &nodes_to_build, &nodes_to_sub, false);
 
   for (bst_node_t const &nidx : nodes_to_build) {
     ASSERT_TRUE(histogram_builder.Histogram().HistogramExists(nidx));
@@ -115,7 +116,8 @@ void TestSyncHist(bool is_distributed) {
 
   // level 0
   nodes_for_explicit_hist_build.emplace_back(0);
-  histogram.AddHistRows(&tree, &nodes_for_explicit_hist_build, &nodes_for_subtraction_trick, false);
+  histogram.AddHistRows(tree.HostScView(), &nodes_for_explicit_hist_build,
+                        &nodes_for_subtraction_trick, false);
 
   tree.ExpandNode(0, 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
   nodes_for_explicit_hist_build.clear();
@@ -125,7 +127,8 @@ void TestSyncHist(bool is_distributed) {
   nodes_for_explicit_hist_build.emplace_back(tree[0].LeftChild());
   nodes_for_subtraction_trick.emplace_back(tree[0].RightChild());
 
-  histogram.AddHistRows(&tree, &nodes_for_explicit_hist_build, &nodes_for_subtraction_trick, false);
+  histogram.AddHistRows(tree.HostScView(), &nodes_for_explicit_hist_build,
+                        &nodes_for_subtraction_trick, false);
 
   tree.ExpandNode(tree[0].LeftChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
   tree.ExpandNode(tree[0].RightChild(), 0, 0, false, 0, 0, 0, 0, 0, 0, 0);
@@ -138,7 +141,8 @@ void TestSyncHist(bool is_distributed) {
   nodes_for_explicit_hist_build.emplace_back(5);
   nodes_for_subtraction_trick.emplace_back(6);
 
-  histogram.AddHistRows(&tree, &nodes_for_explicit_hist_build, &nodes_for_subtraction_trick, false);
+  histogram.AddHistRows(tree.HostScView(), &nodes_for_explicit_hist_build,
+                        &nodes_for_subtraction_trick, false);
 
   const size_t n_nodes = nodes_for_explicit_hist_build.size();
   ASSERT_EQ(n_nodes, 2ul);
@@ -181,7 +185,8 @@ void TestSyncHist(bool is_distributed) {
 
   histogram.Buffer().Reset(1, n_nodes, space, target_hists);
   // sync hist
-  histogram.SyncHistogram(&ctx, &tree, nodes_for_explicit_hist_build, nodes_for_subtraction_trick);
+  histogram.SyncHistogram(&ctx, tree.HostScView(), nodes_for_explicit_hist_build,
+                          nodes_for_subtraction_trick);
 
   using GHistRowT = common::GHistRow;
   auto check_hist = [](const GHistRowT parent, const GHistRowT left, const GHistRowT right,
@@ -258,14 +263,14 @@ void TestBuildHistogram(Context const* ctx, bool is_distributed, bool force_read
   std::vector<bst_node_t> nodes_to_build{node.nid};
   std::vector<bst_node_t> dummy_sub;
 
-  histogram.AddHistRows(&tree, &nodes_to_build, &dummy_sub, false);
+  histogram.AddHistRows(tree.HostScView(), &nodes_to_build, &dummy_sub, false);
   common::BlockedSpace2d space{
       1, [&](std::size_t nidx_in_set) { return row_set_collection[nidx_in_set].Size(); }, 256};
   for (auto const &gidx : p_fmat->GetBatches<GHistIndexMatrix>(ctx, {kMaxBins, 0.5})) {
     histogram.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
                         linalg::MakeTensorView(ctx, gpair, gpair.size()), force_read_by_column);
   }
-  histogram.SyncHistogram(ctx, &tree, nodes_to_build, {});
+  histogram.SyncHistogram(ctx, tree.HostScView(), nodes_to_build, {});
 
   // Check if number of histogram bins is correct
   ASSERT_EQ(histogram.Histogram()[nid].size(), gmat.cut.Ptrs().back());
@@ -365,12 +370,12 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
   for (auto const &gidx : cat_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
     auto total_bins = gidx.cut.TotalBins();
     cat_hist.Reset(&ctx, total_bins, {kBins, 0.5}, false, false, &hist_param);
-    cat_hist.AddHistRows(&tree, &nodes_to_build, &dummy_sub, false);
+    cat_hist.AddHistRows(tree.HostScView(), &nodes_to_build, &dummy_sub, false);
     cat_hist.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
                        linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
                        force_read_by_column);
   }
-  cat_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
+  cat_hist.SyncHistogram(&ctx, tree.HostScView(), nodes_to_build, {});
 
   /**
    * Generate hist with one hot encoded data.
@@ -381,12 +386,12 @@ void TestHistogramCategorical(size_t n_categories, bool force_read_by_column) {
   for (auto const &gidx : encode_m->GetBatches<GHistIndexMatrix>(&ctx, {kBins, 0.5})) {
     auto total_bins = gidx.cut.TotalBins();
     onehot_hist.Reset(&ctx, total_bins, {kBins, 0.5}, false, false, &hist_param);
-    onehot_hist.AddHistRows(&tree, &nodes_to_build, &dummy_sub, false);
+    onehot_hist.AddHistRows(tree.HostScView(), &nodes_to_build, &dummy_sub, false);
     onehot_hist.BuildHist(0, space, gidx, row_set_collection, nodes_to_build,
                           linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size()),
                           force_read_by_column);
   }
-  onehot_hist.SyncHistogram(&ctx, &tree, nodes_to_build, {});
+  onehot_hist.SyncHistogram(&ctx, tree.HostScView(), nodes_to_build, {});
 
   auto cat = cat_hist.Histogram()[0];
   auto onehot = onehot_hist.Histogram()[0];
@@ -448,7 +453,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
     ASSERT_EQ(n_samples, m->Info().num_row_);
 
     multi_build.Reset(ctx, total_bins, batch_param, false, false, &hist_param);
-    multi_build.AddHistRows(&tree, &nodes, &dummy_sub, false);
+    multi_build.AddHistRows(tree.HostScView(), &nodes, &dummy_sub, false);
     std::size_t page_idx{0};
     for (auto const &page : m->GetBatches<GHistIndexMatrix>(ctx, batch_param)) {
       multi_build.BuildHist(page_idx, space, page, rows_set[page_idx], nodes,
@@ -456,7 +461,7 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
                             force_read_by_column);
       ++page_idx;
     }
-    multi_build.SyncHistogram(ctx, &tree, nodes, {});
+    multi_build.SyncHistogram(ctx, tree.HostScView(), nodes, {});
 
     multi_page = multi_build.Histogram()[RegTree::kRoot];
   }
@@ -481,11 +486,11 @@ void TestHistogramExternalMemory(Context const *ctx, BatchParam batch_param, boo
     GHistIndexMatrix gmat(concat, {}, cut, batch_param.max_bin, false,
                           std::numeric_limits<double>::quiet_NaN(), ctx->Threads());
 
-    single_build.AddHistRows(&tree, &nodes, &dummy_sub, false);
+    single_build.AddHistRows(tree.HostScView(), &nodes, &dummy_sub, false);
     single_build.BuildHist(0, space, gmat, row_set_collection, nodes,
                            linalg::MakeTensorView(ctx, h_gpair, h_gpair.size()),
                            force_read_by_column);
-    single_build.SyncHistogram(ctx, &tree, nodes, {});
+    single_build.SyncHistogram(ctx, tree.HostScView(), nodes, {});
 
     single_page = single_build.Histogram()[RegTree::kRoot];
   }
@@ -556,7 +561,7 @@ class OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
     auto gpair = GenerateRandomGradients(Xy->Info().num_row_, 0.0, 1.0);
 
     CPUExpandEntry best;
-    hist_builder.BuildRootHist(Xy.get(), &tree, partitioners,
+    hist_builder.BuildRootHist(Xy.get(), tree.HostScView(), partitioners,
                                linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1),
                                best, batch);
 
@@ -569,13 +574,13 @@ class OverflowTest : public ::testing::TestWithParam<std::tuple<bool, bool>> {
 
     std::vector<CPUExpandEntry> valid_candidates{best};
     for (auto const &page : Xy->GetBatches<GHistIndexMatrix>(&ctx, batch)) {
-      partitioners.front().UpdatePosition(&ctx, page, valid_candidates, &tree);
+      partitioners.front().UpdatePosition(&ctx, page, valid_candidates, tree.HostScView());
     }
     CHECK_NE(partitioners.front()[tree.LeftChild(best.nid)].Size(), 0);
     CHECK_NE(partitioners.front()[tree.RightChild(best.nid)].Size(), 0);
 
     hist_builder.BuildHistLeftRight(
-        &ctx, Xy.get(), &tree, partitioners, valid_candidates,
+        &ctx, Xy.get(), tree.HostScView(), partitioners, valid_candidates,
         linalg::MakeTensorView(&ctx, gpair.ConstHostSpan(), gpair.Size(), 1), batch);
 
     if (limit) {
diff --git a/tests/cpp/tree/test_approx.cc b/tests/cpp/tree/test_approx.cc
index 1137bf56e23d..a7623a658770 100644
--- a/tests/cpp/tree/test_approx.cc
+++ b/tests/cpp/tree/test_approx.cc
@@ -1,7 +1,9 @@
 /**
- * Copyright 2021-2024, XGBoost contributors.
+ * Copyright 2021-2025, XGBoost contributors.
  */
 #include <gtest/gtest.h>
+#include <xgboost/gradient.h>      // for GradientContainer
+#include <xgboost/tree_model.h>    // for RegTree
 #include <xgboost/tree_updater.h>  // for TreeUpdater
 
 #include <algorithm>  // for transform
@@ -14,7 +16,6 @@
 #include "../helpers.h"
 #include "test_column_split.h"  // for TestColumnSplit
 #include "test_partitioner.h"
-#include "xgboost/tree_model.h"  // for RegTree
 
 namespace xgboost::tree {
 namespace {
@@ -48,7 +49,7 @@ TEST(Approx, Partitioner) {
       RegTree tree;
       CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
       GetSplit(&tree, min_value, &candidates);
-      partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+      partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());
       ASSERT_EQ(partitioner.Size(), 3);
       ASSERT_EQ(partitioner[1].Size(), 0);
       ASSERT_EQ(partitioner[2].Size(), n_samples);
@@ -59,7 +60,7 @@ TEST(Approx, Partitioner) {
       float split_value = page.cut.Values().at(ptr / 2);
       RegTree tree;
       GetSplit(&tree, split_value, &candidates);
-      partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+      partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());
 
       {
         auto left_nidx = tree[RegTree::kRoot].LeftChild();
@@ -89,8 +90,7 @@ TEST(Approx, InteractionConstraint) {
   auto p_dmat = GenerateCatDMatrix(kRows, kCols, 0.6f, false);
   Context ctx;
 
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
-  gpair.Data()->Copy(GenerateRandomGradients(kRows));
+  GradientContainer gpair = GenerateRandomGradients(&ctx, kRows, 1);
 
   ObjInfo task{ObjInfo::kRegression};
   {
@@ -146,7 +146,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
       RegTree tree;
       CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
       GetSplit(&tree, min_value, &candidates);
-      partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+      partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());
       ASSERT_EQ(partitioner.Size(), 3);
       ASSERT_EQ(partitioner[1].Size(), 0);
       ASSERT_EQ(partitioner[2].Size(), n_samples);
@@ -155,7 +155,7 @@ void TestColumnSplitPartitioner(size_t n_samples, size_t base_rowid, std::shared
       CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
       RegTree tree;
       GetSplit(&tree, mid_value, &candidates);
-      partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+      partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());
       {
         auto left_nidx = tree[RegTree::kRoot].LeftChild();
         auto const& elem = partitioner[left_nidx];
@@ -200,7 +200,7 @@ TEST(Approx, PartitionerColumnSplit) {
     mid_value = page.cut.Values().at(ptr / 2);
     RegTree tree;
     GetSplit(&tree, mid_value, &candidates);
-    mid_partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+    mid_partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());
   }
 
   auto constexpr kWorkers = 4;
diff --git a/tests/cpp/tree/test_common_partitioner.cc b/tests/cpp/tree/test_common_partitioner.cc
index 534aa27ad499..83d2b1f7ce2f 100644
--- a/tests/cpp/tree/test_common_partitioner.cc
+++ b/tests/cpp/tree/test_common_partitioner.cc
@@ -49,9 +49,9 @@ void TestLeafPartition(size_t n_samples) {
     auto ptr = page.cut.Ptrs()[split_ind + 1];
     split_value = page.cut.Values().at(ptr / 2);
     GetSplit(&tree, split_value, &candidates);
-    partitioner.UpdatePosition(&ctx, page, candidates, &tree);
+    partitioner.UpdatePosition(&ctx, page, candidates, tree.HostScView());
     std::vector<bst_node_t> position(page.Size());
-    partitioner.LeafPartition(&ctx, tree, hess, position);
+    partitioner.LeafPartition(&ctx, tree.HostScView(), hess, position);
     std::sort(position.begin(), position.end());
     size_t beg = std::distance(
         position.begin(),
@@ -110,8 +110,8 @@ void TestExternalMemory() {
     }
 
     partitioners.emplace_back(&ctx, page.Size(), page.base_rowid, false);
-    partitioners.back().UpdatePosition(&ctx, page, candidates, &tree);
-    partitioners.back().LeafPartition(&ctx, tree, t_gpair, position);
+    partitioners.back().UpdatePosition(&ctx, page, candidates, tree.HostScView());
+    partitioners.back().LeafPartition(&ctx, tree.HostScView(), t_gpair, position);
   }
 
   bst_idx_t n_left{0};
diff --git a/tests/cpp/tree/test_gpu_approx.cu b/tests/cpp/tree/test_gpu_approx.cu
index 7df60b8cbcd2..295d79c0a477 100644
--- a/tests/cpp/tree/test_gpu_approx.cu
+++ b/tests/cpp/tree/test_gpu_approx.cu
@@ -1,7 +1,8 @@
 /**
- * Copyright 2024, XGBoost contributors
+ * Copyright 2024-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
+#include <xgboost/gradient.h>      // for GradientContainer
 #include <xgboost/json.h>          // for Json
 #include <xgboost/task.h>          // for ObjInfo
 #include <xgboost/tree_model.h>    // for RegTree
@@ -21,8 +22,7 @@ RegTree GetApproxTree(Context const* ctx, DMatrix* dmat) {
   TrainParam param;
   param.UpdateAllowUnknown(Args{});
 
-  linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
-  gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
+  auto gpair = GenerateRandomGradients(ctx, dmat->Info().num_row_, 1);
 
   std::vector<HostDeviceVector<bst_node_t>> position(1);
   RegTree tree;
diff --git a/tests/cpp/tree/test_gpu_hist.cu b/tests/cpp/tree/test_gpu_hist.cu
index d0f546e6134e..01e378172959 100644
--- a/tests/cpp/tree/test_gpu_hist.cu
+++ b/tests/cpp/tree/test_gpu_hist.cu
@@ -1,9 +1,10 @@
 /**
- * Copyright 2017-2024, XGBoost contributors
+ * Copyright 2017-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/base.h>                // for Args
 #include <xgboost/context.h>             // for Context
+#include <xgboost/gradient.h>            // for GradientContainer
 #include <xgboost/host_device_vector.h>  // for HostDeviceVector
 #include <xgboost/json.h>                // for Json
 #include <xgboost/task.h>                // for ObjInfo
@@ -21,7 +22,7 @@
 
 namespace xgboost::tree {
 namespace {
-void UpdateTree(Context const* ctx, linalg::Matrix<GradientPair>* gpair, DMatrix* dmat,
+void UpdateTree(Context const* ctx, GradientContainer* gpair, DMatrix* dmat,
                 RegTree* tree, HostDeviceVector<bst_float>* preds, float subsample,
                 const std::string& sampling_method, bst_bin_t max_bin, bool concat_pages) {
   Args args{
@@ -67,8 +68,7 @@ TEST(GpuHist, UniformSampling) {
   auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f}.GenerateDMatrix(true);
   ASSERT_TRUE(p_fmat->SingleColBlock());
 
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
-  gpair.Data()->Copy(GenerateRandomGradients(kRows));
+  auto gpair = GenerateRandomGradients(&ctx, kRows, 1);
 
   // Build a tree using the in-memory DMatrix.
   RegTree tree;
@@ -97,9 +97,7 @@ TEST(GpuHist, GradientBasedSampling) {
 
   // Create an in-memory DMatrix.
   auto p_fmat = RandomDataGenerator{kRows, kCols, 0.0f}.GenerateDMatrix(true);
-
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
-  gpair.Data()->Copy(GenerateRandomGradients(kRows));
+  auto gpair = GenerateRandomGradients(&ctx, kRows, 1);
 
   // Build a tree using the in-memory DMatrix.
   RegTree tree;
@@ -135,8 +133,7 @@ TEST(GpuHist, ExternalMemory) {
   ASSERT_TRUE(p_fmat->SingleColBlock());
 
   auto ctx = MakeCUDACtx(0);
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
-  gpair.Data()->Copy(GenerateRandomGradients(kRows));
+  auto gpair = GenerateRandomGradients(&ctx, kRows, 1);
 
   // Build a tree using the in-memory DMatrix.
   RegTree tree;
@@ -177,8 +174,7 @@ TEST(GpuHist, ExternalMemoryWithSampling) {
                         .GenerateSparsePageDMatrix("temp", true);
   ASSERT_FALSE(p_fmat_ext->SingleColBlock());
 
-  linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
-  gpair.Data()->Copy(GenerateRandomGradients(kRows));
+  auto gpair = GenerateRandomGradients(&ctx, kRows, 1);
 
   // Build a tree using the in-memory DMatrix.
   auto rng = common::GlobalRandom();
@@ -276,9 +272,7 @@ RegTree GetHistTree(Context const* ctx, DMatrix* dmat) {
 
   TrainParam param;
   param.UpdateAllowUnknown(Args{});
-
-  linalg::Matrix<GradientPair> gpair({dmat->Info().num_row_}, ctx->Device());
-  gpair.Data()->Copy(GenerateRandomGradients(dmat->Info().num_row_));
+  auto gpair = GenerateRandomGradients(ctx, dmat->Info().num_row_, 1);
 
   std::vector<HostDeviceVector<bst_node_t>> position(1);
   RegTree tree;
diff --git a/tests/cpp/tree/test_multi_target_tree_model.cc b/tests/cpp/tree/test_multi_target_tree_model.cc
index 2f01e05de0e2..903779da5bdc 100644
--- a/tests/cpp/tree/test_multi_target_tree_model.cc
+++ b/tests/cpp/tree/test_multi_target_tree_model.cc
@@ -1,11 +1,13 @@
 /**
- * Copyright 2023-2024, XGBoost Contributors
+ * Copyright 2023-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
-#include <xgboost/context.h>     // for Context
+#include <xgboost/context.h>  // for Context
 #include <xgboost/multi_target_tree_model.h>
 #include <xgboost/tree_model.h>  // for RegTree
 
+#include "../../../src/tree/tree_view.h"
+
 namespace xgboost {
 namespace {
 auto MakeTreeForTest() {
@@ -52,7 +54,8 @@ TEST(MultiTargetTree, JsonIO) {
   check_jtree(jtree1, *tree);
 }
 
-TEST(MultiTargetTree, DumpDot) {
+namespace {
+void TestTreeDump(std::string format, std::string leaf_key) {
   auto tree = MakeTreeForTest();
   auto n_features = tree->NumFeatures();
   FeatureMap fmap;
@@ -60,19 +63,37 @@ TEST(MultiTargetTree, DumpDot) {
     auto name = "feat_" + std::to_string(f);
     fmap.PushBack(f, name.c_str(), "q");
   }
-  auto str = tree->DumpModel(fmap, false, "dot");
-  ASSERT_NE(str.find("leaf=[2, 3, 4]"), std::string::npos);
-  ASSERT_NE(str.find("leaf=[3, 4, 5]"), std::string::npos);
+  {
+    auto str = tree->DumpModel(fmap, false, format);
+    ASSERT_NE(str.find(leaf_key + "[2, 3, 4]"), std::string::npos);
+    ASSERT_NE(str.find(leaf_key + "[3, 4, 5]"), std::string::npos);
+  }
 
   {
+    // Test the "..."
     bst_target_t n_targets{4};
-    bst_feature_t n_features{4};
     RegTree tree{n_targets, n_features};
     linalg::Vector<float> weight{{1.0f, 2.0f, 3.0f, 4.0f}, {4ul}, DeviceOrd::CPU()};
     tree.ExpandNode(RegTree::kRoot, /*split_idx=*/1, 0.5f, true, weight.HostView(),
                     weight.HostView(), weight.HostView());
-    auto str = tree.DumpModel(fmap, false, "dot");
-    ASSERT_NE(str.find("leaf=[1, 2, ..., 4]"), std::string::npos);
+    auto str = tree.DumpModel(fmap, false, format);
+    ASSERT_NE(str.find(leaf_key + "[1, 2, ..., 4]"), std::string::npos);
   }
 }
+}  // namespace
+
+TEST(MultiTargetTree, DotDump) { TestTreeDump("dot", "leaf="); }
+
+TEST(MultiTargetTree, TextDump) { TestTreeDump("text", "leaf="); }
+
+TEST(MultiTargetTree, JsonDump) { TestTreeDump("json", "\"leaf\": "); }
+
+TEST(MultiTargetTree, View) {
+  auto tree = MakeTreeForTest();
+  auto v = tree->HostMtView();
+  ASSERT_EQ(v.NumTargets(), 3);
+  ASSERT_EQ(v.Size(), 3);
+  ASSERT_EQ(v.LeftChild(0), 1);
+  ASSERT_EQ(v.RightChild(0), 2);
+}
 }  // namespace xgboost
diff --git a/tests/cpp/tree/test_prune.cc b/tests/cpp/tree/test_prune.cc
index 1a3ec532e18b..0b8e3258c9cd 100644
--- a/tests/cpp/tree/test_prune.cc
+++ b/tests/cpp/tree/test_prune.cc
@@ -1,9 +1,10 @@
 /**
- * Copyright 2018-2023 by XGBoost Contributors
+ * Copyright 2018-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/data.h>
-#include <xgboost/host_device_vector.h>
+#include <xgboost/gradient.h>            // for GradientContainer
+#include <xgboost/host_device_vector.h>  // for HostDeviceVector
 #include <xgboost/learner.h>
 #include <xgboost/tree_updater.h>
 
@@ -24,7 +25,8 @@ TEST(Updater, Prune) {
   Context ctx;
 
   // These data are just place holders.
-  linalg::Matrix<GradientPair> gpair
+  GradientContainer gpair;
+  gpair.gpair = linalg::Matrix<GradientPair>
       {{ {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f}, {0.50f, 0.25f},
          {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f}, {0.25f, 0.24f} }, {8, 1}, ctx.Device()};
   std::shared_ptr<DMatrix> p_dmat{RandomDataGenerator{32, 10, 0}.GenerateDMatrix()};
diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc
index 1863c91b6758..03763d179e87 100644
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -1,21 +1,27 @@
 /**
- * Copyright 2018-2024, XGBoost Contributors
+ * Copyright 2018-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
+#include <xgboost/gradient.h>  // for GradientContainer
 #include <xgboost/host_device_vector.h>
+#include <xgboost/linalg.h>
 #include <xgboost/tree_updater.h>
 
+#include <cmath>
 #include <cstddef>  // for size_t
+#include <cstring>
+#include <memory>
 #include <string>
 #include <vector>
 
 #include "../../../src/tree/common_row_partitioner.h"
 #include "../../../src/tree/hist/expand_entry.h"  // for MultiExpandEntry, CPUExpandEntry
-#include "../collective/test_worker.h"  // for TestDistributedGlobal
+#include "../collective/test_worker.h"            // for TestDistributedGlobal
 #include "../helpers.h"
 #include "test_column_split.h"  // for TestColumnSplit
 #include "test_partitioner.h"
 #include "xgboost/data.h"
+#include "xgboost/task.h"
 
 namespace xgboost::tree {
 namespace {
@@ -49,10 +55,13 @@ void TestPartitioner(bst_target_t n_targets) {
       CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, false};
       if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {
         GetSplit(&tree, min_value, &candidates);
+        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                tree.HostScView());
       } else {
         GetMultiSplitForTest(&tree, min_value, &candidates);
+        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                tree.HostMtView());
       }
-      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
       ASSERT_EQ(partitioner.Size(), 3);
       ASSERT_EQ(partitioner[1].Size(), 0);
       ASSERT_EQ(partitioner[2].Size(), n_samples);
@@ -64,10 +73,14 @@ void TestPartitioner(bst_target_t n_targets) {
       RegTree tree{n_targets, n_features};
       if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {
         GetSplit(&tree, split_value, &candidates);
+        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                tree.HostScView());
       } else {
         GetMultiSplitForTest(&tree, split_value, &candidates);
+        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                tree.HostMtView());
       }
-      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
+
       {
         auto left_nidx = tree.LeftChild(RegTree::kRoot);
         auto const& elem = partitioner[left_nidx];
@@ -121,10 +134,13 @@ void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples,
       CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
       if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {
         GetSplit(&tree, min_value, &candidates);
+        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                tree.HostScView());
       } else {
         GetMultiSplitForTest(&tree, min_value, &candidates);
+        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                tree.HostMtView());
       }
-      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
       ASSERT_EQ(partitioner.Size(), 3);
       ASSERT_EQ(partitioner[1].Size(), 0);
       ASSERT_EQ(partitioner[2].Size(), n_samples);
@@ -134,11 +150,14 @@ void VerifyColumnSplitPartitioner(bst_target_t n_targets, size_t n_samples,
       CommonRowPartitioner partitioner{&ctx, n_samples, base_rowid, true};
       if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {
         GetSplit(&tree, mid_value, &candidates);
+        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                tree.HostScView());
       } else {
         GetMultiSplitForTest(&tree, mid_value, &candidates);
+        partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                tree.HostMtView());
       }
       auto left_nidx = tree.LeftChild(RegTree::kRoot);
-      partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
 
       {
         auto const& elem = partitioner[left_nidx];
@@ -189,10 +208,13 @@ void TestColumnSplitPartitioner(bst_target_t n_targets) {
     RegTree tree{n_targets, n_features};
     if constexpr (std::is_same_v<ExpandEntry, CPUExpandEntry>) {
       GetSplit(&tree, mid_value, &candidates);
+      mid_partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                  tree.HostScView());
     } else {
       GetMultiSplitForTest(&tree, mid_value, &candidates);
+      mid_partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates,
+                                                  tree.HostMtView());
     }
-    mid_partitioner.UpdatePosition<false, true>(&ctx, gmat, column_indices, candidates, &tree);
   }
 
   auto constexpr kWorkers = 4;
@@ -230,4 +252,96 @@ INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColumnSplit, ::testing::ValuesIn([
                            }
                            return params;
                          }()));
+
+namespace {
+void FillGradients(linalg::Matrix<GradientPair>* gpair) {
+  auto h = gpair->HostView();
+  for (std::size_t row = 0; row < h.Shape(0); ++row) {
+    for (std::size_t target = 0; target < h.Shape(1); ++target) {
+      h(row, target) = GradientPair{1.0f, 0.0f};
+    }
+  }
+}
+
+// Verify partitioner doesn't write past buffer end when doing
+// update on small dataset after large one.
+void TestPartitionerOverrun(bst_target_t n_targets) {
+  constexpr bst_idx_t kNBig = 1 << 16, kNSmall = 1024;
+  constexpr int kCols = 3;
+
+  Context ctx;
+  ctx.InitAllowUnknown(Args{{"nthread", "1"}});
+
+  ObjInfo task{ObjInfo::kRegression, true, true};
+  auto updater =
+      std::unique_ptr<TreeUpdater>{TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
+
+  TrainParam param;
+  param.InitAllowUnknown(Args{{"max_depth", "1"},
+                              {"max_bin", "32"},
+                              {"lambda", "0"},
+                              {"gamma", "0"},
+                              {"min_child_weight", "0"}});
+  updater->Configure(Args{});
+
+  auto const n_targets_size = static_cast<std::size_t>(n_targets);
+
+  auto dmat_large =
+      RandomDataGenerator{kNBig, kCols, 0.0f}.Seed(0).Batches(8).GenerateSparsePageDMatrix(
+          "part_resize_big_first", true);
+
+  std::size_t shape_large[2]{dmat_large->Info().num_row_, n_targets_size};
+  GradientContainer gpair_large;
+  gpair_large.gpair = linalg::Matrix<GradientPair>{shape_large, ctx.Device()};
+  FillGradients(&gpair_large.gpair);
+
+  RegTree tree_large{n_targets, static_cast<bst_feature_t>(kCols)};
+  std::vector<RegTree*> trees_large{&tree_large};
+  std::vector<HostDeviceVector<bst_node_t>> position_large(1);
+  common::Span<HostDeviceVector<bst_node_t>> pos_large{position_large.data(), 1};
+  updater->Update(&param, &gpair_large, dmat_large.get(), pos_large, trees_large);
+
+  auto dmat_small =
+      RandomDataGenerator{kNSmall, kCols, 0.0f}.Seed(1).Batches(1).GenerateSparsePageDMatrix(
+          "part_resize_small_second", false);
+
+  std::vector<HostDeviceVector<bst_node_t>> position_small(1);
+  auto& pos = position_small.front();
+  pos.Resize(kNBig);    // Allocate large
+  pos.Resize(kNSmall);  // Shrink logical size, capacity remains large
+
+  auto& hv = pos.HostVector();
+  std::size_t cap = hv.capacity();
+  ASSERT_GE(cap, static_cast<std::size_t>(kNBig));
+
+  std::size_t tail_elems = cap - hv.size();
+  ASSERT_GT(tail_elems, 0u) << "Expected reserved tail storage";
+  std::vector<bst_node_t> tail_before(tail_elems);
+  std::memcpy(tail_before.data(), hv.data() + hv.size(), tail_elems * sizeof(bst_node_t));
+
+  std::size_t shape_small[2]{dmat_small->Info().num_row_, n_targets_size};
+  GradientContainer gpair_small;
+  gpair_small.gpair = linalg::Matrix<GradientPair>{shape_small, ctx.Device()};
+  FillGradients(&gpair_small.gpair);
+
+  RegTree tree_small{n_targets, static_cast<bst_feature_t>(kCols)};
+  std::vector<RegTree*> trees_small{&tree_small};
+  common::Span<HostDeviceVector<bst_node_t>> pos_small{position_small.data(), 1};
+  updater->Update(&param, &gpair_small, dmat_small.get(), pos_small, trees_small);
+
+  // Verify no buffer overrun: tail bytes should be unchanged
+  ASSERT_EQ(hv.capacity(), cap) << "Test precondition violated: capacity changed";
+  std::vector<bst_node_t> tail_after(tail_elems);
+  std::memcpy(tail_after.data(), hv.data() + hv.size(), tail_elems * sizeof(bst_node_t));
+
+  EXPECT_EQ(tail_before, tail_after)
+      << "Buffer overrun detected: writes past kNSmall when updating small "
+         "single-batch DMatrix after large multi-batch one. "
+         "Likely stale partitioner writing to buffer.";
+}
+}  // anonymous namespace
+
+TEST(QuantileHist, HistUpdaterPartitionerOverrun) { TestPartitionerOverrun(1); }
+
+TEST(QuantileHist, MultiTargetHistBuilderPartitionerOverrun) { TestPartitionerOverrun(3); }
 }  // namespace xgboost::tree
diff --git a/tests/cpp/tree/test_refresh.cc b/tests/cpp/tree/test_refresh.cc
index bbd274a08d0f..01052861ab03 100644
--- a/tests/cpp/tree/test_refresh.cc
+++ b/tests/cpp/tree/test_refresh.cc
@@ -1,7 +1,8 @@
 /**
- * Copyright 2018-2023 by XGBoost Contributors
+ * Copyright 2018-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
+#include <xgboost/gradient.h>  // for GradientContainer
 #include <xgboost/host_device_vector.h>
 #include <xgboost/task.h>  // for ObjInfo
 #include <xgboost/tree_updater.h>
@@ -19,9 +20,18 @@ TEST(Updater, Refresh) {
   bst_feature_t constexpr kCols = 16;
   Context ctx;
 
-  linalg::Matrix<GradientPair> gpair
-      {{ {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f}, {0.23f, 0.24f},
-         {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f}, {0.27f, 0.29f} }, {8, 1}, ctx.Device()};
+  GradientContainer gpair;
+  gpair.gpair = linalg::Matrix<GradientPair>{{{0.23f, 0.24f},
+                                              {0.23f, 0.24f},
+                                              {0.23f, 0.24f},
+                                              {0.23f, 0.24f},
+                                              {0.27f, 0.29f},
+                                              {0.27f, 0.29f},
+                                              {0.27f, 0.29f},
+                                              {0.27f, 0.29f}},
+                                             {8, 1},
+                                             ctx.Device()};
+
   std::shared_ptr<DMatrix> p_dmat{
     RandomDataGenerator{kRows, kCols, 0.4f}.Seed(3).GenerateDMatrix()};
   std::vector<std::pair<std::string, std::string>> cfg{
diff --git a/tests/cpp/tree/test_tree_model.cc b/tests/cpp/tree/test_tree_model.cc
index 2491f3973f9a..0d8e05e8f183 100644
--- a/tests/cpp/tree/test_tree_model.cc
+++ b/tests/cpp/tree/test_tree_model.cc
@@ -1,11 +1,13 @@
 /**
- * Copyright 2018-2024, XGBoost Contributors
+ * Copyright 2018-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 
+#include <stack>  // for stack
+
 #include "../../../src/common/bitfield.h"
 #include "../../../src/common/categorical.h"
-#include "../filesystem.h"
+#include "../../../src/tree/io_utils.h"  // for DftBadValue
 #include "../helpers.h"
 #include "xgboost/tree_model.h"
 
@@ -15,20 +17,6 @@ TEST(Tree, ModelShape) {
   RegTree tree{1u, n_features};
   ASSERT_EQ(tree.NumFeatures(), n_features);
 
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/tree.model";
-  {
-    // binary dump
-    std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(tmp_file.c_str(), "w"));
-    tree.Save(fo.get());
-  }
-  {
-    // binary load
-    RegTree new_tree;
-    std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(tmp_file.c_str(), "r"));
-    new_tree.Load(fi.get());
-    ASSERT_EQ(new_tree.NumFeatures(), n_features);
-  }
   {
     // json
     Json j_tree{Object{}};
@@ -55,88 +43,6 @@ TEST(Tree, ModelShape) {
   }
 }
 
-#if DMLC_IO_NO_ENDIAN_SWAP  // skip on big-endian machines
-// Manually construct tree in binary format
-// Do not use structs in case they change
-// We want to preserve backwards compatibility
-TEST(Tree, Load) {
-  dmlc::TemporaryDirectory tempdir;
-  const std::string tmp_file = tempdir.path + "/tree.model";
-  std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(tmp_file.c_str(), "w"));
-
-  // Write params
-  EXPECT_EQ(sizeof(TreeParam), (31 + 6) * sizeof(int));
-  int num_roots = 1;
-  int num_nodes = 2;
-  int num_deleted = 0;
-  int max_depth = 1;
-  int num_feature = 0;
-  int size_leaf_vector = 0;
-  int reserved[31];
-  fo->Write(&num_roots, sizeof(int));
-  fo->Write(&num_nodes, sizeof(int));
-  fo->Write(&num_deleted, sizeof(int));
-  fo->Write(&max_depth, sizeof(int));
-  fo->Write(&num_feature, sizeof(int));
-  fo->Write(&size_leaf_vector, sizeof(int));
-  fo->Write(reserved, sizeof(int) * 31);
-
-  // Write 2 nodes
-  EXPECT_EQ(sizeof(RegTree::Node),
-            3 * sizeof(int) + 1 * sizeof(unsigned) + sizeof(float));
-  int parent = -1;
-  int cleft = 1;
-  int cright = -1;
-  unsigned sindex = 5;
-  float split_or_weight = 0.5;
-  fo->Write(&parent, sizeof(int));
-  fo->Write(&cleft, sizeof(int));
-  fo->Write(&cright, sizeof(int));
-  fo->Write(&sindex, sizeof(unsigned));
-  fo->Write(&split_or_weight, sizeof(float));
-  parent = 0;
-  cleft = -1;
-  cright = -1;
-  sindex = 2;
-  split_or_weight = 0.1;
-  fo->Write(&parent, sizeof(int));
-  fo->Write(&cleft, sizeof(int));
-  fo->Write(&cright, sizeof(int));
-  fo->Write(&sindex, sizeof(unsigned));
-  fo->Write(&split_or_weight, sizeof(float));
-
-  // Write 2x node stats
-  EXPECT_EQ(sizeof(RTreeNodeStat), 3 * sizeof(float) + sizeof(int));
-  bst_float loss_chg = 5.0;
-  bst_float sum_hess = 1.0;
-  bst_float base_weight = 3.0;
-  int leaf_child_cnt = 0;
-  fo->Write(&loss_chg, sizeof(float));
-  fo->Write(&sum_hess, sizeof(float));
-  fo->Write(&base_weight, sizeof(float));
-  fo->Write(&leaf_child_cnt, sizeof(int));
-
-  loss_chg = 50.0;
-  sum_hess = 10.0;
-  base_weight = 30.0;
-  leaf_child_cnt = 0;
-  fo->Write(&loss_chg, sizeof(float));
-  fo->Write(&sum_hess, sizeof(float));
-  fo->Write(&base_weight, sizeof(float));
-  fo->Write(&leaf_child_cnt, sizeof(int));
-  fo.reset();
-  std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(tmp_file.c_str(), "r"));
-
-  xgboost::RegTree tree;
-  tree.Load(fi.get());
-  EXPECT_EQ(tree.GetDepth(1), 1);
-  EXPECT_EQ(tree[0].SplitCond(), 0.5f);
-  EXPECT_EQ(tree[0].SplitIndex(), 5ul);
-  EXPECT_EQ(tree[1].LeafValue(), 0.1f);
-  EXPECT_TRUE(tree[1].IsLeaf());
-}
-#endif  // DMLC_IO_NO_ENDIAN_SWAP
-
 TEST(Tree, AllocateNode) {
   RegTree tree;
   tree.ExpandNode(0, 0, 0.0f, false, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
@@ -148,25 +54,26 @@ TEST(Tree, AllocateNode) {
                   /*left_sum=*/0.0f, /*right_sum=*/0.0f);
   ASSERT_EQ(tree.NumExtraNodes(), 2);
 
-  auto& nodes = tree.GetNodes();
-  ASSERT_FALSE(nodes.at(1).IsDeleted());
-  ASSERT_TRUE(nodes.at(1).IsLeaf());
-  ASSERT_TRUE(nodes.at(2).IsLeaf());
+  auto nodes = tree.GetNodes(DeviceOrd::CPU());
+  ASSERT_FALSE(nodes[1].IsDeleted());
+  ASSERT_TRUE(nodes[1].IsLeaf());
+  ASSERT_TRUE(nodes[2].IsLeaf());
 }
 
 TEST(Tree, ExpandCategoricalFeature) {
+  Context ctx;
   {
     RegTree tree;
     tree.ExpandCategorical(0, 0, {}, true, 1.0, 2.0, 3.0, 11.0, 2.0,
                            /*left_sum=*/3.0, /*right_sum=*/4.0);
-    ASSERT_EQ(tree.GetNodes().size(), 3ul);
+    ASSERT_EQ(tree.Size(), 3ul);
     ASSERT_EQ(tree.GetNumLeaves(), 2);
-    ASSERT_EQ(tree.GetSplitTypes().size(), 3ul);
-    ASSERT_EQ(tree.GetSplitTypes()[0], FeatureType::kCategorical);
-    ASSERT_EQ(tree.GetSplitTypes()[1], FeatureType::kNumerical);
-    ASSERT_EQ(tree.GetSplitTypes()[2], FeatureType::kNumerical);
-    ASSERT_EQ(tree.GetSplitCategories().size(), 0ul);
-    ASSERT_TRUE(std::isnan(tree[0].SplitCond()));
+    ASSERT_EQ(tree.GetSplitTypes(ctx.Device()).size(), 3ul);
+    ASSERT_EQ(tree.GetSplitTypes(ctx.Device())[0], FeatureType::kCategorical);
+    ASSERT_EQ(tree.GetSplitTypes(ctx.Device())[1], FeatureType::kNumerical);
+    ASSERT_EQ(tree.GetSplitTypes(ctx.Device())[2], FeatureType::kNumerical);
+    ASSERT_EQ(tree.GetSplitCategories(ctx.Device()).size(), 0ul);
+    ASSERT_EQ(tree[0].SplitCond(), DftBadValue());
   }
   {
     RegTree tree;
@@ -176,7 +83,7 @@ TEST(Tree, ExpandCategoricalFeature) {
     bitset.Set(cat);
     tree.ExpandCategorical(0, 0, split_cats, true, 1.0, 2.0, 3.0, 11.0, 2.0,
                            /*left_sum=*/3.0, /*right_sum=*/4.0);
-    auto categories = tree.GetSplitCategories();
+    auto categories = tree.GetSplitCategories(ctx.Device());
     auto segments = tree.GetSplitCategoriesPtr();
     auto got = categories.subspan(segments[0].beg, segments[0].size);
     ASSERT_TRUE(std::equal(got.cbegin(), got.cend(), split_cats.cbegin()));
@@ -192,7 +99,7 @@ TEST(Tree, ExpandCategoricalFeature) {
     ASSERT_EQ(cat_ptr[0].beg, 0ul);
     ASSERT_EQ(cat_ptr[0].size, 2ul);
 
-    auto loaded_categories = loaded_tree.GetSplitCategories();
+    auto loaded_categories = loaded_tree.GetSplitCategories(ctx.Device());
     auto loaded_root = loaded_categories.subspan(cat_ptr[0].beg, cat_ptr[0].size);
     ASSERT_TRUE(std::equal(loaded_root.begin(), loaded_root.end(), split_cats.begin()));
   }
diff --git a/tests/cpp/tree/test_tree_policy.cc b/tests/cpp/tree/test_tree_policy.cc
index 50563be1d218..3e7807e9170b 100644
--- a/tests/cpp/tree/test_tree_policy.cc
+++ b/tests/cpp/tree/test_tree_policy.cc
@@ -1,9 +1,11 @@
-/*!
- * Copyright 2021 XGBoost contributors
+/**
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/base.h>
 #include <xgboost/tree_model.h>
+
+#include "../../../src/tree/tree_view.h"  // for WalkTree
 #include "../helpers.h"
 
 namespace xgboost {
@@ -50,7 +52,7 @@ class TestGrowPolicy : public ::testing::Test {
       RegTree tree;
       tree.LoadModel(j_tree);
       bst_node_t depth = 0;
-      tree.WalkTree([&](bst_node_t nidx) {
+      tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {
         depth = std::max(tree.GetDepth(nidx), depth);
         return true;
       });
@@ -124,7 +126,7 @@ class TestGrowPolicy : public ::testing::Test {
       RegTree tree;
       tree.LoadModel(j_tree);
       bst_node_t depth = 0;
-      tree.WalkTree([&](bst_node_t nidx) {
+      tree::WalkTree(tree, [&](auto const& tree, bst_node_t nidx) {
         depth = std::max(tree.GetDepth(nidx), depth);
         return true;
       });
diff --git a/tests/cpp/tree/test_tree_stat.cc b/tests/cpp/tree/test_tree_stat.cc
index 5f0646f22276..7654c04a2125 100644
--- a/tests/cpp/tree/test_tree_stat.cc
+++ b/tests/cpp/tree/test_tree_stat.cc
@@ -1,15 +1,18 @@
 /**
- * Copyright 2020-2024, XGBoost Contributors
+ * Copyright 2020-2025, XGBoost Contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/context.h>       // for Context
+#include <xgboost/gradient.h>      // for GradientContainer
 #include <xgboost/task.h>          // for ObjInfo
 #include <xgboost/tree_model.h>    // for RegTree
 #include <xgboost/tree_updater.h>  // for TreeUpdater
 
 #include <memory>  // for unique_ptr
 
-#include "../../../src/tree/param.h"  // for TrainParam
+#include "../../../src/tree/io_utils.h"   // for DftBadValue
+#include "../../../src/tree/param.h"      // for TrainParam
+#include "../../../src/tree/tree_view.h"  // for WalkTree
 #include "../helpers.h"
 
 namespace xgboost {
@@ -19,16 +22,15 @@ namespace xgboost {
 class UpdaterTreeStatTest : public ::testing::Test {
  protected:
   std::shared_ptr<DMatrix> p_dmat_;
-  linalg::Matrix<GradientPair> gpairs_;
+  GradientContainer gpairs_;
   size_t constexpr static kRows = 10;
   size_t constexpr static kCols = 10;
 
  protected:
   void SetUp() override {
     p_dmat_ = RandomDataGenerator(kRows, kCols, .5f).GenerateDMatrix(true);
-    auto g = GenerateRandomGradients(kRows);
-    gpairs_.Reshape(kRows, 1);
-    gpairs_.Data()->Copy(g);
+    Context ctx;
+    gpairs_ = GenerateRandomGradients(&ctx, kRows, 1);
   }
 
   void RunTest(Context const* ctx, std::string updater) {
@@ -42,10 +44,11 @@ class UpdaterTreeStatTest : public ::testing::Test {
     std::vector<HostDeviceVector<bst_node_t>> position(1);
     up->Update(&param, &gpairs_, p_dmat_.get(), position, {&tree});
 
-    tree.WalkTree([&tree](bst_node_t nidx) {
-      if (tree[nidx].IsLeaf()) {
+    auto sc_tree = tree.HostScView();
+    sc_tree.WalkTree([&sc_tree](bst_node_t nidx) {
+      if (sc_tree.IsLeaf(nidx)) {
         // 1.0 is the default `min_child_weight`.
-        CHECK_GE(tree.Stat(nidx).sum_hess, 1.0);
+        CHECK_GE(sc_tree.Stat(nidx).sum_hess, 1.0);
       }
       return true;
     });
@@ -96,7 +99,7 @@ class TestSplitWithEta : public ::testing::Test {
       updater->Configure({});
 
       auto grad = GenerateRandomGradients(ctx, Xy->Info().num_row_, n_targets);
-      CHECK_EQ(grad.Shape(1), n_targets);
+      CHECK_EQ(grad.gpair.Shape(1), n_targets);
       tree::TrainParam param;
       param.Init(Args{{"learning_rate", std::to_string(eta)}});
       HostDeviceVector<bst_node_t> position;
@@ -116,32 +119,35 @@ class TestSplitWithEta : public ::testing::Test {
     CHECK_GE(p_tree0->NumExtraNodes(), 32);
 
     bst_node_t n_nodes{0};
-    p_tree0->WalkTree([&](bst_node_t nidx) {
-      if (p_tree0->IsLeaf(nidx)) {
-        CHECK(p_tree1->IsLeaf(nidx));
-        if (p_tree0->IsMultiTarget()) {
-          CHECK(p_tree1->IsMultiTarget());
-          auto leaf_0 = p_tree0->GetMultiTargetTree()->LeafValue(nidx);
-          auto leaf_1 = p_tree1->GetMultiTargetTree()->LeafValue(nidx);
-          CHECK_EQ(leaf_0.Size(), leaf_1.Size());
-          for (std::size_t i = 0; i < leaf_0.Size(); ++i) {
-            CHECK_EQ(leaf_0(i) * eta_ratio, leaf_1(i));
+    tree::WalkTree(
+        *p_tree0,
+        [&](auto const& tree0, auto const& tree1, bst_node_t nidx) {
+          if (tree0.IsLeaf(nidx)) {
+            CHECK(tree1.IsLeaf(nidx));
+            if (p_tree0->IsMultiTarget()) {
+              CHECK(p_tree1->IsMultiTarget());
+              auto leaf_0 = p_tree0->GetMultiTargetTree()->LeafValue(nidx);
+              auto leaf_1 = p_tree1->GetMultiTargetTree()->LeafValue(nidx);
+              CHECK_EQ(leaf_0.Size(), leaf_1.Size());
+              for (std::size_t i = 0; i < leaf_0.Size(); ++i) {
+                CHECK_EQ(leaf_0(i) * eta_ratio, leaf_1(i));
+              }
+              CHECK_EQ(DftBadValue(), tree0.SplitCond(nidx));
+              CHECK_EQ(DftBadValue(), tree1.SplitCond(nidx));
+            } else {
+              // NON-mt tree reuses split cond for leaf value.
+              auto leaf_0 = tree0.SplitCond(nidx);
+              auto leaf_1 = tree1.SplitCond(nidx);
+              CHECK_EQ(leaf_0 * eta_ratio, leaf_1);
+            }
+          } else {
+            CHECK(!tree1.IsLeaf(nidx));
+            CHECK_EQ(tree0.SplitCond(nidx), tree1.SplitCond(nidx));
           }
-          CHECK(std::isnan(p_tree0->SplitCond(nidx)));
-          CHECK(std::isnan(p_tree1->SplitCond(nidx)));
-        } else {
-          // NON-mt tree reuses split cond for leaf value.
-          auto leaf_0 = p_tree0->SplitCond(nidx);
-          auto leaf_1 = p_tree1->SplitCond(nidx);
-          CHECK_EQ(leaf_0 * eta_ratio, leaf_1);
-        }
-      } else {
-        CHECK(!p_tree1->IsLeaf(nidx));
-        CHECK_EQ(p_tree0->SplitCond(nidx), p_tree1->SplitCond(nidx));
-      }
-      n_nodes++;
-      return true;
-    });
+          n_nodes++;
+          return true;
+        },
+        *p_tree1);
     ASSERT_EQ(n_nodes, p_tree0->NumExtraNodes() + 1);
   }
 };
@@ -186,15 +192,15 @@ TEST_F(TestSplitWithEta, GpuApprox) {
 
 class TestMinSplitLoss : public ::testing::Test {
   std::shared_ptr<DMatrix> dmat_;
-  linalg::Matrix<GradientPair> gpair_;
+  GradientContainer gpair_;
 
   void SetUp() override {
     constexpr size_t kRows = 32;
     constexpr size_t kCols = 16;
     constexpr float kSparsity = 0.6;
     dmat_ = RandomDataGenerator(kRows, kCols, kSparsity).Seed(3).GenerateDMatrix();
-    gpair_.Reshape(kRows, 1);
-    gpair_.Data()->Copy(GenerateRandomGradients(kRows));
+    Context ctx;
+    gpair_ = GenerateRandomGradients(&ctx, kRows, 1);
   }
 
   std::int32_t Update(Context const* ctx, std::string updater, float gamma) {
diff --git a/tests/python-gpu/conftest.py b/tests/python-gpu/conftest.py
index 40a12f020fc8..8ce9aabcb5b2 100644
--- a/tests/python-gpu/conftest.py
+++ b/tests/python-gpu/conftest.py
@@ -1,14 +1,16 @@
+from typing import Any, List
+
 import pytest
 
 from xgboost import testing as tm
 
 
-def has_rmm():
+def has_rmm() -> bool:
     return tm.no_rmm()["condition"]
 
 
 @pytest.fixture(scope="session", autouse=True)
-def setup_rmm_pool(request, pytestconfig):
+def setup_rmm_pool(request: Any, pytestconfig: pytest.Config) -> None:
     tm.setup_rmm_pool(request, pytestconfig)
 
 
@@ -18,7 +20,9 @@ def pytest_addoption(parser: pytest.Parser) -> None:
     )
 
 
-def pytest_collection_modifyitems(config, items):
+def pytest_collection_modifyitems(
+    config: pytest.Config, items: List[pytest.Item]
+) -> None:
     if config.getoption("--use-rmm-pool"):
         blocklist = [
             "python-gpu/test_gpu_demos.py::test_dask_training",
diff --git a/tests/python-gpu/load_pickle.py b/tests/python-gpu/load_pickle.py
index 402470d99720..6853f4b4346d 100644
--- a/tests/python-gpu/load_pickle.py
+++ b/tests/python-gpu/load_pickle.py
@@ -9,7 +9,6 @@
 from test_gpu_pickling import build_dataset, load_pickle, model_path
 
 import xgboost as xgb
-from xgboost import testing as tm
 
 
 class TestLoadPickle:
@@ -63,6 +62,4 @@ def test_training_on_cpu_only_env(self) -> None:
         X = rng.randn(10, 10)
         y = rng.randn(10)
         with pytest.warns(UserWarning, match="No visible GPU is found"):
-            # Test no thrust exception is thrown
-            with pytest.raises(xgb.core.XGBoostError, match="have at least one device"):
-                xgb.train({"tree_method": "gpu_hist"}, xgb.DMatrix(X, y))
+            xgb.train({"device": "cuda"}, xgb.DMatrix(X, y))
diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py
index 5e3bd79625e5..9f62b76a7c8a 100644
--- a/tests/python-gpu/test_device_quantile_dmatrix.py
+++ b/tests/python-gpu/test_device_quantile_dmatrix.py
@@ -12,6 +12,7 @@
     check_categorical_strings,
     check_ref_quantile_cut,
 )
+from xgboost.testing.utils import predictor_equal
 
 sys.path.append("tests/python")
 import test_quantile_dmatrix as tqd
@@ -47,10 +48,10 @@ def test_dmatrix_cupy_init(self) -> None:
         xgb.QuantileDMatrix(data, cp.ones(5, dtype=np.float64))
 
     @pytest.mark.parametrize(
-        "on_device,tree_method",
-        [(True, "hist"), (False, "gpu_hist"), (False, "hist"), (True, "gpu_hist")],
+        "on_device,device",
+        [(True, "cpu"), (False, "cuda"), (False, "cpu"), (True, "cuda")],
     )
-    def test_initialization(self, on_device: bool, tree_method: str) -> None:
+    def test_initialization(self, on_device: bool, device: str) -> None:
         n_samples, n_features, max_bin = 64, 3, 16
         X, y, w = tm.make_batches(
             n_samples,
@@ -59,24 +60,25 @@ def test_initialization(self, on_device: bool, tree_method: str) -> None:
             use_cupy=on_device,
         )
 
+        tree_method = "hist"
         # Init SparsePage
         Xy = xgb.DMatrix(X[0], y[0], weight=w[0])
         # Init GIDX/Ellpack
         xgb.train(
-            {"tree_method": tree_method, "max_bin": max_bin},
+            {"tree_method": tree_method, "max_bin": max_bin, "device": device},
             Xy,
             num_boost_round=1,
         )
         # query cuts from GIDX/Ellpack
         qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
-        tm.predictor_equal(Xy, qXy)
+        predictor_equal(Xy, qXy)
         with pytest.raises(ValueError, match="Inconsistent"):
             # max_bin changed.
             xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)
 
         # No error, DMatrix can be modified for different training session.
         xgb.train(
-            {"tree_method": tree_method, "max_bin": max_bin - 1},
+            {"tree_method": tree_method, "max_bin": max_bin - 1, "device": device},
             Xy,
             num_boost_round=1,
         )
@@ -85,25 +87,35 @@ def test_initialization(self, on_device: bool, tree_method: str) -> None:
         Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)
         # Init GIDX/Ellpack
         xgb.train(
-            {"tree_method": tree_method, "max_bin": max_bin},
+            {"tree_method": tree_method, "max_bin": max_bin, "device": device},
             Xy,
             num_boost_round=1,
         )
         # query cuts from GIDX/Ellpack
         qXy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin, ref=Xy)
-        tm.predictor_equal(Xy, qXy)
+        predictor_equal(Xy, qXy)
         with pytest.raises(ValueError, match="Inconsistent"):
             # max_bin changed.
             xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin - 1, ref=Xy)
 
         Xy = xgb.DMatrix(X[0], y[0], weight=w[0])
         booster0 = xgb.train(
-            {"tree_method": "hist", "max_bin": max_bin, "max_depth": 4},
+            {
+                "tree_method": "hist",
+                "max_bin": max_bin,
+                "max_depth": 4,
+                "device": "cpu",
+            },
             Xy,
             num_boost_round=1,
         )
         booster1 = xgb.train(
-            {"tree_method": "gpu_hist", "max_bin": max_bin, "max_depth": 4},
+            {
+                "tree_method": "hist",
+                "max_bin": max_bin,
+                "max_depth": 4,
+                "device": "cuda",
+            },
             Xy,
             num_boost_round=1,
         )
@@ -114,10 +126,10 @@ def test_initialization(self, on_device: bool, tree_method: str) -> None:
 
     @pytest.mark.skipif(**tm.no_cupy())
     @pytest.mark.parametrize(
-        "tree_method,max_bin",
-        [("hist", 16), ("gpu_hist", 16), ("hist", 64), ("gpu_hist", 64)],
+        "device,max_bin",
+        [("cpu", 16), ("cuda", 16), ("cpu", 64), ("cuda", 64)],
     )
-    def test_interoperability(self, tree_method: str, max_bin: int) -> None:
+    def test_interoperability(self, device: str, max_bin: int) -> None:
         import cupy as cp
 
         n_samples = 64
@@ -128,7 +140,7 @@ def test_interoperability(self, tree_method: str, max_bin: int) -> None:
         # from CPU
         Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)
         booster_0 = xgb.train(
-            {"tree_method": tree_method, "max_bin": max_bin}, Xy, num_boost_round=4
+            {"device": device, "max_bin": max_bin}, Xy, num_boost_round=4
         )
 
         X[0] = cp.array(X[0])
@@ -138,7 +150,7 @@ def test_interoperability(self, tree_method: str, max_bin: int) -> None:
         # from GPU
         Xy = xgb.QuantileDMatrix(X[0], y[0], weight=w[0], max_bin=max_bin)
         booster_1 = xgb.train(
-            {"tree_method": tree_method, "max_bin": max_bin}, Xy, num_boost_round=4
+            {"device": device, "max_bin": max_bin}, Xy, num_boost_round=4
         )
         cp.testing.assert_allclose(
             booster_0.inplace_predict(X[0]), booster_1.inplace_predict(X[0])
@@ -146,7 +158,9 @@ def test_interoperability(self, tree_method: str, max_bin: int) -> None:
 
         with pytest.raises(ValueError, match=r"Only.*hist.*"):
             xgb.train(
-                {"tree_method": "approx", "max_bin": max_bin}, Xy, num_boost_round=4
+                {"tree_method": "approx", "max_bin": max_bin, "device": device},
+                Xy,
+                num_boost_round=4,
             )
 
     def test_ref_quantile_cut(self) -> None:
@@ -189,7 +203,7 @@ def test_ref_dmatrix(self) -> None:
         strategies.fractions(0, 0.99),
     )
     @settings(print_blob=True, deadline=None)
-    def test_to_csr(self, n_samples, n_features, sparsity) -> None:
+    def test_to_csr(self, n_samples: int, n_features: int, sparsity: float) -> None:
         import cupy as cp
 
         X, y = tm.make_sparse_regression(n_samples, n_features, sparsity, False)
@@ -229,12 +243,12 @@ def test_ltr(self) -> None:
         cpX = cp.array(X)
         Xy_qdm = xgb.QuantileDMatrix(cpX, y, qid=qid, weight=w)
         Xy = xgb.DMatrix(X, y, qid=qid, weight=w)
-        xgb.train({"tree_method": "gpu_hist", "objective": "rank:ndcg"}, Xy)
+        xgb.train({"device": "cuda", "objective": "rank:ndcg"}, Xy)
 
         from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy)
         from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm)
 
-        assert tm.predictor_equal(from_qdm, from_dm)
+        assert predictor_equal(from_qdm, from_dm)
 
     @pytest.mark.skipif(**tm.no_cupy())
     def test_check_inf(self) -> None:
diff --git a/tests/python-gpu/test_from_cudf.py b/tests/python-gpu/test_from_cudf.py
index 8c263901d94a..26354ac7303d 100644
--- a/tests/python-gpu/test_from_cudf.py
+++ b/tests/python-gpu/test_from_cudf.py
@@ -1,16 +1,29 @@
 import json
+from typing import TYPE_CHECKING, Any, Callable, Dict, Type
 
 import numpy as np
 import pytest
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.compat import is_dataframe
 from xgboost.testing.data import run_base_margin_info
 
-cudf = pytest.importorskip("cudf")
+if TYPE_CHECKING:
+    import cudf
+else:
+    cudf = pytest.importorskip("cudf")
 
 
-def dmatrix_from_cudf(input_type, DMatrixT, missing=np.nan):
+def test_type_check() -> None:
+    df = cudf.DataFrame([[1, 2.0], [2, 3.0]], columns=["a", "b"])
+    assert is_dataframe(df)
+    assert is_dataframe(df.a)
+
+
+def dmatrix_from_cudf(
+    input_type: Any, DMatrixT: Type[xgb.DMatrix], missing: float = np.nan
+) -> None:
     """Test constructing DMatrix from cudf"""
     import pandas as pd
 
@@ -36,7 +49,7 @@ def dmatrix_from_cudf(input_type, DMatrixT, missing=np.nan):
     assert dtrain.num_row() == kRows
 
 
-def _test_from_cudf(DMatrixT):
+def _test_from_cudf(DMatrixT: Type[xgb.DMatrix]) -> None:
     """Test constructing DMatrix from cudf"""
     dmatrix_from_cudf(np.float32, DMatrixT, np.nan)
     dmatrix_from_cudf(np.float64, DMatrixT, np.nan)
@@ -72,7 +85,7 @@ def _test_from_cudf(DMatrixT):
     assert dtrain.num_row() == 5
 
 
-def _test_cudf_training(DMatrixT):
+def _test_cudf_training(DMatrixT: Type[xgb.DMatrix]) -> None:
     import pandas as pd
     from cudf import DataFrame as df
 
@@ -84,7 +97,7 @@ def _test_cudf_training(DMatrixT):
     base_margin = np.random.random(50)
     cudf_base_margin = df.from_pandas(pd.DataFrame(base_margin))
 
-    evals_result_cudf = {}
+    evals_result_cudf: Dict[str, Any] = {}
     dtrain_cudf = DMatrixT(
         df.from_pandas(X),
         df.from_pandas(y),
@@ -98,7 +111,7 @@ def _test_cudf_training(DMatrixT):
         evals=[(dtrain_cudf, "train")],
         evals_result=evals_result_cudf,
     )
-    evals_result_np = {}
+    evals_result_np: Dict[str, Any] = {}
     dtrain_np = xgb.DMatrix(X, y, weight=weights, base_margin=base_margin)
     xgb.train(
         params, dtrain_np, evals=[(dtrain_np, "train")], evals_result=evals_result_np
@@ -108,7 +121,7 @@ def _test_cudf_training(DMatrixT):
     )
 
 
-def _test_cudf_metainfo(DMatrixT):
+def _test_cudf_metainfo(DMatrixT: Type[xgb.DMatrix]) -> None:
     import pandas as pd
     from cudf import DataFrame as df
 
@@ -169,27 +182,27 @@ class TestFromColumnar:
     Arrow specification."""
 
     @pytest.mark.skipif(**tm.no_cudf())
-    def test_simple_dmatrix_from_cudf(self):
+    def test_simple_dmatrix_from_cudf(self) -> None:
         _test_from_cudf(xgb.DMatrix)
 
     @pytest.mark.skipif(**tm.no_cudf())
-    def test_device_dmatrix_from_cudf(self):
+    def test_device_dmatrix_from_cudf(self) -> None:
         _test_from_cudf(xgb.QuantileDMatrix)
 
     @pytest.mark.skipif(**tm.no_cudf())
-    def test_cudf_training_simple_dmatrix(self):
+    def test_cudf_training_simple_dmatrix(self) -> None:
         _test_cudf_training(xgb.DMatrix)
 
     @pytest.mark.skipif(**tm.no_cudf())
-    def test_cudf_training_device_dmatrix(self):
+    def test_cudf_training_device_dmatrix(self) -> None:
         _test_cudf_training(xgb.QuantileDMatrix)
 
     @pytest.mark.skipif(**tm.no_cudf())
-    def test_cudf_metainfo_simple_dmatrix(self):
+    def test_cudf_metainfo_simple_dmatrix(self) -> None:
         _test_cudf_metainfo(xgb.DMatrix)
 
     @pytest.mark.skipif(**tm.no_cudf())
-    def test_cudf_metainfo_device_dmatrix(self):
+    def test_cudf_metainfo_device_dmatrix(self) -> None:
         _test_cudf_metainfo(xgb.QuantileDMatrix)
 
     @pytest.mark.skipif(**tm.no_cudf())
@@ -257,7 +270,7 @@ def test_cudf_categorical(self) -> None:
 @pytest.mark.skipif(**tm.no_cupy())
 @pytest.mark.skipif(**tm.no_sklearn())
 @pytest.mark.skipif(**tm.no_pandas())
-def test_cudf_training_with_sklearn():
+def test_cudf_training_with_sklearn() -> None:
     import pandas as pd
     from cudf import DataFrame as df
     from cudf import Series as ss
@@ -298,7 +311,7 @@ class IterForDMatrixTest(xgb.core.DataIter):
     ROWS_PER_BATCH = 100  # data is splited by rows
     BATCHES = 16
 
-    def __init__(self, categorical):
+    def __init__(self, categorical: bool) -> None:
         """Generate some random data for demostration.
 
         Actual data can be anything that is currently supported by XGBoost.
@@ -327,37 +340,37 @@ def __init__(self, categorical):
         self.it = 0  # set iterator to 0
         super().__init__(cache_prefix=None)
 
-    def as_array(self):
+    def as_array(self) -> "cudf.DataFrame":
         return cudf.concat(self._data)
 
-    def as_array_labels(self):
+    def as_array_labels(self) -> np.ndarray:
         return np.concatenate(self._labels)
 
-    def data(self):
+    def data(self) -> "cudf.DataFrame":
         """Utility function for obtaining current batch of data."""
         return self._data[self.it]
 
-    def labels(self):
+    def labels(self) -> Any:
         """Utility function for obtaining current batch of label."""
         return self._labels[self.it]
 
-    def reset(self):
+    def reset(self) -> None:
         """Reset the iterator"""
         self.it = 0
 
-    def next(self, input_data):
+    def next(self, input_data: Callable) -> bool:
         """Yield next batch of data"""
         if self.it == len(self._data):
-            # Return 0 when there's no more batch.
-            return 0
+            # Return False when there's no more batch.
+            return False
         input_data(data=self.data(), label=self.labels())
         self.it += 1
-        return 1
+        return True
 
 
 @pytest.mark.skipif(**tm.no_cudf())
 @pytest.mark.parametrize("enable_categorical", [True, False])
-def test_from_cudf_iter(enable_categorical):
+def test_from_cudf_iter(enable_categorical: bool) -> None:
     rounds = 100
     it = IterForDMatrixTest(enable_categorical)
     params = {"tree_method": "hist", "device": "cuda"}
diff --git a/tests/python-gpu/test_from_cupy.py b/tests/python-gpu/test_from_cupy.py
index 56e331d66cf7..175a8c05730b 100644
--- a/tests/python-gpu/test_from_cupy.py
+++ b/tests/python-gpu/test_from_cupy.py
@@ -1,4 +1,5 @@
 import json
+from typing import Any, Dict, Type
 
 import numpy as np
 import pytest
@@ -18,7 +19,9 @@ def test_array_interface() -> None:
     np.testing.assert_equal(cp.asnumpy(arr), cp.asnumpy(ret))
 
 
-def dmatrix_from_cupy(input_type, DMatrixT, missing=np.nan):
+def dmatrix_from_cupy(
+    input_type: Any, DMatrixT: Type[xgb.DMatrix], missing: float = np.nan
+) -> xgb.DMatrix:
     """Test constructing DMatrix from cupy"""
     kRows = 80
     kCols = 3
@@ -44,7 +47,7 @@ def dmatrix_from_cupy(input_type, DMatrixT, missing=np.nan):
     return dtrain
 
 
-def _test_from_cupy(DMatrixT):
+def _test_from_cupy(DMatrixT: Type[xgb.DMatrix]) -> None:
     """Test constructing DMatrix from cupy"""
     dmatrix_from_cupy(np.float16, DMatrixT, np.nan)
     dmatrix_from_cupy(np.float32, DMatrixT, np.nan)
@@ -64,7 +67,7 @@ def _test_from_cupy(DMatrixT):
         DMatrixT(X, label=y)
 
 
-def _test_cupy_training(DMatrixT):
+def _test_cupy_training(DMatrixT: Type[xgb.DMatrix]) -> None:
     np.random.seed(1)
     cp.random.seed(np.uint64(1))
     X = cp.random.randn(50, 10, dtype="float32")
@@ -74,13 +77,13 @@ def _test_cupy_training(DMatrixT):
     base_margin = np.random.random(50)
     cupy_base_margin = cp.array(base_margin)
 
-    evals_result_cupy = {}
+    evals_result_cupy: Dict[str, Any] = {}
     dtrain_cp = DMatrixT(X, y, weight=cupy_weights, base_margin=cupy_base_margin)
     params = {"tree_method": "hist", "device": "cuda:0"}
     xgb.train(
         params, dtrain_cp, evals=[(dtrain_cp, "train")], evals_result=evals_result_cupy
     )
-    evals_result_np = {}
+    evals_result_np: Dict[str, Any] = {}
     dtrain_np = xgb.DMatrix(
         cp.asnumpy(X), cp.asnumpy(y), weight=weights, base_margin=base_margin
     )
@@ -92,7 +95,7 @@ def _test_cupy_training(DMatrixT):
     )
 
 
-def _test_cupy_metainfo(DMatrixT):
+def _test_cupy_metainfo(DMatrixT: Type[xgb.DMatrix]) -> None:
     n = 100
     X = np.random.random((n, 2))
     dmat_cupy = DMatrixT(cp.array(X))
@@ -129,7 +132,7 @@ def _test_cupy_metainfo(DMatrixT):
 
 @pytest.mark.skipif(**tm.no_cupy())
 @pytest.mark.skipif(**tm.no_sklearn())
-def test_cupy_training_with_sklearn():
+def test_cupy_training_with_sklearn() -> None:
     np.random.seed(1)
     cp.random.seed(np.uint64(1))
     X = cp.random.randn(50, 10, dtype="float32")
@@ -156,37 +159,37 @@ class TestFromCupy:
     Arrow specification."""
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_simple_dmat_from_cupy(self):
+    def test_simple_dmat_from_cupy(self) -> None:
         _test_from_cupy(xgb.DMatrix)
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_device_dmat_from_cupy(self):
+    def test_device_dmat_from_cupy(self) -> None:
         _test_from_cupy(xgb.QuantileDMatrix)
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_cupy_training_device_dmat(self):
+    def test_cupy_training_device_dmat(self) -> None:
         _test_cupy_training(xgb.QuantileDMatrix)
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_cupy_training_simple_dmat(self):
+    def test_cupy_training_simple_dmat(self) -> None:
         _test_cupy_training(xgb.DMatrix)
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_cupy_metainfo_simple_dmat(self):
+    def test_cupy_metainfo_simple_dmat(self) -> None:
         _test_cupy_metainfo(xgb.DMatrix)
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_cupy_metainfo_device_dmat(self):
+    def test_cupy_metainfo_device_dmat(self) -> None:
         _test_cupy_metainfo(xgb.QuantileDMatrix)
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_dlpack_simple_dmat(self):
+    def test_dlpack_simple_dmat(self) -> None:
         n = 100
         X = cp.random.random((n, 2))
         xgb.DMatrix(X.toDlpack())
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_cupy_categorical(self):
+    def test_cupy_categorical(self) -> None:
         n_features = 10
         X, y = tm.make_categorical(10, n_features, n_categories=4, onehot=False)
         X = cp.asarray(X.values.astype(cp.float32))
@@ -198,7 +201,7 @@ def test_cupy_categorical(self):
         np.testing.assert_equal(np.array(Xy.feature_types), np.array(feature_types))
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_dlpack_device_dmat(self):
+    def test_dlpack_device_dmat(self) -> None:
         n = 100
         X = cp.random.random((n, 2))
         m = xgb.QuantileDMatrix(X.toDlpack())
@@ -209,7 +212,7 @@ def test_dlpack_device_dmat(self):
             m.slice(rindex=[0, 1, 2])
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_qid(self):
+    def test_qid(self) -> None:
         rng = cp.random.RandomState(np.uint64(1994))
         rows = 100
         cols = 10
@@ -225,7 +228,7 @@ def test_qid(self):
 
     @pytest.mark.skipif(**tm.no_cupy())
     @pytest.mark.mgpu
-    def test_specified_device(self):
+    def test_specified_device(self) -> None:
         cp.cuda.runtime.setDevice(0)
         dtrain = dmatrix_from_cupy(np.float32, xgb.QuantileDMatrix, np.nan)
         with pytest.raises(xgb.core.XGBoostError, match="Invalid device ordinal"):
diff --git a/tests/python-gpu/test_gpu_basic_models.py b/tests/python-gpu/test_gpu_basic_models.py
index 8b74c79715d9..b1faf6ffc016 100644
--- a/tests/python-gpu/test_gpu_basic_models.py
+++ b/tests/python-gpu/test_gpu_basic_models.py
@@ -1,26 +1,16 @@
 import os
-import sys
+from typing import Tuple
 
 import numpy as np
 import pytest
 
 import xgboost as xgb
 from xgboost import testing as tm
-
-sys.path.append("tests/python")
-import test_basic_models as test_bm
-
-# Don't import the test class, otherwise they will run twice.
-import test_callback as test_cb  # noqa
-
-rng = np.random.RandomState(1994)
+from xgboost.testing.basic_models import run_custom_objective
 
 
 class TestGPUBasicModels:
-    cpu_test_cb = test_cb.TestCallbacks()
-    cpu_test_bm = test_bm.TestModels()
-
-    def run_cls(self, X, y):
+    def run_cls(self, X: np.ndarray, y: np.ndarray) -> Tuple[int, int]:
         cls = xgb.XGBClassifier(tree_method="hist", device="cuda")
         cls.fit(X, y)
         cls.get_booster().save_model("test_deterministic_gpu_hist-0.json")
@@ -39,19 +29,11 @@ def run_cls(self, X, y):
 
         return hash(model_0), hash(model_1)
 
-    def test_custom_objective(self):
-        self.cpu_test_bm.run_custom_objective("gpu_hist")
+    def test_custom_objective(self) -> None:
+        dtrain, dtest = tm.load_agaricus(__file__)
+        run_custom_objective("hist", "cuda", dtrain, dtest)
 
-    def test_eta_decay(self):
-        self.cpu_test_cb.run_eta_decay("gpu_hist")
-
-    @pytest.mark.parametrize(
-        "objective", ["binary:logistic", "reg:absoluteerror", "reg:quantileerror"]
-    )
-    def test_eta_decay_leaf_output(self, objective) -> None:
-        self.cpu_test_cb.run_eta_decay_leaf_output("gpu_hist", objective)
-
-    def test_deterministic_gpu_hist(self):
+    def test_deterministic_gpu_hist(self) -> None:
         kRows = 1000
         kCols = 64
         kClasses = 4
@@ -63,16 +45,16 @@ def test_deterministic_gpu_hist(self):
         assert model_0 == model_1
 
     @pytest.mark.skipif(**tm.no_sklearn())
-    def test_invalid_gpu_id(self):
+    def test_invalid_gpu_id(self) -> None:
         from sklearn.datasets import load_digits
 
         X, y = load_digits(return_X_y=True)
         # should pass with invalid gpu id
-        cls1 = xgb.XGBClassifier(tree_method="gpu_hist", gpu_id=9999)
+        cls1 = xgb.XGBClassifier(tree_method="hist", device="cuda:9999")
         cls1.fit(X, y)
         # should throw error with fail_on_invalid_gpu_id enabled
         cls2 = xgb.XGBClassifier(
-            tree_method="gpu_hist", gpu_id=9999, fail_on_invalid_gpu_id=True
+            tree_method="hist", device="cuda:9999", fail_on_invalid_gpu_id=True
         )
         with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
             cls2.fit(X, y)
@@ -82,3 +64,7 @@ def test_invalid_gpu_id(self):
         )
         with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
             cls2.fit(X, y)
+
+        clf = xgb.XGBClassifier(tree_method="hist", gpu_id=0)
+        with pytest.raises(ValueError, match="`gpu_id` has been removed"):
+            clf.fit(X, y)
diff --git a/tests/python-gpu/test_gpu_callbacks.py b/tests/python-gpu/test_gpu_callbacks.py
new file mode 100644
index 000000000000..6f1f0081bf56
--- /dev/null
+++ b/tests/python-gpu/test_gpu_callbacks.py
@@ -0,0 +1,20 @@
+import pytest
+
+from xgboost import testing as tm
+from xgboost.testing.callbacks import (
+    run_eta_decay,
+    run_eta_decay_leaf_output,
+    tree_methods_objs,
+)
+
+
+@pytest.mark.parametrize("tree_method", ["approx", "hist"])
+def test_eta_decay(tree_method: str) -> None:
+    dtrain, dtest = tm.load_agaricus(__file__)
+    run_eta_decay(tree_method, dtrain, dtest, "cuda")
+
+
+@pytest.mark.parametrize("tree_method,objective", tree_methods_objs())
+def test_eta_decay_leaf_output(tree_method: str, objective: str) -> None:
+    dtrain, dtest = tm.load_agaricus(__file__)
+    run_eta_decay_leaf_output(tree_method, objective, dtrain, dtest, "cuda")
diff --git a/tests/python-gpu/test_gpu_data_iterator.py b/tests/python-gpu/test_gpu_data_iterator.py
index b3e7254244b6..2adc8d8a221e 100644
--- a/tests/python-gpu/test_gpu_data_iterator.py
+++ b/tests/python-gpu/test_gpu_data_iterator.py
@@ -235,3 +235,20 @@ def test_invalid_cat_batches() -> None:
 
 def test_uneven_sizes() -> None:
     check_uneven_sizes("cuda")
+
+
+def test_cache_host_ratio() -> None:
+    boosters = []
+    for min_cache_page_bytes in [0, 64, np.iinfo(np.int64).max, None]:
+        for cache_host_ratio in [0, 0.5, 1.0, None]:
+            it = tm.IteratorForTest(
+                *tm.make_batches(64, 16, 4, use_cupy=True),
+                cache=None,
+                on_host=True,
+            )
+            Xy = xgb.ExtMemQuantileDMatrix(it, cache_host_ratio=cache_host_ratio)
+            booster = xgb.train({"device": "cuda"}, Xy)
+            boosters.append(booster.save_raw(raw_format="json"))
+
+        for model in boosters[1:]:
+            assert str(model) == str(boosters[0])
diff --git a/tests/python-gpu/test_gpu_demos.py b/tests/python-gpu/test_gpu_demos.py
index 124f8b303db2..a7bfb778d5f4 100644
--- a/tests/python-gpu/test_gpu_demos.py
+++ b/tests/python-gpu/test_gpu_demos.py
@@ -1,6 +1,5 @@
 import os
 import subprocess
-import sys
 
 import pytest
 
@@ -11,19 +10,19 @@
 
 
 @pytest.mark.skipif(**tm.no_cupy())
-def test_data_iterator():
+def test_data_iterator() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "quantile_data_iterator.py")
     cmd = ["python", script]
     subprocess.check_call(cmd)
 
 
-def test_update_process_demo():
+def test_update_process_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "update_process.py")
     cmd = ["python", script]
     subprocess.check_call(cmd)
 
 
-def test_categorical_demo():
+def test_categorical_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "categorical.py")
     cmd = ["python", script]
     subprocess.check_call(cmd)
@@ -31,7 +30,7 @@ def test_categorical_demo():
 
 @pytest.mark.skipif(**tm.no_rmm())
 @pytest.mark.skipif(**tm.no_cupy())
-def test_external_memory_demo():
+def test_external_memory_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "external_memory.py")
     cmd = ["python", script, "--device=cuda"]
     subprocess.check_call(cmd)
@@ -40,7 +39,7 @@ def test_external_memory_demo():
 @pytest.mark.skipif(**tm.no_rmm())
 @pytest.mark.skipif(**tm.no_cupy())
 @pytest.mark.mgpu
-def test_distributed_extmem_basic_demo():
+def test_distributed_extmem_basic_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "distributed_extmem_basic.py")
     cmd = ["python", script, "--device=cuda"]
     subprocess.check_call(cmd)
diff --git a/tests/python-gpu/test_gpu_eval_metrics.py b/tests/python-gpu/test_gpu_eval_metrics.py
index f084eaa45253..2f3d05f36b4b 100644
--- a/tests/python-gpu/test_gpu_eval_metrics.py
+++ b/tests/python-gpu/test_gpu_eval_metrics.py
@@ -1,31 +1,33 @@
 import json
-import sys
 
 import pytest
 
 import xgboost
 from xgboost import testing as tm
-from xgboost.testing.metrics import check_precision_score, check_quantile_error
-
-sys.path.append("tests/python")
-import test_eval_metrics as test_em  # noqa
+from xgboost.testing.metrics import (
+    check_precision_score,
+    check_quantile_error,
+    run_pr_auc_binary,
+    run_pr_auc_ltr,
+    run_pr_auc_multi,
+    run_roc_auc_binary,
+    run_roc_auc_multi,
+)
 
 
 class TestGPUEvalMetrics:
-    cpu_test = test_em.TestEvalMetrics()
-
     @pytest.mark.parametrize("n_samples", [4, 100, 1000])
-    def test_roc_auc_binary(self, n_samples):
-        self.cpu_test.run_roc_auc_binary("gpu_hist", n_samples)
+    def test_roc_auc_binary(self, n_samples: int) -> None:
+        run_roc_auc_binary("hist", n_samples, "cuda")
 
     @pytest.mark.parametrize(
         "n_samples,weighted", [(4, False), (100, False), (1000, False), (1000, True)]
     )
-    def test_roc_auc_multi(self, n_samples, weighted):
-        self.cpu_test.run_roc_auc_multi("gpu_hist", n_samples, weighted)
+    def test_roc_auc_multi(self, n_samples: int, weighted: bool) -> None:
+        run_roc_auc_multi("hist", n_samples, weighted, "cuda")
 
     @pytest.mark.parametrize("n_samples", [4, 100, 1000])
-    def test_roc_auc_ltr(self, n_samples):
+    def test_roc_auc_ltr(self, n_samples: int) -> None:
         import numpy as np
 
         rng = np.random.RandomState(1994)
@@ -56,18 +58,18 @@ def test_roc_auc_ltr(self, n_samples):
 
         np.testing.assert_allclose(cpu_auc, gpu_auc)
 
-    def test_pr_auc_binary(self):
-        self.cpu_test.run_pr_auc_binary("gpu_hist")
+    def test_pr_auc_binary(self) -> None:
+        run_pr_auc_binary("hist", "cuda")
 
-    def test_pr_auc_multi(self):
-        self.cpu_test.run_pr_auc_multi("gpu_hist")
+    def test_pr_auc_multi(self) -> None:
+        run_pr_auc_multi("hist", "cuda")
 
-    def test_pr_auc_ltr(self):
-        self.cpu_test.run_pr_auc_ltr("gpu_hist")
+    def test_pr_auc_ltr(self) -> None:
+        run_pr_auc_ltr("hist", "cuda")
 
-    def test_precision_score(self):
-        check_precision_score("gpu_hist")
+    def test_precision_score(self) -> None:
+        check_precision_score("hist", "cuda")
 
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_quantile_error(self) -> None:
-        check_quantile_error("gpu_hist")
+        check_quantile_error("hist", "cuda")
diff --git a/tests/python-gpu/test_gpu_interaction_constraints.py b/tests/python-gpu/test_gpu_interaction_constraints.py
index 434cc15dacd5..2c22fe91b12c 100644
--- a/tests/python-gpu/test_gpu_interaction_constraints.py
+++ b/tests/python-gpu/test_gpu_interaction_constraints.py
@@ -1,28 +1,26 @@
-import sys
-
 import numpy as np
 import pandas as pd
+import pytest
 
 import xgboost as xgb
-
-sys.path.append("tests/python")
-# Don't import the test class, otherwise they will run twice.
-import test_interaction_constraints as test_ic  # noqa
-
-rng = np.random.RandomState(1994)
+from xgboost.testing.interaction_constraints import (
+    run_interaction_constraints,
+    training_accuracy,
+)
 
 
 class TestGPUInteractionConstraints:
-    cputest = test_ic.TestInteractionConstraints()
-
-    def test_interaction_constraints(self):
-        self.cputest.run_interaction_constraints(tree_method="gpu_hist")
+    @pytest.mark.parametrize("tree_method", ["hist", "approx"])
+    def test_interaction_constraints(self, tree_method: str) -> None:
+        run_interaction_constraints(tree_method=tree_method, device="cuda")
 
-    def test_training_accuracy(self):
-        self.cputest.training_accuracy(tree_method="gpu_hist")
+    @pytest.mark.parametrize("tree_method", ["hist", "approx"])
+    def test_training_accuracy(self, tree_method: str) -> None:
+        dpath = "demo/data/"
+        training_accuracy(tree_method=tree_method, dpath=dpath, device="cuda")
 
     # case where different number of features can occur in the evaluator
-    def test_issue_8730(self):
+    def test_issue_8730(self) -> None:
         X = pd.DataFrame(
             zip(range(0, 100), range(200, 300), range(300, 400), range(400, 500)),
             columns=["A", "B", "C", "D"],
@@ -39,7 +37,8 @@ def test_issue_8730(self):
             "lambda": 0.14943712232059794,
             "grow_policy": "depthwise",
             "max_depth": 3,
-            "tree_method": "gpu_hist",
+            "tree_method": "hist",
+            "device": "cuda",
             "interaction_constraints": [["A", "B"], ["B", "D", "C"], ["C", "D"]],
             "objective": "count:poisson",
             "eval_metric": "poisson-nloglik",
diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py
index 174277c7ec3d..ace1238488ac 100644
--- a/tests/python-gpu/test_gpu_linear.py
+++ b/tests/python-gpu/test_gpu_linear.py
@@ -1,3 +1,5 @@
+from typing import Any, Dict
+
 import pytest
 from hypothesis import assume, given, note, settings, strategies
 
@@ -20,8 +22,10 @@
 )
 
 
-def train_result(param, dmat, num_rounds):
-    result = {}
+def train_result(
+    param: Dict[str, Any], dmat: xgb.DMatrix, num_rounds: int
+) -> Dict[str, Any]:
+    result: Dict[str, Any] = {}
     booster = xgb.train(
         param,
         dmat,
@@ -37,9 +41,12 @@ def train_result(param, dmat, num_rounds):
 class TestGPULinear:
     @given(parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy())
     @settings(deadline=None, max_examples=20, print_blob=True)
-    def test_gpu_coordinate(self, param, num_rounds, dataset):
+    def test_gpu_coordinate(
+        self, param: Dict[str, Any], num_rounds: int, dataset: tm.TestDataset
+    ) -> None:
         assume(len(dataset.y) > 0)
-        param["updater"] = "gpu_coord_descent"
+        param["updater"] = "coord_descent"
+        param["device"] = "cuda"
         param = dataset.set_params(param)
         result = train_result(param, dataset.get_dmat(), num_rounds)["train"][
             dataset.metric
@@ -58,9 +65,17 @@ def test_gpu_coordinate(self, param, num_rounds, dataset):
         strategies.floats(1e-5, 0.8),
     )
     @settings(deadline=None, max_examples=20, print_blob=True)
-    def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
+    def test_gpu_coordinate_regularised(
+        self,
+        param: Dict[str, Any],
+        num_rounds: int,
+        dataset: tm.TestDataset,
+        alpha: float,
+        lambd: float,
+    ) -> None:
         assume(len(dataset.y) > 0)
-        param["updater"] = "gpu_coord_descent"
+        param["updater"] = "coord_descent"
+        param["device"] = "cuda"
         param["alpha"] = alpha
         param["lambda"] = lambd
         param = dataset.set_params(param)
@@ -71,14 +86,15 @@ def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lam
         assert tm.non_increasing([result[0], result[-1]])
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_gpu_coordinate_from_cupy(self):
+    def test_gpu_coordinate_from_cupy(self) -> None:
         # Training linear model is quite expensive, so we don't include it in
         # test_from_cupy.py
         import cupy
 
         params = {
             "booster": "gblinear",
-            "updater": "gpu_coord_descent",
+            "updater": "coord_descent",
+            "device": "cuda",
             "n_estimators": 100,
         }
         X, y = tm.get_california_housing()
diff --git a/tests/python-gpu/test_gpu_multi_target.py b/tests/python-gpu/test_gpu_multi_target.py
new file mode 100644
index 000000000000..96c4d2c0cc18
--- /dev/null
+++ b/tests/python-gpu/test_gpu_multi_target.py
@@ -0,0 +1,19 @@
+from xgboost.testing.multi_target import (
+    run_multiclass,
+    run_multilabel,
+    run_reduced_grad,
+)
+
+
+def test_multiclass() -> None:
+    # learning_rate is not yet supported.
+    run_multiclass("cuda", 1.0)
+
+
+def test_multilabel() -> None:
+    # learning_rate is not yet supported.
+    run_multilabel("cuda", 1.0)
+
+
+def test_reduced_grad() -> None:
+    run_reduced_grad("cuda")
diff --git a/tests/python-gpu/test_gpu_ordinal.py b/tests/python-gpu/test_gpu_ordinal.py
index c59c7b4d020e..e091ffc80773 100644
--- a/tests/python-gpu/test_gpu_ordinal.py
+++ b/tests/python-gpu/test_gpu_ordinal.py
@@ -1,10 +1,29 @@
+import os
+from concurrent.futures import ThreadPoolExecutor
+from typing import Type
+
+import numpy as np
 import pytest
 
+import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.data import make_categorical
 from xgboost.testing.ordinal import (
+    run_basic_predict,
     run_cat_container,
     run_cat_container_iter,
     run_cat_container_mixed,
+    run_cat_invalid,
+    run_cat_leaf,
+    run_cat_predict,
+    run_cat_shap,
+    run_cat_thread_safety,
+    run_recode_dmatrix,
+    run_recode_dmatrix_predict,
+    run_specified_cat,
+    run_training_continuation,
+    run_update,
+    run_validation,
 )
 
 pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_arrow(), tm.no_cudf()))
@@ -20,3 +39,100 @@ def test_cat_container_mixed() -> None:
 
 def test_cat_container_iter() -> None:
     run_cat_container_iter("cuda")
+
+
+def test_cat_predict() -> None:
+    run_cat_predict("cuda")
+
+
+def test_cat_invalid() -> None:
+    run_cat_invalid("cuda")
+
+
+def test_cat_thread_safety() -> None:
+    run_cat_thread_safety("cuda")
+
+
+def test_cat_shap() -> None:
+    run_cat_shap("cuda")
+
+
+def test_cat_leaf() -> None:
+    run_cat_leaf("cuda")
+
+
+def test_mixed_devices() -> None:
+    n_samples = 128
+    n_features = 4
+    X, y = make_categorical(n_samples, n_features, 7, onehot=False, device="cpu")
+
+    def run_cpu_gpu(DMatrixT: Type) -> bool:
+        Xy = DMatrixT(X, y, enable_categorical=True)
+        booster = xgb.train({"tree_method": "hist", "device": "cuda"}, Xy)
+        predt0 = booster.inplace_predict(X)
+        predt1 = booster.predict(DMatrixT(X, y, enable_categorical=True))
+
+        np.testing.assert_allclose(predt0, predt1)
+        return True
+
+    n_cpus = os.cpu_count()
+    assert n_cpus is not None
+
+    futures = []
+    with ThreadPoolExecutor(max_workers=n_cpus) as e:
+        for dm in (xgb.DMatrix, xgb.QuantileDMatrix):
+            f = e.submit(run_cpu_gpu, dm)
+            futures.append(f)
+
+    for f in futures:
+        assert f.result()
+
+    X, y = make_categorical(n_samples, n_features, 7, onehot=False, device="cuda")
+
+    def run_gpu_cpu(DMatrixT: Type) -> bool:
+        Xy = DMatrixT(X, y, enable_categorical=True)
+        booster = xgb.train({"tree_method": "hist", "device": "cpu"}, Xy)
+        predt0 = booster.inplace_predict(X).get()
+        predt1 = booster.predict(DMatrixT(X, y, enable_categorical=True))
+
+        np.testing.assert_allclose(predt0, predt1)
+        return True
+
+    futures = []
+    with ThreadPoolExecutor(max_workers=n_cpus) as e:
+        for dm in (xgb.DMatrix, xgb.QuantileDMatrix):
+            f = e.submit(run_gpu_cpu, dm)
+            futures.append(f)
+
+    for f in futures:
+        assert f.result()
+
+
+@pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
+def test_mixed_devices_types(DMatrixT: Type) -> None:
+    run_basic_predict(DMatrixT, "cuda", "cpu")
+    run_basic_predict(DMatrixT, "cpu", "cuda")
+
+
+def test_specified_cat() -> None:
+    run_specified_cat("cuda")
+
+
+def test_validation() -> None:
+    run_validation("cuda")
+
+
+def test_recode_dmatrix() -> None:
+    run_recode_dmatrix("cuda")
+
+
+def test_training_continuation() -> None:
+    run_training_continuation("cuda")
+
+
+def test_update() -> None:
+    run_update("cuda")
+
+
+def test_recode_dmatrix_predict() -> None:
+    run_recode_dmatrix_predict("cuda")
diff --git a/tests/python-gpu/test_gpu_parse_tree.py b/tests/python-gpu/test_gpu_parse_tree.py
index 1c55acc8f33a..d46dbf23d108 100644
--- a/tests/python-gpu/test_gpu_parse_tree.py
+++ b/tests/python-gpu/test_gpu_parse_tree.py
@@ -1,14 +1,12 @@
-import sys
+from xgboost.testing.parse_tree import (
+    run_split_value_histograms,
+    run_tree_to_df_categorical,
+)
 
-sys.path.append("tests/python")
-from test_parse_tree import TestTreesToDataFrame
 
+def test_tree_to_df_categorical() -> None:
+    run_tree_to_df_categorical("hist", "cuda")
 
-def test_tree_to_df_categorical():
-    cputest = TestTreesToDataFrame()
-    cputest.run_tree_to_df_categorical("gpu_hist")
 
-
-def test_split_value_histograms():
-    cputest = TestTreesToDataFrame()
-    cputest.run_split_value_histograms("gpu_hist")
+def test_split_value_histograms() -> None:
+    run_split_value_histograms("hist", "cuda")
diff --git a/tests/python-gpu/test_gpu_pickling.py b/tests/python-gpu/test_gpu_pickling.py
index 66c86f170374..f4219388255d 100644
--- a/tests/python-gpu/test_gpu_pickling.py
+++ b/tests/python-gpu/test_gpu_pickling.py
@@ -3,6 +3,7 @@
 import os
 import pickle
 import subprocess
+from typing import Any, Dict, Tuple, Union
 
 import numpy as np
 import pytest
@@ -16,7 +17,7 @@
 pytestmark = tm.timeout(30)
 
 
-def build_dataset():
+def build_dataset() -> Tuple[np.ndarray, np.ndarray]:
     N = 10
     x = np.linspace(0, N * N, N * N)
     x = x.reshape((N, N))
@@ -24,12 +25,12 @@ def build_dataset():
     return x, y
 
 
-def save_pickle(bst, path):
+def save_pickle(bst: Union[xgb.Booster, xgb.XGBModel], path: str) -> None:
     with open(path, "wb") as fd:
         pickle.dump(bst, fd)
 
 
-def load_pickle(path):
+def load_pickle(path: str) -> Any:
     with open(path, "rb") as fd:
         bst = pickle.load(fd)
     return bst
@@ -38,7 +39,7 @@ def load_pickle(path):
 class TestPickling:
     args_template = ["pytest", "--verbose", "-s", "--fulltrace"]
 
-    def run_pickling(self, bst) -> None:
+    def run_pickling(self, bst: Union[xgb.Booster, xgb.XGBModel]) -> None:
         save_pickle(bst, model_path)
         args = [
             "pytest",
@@ -67,18 +68,18 @@ def run_pickling(self, bst) -> None:
 
     # TODO: This test is too slow
     @pytest.mark.skipif(**tm.no_sklearn())
-    def test_pickling(self):
+    def test_pickling(self) -> None:
         x, y = build_dataset()
         train_x = xgb.DMatrix(x, label=y)
 
-        param = {"tree_method": "gpu_hist", "gpu_id": 0}
+        param = {"tree_method": "hist", "device": "cuda"}
         bst = xgb.train(param, train_x)
         self.run_pickling(bst)
 
         bst = xgb.XGBRegressor(**param).fit(x, y)
         self.run_pickling(bst)
 
-        param = {"booster": "gblinear", "updater": "gpu_coord_descent", "gpu_id": 0}
+        param = {"booster": "gblinear", "updater": "coord_descent", "device": "cuda"}
         bst = xgb.train(param, train_x)
         self.run_pickling(bst)
 
@@ -86,12 +87,12 @@ def test_pickling(self):
         self.run_pickling(bst)
 
     @pytest.mark.mgpu
-    def test_wrap_gpu_id(self):
+    def test_wrap_gpu_id(self) -> None:
         X, y = build_dataset()
         dtrain = xgb.DMatrix(X, y)
 
         bst = xgb.train(
-            {"tree_method": "gpu_hist", "gpu_id": 1}, dtrain, num_boost_round=6
+            {"tree_method": "hist", "device": "cuda:1"}, dtrain, num_boost_round=6
         )
 
         model_path = "model.pkl"
@@ -107,41 +108,46 @@ def test_wrap_gpu_id(self):
         assert status == 0
         os.remove(model_path)
 
-    def test_pickled_context(self):
+    def test_pickled_context(self) -> None:
         x, y = tm.make_sparse_regression(10, 10, sparsity=0.8, as_dense=True)
         train_x = xgb.DMatrix(x, label=y)
 
-        param = {"tree_method": "gpu_hist", "verbosity": 1}
-        bst = xgb.train(param, train_x)
+        def run_test(param: Dict[str, Any]) -> None:
+            bst = xgb.train(param, train_x)
 
-        save_pickle(bst, model_path)
+            save_pickle(bst, model_path)
 
-        args = self.args_template.copy()
-        root = tm.project_root(__file__)
-        path = os.path.join(root, "tests", "python-gpu", "load_pickle.py")
-        args.append(path + "::TestLoadPickle::test_context_is_removed")
+            args = self.args_template.copy()
+            root = tm.project_root(__file__)
+            path = os.path.join(root, "tests", "python-gpu", "load_pickle.py")
+            args.append(path + "::TestLoadPickle::test_context_is_removed")
 
-        cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
-        env = os.environ.copy()
-        env.update(cuda_environment)
+            cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
+            env = os.environ.copy()
+            env.update(cuda_environment)
 
-        # Load model in a CPU only environment.
-        status = subprocess.call(args, env=env)
-        assert status == 0
+            # Load model in a CPU only environment.
+            status = subprocess.call(args, env=env)
+            assert status == 0
 
-        args = self.args_template.copy()
-        args.append(
-            "./tests/python-gpu/"
-            "load_pickle.py::TestLoadPickle::test_context_is_preserved"
-        )
+            args = self.args_template.copy()
+            args.append(
+                "./tests/python-gpu/"
+                "load_pickle.py::TestLoadPickle::test_context_is_preserved"
+            )
 
-        # Load in environment that has GPU.
-        env = os.environ.copy()
-        assert "CUDA_VISIBLE_DEVICES" not in env.keys()
-        status = subprocess.call(args, env=env)
-        assert status == 0
+            # Load in environment that has GPU.
+            env = os.environ.copy()
+            assert "CUDA_VISIBLE_DEVICES" not in env.keys()
+            status = subprocess.call(args, env=env)
+            assert status == 0
 
-        os.remove(model_path)
+            os.remove(model_path)
+
+        param = {"tree_method": "hist", "verbosity": 1, "device": "cuda"}
+        run_test(param)
+        param = {"booster": "gblinear", "updater": "coord_descent", "device": "cuda"}
+        run_test(param)
 
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_predict_sklearn_pickle(self) -> None:
@@ -150,9 +156,9 @@ def test_predict_sklearn_pickle(self) -> None:
         x, y = load_digits(return_X_y=True)
 
         kwargs = {
-            "tree_method": "gpu_hist",
+            "tree_method": "hist",
             "objective": "binary:logistic",
-            "gpu_id": 0,
+            "device": "cuda",
             "n_estimators": 10,
         }
 
@@ -174,7 +180,7 @@ def test_predict_sklearn_pickle(self) -> None:
         cpu_pred = model.predict(x, output_margin=True)
         np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)
 
-    def test_training_on_cpu_only_env(self):
+    def test_training_on_cpu_only_env(self) -> None:
         cuda_environment = {"CUDA_VISIBLE_DEVICES": "-1"}
         env = os.environ.copy()
         env.update(cuda_environment)
diff --git a/tests/python-gpu/test_gpu_plotting.py b/tests/python-gpu/test_gpu_plotting.py
index 22b3b41fc34b..af29dc0201e3 100644
--- a/tests/python-gpu/test_gpu_plotting.py
+++ b/tests/python-gpu/test_gpu_plotting.py
@@ -1,18 +1,11 @@
-import sys
-
 import pytest
 
 from xgboost import testing as tm
 
-sys.path.append("tests/python")
-import test_plotting as tp
-
-pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
-
 
 class TestPlotting:
-    cputest = tp.TestPlotting()
+    @pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
+    def test_categorical(self) -> None:
+        from xgboost.testing.plotting import run_categorical
 
-    @pytest.mark.skipif(**tm.no_pandas())
-    def test_categorical(self):
-        self.cputest.run_categorical("gpu_hist")
+        run_categorical("hist", "cuda")
diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py
index 2c9bbabf0f5a..8e2303749e54 100644
--- a/tests/python-gpu/test_gpu_prediction.py
+++ b/tests/python-gpu/test_gpu_prediction.py
@@ -1,25 +1,18 @@
 import sys
 from copy import copy
+from typing import Any, Dict, Type
 
 import numpy as np
 import pytest
 from hypothesis import assume, given, settings, strategies
+from hypothesis.extra.pandas import column, data_frames, range_indexes
 
 import xgboost as xgb
 from xgboost import testing as tm
-from xgboost.compat import PANDAS_INSTALLED
-
-if PANDAS_INSTALLED:
-    from hypothesis.extra.pandas import column, data_frames, range_indexes
-else:
-
-    def noop(*args, **kwargs):
-        pass
-
-    column, data_frames, range_indexes = noop, noop, noop
+from xgboost.testing.predict import run_base_margin_vs_base_score, run_predict_leaf
 
 sys.path.append("tests/python")
-from test_predict import run_predict_leaf  # noqa
+
 from test_predict import run_threaded_predict  # noqa
 
 rng = np.random.RandomState(1994)
@@ -44,7 +37,7 @@ def noop(*args, **kwargs):
 
 
 class TestGPUPredict:
-    def test_predict(self):
+    def test_predict(self) -> None:
         iterations = 10
         np.random.seed(1)
         test_num_rows = [10, 1000, 5000]
@@ -68,7 +61,7 @@ def test_predict(self):
                     label=[0, 1] * int(num_rows / 2),
                 )
                 watchlist = [(dtrain, "train"), (dval, "validation")]
-                res = {}
+                res: Dict[str, Any] = {}
                 param = {
                     "objective": "binary:logistic",
                     "eval_metric": "logloss",
@@ -98,7 +91,7 @@ def test_predict(self):
     # Test case for a bug where multiple batch predictions made on a
     # test set produce incorrect results
     @pytest.mark.skipif(**tm.no_sklearn())
-    def test_multi_predict(self):
+    def test_multi_predict(self) -> None:
         from sklearn.datasets import make_regression
         from sklearn.model_selection import train_test_split
 
@@ -123,7 +116,7 @@ def test_multi_predict(self):
         assert np.allclose(predict_gpu_0, predict_cpu)
 
     @pytest.mark.skipif(**tm.no_sklearn())
-    def test_sklearn(self):
+    def test_sklearn(self) -> None:
         m, n = 15000, 14
         tr_size = 2500
         X = np.random.rand(m, n)
@@ -193,7 +186,12 @@ def test_inplace_predict_device_type(self, device: str) -> None:
         np.testing.assert_allclose(predt_0, predt_4)
 
     def run_inplace_base_margin(
-        self, device: int, booster: xgb.Booster, dtrain: xgb.DMatrix, X, base_margin
+        self,
+        device: int,
+        booster: xgb.Booster,
+        dtrain: xgb.DMatrix,
+        X: Any,
+        base_margin: Any,
     ) -> None:
         import cupy as cp
 
@@ -247,7 +245,7 @@ def run_inplace_predict_cupy(self, device: int) -> None:
         predt_from_dmatrix = booster.predict(test)
         cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)
 
-        def predict_dense(x):
+        def predict_dense(x: cp.ndarray) -> bool:
             cp.cuda.runtime.setDevice(device)
             inplace_predt = booster.inplace_predict(x)
             d = xgb.DMatrix(x)
@@ -284,12 +282,12 @@ def predict_dense(x):
         cp.cuda.runtime.setDevice(0)
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_inplace_predict_cupy(self):
+    def test_inplace_predict_cupy(self) -> None:
         self.run_inplace_predict_cupy(0)
 
     @pytest.mark.skipif(**tm.no_cupy())
     @pytest.mark.mgpu
-    def test_inplace_predict_cupy_specified_device(self):
+    def test_inplace_predict_cupy_specified_device(self) -> None:
         import cupy as cp
 
         n_devices = cp.cuda.runtime.getDeviceCount()
@@ -298,7 +296,7 @@ def test_inplace_predict_cupy_specified_device(self):
 
     @pytest.mark.skipif(**tm.no_cupy())
     @pytest.mark.skipif(**tm.no_cudf())
-    def test_inplace_predict_cudf(self):
+    def test_inplace_predict_cudf(self) -> None:
         import cudf
         import cupy as cp
         import pandas as pd
@@ -323,7 +321,7 @@ def test_inplace_predict_cudf(self):
 
         cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)
 
-        def predict_df(x):
+        def predict_df(x: cudf.DataFrame) -> bool:
             # column major array
             inplace_predt = booster.inplace_predict(x.values)
             d = xgb.DMatrix(x)
@@ -369,6 +367,7 @@ def test_shap(self, num_rounds: int, dataset: tm.TestDataset, param: dict) -> No
         strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
     )
     @settings(deadline=None, max_examples=10, print_blob=True)
+    @pytest.mark.timeout(90)
     def test_shap_interactions(
         self, num_rounds: int, dataset: tm.TestDataset, param: dict
     ) -> None:
@@ -404,7 +403,7 @@ def test_shap_interactions(
             1e-3,
         )
 
-    def test_shap_categorical(self):
+    def test_shap_categorical(self) -> None:
         X, y = tm.make_categorical(100, 20, 7, onehot=False)
         Xy = xgb.DMatrix(X, y, enable_categorical=True)
         booster = xgb.train(
@@ -425,12 +424,19 @@ def test_shap_categorical(self):
             np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
         )
 
-    def test_predict_leaf_basic(self):
-        gpu_leaf = run_predict_leaf("gpu:0")
-        cpu_leaf = run_predict_leaf("cpu")
+    @pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
+    def test_predict_leaf_basic(self, DMatrixT: Type[xgb.DMatrix]) -> None:
+        gpu_leaf = run_predict_leaf("cuda", DMatrixT)
+        cpu_leaf = run_predict_leaf("cpu", DMatrixT)
         np.testing.assert_equal(gpu_leaf, cpu_leaf)
 
-    def run_predict_leaf_booster(self, param, num_rounds, dataset):
+    def run_predict_leaf_booster(
+        self,
+        param: Dict[str, Any],
+        num_rounds: int,
+        dataset: tm.TestDataset,
+        DMatrixT: Type[xgb.DMatrix],
+    ) -> None:
         param = dataset.set_params(param)
         m = dataset.get_dmat()
         booster = xgb.train(
@@ -444,15 +450,25 @@ def run_predict_leaf_booster(self, param, num_rounds, dataset):
 
         np.testing.assert_equal(cpu_leaf, gpu_leaf)
 
-    @given(predict_parameter_strategy, tm.make_dataset_strategy())
+    @given(
+        predict_parameter_strategy,
+        tm.make_dataset_strategy(),
+        strategies.fixed_dictionaries(
+            {
+                "DMatrixT": strategies.sampled_from([xgb.DMatrix, xgb.QuantileDMatrix]),
+            }
+        ),
+    )
     @settings(deadline=None, max_examples=20, print_blob=True)
-    def test_predict_leaf_gbtree(self, param: dict, dataset: tm.TestDataset) -> None:
+    def test_predict_leaf_gbtree(
+        self, param: dict, dataset: tm.TestDataset, DMatrixT: Type[xgb.DMatrix]
+    ) -> None:
         # Unsupported for random forest
         if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
             return
 
         param.update({"booster": "gbtree", "tree_method": "hist", "device": "cuda:0"})
-        self.run_predict_leaf_booster(param, 10, dataset)
+        self.run_predict_leaf_booster(param, 10, dataset, DMatrixT)
 
     @given(predict_parameter_strategy, tm.make_dataset_strategy())
     @settings(deadline=None, max_examples=20, print_blob=True)
@@ -462,7 +478,7 @@ def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None:
             return
 
         param.update({"booster": "dart", "tree_method": "hist", "device": "cuda:0"})
-        self.run_predict_leaf_booster(param, 10, dataset)
+        self.run_predict_leaf_booster(param, 10, dataset, xgb.DMatrix)
 
     @pytest.mark.skipif(**tm.no_sklearn())
     @pytest.mark.skipif(**tm.no_pandas())
@@ -476,7 +492,7 @@ def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None:
         )
     )
     @settings(deadline=None, max_examples=20, print_blob=True)
-    def test_predict_categorical_split(self, df):
+    def test_predict_categorical_split(self, df: Any) -> None:
         from sklearn.metrics import root_mean_squared_error
 
         df = df.astype("category")
@@ -493,7 +509,7 @@ def test_predict_categorical_split(self, df):
             "device": "cuda:0",
         }
 
-        eval_history = {}
+        eval_history: Dict[str, Any] = {}
         bst = xgb.train(
             params,
             dtrain,
@@ -511,7 +527,7 @@ def test_predict_categorical_split(self, df):
 
     @pytest.mark.skipif(**tm.no_cupy())
     @pytest.mark.parametrize("n_classes", [2, 3])
-    def test_predict_dart(self, n_classes):
+    def test_predict_dart(self, n_classes: int) -> None:
         import cupy as cp
         from sklearn.datasets import make_classification
 
@@ -565,7 +581,7 @@ def test_predict_dart(self, n_classes):
         cp.testing.assert_allclose(inplace, copied, atol=1e-6)
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_dtypes(self):
+    def test_dtypes(self) -> None:
         import cupy as cp
 
         rows = 1000
@@ -613,3 +629,48 @@ def test_dtypes(self):
             X = cp.array(orig, dtype=dtype)
             with pytest.raises(ValueError):
                 booster.inplace_predict(X)
+
+
+def test_base_margin_vs_base_score() -> None:
+    run_base_margin_vs_base_score("cuda")
+
+
+@pytest.mark.skipif(**tm.no_sklearn())
+def test_shap_multiclass() -> None:
+    from sklearn.datasets import make_classification
+
+    X, y = make_classification(n_classes=3, random_state=2025, n_informative=16)
+    param = {
+        "tree_method": "hist",
+        "device": "cuda",
+        "num_class": 3,
+        "base_score": [1.0, 2.0, 3.0],
+    }
+    Xy = xgb.DMatrix(X, y)
+    bst = xgb.train(param, Xy, 8)
+
+    d_shap = bst.predict(Xy, pred_contribs=True)
+    d_margin = bst.predict(Xy, output_margin=True)
+
+    bst.set_param({"device": "cpu"})
+
+    h_shap = bst.predict(Xy, pred_contribs=True)
+    h_margin = bst.predict(Xy, output_margin=True)
+
+    np.testing.assert_allclose(d_shap, h_shap, atol=1e-6)
+    np.testing.assert_allclose(d_margin, h_margin, atol=1e-6)
+
+    # Compare base margin and base score
+    margin = np.stack(
+        [
+            np.ones(X.shape[0]),
+            np.full(X.shape[0], fill_value=2.0),
+            np.full(X.shape[0], fill_value=3.0),
+        ],
+        axis=1,
+    )
+    Xy = xgb.DMatrix(X, y, base_margin=margin)
+
+    bst.set_param({"device": "cuda"})
+    d_shap = bst.predict(Xy, pred_contribs=True)
+    np.testing.assert_allclose(d_shap, h_shap, atol=1e-6)
diff --git a/tests/python-gpu/test_gpu_ranking.py b/tests/python-gpu/test_gpu_ranking.py
index f24da20374be..4b284bf4c9f0 100644
--- a/tests/python-gpu/test_gpu_ranking.py
+++ b/tests/python-gpu/test_gpu_ranking.py
@@ -1,4 +1,3 @@
-import os
 from typing import Dict
 
 import numpy as np
@@ -28,8 +27,8 @@ def comp_training_with_rank_objective(
 
     params = {
         "booster": "gbtree",
-        "tree_method": "gpu_hist",
-        "gpu_id": 0,
+        "tree_method": "hist",
+        "device": "cuda",
     }
 
     num_trees = 100
@@ -53,7 +52,7 @@ def comp_training_with_rank_objective(
     cpu_params = {
         "booster": "gbtree",
         "tree_method": "hist",
-        "gpu_id": -1,
+        "device": "cpu",
     }
     cpu_params["objective"] = rank_objective
     cpu_params["eval_metric"] = metric_name
@@ -104,7 +103,7 @@ def comp_training_with_rank_objective(
         ("rank:map", "map"),
     ],
 )
-def test_with_mq2008(objective, metric) -> None:
+def test_with_mq2008(objective: str, metric: str) -> None:
     (
         x_train,
         y_train,
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
index 340188b23652..cf51b9935ad9 100644
--- a/tests/python-gpu/test_gpu_updaters.py
+++ b/tests/python-gpu/test_gpu_updaters.py
@@ -1,4 +1,4 @@
-import sys
+from itertools import product
 from typing import Any, Dict
 
 import numpy as np
@@ -19,12 +19,12 @@
     check_get_quantile_cut,
     check_init_estimation,
     check_quantile_loss,
+    run_adaptive,
+    run_invalid_category,
+    run_max_cat,
     train_result,
 )
 
-sys.path.append("tests/python")
-import test_updaters as test_up
-
 pytestmark = tm.timeout(30)
 
 
@@ -33,8 +33,11 @@ class TestGPUUpdatersMulti:
         hist_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
     )
     @settings(deadline=None, max_examples=50, print_blob=True)
-    def test_hist(self, param, num_rounds, dataset):
-        param["tree_method"] = "gpu_hist"
+    def test_hist(
+        self, param: Dict[str, Any], num_rounds: int, dataset: tm.TestDataset
+    ) -> None:
+        param["tree_method"] = "hist"
+        param["device"] = "cuda"
         param = dataset.set_params(param)
         result = train_result(param, dataset.get_dmat(), num_rounds)
         note(str(result))
@@ -42,8 +45,6 @@ def test_hist(self, param, num_rounds, dataset):
 
 
 class TestGPUUpdaters:
-    cputest = test_up.TestTreeMethod()
-
     @given(
         exact_parameter_strategy,
         hist_parameter_strategy,
@@ -116,13 +117,13 @@ def test_gpu_approx(
 
     @given(tm.sparse_datasets_strategy)
     @settings(deadline=None, print_blob=True)
-    def test_sparse(self, dataset):
+    def test_sparse(self, dataset: tm.TestDataset) -> None:
         param = {"tree_method": "hist", "max_bin": 64}
         hist_result = train_result(param, dataset.get_dmat(), 16)
         note(str(hist_result))
         assert tm.non_increasing(hist_result["train"][dataset.metric])
 
-        param = {"tree_method": "gpu_hist", "max_bin": 64}
+        param = {"tree_method": "hist", "max_bin": 64, "device": "cuda"}
         gpu_hist_result = train_result(param, dataset.get_dmat(), 16)
         note(str(gpu_hist_result))
         assert tm.non_increasing(gpu_hist_result["train"][dataset.metric])
@@ -139,7 +140,9 @@ def test_sparse(self, dataset):
     )
     @settings(deadline=None, max_examples=20, print_blob=True)
     @pytest.mark.skipif(**tm.no_pandas())
-    def test_categorical_ohe(self, rows, cols, rounds, cats):
+    def test_categorical_ohe(
+        self, rows: int, cols: int, rounds: int, cats: int
+    ) -> None:
         check_categorical_ohe(
             rows=rows,
             cols=cols,
@@ -208,7 +211,8 @@ def test_categorical_ames_housing(
         dataset = tm.TestDataset(
             "ames_housing", tm.data.get_ames_housing, "reg:squarederror", "rmse"
         )
-        cat_parameters["tree_method"] = "gpu_hist"
+        cat_parameters["tree_method"] = "hist"
+        cat_parameters["device"] = "cuda"
         results = train_result(cat_parameters, dataset.get_dmat(), 16)
         tm.non_increasing(results["train"]["rmse"])
 
@@ -228,10 +232,11 @@ def test_categorical_missing(self, rows: int, cols: int, cats: int) -> None:
         )
 
     @pytest.mark.skipif(**tm.no_pandas())
-    def test_max_cat(self) -> None:
-        self.cputest.run_max_cat("gpu_hist")
+    @pytest.mark.parametrize("tree_method", ["hist", "approx"])
+    def test_max_cat(self, tree_method: str) -> None:
+        run_max_cat(tree_method, "cuda")
 
-    def test_categorical_32_cat(self):
+    def test_categorical_32_cat(self) -> None:
         """32 hits the bound of integer bitset, so special test"""
         rows = 1000
         check_categorical_ohe(
@@ -245,8 +250,9 @@ def test_categorical_32_cat(self):
         )
 
     @pytest.mark.skipif(**tm.no_cupy())
-    def test_invalid_category(self):
-        self.cputest.run_invalid_category("gpu_hist")
+    @pytest.mark.parametrize("tree_method", ["hist", "approx"])
+    def test_invalid_category(self, tree_method: str) -> None:
+        run_invalid_category(tree_method, "cuda")
 
     @pytest.mark.skipif(**tm.no_cupy())
     @given(
@@ -260,7 +266,8 @@ def test_gpu_hist_device_dmatrix(
     ) -> None:
         # We cannot handle empty dataset yet
         assume(len(dataset.y) > 0)
-        param["tree_method"] = "gpu_hist"
+        param["tree_method"] = "hist"
+        param["device"] = "cuda"
         param = dataset.set_params(param)
         result = train_result(
             param,
@@ -276,19 +283,22 @@ def test_gpu_hist_device_dmatrix(
         tm.make_dataset_strategy(),
     )
     @settings(deadline=None, max_examples=10, print_blob=True)
-    def test_external_memory(self, param, num_rounds, dataset):
-        if dataset.name.endswith("-l1"):
-            return
+    def test_external_memory(
+        self, param: Dict[str, Any], num_rounds: int, dataset: tm.TestDataset
+    ) -> None:
         # We cannot handle empty dataset yet
         assume(len(dataset.y) > 0)
-        param["tree_method"] = "gpu_hist"
-        param = dataset.set_params(param)
-        m = dataset.get_external_dmat()
-        external_result = train_result(param, m, num_rounds)
-        del m
-        assert tm.non_increasing(external_result["train"][dataset.metric])
 
-    def test_empty_dmatrix_prediction(self):
+        with xgb.config_context(use_rmm=True):
+            param["tree_method"] = "hist"
+            param["device"] = "cuda"
+            param = dataset.set_params(param)
+            m = dataset.get_external_dmat()
+            external_result = train_result(param, m, num_rounds)
+            del m
+            assert tm.non_increasing(external_result["train"][dataset.metric])
+
+    def test_empty_dmatrix_prediction(self) -> None:
         # FIXME(trivialfis): This should be done with all updaters
         kRows = 0
         kCols = 100
@@ -299,7 +309,7 @@ def test_empty_dmatrix_prediction(self):
         dtrain = xgb.DMatrix(X, y)
 
         bst = xgb.train(
-            {"verbosity": 2, "tree_method": "gpu_hist", "gpu_id": 0},
+            {"verbosity": 2, "tree_method": "hist", "device": "cuda"},
             dtrain,
             verbose_eval=True,
             num_boost_round=6,
@@ -307,9 +317,9 @@ def test_empty_dmatrix_prediction(self):
         )
 
         kRows = 100
-        X = np.random.randn(kRows, kCols)
+        X_test = np.random.randn(kRows, kCols)
 
-        dtest = xgb.DMatrix(X)
+        dtest = xgb.DMatrix(X_test)
         predictions = bst.predict(dtest)
         # non-distributed, 0.0 is returned due to base_score estimation with 0 gradient.
         np.testing.assert_allclose(predictions, 0.0, 1e-6)
@@ -317,26 +327,30 @@ def test_empty_dmatrix_prediction(self):
     @pytest.mark.mgpu
     @given(tm.make_dataset_strategy(), strategies.integers(0, 10))
     @settings(deadline=None, max_examples=10, print_blob=True)
-    def test_specified_gpu_id_gpu_update(self, dataset, gpu_id):
-        param = {"tree_method": "gpu_hist", "gpu_id": gpu_id}
+    def test_specified_gpu_id_gpu_update(
+        self, dataset: tm.TestDataset, gpu_id: int
+    ) -> None:
+        param = {"tree_method": "hist", "device": f"cuda:{gpu_id}"}
         param = dataset.set_params(param)
         result = train_result(param, dataset.get_dmat(), 10)
         assert tm.non_increasing(result["train"][dataset.metric])
 
     @pytest.mark.skipif(**tm.no_sklearn())
-    @pytest.mark.parametrize("weighted", [True, False])
-    def test_adaptive(self, weighted) -> None:
-        self.cputest.run_adaptive("gpu_hist", weighted)
+    @pytest.mark.parametrize(
+        "tree_method,weighted", list(product(["approx", "hist"], [True, False]))
+    )
+    def test_adaptive(self, tree_method: str, weighted: bool) -> None:
+        run_adaptive(tree_method, weighted, "cuda")
 
     def test_init_estimation(self) -> None:
-        check_init_estimation("gpu_hist")
+        check_init_estimation("hist", "cuda")
 
     @pytest.mark.parametrize("weighted", [True, False])
     def test_quantile_loss(self, weighted: bool) -> None:
-        check_quantile_loss("gpu_hist", weighted)
+        check_quantile_loss("hist", weighted, "cuda")
 
     @pytest.mark.skipif(**tm.no_pandas())
-    def test_issue8824(self):
+    def test_issue8824(self) -> None:
         # column sampling by node crashes because shared pointers go out of scope
         import pandas as pd
 
@@ -351,7 +365,8 @@ def test_issue8824(self):
                 "max_depth": 5,
                 "learning_rate": 0.05,
                 "objective": "binary:logistic",
-                "tree_method": "gpu_hist",
+                "tree_method": "hist",
+                "device": "cuda",
                 "colsample_bytree": 0.5,
                 "colsample_bylevel": 0.5,
                 "colsample_bynode": 0.5,  # Causes issues
diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py
index a01e79ccc88a..e629d3639e5b 100644
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -1,9 +1,9 @@
 import itertools
 import json
 import os
-import sys
 import tempfile
 from concurrent.futures import ThreadPoolExecutor
+from typing import Any, List, Tuple
 
 import numpy as np
 import pytest
@@ -11,16 +11,20 @@
 import xgboost as xgb
 from xgboost import testing as tm
 from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
-
-sys.path.append("tests/python")
-import test_with_sklearn as twskl  # noqa
+from xgboost.testing.with_skl import (
+    run_boost_from_prediction_binary,
+    run_boost_from_prediction_multi_clasas,
+    run_housing_rf_regression,
+    run_intercept,
+    run_recoding,
+)
 
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
 
 rng = np.random.RandomState(1994)
 
 
-def test_gpu_binary_classification():
+def test_gpu_binary_classification() -> None:
     from sklearn.datasets import load_digits
     from sklearn.model_selection import KFold
 
@@ -31,8 +35,15 @@ def test_gpu_binary_classification():
     for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
         for train_index, test_index in kf.split(X, y):
             xgb_model = cls(
-                random_state=42, tree_method="gpu_hist", n_estimators=4, gpu_id="0"
+                random_state=42,
+                tree_method="hist",
+                n_estimators=4,
+                device="cuda",
             ).fit(X[train_index], y[train_index])
+            cfg: str = json.loads(xgb_model.get_booster().save_config())["learner"][
+                "generic_param"
+            ]["device"]
+            assert cfg.startswith("cuda")
             preds = xgb_model.predict(X[test_index])
             labels = y[test_index]
             err = sum(
@@ -43,50 +54,55 @@ def test_gpu_binary_classification():
 
 @pytest.mark.skipif(**tm.no_cupy())
 @pytest.mark.skipif(**tm.no_cudf())
-def test_boost_from_prediction_gpu_hist():
+@pytest.mark.parametrize("tree_method", ["hist", "approx"])
+def test_boost_from_prediction_gpu_hist(tree_method: str) -> None:
     import cudf
     import cupy as cp
     from sklearn.datasets import load_breast_cancer, load_digits
 
-    tree_method = "gpu_hist"
     X, y = load_breast_cancer(return_X_y=True)
     X, y = cp.array(X), cp.array(y)
 
-    twskl.run_boost_from_prediction_binary(tree_method, X, y, None)
-    twskl.run_boost_from_prediction_binary(tree_method, X, y, cudf.DataFrame)
+    run_boost_from_prediction_binary(tree_method, "cuda", X, y, None)
+    run_boost_from_prediction_binary(tree_method, "cuda", X, y, cudf.DataFrame)
 
     X, y = load_digits(return_X_y=True)
     X, y = cp.array(X), cp.array(y)
 
-    twskl.run_boost_from_prediction_multi_clasas(
-        xgb.XGBClassifier, tree_method, X, y, None
+    run_boost_from_prediction_multi_clasas(
+        xgb.XGBClassifier, tree_method, "cuda", X, y, None
     )
-    twskl.run_boost_from_prediction_multi_clasas(
-        xgb.XGBClassifier, tree_method, X, y, cudf.DataFrame
+    run_boost_from_prediction_multi_clasas(
+        xgb.XGBClassifier, tree_method, "cuda", X, y, cudf.DataFrame
     )
 
 
-def test_num_parallel_tree():
-    twskl.run_housing_rf_regression("gpu_hist")
+def test_num_parallel_tree() -> None:
+    run_housing_rf_regression("hist", "cuda")
 
 
 @pytest.mark.skipif(**tm.no_pandas())
 @pytest.mark.skipif(**tm.no_cudf())
 @pytest.mark.skipif(**tm.no_sklearn())
-def test_categorical():
+def test_categorical() -> None:
     import cudf
     import cupy as cp
     import pandas as pd
     from sklearn.datasets import load_svmlight_file
 
     data_dir = tm.data_dir(__file__)
-    X, y = load_svmlight_file(os.path.join(data_dir, "agaricus.txt.train"))
+    X, y = load_svmlight_file(
+        os.path.join(data_dir, "agaricus.txt.train"), dtype=np.float32
+    )
     clf = xgb.XGBClassifier(
-        tree_method="gpu_hist",
+        tree_method="hist",
+        device="cuda",
         enable_categorical=True,
         n_estimators=10,
     )
     X = pd.DataFrame(X.todense()).astype("category")
+    for c in X.columns:
+        X[c] = X[c].cat.rename_categories(int)
     clf.fit(X, y)
 
     with tempfile.TemporaryDirectory() as tempdir:
@@ -103,15 +119,17 @@ def test_categorical():
             assert categories_sizes.shape[0] != 0
             np.testing.assert_allclose(categories_sizes, 1)
 
-    def check_predt(X, y):
+    def check_predt(X: Any, y: List[float]) -> None:
         reg = xgb.XGBRegressor(
-            tree_method="gpu_hist", enable_categorical=True, n_estimators=64
+            tree_method="hist", enable_categorical=True, n_estimators=64, device="cuda"
         )
         reg.fit(X, y)
         predts = reg.predict(X)
         booster = reg.get_booster()
-        assert "c" in booster.feature_types
-        assert len(booster.feature_types) == 1
+        feature_types = booster.feature_types
+        assert feature_types is not None
+        assert "c" in feature_types
+        assert len(feature_types) == 1
         inp_predts = booster.inplace_predict(X)
         if isinstance(inp_predts, cp.ndarray):
             inp_predts = cp.asnumpy(inp_predts)
@@ -128,7 +146,7 @@ def check_predt(X, y):
 
 @pytest.mark.skipif(**tm.no_cupy())
 @pytest.mark.skipif(**tm.no_cudf())
-def test_classififer():
+def test_classififer() -> None:
     import cudf
     import cupy as cp
     from sklearn.datasets import load_digits
@@ -193,6 +211,7 @@ def test_custom_objective(
     }
 
     obj = tm.softprob_obj(y.max() + 1, use_cupy=use_cupy, order=order, gdtype=gdtype)
+    assert callable(obj)
 
     clf = xgb.XGBClassifier(objective=obj, **params)
 
@@ -201,7 +220,7 @@ def test_custom_objective(
             clf.fit(X, y)
         return
     if strategy == "multi_output_tree" and device == "cuda":
-        with pytest.raises(ValueError, match=r"GPU is not yet"):
+        with pytest.raises(ValueError, match=r"GPU support for multi-target tree"):
             clf.fit(X, y)
         return
 
@@ -214,7 +233,9 @@ def test_custom_objective(
 
     params["n_estimators"] = 2
 
-    def wrong_shape(labels, predt):
+    def wrong_shape(
+        labels: np.ndarray, predt: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
         grad, hess = obj(labels, predt)
         return grad[:, :-1], hess[:, :-1]
 
@@ -222,7 +243,9 @@ def wrong_shape(labels, predt):
         clf = xgb.XGBClassifier(objective=wrong_shape, **params)
         clf.fit(X, y)
 
-    def wrong_shape_1(labels, predt):
+    def wrong_shape_1(
+        labels: np.ndarray, predt: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
         grad, hess = obj(labels, predt)
         return grad[:-1, :], hess[:-1, :]
 
@@ -230,7 +253,9 @@ def wrong_shape_1(labels, predt):
         clf = xgb.XGBClassifier(objective=wrong_shape_1, **params)
         clf.fit(X, y)
 
-    def wrong_shape_2(labels, predt):
+    def wrong_shape_2(
+        labels: np.ndarray, predt: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
         grad, hess = obj(labels, predt)
         return grad[:, :], hess[:-1, :]
 
@@ -238,7 +263,9 @@ def wrong_shape_2(labels, predt):
         clf = xgb.XGBClassifier(objective=wrong_shape_2, **params)
         clf.fit(X, y)
 
-    def wrong_shape_3(labels, predt):
+    def wrong_shape_3(
+        labels: np.ndarray, predt: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
         grad, hess = obj(labels, predt)
         grad = grad.reshape(grad.size)
         hess = hess.reshape(hess.size)
@@ -250,10 +277,10 @@ def wrong_shape_3(labels, predt):
 
 
 @pytest.mark.skipif(**tm.no_cudf())
-def test_ranking_qid_df():
+def test_ranking_qid_df() -> None:
     import cudf
 
-    run_ranking_qid_df(cudf, "gpu_hist")
+    run_ranking_qid_df(cudf, "hist", "cuda")
 
 
 @pytest.mark.skipif(**tm.no_pandas())
@@ -302,3 +329,12 @@ def worker(ordinal: int, correct_ordinal: bool) -> None:
             fut.result()
 
     cp.cuda.runtime.setDevice(0)
+
+
+@pytest.mark.skipif(**tm.no_cudf())
+def test_recoding() -> None:
+    run_recoding("cuda")
+
+
+def test_intercept() -> None:
+    run_intercept("cuda")
diff --git a/tests/python-gpu/test_large_input.py b/tests/python-gpu/test_large_input.py
index 2d85cabc809a..0ec203621ae8 100644
--- a/tests/python-gpu/test_large_input.py
+++ b/tests/python-gpu/test_large_input.py
@@ -6,7 +6,7 @@
 
 
 # Test for integer overflow or out of memory exceptions
-def test_large_input():
+def test_large_input() -> None:
     available_bytes, _ = cp.cuda.runtime.memGetInfo()
     # 15 GB
     required_bytes = 1.5e10
@@ -19,6 +19,8 @@ def test_large_input():
     y = cp.ones(m)
     w = cp.ones(m)
     dmat = xgb.QuantileDMatrix(X, y, weight=w)
-    booster = xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
+    booster = xgb.train(
+        {"tree_method": "hist", "max_depth": 1, "device": "cuda"}, dmat, 1
+    )
     del y
     booster.inplace_predict(X)
diff --git a/tests/python-gpu/test_monotonic_constraints.py b/tests/python-gpu/test_monotonic_constraints.py
index 4586b617a9bb..baf64621059b 100644
--- a/tests/python-gpu/test_monotonic_constraints.py
+++ b/tests/python-gpu/test_monotonic_constraints.py
@@ -1,26 +1,22 @@
-import sys
-
 import numpy as np
 import pytest
 
 import xgboost as xgb
 from xgboost import testing as tm
-
-sys.path.append("tests/python")
-import test_monotone_constraints as tmc
+from xgboost.testing.monotone_constraints import is_correctly_constrained, training_dset
 
 rng = np.random.RandomState(1994)
 
 
-def non_decreasing(L):
+def non_decreasing(L: np.ndarray) -> bool:
     return all((x - y) < 0.001 for x, y in zip(L, L[1:]))
 
 
-def non_increasing(L):
+def non_increasing(L: np.ndarray) -> bool:
     return all((y - x) < 0.001 for x, y in zip(L, L[1:]))
 
 
-def assert_constraint(constraint, tree_method):
+def assert_constraint(constraint: int, tree_method: str) -> None:
     from sklearn.datasets import make_regression
 
     n = 1000
@@ -28,6 +24,7 @@ def assert_constraint(constraint, tree_method):
     dtrain = xgb.DMatrix(X, y)
     param = {}
     param["tree_method"] = tree_method
+    param["device"] = "cuda"
     param["monotone_constraints"] = "(" + str(constraint) + ")"
     bst = xgb.train(param, dtrain)
     dpredict = xgb.DMatrix(X[X[:, 0].argsort()])
@@ -40,26 +37,34 @@ def assert_constraint(constraint, tree_method):
 
 
 @pytest.mark.skipif(**tm.no_sklearn())
-def test_gpu_hist_basic():
-    assert_constraint(1, "gpu_hist")
-    assert_constraint(-1, "gpu_hist")
+def test_gpu_hist_basic() -> None:
+    assert_constraint(1, "hist")
+    assert_constraint(-1, "hist")
+
+
+@pytest.mark.skipif(**tm.no_sklearn())
+def test_gpu_approx_basic() -> None:
+    assert_constraint(1, "approx")
+    assert_constraint(-1, "approx")
 
 
-def test_gpu_hist_depthwise():
+def test_gpu_hist_depthwise() -> None:
     params = {
-        "tree_method": "gpu_hist",
+        "tree_method": "hist",
         "grow_policy": "depthwise",
+        "device": "cuda",
         "monotone_constraints": "(1, -1)",
     }
-    model = xgb.train(params, tmc.training_dset)
-    tmc.is_correctly_constrained(model)
+    model = xgb.train(params, training_dset)
+    is_correctly_constrained(model)
 
 
-def test_gpu_hist_lossguide():
+def test_gpu_hist_lossguide() -> None:
     params = {
-        "tree_method": "gpu_hist",
+        "tree_method": "hist",
         "grow_policy": "lossguide",
+        "device": "cuda",
         "monotone_constraints": "(1, -1)",
     }
-    model = xgb.train(params, tmc.training_dset)
-    tmc.is_correctly_constrained(model)
+    model = xgb.train(params, training_dset)
+    is_correctly_constrained(model)
diff --git a/tests/python-sycl/test_sycl_training_continuation.py b/tests/python-sycl/test_sycl_training_continuation.py
index e2a11c987bb4..71d5965600e7 100644
--- a/tests/python-sycl/test_sycl_training_continuation.py
+++ b/tests/python-sycl/test_sycl_training_continuation.py
@@ -9,8 +9,8 @@ class TestSYCLTrainingContinuation:
     def run_training_continuation(self, use_json):
         kRows = 64
         kCols = 32
-        X = np.random.randn(kRows, kCols)
-        y = np.random.randn(kRows)
+        X = rng.randn(kRows, kCols)
+        y = rng.randn(kRows)
         dtrain = xgb.DMatrix(X, y)
         params = {
             "device": "sycl",
diff --git a/tests/python/generate_models.py b/tests/python/generate_models.py
index 2a2444e8f822..57e0f9ed26e0 100644
--- a/tests/python/generate_models.py
+++ b/tests/python/generate_models.py
@@ -1,147 +1,206 @@
 import os
 
 import numpy as np
+from sklearn.datasets import make_classification
 
 import xgboost
+from xgboost.testing import make_categorical, make_ltr
 
-kRounds = 2
+kRounds = 4
 kRows = 1000
 kCols = 4
 kForests = 2
-kMaxDepth = 2
+kMaxDepth = 3
 kClasses = 3
 
-X = np.random.randn(kRows, kCols)
-w = np.random.uniform(size=kRows)
 
 version = xgboost.__version__
 
-np.random.seed(1994)
-target_dir = 'models'
+target_dir = "models"
 
 
-def booster_bin(model):
-    return os.path.join(target_dir,
-                        'xgboost-' + version + '.' + model + '.bin')
+def booster_ubj(model: str) -> str:
+    return os.path.join(target_dir, "xgboost-" + version + "." + model + ".ubj")
 
 
-def booster_json(model):
-    return os.path.join(target_dir,
-                        'xgboost-' + version + '.' + model + '.json')
+def booster_json(model: str) -> str:
+    return os.path.join(target_dir, "xgboost-" + version + "." + model + ".json")
 
 
-def skl_bin(model):
-    return os.path.join(target_dir,
-                        'xgboost_scikit-' + version + '.' + model + '.bin')
+def skl_ubj(model: str) -> str:
+    return os.path.join(target_dir, "xgboost_scikit-" + version + "." + model + ".ubj")
 
 
-def skl_json(model):
-    return os.path.join(target_dir,
-                        'xgboost_scikit-' + version + '.' + model + '.json')
+def skl_json(model: str) -> str:
+    return os.path.join(target_dir, "xgboost_scikit-" + version + "." + model + ".json")
 
 
-def generate_regression_model():
-    print('Regression')
-    y = np.random.randn(kRows)
+def generate_regression_model() -> None:
+    print("Regression")
+    X, y = make_categorical(
+        n_samples=kRows, n_features=kCols, n_categories=16, onehot=False, cat_ratio=0.5
+    )
+    w = np.random.default_rng(2025).uniform(size=X.shape[0])
+    data = xgboost.DMatrix(X, label=y, weight=w, enable_categorical=True)
+    booster = xgboost.train(
+        {
+            "tree_method": "hist",
+            "num_parallel_tree": kForests,
+            "max_depth": kMaxDepth,
+            "base_score": 0.5,
+        },
+        num_boost_round=kRounds,
+        dtrain=data,
+    )
+    booster.save_model(booster_ubj("reg"))
+    booster.save_model(booster_json("reg"))
 
-    data = xgboost.DMatrix(X, label=y, weight=w)
-    booster = xgboost.train({'tree_method': 'hist',
-                             'num_parallel_tree': kForests,
-                             'max_depth': kMaxDepth},
-                            num_boost_round=kRounds, dtrain=data)
-    booster.save_model(booster_bin('reg'))
-    booster.save_model(booster_json('reg'))
-
-    reg = xgboost.XGBRegressor(tree_method='hist',
-                               num_parallel_tree=kForests,
-                               max_depth=kMaxDepth,
-                               n_estimators=kRounds)
-    reg.fit(X, y, w)
-    reg.save_model(skl_bin('reg'))
-    reg.save_model(skl_json('reg'))
-
-
-def generate_logistic_model():
-    print('Logistic')
-    y = np.random.randint(0, 2, size=kRows)
+    reg = xgboost.XGBRegressor(
+        tree_method="hist",
+        num_parallel_tree=kForests,
+        max_depth=kMaxDepth,
+        n_estimators=kRounds,
+        base_score=0.5,
+        enable_categorical=True,
+    )
+    reg.fit(X, y, sample_weight=w)
+    reg.save_model(skl_ubj("reg"))
+    reg.save_model(skl_json("reg"))
+
+
+def generate_logistic_model() -> None:
+    print("Logistic")
+    X, y = make_classification(n_samples=kRows, n_features=kCols, random_state=2025)
     assert y.max() == 1 and y.min() == 0
+    w = np.random.default_rng(2025).uniform(size=X.shape[0])
 
-    for objective, name in [('binary:logistic', 'logit'), ('binary:logitraw', 'logitraw')]:
+    for objective, name in [
+        ("binary:logistic", "logit"),
+        ("binary:logitraw", "logitraw"),
+    ]:
         data = xgboost.DMatrix(X, label=y, weight=w)
-        booster = xgboost.train({'tree_method': 'hist',
-                                 'num_parallel_tree': kForests,
-                                 'max_depth': kMaxDepth,
-                                 'objective': objective},
-                                num_boost_round=kRounds, dtrain=data)
-        booster.save_model(booster_bin(name))
+        booster = xgboost.train(
+            {
+                "tree_method": "hist",
+                "num_parallel_tree": kForests,
+                "max_depth": kMaxDepth,
+                "objective": objective,
+                "base_score": 0.5,
+            },
+            num_boost_round=kRounds,
+            dtrain=data,
+        )
+        booster.save_model(booster_ubj(name))
         booster.save_model(booster_json(name))
 
-        reg = xgboost.XGBClassifier(tree_method='hist',
-                                    num_parallel_tree=kForests,
-                                    max_depth=kMaxDepth,
-                                    n_estimators=kRounds,
-                                    objective=objective)
-        reg.fit(X, y, w)
-        reg.save_model(skl_bin(name))
+        reg = xgboost.XGBClassifier(
+            tree_method="hist",
+            num_parallel_tree=kForests,
+            max_depth=kMaxDepth,
+            n_estimators=kRounds,
+            objective=objective,
+            base_score=0.5,
+        )
+        reg.fit(X, y, sample_weight=w)
+        reg.save_model(skl_ubj(name))
         reg.save_model(skl_json(name))
 
 
-def generate_classification_model():
-    print('Classification')
-    y = np.random.randint(0, kClasses, size=kRows)
+def generate_classification_model() -> None:
+    print("Classification")
+    X, y = make_classification(
+        n_samples=kRows,
+        n_features=kCols,
+        random_state=2025,
+        n_classes=kClasses,
+        n_informative=4,
+        n_redundant=0,
+    )
+    w = np.random.default_rng(2025).uniform(size=X.shape[0])
+
     data = xgboost.DMatrix(X, label=y, weight=w)
-    booster = xgboost.train({'num_class': kClasses,
-                             'tree_method': 'hist',
-                             'num_parallel_tree': kForests,
-                             'max_depth': kMaxDepth},
-                            num_boost_round=kRounds, dtrain=data)
-    booster.save_model(booster_bin('cls'))
-    booster.save_model(booster_json('cls'))
-
-    cls = xgboost.XGBClassifier(tree_method='hist',
-                                num_parallel_tree=kForests,
-                                max_depth=kMaxDepth,
-                                n_estimators=kRounds)
-    cls.fit(X, y, w)
-    cls.save_model(skl_bin('cls'))
-    cls.save_model(skl_json('cls'))
-
-
-def generate_ranking_model():
-    print('Learning to Rank')
-    y = np.random.randint(5, size=kRows)
-    w = np.random.uniform(size=20)
-    g = np.repeat(50, 20)
-
-    data = xgboost.DMatrix(X, y, weight=w)
-    data.set_group(g)
-    booster = xgboost.train({'objective': 'rank:ndcg',
-                             'num_parallel_tree': kForests,
-                             'tree_method': 'hist',
-                             'max_depth': kMaxDepth},
-                            num_boost_round=kRounds,
-                            dtrain=data)
-    booster.save_model(booster_bin('ltr'))
-    booster.save_model(booster_json('ltr'))
-
-    ranker = xgboost.sklearn.XGBRanker(n_estimators=kRounds,
-                                       tree_method='hist',
-                                       objective='rank:ndcg',
-                                       max_depth=kMaxDepth,
-                                       num_parallel_tree=kForests)
-    ranker.fit(X, y, g, sample_weight=w)
-    ranker.save_model(skl_bin('ltr'))
-    ranker.save_model(skl_json('ltr'))
-
-
-def write_versions():
-    versions = {'numpy': np.__version__,
-                'xgboost': version}
-    with open(os.path.join(target_dir, 'version'), 'w') as fd:
-        fd.write(str(versions))
-
-
-if __name__ == '__main__':
+    booster = xgboost.train(
+        {
+            "num_class": kClasses,
+            "tree_method": "hist",
+            "num_parallel_tree": kForests,
+            "max_depth": kMaxDepth,
+        },
+        num_boost_round=kRounds,
+        dtrain=data,
+    )
+    booster.save_model(booster_ubj("cls"))
+    booster.save_model(booster_json("cls"))
+
+    cls = xgboost.XGBClassifier(
+        tree_method="hist",
+        num_parallel_tree=kForests,
+        max_depth=kMaxDepth,
+        n_estimators=kRounds,
+    )
+    cls.fit(X, y, sample_weight=w)
+    cls.save_model(skl_ubj("cls"))
+    cls.save_model(skl_json("cls"))
+
+
+def generate_ranking_model() -> None:
+    print("Learning to Rank")
+    X, y, qid, w = make_ltr(
+        n_samples=kRows, n_features=kCols, n_query_groups=7, max_rel=3
+    )
+
+    data = xgboost.DMatrix(X, y, weight=w, qid=qid)
+    booster = xgboost.train(
+        {
+            "objective": "rank:ndcg",
+            "num_parallel_tree": kForests,
+            "tree_method": "hist",
+            "max_depth": kMaxDepth,
+            "base_score": 0.5,
+        },
+        num_boost_round=kRounds,
+        dtrain=data,
+    )
+    booster.save_model(booster_ubj("ltr"))
+    booster.save_model(booster_json("ltr"))
+
+    ranker = xgboost.sklearn.XGBRanker(
+        n_estimators=kRounds,
+        tree_method="hist",
+        objective="rank:ndcg",
+        max_depth=kMaxDepth,
+        num_parallel_tree=kForests,
+        base_score=0.5,
+    )
+    ranker.fit(X, y, qid=qid, sample_weight=w)
+    ranker.save_model(skl_ubj("ltr"))
+    ranker.save_model(skl_json("ltr"))
+
+
+def generate_aft_survival_models() -> None:
+    print("AFT Survival")
+    X, y_lower = make_categorical(
+        n_samples=kRows, n_features=kCols, n_categories=16, onehot=False, cat_ratio=0.5
+    )
+    w = np.random.default_rng(2025).uniform(size=X.shape[0])
+    y_upper = y_lower + np.mean(y_lower) + w
+    data = xgboost.QuantileDMatrix(
+        X, label_lower_bound=y_lower, label_upper_bound=y_upper, enable_categorical=True
+    )
+    params = {
+        "num_parallel_tree": kForests,
+        "tree_method": "hist",
+        "max_depth": kMaxDepth,
+        "objective": "survival:aft",
+        "aft_loss_distribution": "normal",
+        "base_score": 0.5,
+    }
+    booster = xgboost.train(params, num_boost_round=kRounds, dtrain=data)
+    booster.save_model(booster_ubj("aft"))
+    booster.save_model(booster_json("aft"))
+
+
+if __name__ == "__main__":
     if not os.path.exists(target_dir):
         os.mkdir(target_dir)
 
@@ -149,4 +208,4 @@ def write_versions():
     generate_logistic_model()
     generate_classification_model()
     generate_ranking_model()
-    write_versions()
+    generate_aft_survival_models()
diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py
index b24152e5dc9a..5f4b616c9a40 100644
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -1,7 +1,6 @@
 import json
 import os
 import tempfile
-from typing import Optional
 
 import numpy as np
 import pytest
@@ -9,11 +8,8 @@
 import xgboost as xgb
 from xgboost import testing as tm
 from xgboost.core import Integer
-from xgboost.testing.updater import ResetStrategy
-
-dpath = tm.data_dir(__file__)
-
-rng = np.random.RandomState(1994)
+from xgboost.testing.basic_models import run_custom_objective
+from xgboost.testing.updater import ResetStrategy, get_basescore
 
 
 class TestModels:
@@ -157,73 +153,11 @@ def test_boost_from_existing_model(self) -> None:
         )
         assert booster.num_boosted_rounds() == 8
 
-    def run_custom_objective(self, tree_method: Optional[str] = None):
-        param = {
-            "max_depth": 2,
-            "eta": 1,
-            "objective": "reg:logistic",
-            "tree_method": tree_method,
-        }
+    def test_custom_objective(self) -> None:
         dtrain, dtest = tm.load_agaricus(__file__)
-        watchlist = [(dtest, "eval"), (dtrain, "train")]
-        num_round = 10
-
-        def evalerror(preds: np.ndarray, dtrain: xgb.DMatrix):
-            return tm.eval_error_metric(preds, dtrain, rev_link=True)
-
-        # test custom_objective in training
-        bst = xgb.train(
-            param,
-            dtrain,
-            num_round,
-            watchlist,
-            obj=tm.logregobj,
-            custom_metric=evalerror,
-        )
-        assert isinstance(bst, xgb.Booster)
-        preds = bst.predict(dtest)
-        labels = dtest.get_label()
-        err = sum(
-            1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
-        ) / float(len(preds))
-        assert err < 0.1
-
-        # test custom_objective in cross-validation
-        xgb.cv(
-            param,
-            dtrain,
-            num_round,
-            nfold=5,
-            seed=0,
-            obj=tm.logregobj,
-            custom_metric=evalerror,
-        )
-
-        # test maximize parameter
-        def neg_evalerror(preds, dtrain):
-            labels = dtrain.get_label()
-            preds = 1.0 / (1.0 + np.exp(-preds))
-            return "error", float(sum(labels == (preds > 0.0))) / len(labels)
+        run_custom_objective("hist", "cpu", dtrain, dtest)
 
-        bst2 = xgb.train(
-            param,
-            dtrain,
-            num_round,
-            evals=watchlist,
-            obj=tm.logregobj,
-            custom_metric=neg_evalerror,
-            maximize=True,
-        )
-        preds2 = bst2.predict(dtest)
-        err2 = sum(
-            1 for i in range(len(preds2)) if int(preds2[i] > 0.5) != labels[i]
-        ) / float(len(preds2))
-        assert err == err2
-
-    def test_custom_objective(self):
-        self.run_custom_objective()
-
-    def test_multi_eval_metric(self):
+    def test_multi_eval_metric(self) -> None:
         dtrain, dtest = tm.load_agaricus(__file__)
         watchlist = [(dtest, "eval"), (dtrain, "train")]
         param = {
@@ -311,44 +245,6 @@ def test_feature_names_validation(self):
         bst = xgb.train([], dm2)
         bst.predict(dm2)  # success
 
-    @pytest.mark.skipif(**tm.no_json_schema())
-    def test_json_dump_schema(self):
-        import jsonschema
-
-        def validate_model(parameters):
-            X = np.random.random((100, 30))
-            y = np.random.randint(0, 4, size=(100,))
-
-            parameters["num_class"] = 4
-            m = xgb.DMatrix(X, y)
-
-            booster = xgb.train(parameters, m)
-            dump = booster.get_dump(dump_format="json")
-
-            for i in range(len(dump)):
-                jsonschema.validate(instance=json.loads(dump[i]), schema=schema)
-
-        path = os.path.dirname(
-            os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        )
-        doc = os.path.join(path, "doc", "dump.schema")
-        with open(doc, "r") as fd:
-            schema = json.load(fd)
-
-        parameters = {
-            "tree_method": "hist",
-            "booster": "gbtree",
-            "objective": "multi:softmax",
-        }
-        validate_model(parameters)
-
-        parameters = {
-            "tree_method": "hist",
-            "booster": "dart",
-            "objective": "multi:softmax",
-        }
-        validate_model(parameters)
-
     def test_special_model_dump_characters(self) -> None:
         params = {"objective": "reg:squarederror", "max_depth": 3}
         feature_names = ['"feature 0"', "\tfeature\n1", """feature "2"."""]
@@ -462,7 +358,9 @@ def run_slice(
         predt_0 = sliced_0.predict(dtrain, output_margin=True)
         predt_1 = sliced_1.predict(dtrain, output_margin=True)
 
-        merged = predt_0 + predt_1 - 0.5  # base score.
+        # base score.
+        intercept = np.broadcast_to(np.array(get_basescore(booster)), predt_0.shape)
+        merged = predt_0 + predt_1 - intercept
         single = booster[1:7].predict(dtrain, output_margin=True)
         np.testing.assert_allclose(merged, single, atol=1e-6)
 
@@ -472,7 +370,7 @@ def run_slice(
         predt_0 = sliced_0.predict(dtrain, output_margin=True)
         predt_1 = sliced_1.predict(dtrain, output_margin=True)
 
-        merged = predt_0 + predt_1 - 0.5
+        merged = predt_0 + predt_1 - intercept
         single = booster[1:7].predict(dtrain, output_margin=True)
         np.testing.assert_allclose(merged, single, atol=1e-6)
 
@@ -516,12 +414,6 @@ def test_slice(self, booster_name: str) -> None:
             booster, dtrain, num_parallel_tree, num_classes, num_boost_round, False
         )
 
-        bytesarray = booster.save_raw(raw_format="deprecated")
-        booster = xgb.Booster(model_file=bytesarray)
-        self.run_slice(
-            booster, dtrain, num_parallel_tree, num_classes, num_boost_round, True
-        )
-
     def test_slice_multi(self) -> None:
         from sklearn.datasets import make_classification
 
@@ -565,6 +457,7 @@ def test_feature_info(self, ext: str) -> None:
         # make data
         rows = 100
         cols = 10
+        rng = np.random.RandomState(1994)
         X = rng.randn(rows, cols)
         y = rng.randn(rows)
 
diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py
index 1ee31d6610c1..c2906747666c 100644
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -1,4 +1,3 @@
-import json
 import os
 import tempfile
 from collections import namedtuple
@@ -9,6 +8,11 @@
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.callbacks import (
+    run_eta_decay,
+    run_eta_decay_leaf_output,
+    tree_methods_objs,
+)
 
 # We use the dataset for tests.
 pytestmark = pytest.mark.skipif(**tm.no_sklearn())
@@ -302,179 +306,15 @@ def test_early_stopping_multiple_metrics(self):
         assert clf.best_iteration > 50
         assert clf.evals_result()["validation_0"]["auc"][-1] > 0.99
 
-    def run_eta_decay(self, tree_method: str) -> None:
-        """Test learning rate scheduler, used by both CPU and GPU tests."""
-        scheduler = xgb.callback.LearningRateScheduler
-
-        dtrain, dtest = tm.load_agaricus(__file__)
-
-        watchlist = [(dtest, "eval"), (dtrain, "train")]
-        num_round = 4
-
-        # learning_rates as a list
-        # init eta with 0 to check whether learning_rates work
-        param = {
-            "max_depth": 2,
-            "eta": 0,
-            "objective": "binary:logistic",
-            "eval_metric": "error",
-            "tree_method": tree_method,
-        }
-        evals_result = {}
-        bst = xgb.train(
-            param,
-            dtrain,
-            num_round,
-            evals=watchlist,
-            callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
-            evals_result=evals_result,
-        )
-        eval_errors_0 = list(map(float, evals_result["eval"]["error"]))
-        assert isinstance(bst, xgb.core.Booster)
-        # validation error should decrease, if eta > 0
-        assert eval_errors_0[0] > eval_errors_0[-1]
-
-        # init learning_rate with 0 to check whether learning_rates work
-        param = {
-            "max_depth": 2,
-            "learning_rate": 0,
-            "objective": "binary:logistic",
-            "eval_metric": "error",
-            "tree_method": tree_method,
-        }
-        evals_result = {}
-
-        bst = xgb.train(
-            param,
-            dtrain,
-            num_round,
-            evals=watchlist,
-            callbacks=[scheduler([0.8, 0.7, 0.6, 0.5])],
-            evals_result=evals_result,
-        )
-        eval_errors_1 = list(map(float, evals_result["eval"]["error"]))
-        assert isinstance(bst, xgb.core.Booster)
-        # validation error should decrease, if learning_rate > 0
-        assert eval_errors_1[0] > eval_errors_1[-1]
-
-        # check if learning_rates override default value of eta/learning_rate
-        param = {
-            "max_depth": 2,
-            "objective": "binary:logistic",
-            "eval_metric": "error",
-            "tree_method": tree_method,
-        }
-        evals_result = {}
-        bst = xgb.train(
-            param,
-            dtrain,
-            num_round,
-            evals=watchlist,
-            callbacks=[scheduler([0, 0, 0, 0])],
-            evals_result=evals_result,
-        )
-        eval_errors_2 = list(map(float, evals_result["eval"]["error"]))
-        assert isinstance(bst, xgb.core.Booster)
-        # validation error should not decrease, if eta/learning_rate = 0
-        assert eval_errors_2[0] == eval_errors_2[-1]
-
-        # learning_rates as a customized decay function
-        def eta_decay(ithround, num_boost_round=num_round):
-            return num_boost_round / (ithround + 1)
-
-        evals_result = {}
-        bst = xgb.train(
-            param,
-            dtrain,
-            num_round,
-            evals=watchlist,
-            callbacks=[scheduler(eta_decay)],
-            evals_result=evals_result,
-        )
-        eval_errors_3 = list(map(float, evals_result["eval"]["error"]))
-
-        assert isinstance(bst, xgb.core.Booster)
-
-        assert eval_errors_3[0] == eval_errors_2[0]
-
-        for i in range(1, len(eval_errors_0)):
-            assert eval_errors_3[i] != eval_errors_2[i]
-
-        xgb.cv(param, dtrain, num_round, callbacks=[scheduler(eta_decay)])
-
-    def run_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
-        # check decay has effect on leaf output.
-        num_round = 4
-        scheduler = xgb.callback.LearningRateScheduler
-
+    @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
+    def test_eta_decay(self, tree_method: str) -> None:
         dtrain, dtest = tm.load_agaricus(__file__)
-        watchlist = [(dtest, "eval"), (dtrain, "train")]
-
-        param = {
-            "max_depth": 2,
-            "objective": objective,
-            "eval_metric": "error",
-            "tree_method": tree_method,
-        }
-        if objective == "reg:quantileerror":
-            param["quantile_alpha"] = 0.3
-
-        def eta_decay_0(i):
-            return num_round / (i + 1)
-
-        bst0 = xgb.train(
-            param,
-            dtrain,
-            num_round,
-            evals=watchlist,
-            callbacks=[scheduler(eta_decay_0)],
-        )
+        run_eta_decay(tree_method, dtrain, dtest, "cpu")
 
-        def eta_decay_1(i: int) -> float:
-            if i > 1:
-                return 5.0
-            return num_round / (i + 1)
-
-        bst1 = xgb.train(
-            param,
-            dtrain,
-            num_round,
-            evals=watchlist,
-            callbacks=[scheduler(eta_decay_1)],
-        )
-        bst_json0 = bst0.save_raw(raw_format="json")
-        bst_json1 = bst1.save_raw(raw_format="json")
-
-        j0 = json.loads(bst_json0)
-        j1 = json.loads(bst_json1)
-
-        tree_2th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][2]
-        tree_2th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][2]
-        assert tree_2th_0["base_weights"] == tree_2th_1["base_weights"]
-        assert tree_2th_0["split_conditions"] == tree_2th_1["split_conditions"]
-
-        tree_3th_0 = j0["learner"]["gradient_booster"]["model"]["trees"][3]
-        tree_3th_1 = j1["learner"]["gradient_booster"]["model"]["trees"][3]
-        assert tree_3th_0["base_weights"] != tree_3th_1["base_weights"]
-        assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
-
-    @pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
-    def test_eta_decay(self, tree_method: str) -> None:
-        self.run_eta_decay(tree_method)
-
-    @pytest.mark.parametrize(
-        "tree_method,objective",
-        [
-            ("hist", "binary:logistic"),
-            ("hist", "reg:absoluteerror"),
-            ("hist", "reg:quantileerror"),
-            ("approx", "binary:logistic"),
-            ("approx", "reg:absoluteerror"),
-            ("approx", "reg:quantileerror"),
-        ],
-    )
+    @pytest.mark.parametrize("tree_method,objective", tree_methods_objs())
     def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
-        self.run_eta_decay_leaf_output(tree_method, objective)
+        dtrain, dtest = tm.load_agaricus(__file__)
+        run_eta_decay_leaf_output(tree_method, objective, dtrain, dtest, "cpu")
 
     def test_check_point(self, breast_cancer: BreastCancer) -> None:
         X, y = breast_cancer.full
diff --git a/tests/python/test_cli.py b/tests/python/test_cli.py
deleted file mode 100644
index 334be264e899..000000000000
--- a/tests/python/test_cli.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import json
-import os
-import platform
-import subprocess
-import tempfile
-
-import numpy
-import pytest
-
-import xgboost
-from xgboost import testing as tm
-
-
-class TestCLI:
-    template = '''
-booster = gbtree
-objective = reg:squarederror
-eta = 1.0
-gamma = 1.0
-seed = {seed}
-min_child_weight = 0
-max_depth = 3
-task = {task}
-model_in = {model_in}
-model_out = {model_out}
-test_path = {test_path}
-name_pred = {name_pred}
-model_dir = {model_dir}
-
-num_round = 10
-data = {data_path}
-eval[test] = {data_path}
-'''
-
-    PROJECT_ROOT = tm.project_root(__file__)
-
-    def get_exe(self):
-        if platform.system() == 'Windows':
-            exe = 'xgboost.exe'
-        else:
-            exe = 'xgboost'
-        exe = os.path.join(self.PROJECT_ROOT, exe)
-        if not os.path.exists(exe):
-            pytest.skip("CLI executable not found.")
-        return exe
-
-    def test_cli_model(self):
-        data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
-            root=self.PROJECT_ROOT)
-        exe = self.get_exe()
-        seed = 1994
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            model_out_cli = os.path.join(
-                tmpdir, 'test_load_cli_model-cli.json')
-            model_out_py = os.path.join(
-                tmpdir, 'test_cli_model-py.json')
-            config_path = os.path.join(
-                tmpdir, 'test_load_cli_model.conf')
-
-            train_conf = self.template.format(data_path=data_path,
-                                              seed=seed,
-                                              task='train',
-                                              model_in='NULL',
-                                              model_out=model_out_cli,
-                                              test_path='NULL',
-                                              name_pred='NULL',
-                                              model_dir='NULL')
-            with open(config_path, 'w') as fd:
-                fd.write(train_conf)
-
-            subprocess.run([exe, config_path])
-
-            predict_out = os.path.join(tmpdir,
-                                       'test_load_cli_model-prediction')
-            predict_conf = self.template.format(task='pred',
-                                                seed=seed,
-                                                data_path=data_path,
-                                                model_in=model_out_cli,
-                                                model_out='NULL',
-                                                test_path=data_path,
-                                                name_pred=predict_out,
-                                                model_dir='NULL')
-            with open(config_path, 'w') as fd:
-                fd.write(predict_conf)
-
-            subprocess.run([exe, config_path])
-
-            cli_predt = numpy.loadtxt(predict_out)
-
-            parameters = {
-                'booster': 'gbtree',
-                'objective': 'reg:squarederror',
-                'eta': 1.0,
-                'gamma': 1.0,
-                'seed': seed,
-                'min_child_weight': 0,
-                'max_depth': 3
-            }
-            data = xgboost.DMatrix(data_path)
-            booster = xgboost.train(parameters, data, num_boost_round=10)
-
-            # CLI model doesn't contain feature info.
-            booster.feature_names = None
-            booster.feature_types = None
-            booster.set_attr(best_iteration=None)
-
-            booster.save_model(model_out_py)
-            py_predt = booster.predict(data)
-
-            numpy.testing.assert_allclose(cli_predt, py_predt)
-
-            cli_model = xgboost.Booster(model_file=model_out_cli)
-            cli_predt = cli_model.predict(data)
-            numpy.testing.assert_allclose(cli_predt, py_predt)
-
-            with open(model_out_cli, 'rb') as fd:
-                cli_model_bin = fd.read()
-            with open(model_out_py, 'rb') as fd:
-                py_model_bin = fd.read()
-
-            assert hash(cli_model_bin) == hash(py_model_bin)
-
-    def test_cli_help(self):
-        exe = self.get_exe()
-        completed = subprocess.run([exe], stdout=subprocess.PIPE)
-        error_msg = completed.stdout.decode('utf-8')
-        ret = completed.returncode
-        assert ret == 1
-        assert error_msg.find('Usage') != -1
-        assert error_msg.find('eval[NAME]') != -1
-
-        completed = subprocess.run([exe, '-V'], stdout=subprocess.PIPE)
-        msg = completed.stdout.decode('utf-8')
-        assert msg.find('XGBoost') != -1
-        v = xgboost.__version__
-        if v.find('dev') != -1:
-            assert msg.split(':')[1].strip() == v.split('-')[0]
-        elif v.find('rc') != -1:
-            assert msg.split(':')[1].strip() == v.split('rc')[0]
-        else:
-            assert msg.split(':')[1].strip() == v
-
-    def test_cli_model_json(self):
-        exe = self.get_exe()
-        data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
-            root=self.PROJECT_ROOT)
-        seed = 1994
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            model_out_cli = os.path.join(
-                tmpdir, 'test_load_cli_model-cli.json')
-            config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')
-
-            train_conf = self.template.format(data_path=data_path,
-                                              seed=seed,
-                                              task='train',
-                                              model_in='NULL',
-                                              model_out=model_out_cli,
-                                              test_path='NULL',
-                                              name_pred='NULL',
-                                              model_dir='NULL')
-            with open(config_path, 'w') as fd:
-                fd.write(train_conf)
-
-            subprocess.run([exe, config_path])
-            with open(model_out_cli, 'r') as fd:
-                model = json.load(fd)
-
-            assert model['learner']['gradient_booster']['name'] == 'gbtree'
-
-    def test_cli_save_model(self):
-        '''Test save on final round'''
-        exe = self.get_exe()
-        data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
-            root=self.PROJECT_ROOT)
-        seed = 1994
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            model_out_cli = os.path.join(tmpdir, '0010.model')
-            config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')
-
-            train_conf = self.template.format(data_path=data_path,
-                                              seed=seed,
-                                              task='train',
-                                              model_in='NULL',
-                                              model_out='NULL',
-                                              test_path='NULL',
-                                              name_pred='NULL',
-                                              model_dir=tmpdir)
-            with open(config_path, 'w') as fd:
-                fd.write(train_conf)
-
-            subprocess.run([exe, config_path])
-            assert os.path.exists(model_out_cli)
diff --git a/tests/python/test_collective.py b/tests/python/test_collective.py
index 473b38b5b742..1204c0faf8c9 100644
--- a/tests/python/test_collective.py
+++ b/tests/python/test_collective.py
@@ -3,7 +3,6 @@
 
 import numpy as np
 import pytest
-from loky import get_reusable_executor
 
 import xgboost as xgb
 from xgboost import RabitTracker, build_info, federated
@@ -25,10 +24,13 @@ def run_rabit_worker(rabit_env: dict, world_size: int) -> int:
 
 @pytest.mark.skipif(**tm.no_loky())
 def test_rabit_communicator() -> None:
+    from loky import get_reusable_executor
+
     world_size = 2
     tracker = RabitTracker(host_ip="127.0.0.1", n_workers=world_size)
     tracker.start()
     workers = []
+
     with get_reusable_executor(max_workers=world_size) as pool:
         for _ in range(world_size):
             worker = pool.submit(
@@ -60,6 +62,8 @@ def run_federated_worker(port: int, world_size: int, rank: int) -> int:
 @pytest.mark.skipif(**tm.skip_win())
 @pytest.mark.skipif(**tm.no_loky())
 def test_federated_communicator() -> None:
+    from loky import get_reusable_executor
+
     if not build_info()["USE_FEDERATED"]:
         pytest.skip("XGBoost not built with federated learning enabled")
 
diff --git a/tests/python/test_data_iterator.py b/tests/python/test_data_iterator.py
index 6a980f967a97..b6692f4feae3 100644
--- a/tests/python/test_data_iterator.py
+++ b/tests/python/test_data_iterator.py
@@ -319,6 +319,7 @@ def test_quantile_objective(
     strategies.integers(2, 16),
 )
 @settings(deadline=None, max_examples=10, print_blob=True)
+@tm.timeout(45)
 def test_extmem_qdm(
     n_samples_per_batch: int, n_features: int, n_batches: int, n_bins: int
 ) -> None:
diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py
index d20e5bc384cc..7d4e1f132a72 100644
--- a/tests/python/test_demos.py
+++ b/tests/python/test_demos.py
@@ -15,9 +15,12 @@
 CLI_DEMO_DIR = os.path.join(DEMO_DIR, "CLI")
 
 
+PYTHON = sys.executable
+
+
 def test_basic_walkthrough() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "basic_walkthrough.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     with tempfile.TemporaryDirectory() as tmpdir:
         subprocess.check_call(cmd, cwd=tmpdir)
 
@@ -25,112 +28,113 @@ def test_basic_walkthrough() -> None:
 @pytest.mark.skipif(**tm.no_pandas())
 def test_categorical() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "categorical.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_pandas())
 def test_cat_pipeline() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "cat_pipeline.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_matplotlib())
 def test_custom_multiclass_objective() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "custom_softmax.py")
-    cmd = ["python", script, "--plot=0"]
+    cmd = [PYTHON, script, "--plot=0"]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_matplotlib())
 def test_custom_rmsle_objective() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "custom_rmsle.py")
-    cmd = ["python", script, "--plot=0"]
+    cmd = [PYTHON, script, "--plot=0"]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_matplotlib())
 def test_feature_weights_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "feature_weights.py")
-    cmd = ["python", script, "--plot=0"]
+    cmd = [PYTHON, script, "--plot=0"]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_sklearn())
 def test_sklearn_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "sklearn_examples.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
     assert os.path.exists("best_calif.pkl")
     os.remove("best_calif.pkl")
 
 
 @pytest.mark.skipif(**tm.no_sklearn())
+@pytest.mark.timeout(60)
 def test_sklearn_parallel_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "sklearn_parallel.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_sklearn())
 def test_sklearn_evals_result_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "sklearn_evals_result.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 def test_boost_from_prediction_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "boost_from_prediction.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 def test_predict_first_ntree_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "predict_first_ntree.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 def test_individual_trees() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "individual_trees.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 def test_predict_leaf_indices_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "predict_leaf_indices.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 def test_generalized_linear_model_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "generalized_linear_model.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 def test_cross_validation_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "cross_validation.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
 def test_external_memory_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "external_memory.py")
-    cmd = ["python", script, "--device=cpu"]
+    cmd = [PYTHON, script, "--device=cpu"]
     subprocess.check_call(cmd)
 
 
 def test_distributed_extmem_basic_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "distributed_extmem_basic.py")
-    cmd = ["python", script, "--device=cpu"]
+    cmd = [PYTHON, script, "--device=cpu"]
     subprocess.check_call(cmd)
 
 
 def test_evals_result_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "evals_result.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
@@ -138,7 +142,7 @@ def test_evals_result_demo() -> None:
 @pytest.mark.skipif(**tm.no_pandas())
 def test_aft_demo() -> None:
     script = os.path.join(DEMO_DIR, "aft_survival", "aft_survival_demo.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
     assert os.path.exists("aft_model.json")
     os.remove("aft_model.json")
@@ -147,13 +151,13 @@ def test_aft_demo() -> None:
 @pytest.mark.skipif(**tm.no_matplotlib())
 def test_callbacks_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "callbacks.py")
-    cmd = ["python", script, "--plot=0"]
+    cmd = [PYTHON, script, "--plot=0"]
     subprocess.check_call(cmd)
 
 
 def test_continuation_demo() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "continuation.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
@@ -161,14 +165,14 @@ def test_continuation_demo() -> None:
 @pytest.mark.skipif(**tm.no_matplotlib())
 def test_multioutput_reg() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "multioutput_regression.py")
-    cmd = ["python", script, "--plot=0"]
+    cmd = [PYTHON, script, "--plot=0"]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_sklearn())
 def test_quantile_reg() -> None:
     script = os.path.join(PYTHON_DEMO_DIR, "quantile_regression.py")
-    cmd = ["python", script]
+    cmd = [PYTHON, script]
     subprocess.check_call(cmd)
 
 
@@ -180,12 +184,12 @@ def run_test(reg: xgboost.XGBRegressor) -> None:
         with tempfile.TemporaryDirectory() as tmpdir:
             path = os.path.join(tmpdir, "reg.json")
             reg.save_model(path)
-            cmd = ["python", script, f"--model={path}"]
+            cmd = [PYTHON, script, f"--model={path}"]
             subprocess.check_call(cmd)
 
             path = os.path.join(tmpdir, "reg.ubj")
             reg.save_model(path)
-            cmd = ["python", script, f"--model={path}"]
+            cmd = [PYTHON, script, f"--model={path}"]
             subprocess.check_call(cmd)
 
     # numerical
@@ -215,32 +219,3 @@ def run_test(reg: xgboost.XGBRegressor) -> None:
 # - gamma regression is not tested as it requires running a R script first.
 # - aft viz is not tested due to ploting is not controlled
 # - aft tunning is not tested due to extra dependency.
-
-
-def test_cli_regression_demo() -> None:
-    reg_dir = os.path.join(CLI_DEMO_DIR, "regression")
-    script = os.path.join(reg_dir, "mapfeat.py")
-    cmd = ["python", script]
-    subprocess.check_call(cmd, cwd=reg_dir)
-
-    script = os.path.join(reg_dir, "mknfold.py")
-    cmd = ["python", script, "machine.txt", "1"]
-    subprocess.check_call(cmd, cwd=reg_dir)
-
-    exe = os.path.join(DEMO_DIR, os.path.pardir, "xgboost")
-    conf = os.path.join(reg_dir, "machine.conf")
-    subprocess.check_call([exe, conf], cwd=reg_dir)
-
-
-@pytest.mark.skipif(
-    condition=sys.platform.startswith("win"), reason="Test requires sh execution."
-)
-def test_cli_binary_classification() -> None:
-    cls_dir = os.path.join(CLI_DEMO_DIR, "binary_classification")
-    with tm.DirectoryExcursion(cls_dir, cleanup=True):
-        subprocess.check_call(["./runexp.sh"])
-        os.remove("0002.model")
-
-
-# year prediction is not tested due to data size being too large.
-# rank is not tested as it requires unrar command.
diff --git a/tests/python/test_dmatrix.py b/tests/python/test_dmatrix.py
index fe60d9bfde50..76040b0aea9c 100644
--- a/tests/python/test_dmatrix.py
+++ b/tests/python/test_dmatrix.py
@@ -13,6 +13,7 @@
 from xgboost import testing as tm
 from xgboost.core import DataSplitMode
 from xgboost.testing.data import np_dtypes, run_base_margin_info
+from xgboost.testing.utils import predictor_equal
 
 dpath = "demo/data/"
 rng = np.random.RandomState(1994)
@@ -397,7 +398,7 @@ def test_dtypes(self) -> None:
         for orig, x in np_dtypes(n_samples, n_features):
             m0 = xgb.DMatrix(orig)
             m1 = xgb.DMatrix(x)
-            assert tm.predictor_equal(m0, m1)
+            assert predictor_equal(m0, m1)
 
 
 @pytest.mark.skipif(tm.is_windows(), reason="Rabit does not run on windows")
diff --git a/tests/python/test_early_stopping.py b/tests/python/test_early_stopping.py
index 8e8dacfa3b9f..fbb163d69658 100644
--- a/tests/python/test_early_stopping.py
+++ b/tests/python/test_early_stopping.py
@@ -35,7 +35,7 @@ def test_early_stopping_nonparallel(self):
         )
         clf3.fit(X_train, y_train, eval_set=[(X_test, y_test)])
         base_score = get_basescore(clf3)
-        assert 0.53 > base_score > 0.5
+        assert 0.53 > base_score[0] > 0.5
 
         clf3 = xgb.XGBClassifier(
             learning_rate=0.1,
diff --git a/tests/python/test_eval_metrics.py b/tests/python/test_eval_metrics.py
index b02f348013fb..406d4258bd0c 100644
--- a/tests/python/test_eval_metrics.py
+++ b/tests/python/test_eval_metrics.py
@@ -3,7 +3,15 @@
 
 import xgboost as xgb
 from xgboost import testing as tm
-from xgboost.testing.metrics import check_precision_score, check_quantile_error
+from xgboost.testing.metrics import (
+    check_precision_score,
+    check_quantile_error,
+    run_pr_auc_binary,
+    run_pr_auc_ltr,
+    run_pr_auc_multi,
+    run_roc_auc_binary,
+    run_roc_auc_multi,
+)
 
 rng = np.random.RandomState(1337)
 
@@ -187,184 +195,29 @@ def test_gamma_lik(self) -> None:
 
         np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)
 
-    def run_roc_auc_binary(self, tree_method, n_samples):
-        import numpy as np
-        from sklearn.datasets import make_classification
-        from sklearn.metrics import roc_auc_score
-
-        rng = np.random.RandomState(1994)
-        n_samples = n_samples
-        n_features = 10
-
-        X, y = make_classification(
-            n_samples,
-            n_features,
-            n_informative=n_features,
-            n_redundant=0,
-            random_state=rng,
-        )
-        Xy = xgb.DMatrix(X, y)
-        booster = xgb.train(
-            {
-                "tree_method": tree_method,
-                "eval_metric": "auc",
-                "objective": "binary:logistic",
-            },
-            Xy,
-            num_boost_round=1,
-        )
-        score = booster.predict(Xy)
-        skl_auc = roc_auc_score(y, score)
-        auc = float(booster.eval(Xy).split(":")[1])
-        np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
-
-        X = rng.randn(*X.shape)
-        score = booster.predict(xgb.DMatrix(X))
-        skl_auc = roc_auc_score(y, score)
-        auc = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1])
-        np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
-
     @pytest.mark.skipif(**tm.no_sklearn())
     @pytest.mark.parametrize("n_samples", [100, 1000, 10000])
-    def test_roc_auc(self, n_samples):
-        self.run_roc_auc_binary("hist", n_samples)
-
-    def run_roc_auc_multi(self, tree_method, n_samples, weighted):
-        import numpy as np
-        from sklearn.datasets import make_classification
-        from sklearn.metrics import roc_auc_score
-
-        rng = np.random.RandomState(1994)
-        n_samples = n_samples
-        n_features = 10
-        n_classes = 4
-
-        X, y = make_classification(
-            n_samples,
-            n_features,
-            n_informative=n_features,
-            n_redundant=0,
-            n_classes=n_classes,
-            random_state=rng,
-        )
-        if weighted:
-            weights = rng.randn(n_samples)
-            weights -= weights.min()
-            weights /= weights.max()
-        else:
-            weights = None
-
-        Xy = xgb.DMatrix(X, y, weight=weights)
-        booster = xgb.train(
-            {
-                "tree_method": tree_method,
-                "eval_metric": "auc",
-                "objective": "multi:softprob",
-                "num_class": n_classes,
-            },
-            Xy,
-            num_boost_round=1,
-        )
-        score = booster.predict(Xy)
-        skl_auc = roc_auc_score(
-            y, score, average="weighted", sample_weight=weights, multi_class="ovr"
-        )
-        auc = float(booster.eval(Xy).split(":")[1])
-        np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
-
-        X = rng.randn(*X.shape)
-
-        score = booster.predict(xgb.DMatrix(X, weight=weights))
-        skl_auc = roc_auc_score(
-            y, score, average="weighted", sample_weight=weights, multi_class="ovr"
-        )
-        auc = float(booster.eval(xgb.DMatrix(X, y, weight=weights)).split(":")[1])
-        np.testing.assert_allclose(skl_auc, auc, rtol=1e-5)
+    def test_roc_auc(self, n_samples: int) -> None:
+        run_roc_auc_binary("hist", n_samples, "cpu")
 
     @pytest.mark.parametrize(
         "n_samples,weighted", [(4, False), (100, False), (1000, False), (10000, True)]
     )
-    def test_roc_auc_multi(self, n_samples, weighted):
-        self.run_roc_auc_multi("hist", n_samples, weighted)
-
-    def run_pr_auc_binary(self, tree_method):
-        from sklearn.datasets import make_classification
-        from sklearn.metrics import auc, precision_recall_curve
-
-        X, y = make_classification(128, 4, n_classes=2, random_state=1994)
-        clf = xgb.XGBClassifier(
-            tree_method=tree_method, n_estimators=1, eval_metric="aucpr"
-        )
-        clf.fit(X, y, eval_set=[(X, y)])
-        evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
-
-        y_score = clf.predict_proba(X)[:, 1]  # get the positive column
-        precision, recall, _ = precision_recall_curve(y, y_score)
-        prauc = auc(recall, precision)
-        # Interpolation results are slightly different from sklearn, but overall should
-        # be similar.
-        np.testing.assert_allclose(prauc, evals_result, rtol=1e-2)
-
-        clf = xgb.XGBClassifier(
-            tree_method=tree_method, n_estimators=10, eval_metric="aucpr"
-        )
-        clf.fit(X, y, eval_set=[(X, y)])
-        evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
-        np.testing.assert_allclose(0.99, evals_result, rtol=1e-2)
-
-    def test_pr_auc_binary(self):
-        self.run_pr_auc_binary("hist")
+    def test_roc_auc_multi(self, n_samples: int, weighted: bool) -> None:
+        run_roc_auc_multi("hist", n_samples, weighted, "cpu")
 
-    def run_pr_auc_multi(self, tree_method):
-        from sklearn.datasets import make_classification
+    def test_pr_auc_binary(self) -> None:
+        run_pr_auc_binary("hist", "cpu")
 
-        X, y = make_classification(
-            64, 16, n_informative=8, n_classes=3, random_state=1994
-        )
-        clf = xgb.XGBClassifier(
-            tree_method=tree_method, n_estimators=1, eval_metric="aucpr"
-        )
-        clf.fit(X, y, eval_set=[(X, y)])
-        evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
-        # No available implementation for comparison, just check that XGBoost converges
-        # to 1.0
-        clf = xgb.XGBClassifier(
-            tree_method=tree_method, n_estimators=10, eval_metric="aucpr"
-        )
-        clf.fit(X, y, eval_set=[(X, y)])
-        evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
-        np.testing.assert_allclose(1.0, evals_result, rtol=1e-2)
-
-    def test_pr_auc_multi(self):
-        self.run_pr_auc_multi("hist")
-
-    def run_pr_auc_ltr(self, tree_method):
-        from sklearn.datasets import make_classification
-
-        X, y = make_classification(128, 4, n_classes=2, random_state=1994)
-        ltr = xgb.XGBRanker(
-            tree_method=tree_method,
-            n_estimators=16,
-            objective="rank:pairwise",
-            eval_metric="aucpr",
-        )
-        groups = np.array([32, 32, 64])
-        ltr.fit(
-            X,
-            y,
-            group=groups,
-            eval_set=[(X, y)],
-            eval_group=[groups],
-        )
-        results = ltr.evals_result()["validation_0"]["aucpr"]
-        assert results[-1] >= 0.99
+    def test_pr_auc_multi(self) -> None:
+        run_pr_auc_multi("hist", "cpu")
 
-    def test_pr_auc_ltr(self):
-        self.run_pr_auc_ltr("hist")
+    def test_pr_auc_ltr(self) -> None:
+        run_pr_auc_ltr("hist", "cpu")
 
-    def test_precision_score(self):
-        check_precision_score("hist")
+    def test_precision_score(self) -> None:
+        check_precision_score("hist", "cpu")
 
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_quantile_error(self) -> None:
-        check_quantile_error("hist")
+        check_quantile_error("hist", "cpu")
diff --git a/tests/python/test_interaction_constraints.py b/tests/python/test_interaction_constraints.py
index 5eaaf1f8c78f..6c95730a8ff4 100644
--- a/tests/python/test_interaction_constraints.py
+++ b/tests/python/test_interaction_constraints.py
@@ -1,118 +1,59 @@
-import numpy as np
 import pytest
 
-import xgboost
 from xgboost import testing as tm
-
-dpath = 'demo/data/'
-rng = np.random.RandomState(1994)
+from xgboost.testing.interaction_constraints import (
+    run_interaction_constraints,
+    training_accuracy,
+)
 
 
 class TestInteractionConstraints:
-    def run_interaction_constraints(
-        self, tree_method, feature_names=None, interaction_constraints='[[0, 1]]'
-    ):
-        x1 = np.random.normal(loc=1.0, scale=1.0, size=1000)
-        x2 = np.random.normal(loc=1.0, scale=1.0, size=1000)
-        x3 = np.random.choice([1, 2, 3], size=1000, replace=True)
-        y = x1 + x2 + x3 + x1 * x2 * x3 \
-            + np.random.normal(
-                loc=0.001, scale=1.0, size=1000) + 3 * np.sin(x1)
-        X = np.column_stack((x1, x2, x3))
-        dtrain = xgboost.DMatrix(X, label=y, feature_names=feature_names)
-
-        params = {
-            'max_depth': 3,
-            'eta': 0.1,
-            'nthread': 2,
-            'interaction_constraints': interaction_constraints,
-            'tree_method': tree_method
-        }
-        num_boost_round = 12
-        # Fit a model that only allows interaction between x1 and x2
-        bst = xgboost.train(
-            params, dtrain, num_boost_round, evals=[(dtrain, 'train')])
-
-        # Set all observations to have the same x3 values then increment
-        #   by the same amount
-        def f(x):
-            tmat = xgboost.DMatrix(
-                np.column_stack((x1, x2, np.repeat(x, 1000))), feature_names=feature_names)
-            return bst.predict(tmat)
-
-        preds = [f(x) for x in [1, 2, 3]]
+    def test_exact_interaction_constraints(self) -> None:
+        run_interaction_constraints(tree_method="exact", device="cpu")
 
-        # Check incrementing x3 has the same effect on all observations
-        #   since x3 is constrained to be independent of x1 and x2
-        #   and all observations start off from the same x3 value
-        diff1 = preds[1] - preds[0]
-        assert np.all(np.abs(diff1 - diff1[0]) < 1e-4)
-        diff2 = preds[2] - preds[1]
-        assert np.all(np.abs(diff2 - diff2[0]) < 1e-4)
+    def test_hist_interaction_constraints(self) -> None:
+        run_interaction_constraints(tree_method="hist", device="cpu")
 
-    def test_exact_interaction_constraints(self):
-        self.run_interaction_constraints(tree_method='exact')
+    def test_approx_interaction_constraints(self) -> None:
+        run_interaction_constraints(tree_method="approx", device="cpu")
 
-    def test_hist_interaction_constraints(self):
-        self.run_interaction_constraints(tree_method='hist')
-
-    def test_approx_interaction_constraints(self):
-        self.run_interaction_constraints(tree_method='approx')
-
-    def test_interaction_constraints_feature_names(self):
+    def test_interaction_constraints_feature_names(self) -> None:
         with pytest.raises(ValueError):
-            constraints = [('feature_0', 'feature_1')]
-            self.run_interaction_constraints(tree_method='exact',
-                                             interaction_constraints=constraints)
+            constraints = [("feature_0", "feature_1")]
+            run_interaction_constraints(
+                tree_method="exact", device="cpu", interaction_constraints=constraints
+            )
 
         with pytest.raises(ValueError):
-            constraints = [('feature_0', 'feature_3')]
-            feature_names = ['feature_0', 'feature_1', 'feature_2']
-            self.run_interaction_constraints(tree_method='exact',
-                                             feature_names=feature_names,
-                                             interaction_constraints=constraints)
-
-        constraints = [('feature_0', 'feature_1')]
-        feature_names = ['feature_0', 'feature_1', 'feature_2']
-        self.run_interaction_constraints(tree_method='exact',
-                                         feature_names=feature_names,
-                                         interaction_constraints=constraints)
-
-        constraints = [['feature_0', 'feature_1'], ['feature_2']]
-        feature_names = ['feature_0', 'feature_1', 'feature_2']
-        self.run_interaction_constraints(tree_method='exact',
-                                         feature_names=feature_names,
-                                         interaction_constraints=constraints)
-
-    @pytest.mark.skipif(**tm.no_sklearn())
-    def training_accuracy(self, tree_method):
-        """Test accuracy, reused by GPU tests."""
-        from sklearn.metrics import accuracy_score
-        dtrain = xgboost.DMatrix(
-            dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm"
-        )
-        dtest = xgboost.DMatrix(
-            dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm"
+            constraints = [("feature_0", "feature_3")]
+            feature_names = ["feature_0", "feature_1", "feature_2"]
+            run_interaction_constraints(
+                tree_method="exact",
+                device="cpu",
+                feature_names=feature_names,
+                interaction_constraints=constraints,
+            )
+
+        constraints = [("feature_0", "feature_1")]
+        feature_names = ["feature_0", "feature_1", "feature_2"]
+        run_interaction_constraints(
+            tree_method="exact",
+            device="cpu",
+            feature_names=feature_names,
+            interaction_constraints=constraints,
         )
-        params = {
-            'eta': 1,
-            'max_depth': 6,
-            'objective': 'binary:logistic',
-            'tree_method': tree_method,
-            'interaction_constraints': '[[1,2], [2,3,4]]'
-        }
-        num_boost_round = 5
 
-        params['grow_policy'] = 'lossguide'
-        bst = xgboost.train(params, dtrain, num_boost_round)
-        pred_dtest = (bst.predict(dtest) < 0.5)
-        assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
-
-        params['grow_policy'] = 'depthwise'
-        bst = xgboost.train(params, dtrain, num_boost_round)
-        pred_dtest = (bst.predict(dtest) < 0.5)
-        assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
+        constraints_lst = [["feature_0", "feature_1"], ["feature_2"]]
+        feature_names = ["feature_0", "feature_1", "feature_2"]
+        run_interaction_constraints(
+            tree_method="exact",
+            device="cpu",
+            feature_names=feature_names,
+            interaction_constraints=constraints_lst,
+        )
 
+    @pytest.mark.skipif(**tm.no_sklearn())
     @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
-    def test_hist_training_accuracy(self, tree_method):
-        self.training_accuracy(tree_method=tree_method)
+    def test_hist_training_accuracy(self, tree_method: str) -> None:
+        dpath = "demo/data/"
+        training_accuracy(tree_method=tree_method, dpath=dpath, device="cpu")
diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py
index 5d281d4152f1..b3d573537d7f 100644
--- a/tests/python/test_linear.py
+++ b/tests/python/test_linear.py
@@ -1,3 +1,5 @@
+from typing import Dict
+
 from hypothesis import given, note, settings, strategies
 
 import xgboost as xgb
@@ -20,8 +22,8 @@
 })
 
 
-def train_result(param, dmat, num_rounds):
-    result = {}
+def train_result(param: dict, dmat: xgb.DMatrix, num_rounds: int) -> Dict[str, Dict]:
+    result: Dict[str, Dict] = {}
     xgb.train(
         param,
         dmat,
diff --git a/tests/python/test_model_compatibility.py b/tests/python/test_model_compatibility.py
index c9b7646efde7..719a35562a40 100644
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@@ -1,130 +1,158 @@
 import copy
+import hashlib
 import json
 import os
 import urllib.request
 import zipfile
+from typing import Any, Dict
 
 import generate_models as gm
 import pytest
 
 import xgboost
 from xgboost import testing as tm
+from xgboost.testing.updater import get_basescore
 
 
-def run_model_param_check(config):
-    assert config['learner']['learner_model_param']['num_feature'] == str(4)
-    assert config['learner']['learner_train_param']['booster'] == 'gbtree'
+def run_model_param_check(name: str, config: Dict[str, Any]) -> None:
+    assert config["learner"]["learner_model_param"]["num_feature"] == str(4)
+    assert config["learner"]["learner_train_param"]["booster"] == "gbtree"
 
+    booster = config["learner"]["gradient_booster"]
+    assert booster["name"] == "gbtree"
+    if name.find("1.0.0rc1") != -1:
+        # There's no `num_parallel_tree` in the model parameter in 1.0 (it was a
+        # configuration instead of a model parameter).
+        return
+    assert booster["gbtree_model_param"]["num_parallel_tree"] == str(gm.kForests)
 
-def run_booster_check(booster, name):
+
+def run_booster_check(booster: xgboost.Booster, name: str) -> None:
     config = json.loads(booster.save_config())
-    run_model_param_check(config)
-    if name.find('cls') != -1:
-        assert (len(booster.get_dump()) == gm.kForests * gm.kRounds *
-                gm.kClasses)
-        assert float(
-            config['learner']['learner_model_param']['base_score']) == 0.5
-        assert config['learner']['learner_train_param'][
-            'objective'] == 'multi:softmax'
-    elif name.find('logitraw') != -1:
-        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
-        assert config['learner']['learner_model_param']['num_class'] == str(0)
-        assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
-    elif name.find('logit') != -1:
-        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
-        assert config['learner']['learner_model_param']['num_class'] == str(0)
-        assert config['learner']['learner_train_param'][
-            'objective'] == 'binary:logistic'
-    elif name.find('ltr') != -1:
-        assert config['learner']['learner_train_param'][
-            'objective'] == 'rank:ndcg'
+    run_model_param_check(name, config)
+    n_rounds = get_n_rounds(name)
+    if name.find("cls") != -1:
+        assert len(booster.get_dump()) == gm.kForests * n_rounds * gm.kClasses
+        base_score = get_basescore(config)
+        assert isinstance(base_score, list)
+        assert all(v == 0.5 for v in base_score)
+        assert config["learner"]["learner_train_param"]["objective"] == "multi:softmax"
+    elif name.find("logitraw") != -1:
+        assert len(booster.get_dump()) == gm.kForests * n_rounds
+        assert config["learner"]["learner_model_param"]["num_class"] == str(0)
+        assert (
+            config["learner"]["learner_train_param"]["objective"] == "binary:logitraw"
+        )
+    elif name.find("logit") != -1:
+        assert len(booster.get_dump()) == gm.kForests * n_rounds
+        assert config["learner"]["learner_model_param"]["num_class"] == str(0)
+        assert (
+            config["learner"]["learner_train_param"]["objective"] == "binary:logistic"
+        )
+    elif name.find("ltr") != -1:
+        assert config["learner"]["learner_train_param"]["objective"] == "rank:ndcg"
+    elif name.find("aft") != -1:
+        assert config["learner"]["learner_train_param"]["objective"] == "survival:aft"
+        assert (
+            config["learner"]["objective"]["aft_loss_param"]["aft_loss_distribution"]
+            == "normal"
+        )
+    else:
+        assert name.find("reg") != -1
+        assert len(booster.get_dump()) == gm.kForests * n_rounds
+        assert get_basescore(config) == [0.5]
+        assert (
+            config["learner"]["learner_train_param"]["objective"] == "reg:squarederror"
+        )
+
+
+def get_n_rounds(name: str) -> int:
+    if name.find("1.0.0rc1") != -1:
+        n_rounds = 2
     else:
-        assert name.find('reg') != -1
-        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
-        assert float(
-            config['learner']['learner_model_param']['base_score']) == 0.5
-        assert config['learner']['learner_train_param'][
-            'objective'] == 'reg:squarederror'
+        n_rounds = gm.kRounds
+    return n_rounds
 
 
-def run_scikit_model_check(name, path):
-    if name.find('reg') != -1:
+def run_scikit_model_check(name: str, path: str) -> None:
+    if name.find("reg") != -1:
         reg = xgboost.XGBRegressor()
         reg.load_model(path)
         config = json.loads(reg.get_booster().save_config())
-        if name.find('0.90') != -1:
-            assert config['learner']['learner_train_param'][
-                'objective'] == 'reg:linear'
-        else:
-            assert config['learner']['learner_train_param'][
-                'objective'] == 'reg:squarederror'
-        assert (len(reg.get_booster().get_dump()) ==
-                gm.kRounds * gm.kForests)
-        run_model_param_check(config)
-    elif name.find('cls') != -1:
+        assert (
+            config["learner"]["learner_train_param"]["objective"] == "reg:squarederror"
+        )
+        assert len(reg.get_booster().get_dump()) == get_n_rounds(name) * gm.kForests
+        run_model_param_check(name, config)
+    elif name.find("cls") != -1:
         cls = xgboost.XGBClassifier()
         cls.load_model(path)
-        if name.find('0.90') == -1:
-            assert len(cls.classes_) == gm.kClasses
-            assert cls.n_classes_ == gm.kClasses
-        assert (len(cls.get_booster().get_dump()) ==
-                gm.kRounds * gm.kForests * gm.kClasses), path
+        n_rounds = get_n_rounds(name)
+        assert (
+            len(cls.get_booster().get_dump()) == n_rounds * gm.kForests * gm.kClasses
+        ), path
         config = json.loads(cls.get_booster().save_config())
-        assert config['learner']['learner_train_param'][
-            'objective'] == 'multi:softprob', path
-        run_model_param_check(config)
-    elif name.find('ltr') != -1:
+        assert (
+            config["learner"]["learner_train_param"]["objective"] == "multi:softprob"
+        ), path
+        run_model_param_check(name, config)
+    elif name.find("ltr") != -1:
         ltr = xgboost.XGBRanker()
         ltr.load_model(path)
-        assert (len(ltr.get_booster().get_dump()) ==
-                gm.kRounds * gm.kForests)
+        assert len(ltr.get_booster().get_dump()) == get_n_rounds(name) * gm.kForests
         config = json.loads(ltr.get_booster().save_config())
-        assert config['learner']['learner_train_param'][
-            'objective'] == 'rank:ndcg'
-        run_model_param_check(config)
-    elif name.find('logitraw') != -1:
+        assert config["learner"]["learner_train_param"]["objective"] == "rank:ndcg"
+        run_model_param_check(name, config)
+    elif name.find("logitraw") != -1:
         logit = xgboost.XGBClassifier()
         logit.load_model(path)
-        assert (len(logit.get_booster().get_dump()) ==
-                gm.kRounds * gm.kForests)
+        assert len(logit.get_booster().get_dump()) == get_n_rounds(name) * gm.kForests
         config = json.loads(logit.get_booster().save_config())
-        assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
-    elif name.find('logit') != -1:
+        assert (
+            config["learner"]["learner_train_param"]["objective"] == "binary:logitraw"
+        )
+        run_model_param_check(name, config)
+    elif name.find("logit") != -1:
         logit = xgboost.XGBClassifier()
         logit.load_model(path)
-        assert (len(logit.get_booster().get_dump()) ==
-                gm.kRounds * gm.kForests)
+        assert len(logit.get_booster().get_dump()) == get_n_rounds(name) * gm.kForests
         config = json.loads(logit.get_booster().save_config())
-        assert config['learner']['learner_train_param'][
-            'objective'] == 'binary:logistic'
+        assert (
+            config["learner"]["learner_train_param"]["objective"] == "binary:logistic"
+        )
+        run_model_param_check(name, config)
     else:
         assert False
 
 
-@pytest.mark.skipif(**tm.no_sklearn())
-def test_model_compatibility():
-    """Test model compatibility, can only be run on CI as others don't
-    have the credentials.
+def download(path: str) -> None:
+    """Download the model files from S3."""
+    zip_path, _ = urllib.request.urlretrieve(
+        "https://xgboost-ci-jenkins-artifacts.s3-us-west-2/"
+        + ".amazonaws.com/xgboost_model_compatibility_tests-3.0.2.zip"
+    )
+    sha = "49d4d4db667a73590099dad9dca4f078532df05c5ea6e035ad4fa09596b1905a"
+    if hasattr(hashlib, "file_digest"):  # not in py 3.10
+        with open(zip_path, "rb") as fd:
+            digest = hashlib.file_digest(fd, "sha256")  # pylint: disable=attr-defined
+            assert digest.hexdigest() == sha
+    with zipfile.ZipFile(zip_path, "r") as z:
+        z.extractall(path)
+
 
-    """
+@pytest.mark.skipif(**tm.no_sklearn())
+def test_model_compatibility() -> None:
+    """Test model compatibility."""
     path = os.path.dirname(os.path.abspath(__file__))
     path = os.path.join(path, "models")
 
     if not os.path.exists(path):
-        zip_path, _ = urllib.request.urlretrieve(
-            "https://xgboost-ci-jenkins-artifacts.s3-us-west-2/"
-            + ".amazonaws.com/xgboost_model_compatibility_test.zip"
-        )
-        with zipfile.ZipFile(zip_path, "r") as z:
-            z.extractall(path)
+        download(path)
 
     models = [
-        os.path.join(root, f)
-        for root, subdir, files in os.walk(path)
-        for f in files
-        if f != "version"
+        os.path.join(root, f) for root, subdir, files in os.walk(path) for f in files
     ]
-    assert models
+    assert len(models) == 54
 
     for path in models:
         name = os.path.basename(path)
diff --git a/tests/python/test_model_io.py b/tests/python/test_model_io.py
index 65e85550944c..f5c4674afc14 100644
--- a/tests/python/test_model_io.py
+++ b/tests/python/test_model_io.py
@@ -122,44 +122,6 @@ def test_categorical_model_io(self) -> None:
             predt_1 = booster.predict(Xy)
             np.testing.assert_allclose(predt_0, predt_1)
 
-    @pytest.mark.skipif(**tm.no_json_schema())
-    def test_json_io_schema(self) -> None:
-        import jsonschema
-
-        model_path = "test_json_schema.json"
-        path = os.path.dirname(
-            os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        )
-        doc = os.path.join(path, "doc", "model.schema")
-        with open(doc, "r") as fd:
-            schema = json.load(fd)
-        parameters = {"tree_method": "hist", "booster": "gbtree"}
-        jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
-        os.remove(model_path)
-
-        parameters = {"tree_method": "hist", "booster": "dart"}
-        jsonschema.validate(instance=json_model(model_path, parameters), schema=schema)
-        os.remove(model_path)
-
-        try:
-            dtrain, _ = tm.load_agaricus(__file__)
-            xgb.train({"objective": "foo"}, dtrain, num_boost_round=1)
-        except ValueError as e:
-            e_str = str(e)
-            beg = e_str.find("Objective candidate")
-            end = e_str.find("Stack trace")
-            e_str = e_str[beg:end]
-            e_str = e_str.strip()
-            splited = e_str.splitlines()
-            objectives = [s.split(": ")[1] for s in splited]
-            j_objectives = schema["properties"]["learner"]["properties"]["objective"][
-                "oneOf"
-            ]
-            objectives_from_schema = set()
-            for j_obj in j_objectives:
-                objectives_from_schema.add(j_obj["properties"]["name"]["const"])
-            assert set(objectives) == objectives_from_schema
-
     def test_with_pathlib(self) -> None:
         """Saving and loading model files from paths."""
         save_path = Path("model.ubj")
@@ -240,11 +202,28 @@ def rename(src: str, dst: str) -> None:
             with pytest.warns(UserWarning, match="UBJSON"):
                 booster.save_model(path_no)
 
-            booster_1 = xgb.Booster(model_file=path_no)
+            with pytest.warns(UserWarning, match="Using UBJSON"):
+                booster_1 = xgb.Booster(model_file=path_no)
             r0 = booster.save_raw(raw_format="json")
             r1 = booster_1.save_raw(raw_format="json")
             assert r0 == r1
 
+            booster.save_model(path_json)
+            rename(path_json, path_no)
+            with pytest.warns(UserWarning, match="Using JSON"):
+                xgb.Booster(model_file=path_no)
+
+    def test_invalid_format(self) -> None:
+        X, y, w = tm.make_regression(64, 16, False)
+        booster = xgb.train({}, xgb.QuantileDMatrix(X, y, weight=w), num_boost_round=3)
+        with pytest.raises(ValueError, match="Unknown model format"):
+            booster.save_raw(raw_format="deprecated")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, "model.deprecated")
+            with pytest.warns(UserWarning, match="Saving model in the UBJSON format"):
+                booster.save_model(path)
+
 
 def save_load_model(model_path: str) -> None:
     from sklearn.datasets import load_digits
@@ -363,7 +342,7 @@ def test_sklearn_model() -> None:
         np.testing.assert_equal(clf.classes_, np.arange(10))
         assert clf.n_classes_ == 10
 
-        assert clf.best_iteration == 27
+        assert clf.best_iteration == 21
         assert clf.best_score == score
 
 
diff --git a/tests/python/test_monotone_constraints.py b/tests/python/test_monotone_constraints.py
index a3785f1cbd0c..312d363c4c91 100644
--- a/tests/python/test_monotone_constraints.py
+++ b/tests/python/test_monotone_constraints.py
@@ -1,10 +1,13 @@
+from typing import Type
+
 import numpy as np
 import pytest
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.monotone_constraints import training_dset, x, y
 
-dpath = 'demo/data/'
+dpath = "demo/data/"
 
 
 def is_increasing(y):
@@ -23,53 +26,34 @@ def is_correctly_constrained(learner, feature_names=None):
     for i in range(n):
         fixed_x = fixed_xs_values[i] * np.ones((n, 1))
         monotonically_increasing_x = np.column_stack((variable_x, fixed_x))
-        monotonically_increasing_dset = xgb.DMatrix(monotonically_increasing_x,
-                                                    feature_names=feature_names)
-        monotonically_increasing_y = learner.predict(
-            monotonically_increasing_dset
+        monotonically_increasing_dset = xgb.DMatrix(
+            monotonically_increasing_x, feature_names=feature_names
         )
+        monotonically_increasing_y = learner.predict(monotonically_increasing_dset)
 
         monotonically_decreasing_x = np.column_stack((fixed_x, variable_x))
-        monotonically_decreasing_dset = xgb.DMatrix(monotonically_decreasing_x,
-                                                    feature_names=feature_names)
-        monotonically_decreasing_y = learner.predict(
-            monotonically_decreasing_dset
+        monotonically_decreasing_dset = xgb.DMatrix(
+            monotonically_decreasing_x, feature_names=feature_names
         )
+        monotonically_decreasing_y = learner.predict(monotonically_decreasing_dset)
 
         if not (
-            is_increasing(monotonically_increasing_y) and
-            is_decreasing(monotonically_decreasing_y)
+            is_increasing(monotonically_increasing_y)
+            and is_decreasing(monotonically_decreasing_y)
         ):
             return False
 
     return True
 
 
-number_of_dpoints = 1000
-x1_positively_correlated_with_y = np.random.random(size=number_of_dpoints)
-x2_negatively_correlated_with_y = np.random.random(size=number_of_dpoints)
-
-x = np.column_stack((
-    x1_positively_correlated_with_y, x2_negatively_correlated_with_y
-))
-zs = np.random.normal(loc=0.0, scale=0.01, size=number_of_dpoints)
-y = (
-    5 * x1_positively_correlated_with_y +
-    np.sin(10 * np.pi * x1_positively_correlated_with_y) -
-    5 * x2_negatively_correlated_with_y -
-    np.cos(10 * np.pi * x2_negatively_correlated_with_y) +
-    zs
-)
-training_dset = xgb.DMatrix(x, label=y)
-
-
 class TestMonotoneConstraints:
-    def test_monotone_constraints_for_exact_tree_method(self):
+    def test_monotone_constraints_for_exact_tree_method(self) -> None:
 
         # first check monotonicity for the 'exact' tree method
         params_for_constrained_exact_method = {
-            'tree_method': 'exact', 'verbosity': 1,
-            'monotone_constraints': '(1, -1)'
+            "tree_method": "exact",
+            "verbosity": 1,
+            "monotone_constraints": "(1, -1)",
         }
         constrained_exact_method = xgb.train(
             params_for_constrained_exact_method, training_dset
@@ -99,14 +83,14 @@ def test_monotone_constraints_tuple(self) -> None:
         constrained = xgb.train(params_for_constrained, training_dset)
         assert is_correctly_constrained(constrained)
 
-    @pytest.mark.parametrize('format', [dict, list])
-    def test_monotone_constraints_feature_names(self, format):
+    @pytest.mark.parametrize("format", [dict, list])
+    def test_monotone_constraints_feature_names(self, format: Type) -> None:
 
         # next check monotonicity when initializing monotone_constraints by feature names
         params = {
-            'tree_method': 'hist',
-            'grow_policy': 'lossguide',
-            'monotone_constraints': {'feature_0': 1, 'feature_1': -1}
+            "tree_method": "hist",
+            "grow_policy": "lossguide",
+            "monotone_constraints": {"feature_0": 1, "feature_1": -1},
         }
 
         if format == list:
@@ -115,37 +99,44 @@ def test_monotone_constraints_feature_names(self, format):
         with pytest.raises(ValueError):
             xgb.train(params, training_dset)
 
-        feature_names = ['feature_0', 'feature_2']
-        training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
+        feature_names = ["feature_0", "feature_2"]
+        training_dset_w_feature_names = xgb.DMatrix(
+            x, label=y, feature_names=feature_names
+        )
 
         with pytest.raises(ValueError):
             xgb.train(params, training_dset_w_feature_names)
 
-        feature_names = ['feature_0', 'feature_1']
-        training_dset_w_feature_names = xgb.DMatrix(x, label=y, feature_names=feature_names)
-
-        constrained_learner = xgb.train(
-            params, training_dset_w_feature_names
+        feature_names = ["feature_0", "feature_1"]
+        training_dset_w_feature_names = xgb.DMatrix(
+            x, label=y, feature_names=feature_names
         )
 
-        assert is_correctly_constrained(constrained_learner, feature_names)
+        constrained_learner = xgb.train(params, training_dset_w_feature_names)
 
+        assert is_correctly_constrained(constrained_learner, feature_names)
 
     @pytest.mark.skipif(**tm.no_sklearn())
-    def test_training_accuracy(self):
+    def test_training_accuracy(self) -> None:
         from sklearn.metrics import accuracy_score
+
         dtrain = xgb.DMatrix(dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm")
         dtest = xgb.DMatrix(dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm")
-        params = {'eta': 1, 'max_depth': 6, 'objective': 'binary:logistic',
-                  'tree_method': 'hist', 'monotone_constraints': '(1, 0)'}
+        params = {
+            "eta": 1,
+            "max_depth": 6,
+            "objective": "binary:logistic",
+            "tree_method": "hist",
+            "monotone_constraints": "(1, 0)",
+        }
         num_boost_round = 5
 
-        params['grow_policy'] = 'lossguide'
+        params["grow_policy"] = "lossguide"
         bst = xgb.train(params, dtrain, num_boost_round)
-        pred_dtest = (bst.predict(dtest) < 0.5)
+        pred_dtest = bst.predict(dtest) < 0.5
         assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
 
-        params['grow_policy'] = 'depthwise'
+        params["grow_policy"] = "depthwise"
         bst = xgb.train(params, dtrain, num_boost_round)
-        pred_dtest = (bst.predict(dtest) < 0.5)
+        pred_dtest = bst.predict(dtest) < 0.5
         assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
diff --git a/tests/python/test_multi_target.py b/tests/python/test_multi_target.py
index 446d440627fd..1619f1eaf189 100644
--- a/tests/python/test_multi_target.py
+++ b/tests/python/test_multi_target.py
@@ -2,15 +2,15 @@
 
 from hypothesis import given, note, settings, strategies
 
-import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.multi_target import run_multiclass, run_multilabel
 from xgboost.testing.params import (
     exact_parameter_strategy,
     hist_cache_strategy,
     hist_multi_parameter_strategy,
     hist_parameter_strategy,
 )
-from xgboost.testing.updater import ResetStrategy, train_result
+from xgboost.testing.updater import train_result
 
 
 class TestTreeMethodMulti:
@@ -78,28 +78,8 @@ def test_hist(
 
 
 def test_multiclass() -> None:
-    X, y = tm.datasets.make_classification(
-        128, n_features=12, n_informative=10, n_classes=4
-    )
-    clf = xgb.XGBClassifier(
-        multi_strategy="multi_output_tree", callbacks=[ResetStrategy()], n_estimators=10
-    )
-    clf.fit(X, y, eval_set=[(X, y)])
-    assert clf.objective == "multi:softprob"
-    assert tm.non_increasing(clf.evals_result()["validation_0"]["mlogloss"])
-
-    proba = clf.predict_proba(X)
-    assert proba.shape == (y.shape[0], 4)
+    run_multiclass("cpu", None)
 
 
 def test_multilabel() -> None:
-    X, y = tm.datasets.make_multilabel_classification(128)
-    clf = xgb.XGBClassifier(
-        multi_strategy="multi_output_tree", callbacks=[ResetStrategy()], n_estimators=10
-    )
-    clf.fit(X, y, eval_set=[(X, y)])
-    assert clf.objective == "binary:logistic"
-    assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
-
-    proba = clf.predict_proba(X)
-    assert proba.shape == y.shape
+    run_multilabel("cpu", None)
diff --git a/tests/python/test_openmp.py b/tests/python/test_openmp.py
index 82b0ba270caa..4710426b0e52 100644
--- a/tests/python/test_openmp.py
+++ b/tests/python/test_openmp.py
@@ -80,6 +80,7 @@ def consist_test(title, n):
         assert np.array_equal(auc_1, auc_3)
 
     @pytest.mark.skipif(**tm.no_sklearn())
+    @pytest.mark.timeout(30)
     def test_with_omp_thread_limit(self):
         args = [
             "python", os.path.join(
diff --git a/tests/python/test_ordinal.py b/tests/python/test_ordinal.py
index 6863733f2d47..3e76a8b37eee 100644
--- a/tests/python/test_ordinal.py
+++ b/tests/python/test_ordinal.py
@@ -5,6 +5,17 @@
     run_cat_container,
     run_cat_container_iter,
     run_cat_container_mixed,
+    run_cat_invalid,
+    run_cat_leaf,
+    run_cat_predict,
+    run_cat_shap,
+    run_cat_thread_safety,
+    run_recode_dmatrix,
+    run_recode_dmatrix_predict,
+    run_specified_cat,
+    run_training_continuation,
+    run_update,
+    run_validation,
 )
 
 pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_arrow(), tm.no_pandas()))
@@ -20,3 +31,47 @@ def test_cat_container_mixed() -> None:
 
 def test_cat_container_iter() -> None:
     run_cat_container_iter("cpu")
+
+
+def test_cat_predict() -> None:
+    run_cat_predict("cpu")
+
+
+def test_cat_invalid() -> None:
+    run_cat_invalid("cpu")
+
+
+def test_cat_thread_safety() -> None:
+    run_cat_thread_safety("cpu")
+
+
+def test_cat_shap() -> None:
+    run_cat_shap("cpu")
+
+
+def test_cat_leaf() -> None:
+    run_cat_leaf("cpu")
+
+
+def test_specified_cat() -> None:
+    run_specified_cat("cpu")
+
+
+def test_validation() -> None:
+    run_validation("cpu")
+
+
+def test_recode_dmatrix() -> None:
+    run_recode_dmatrix("cpu")
+
+
+def test_training_continuation() -> None:
+    run_training_continuation("cpu")
+
+
+def test_update() -> None:
+    run_update("cpu")
+
+
+def test_recode_dmatrix_predict() -> None:
+    run_recode_dmatrix_predict("cpu")
diff --git a/tests/python/test_parse_tree.py b/tests/python/test_parse_tree.py
index 1be6c1d3ba92..0155acca1d79 100644
--- a/tests/python/test_parse_tree.py
+++ b/tests/python/test_parse_tree.py
@@ -3,70 +3,57 @@
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.parse_tree import (
+    run_split_value_histograms,
+    run_tree_to_df_categorical,
+)
 
 pytestmark = pytest.mark.skipif(**tm.no_pandas())
 
 
-dpath = 'demo/data/'
+dpath = "demo/data/"
 rng = np.random.RandomState(1994)
 
 
 class TestTreesToDataFrame:
     def build_model(self, max_depth, num_round):
         dtrain, _ = tm.load_agaricus(__file__)
-        param = {'max_depth': max_depth, 'objective': 'binary:logistic',
-                 'verbosity': 1}
+        param = {"max_depth": max_depth, "objective": "binary:logistic", "verbosity": 1}
         num_round = num_round
         bst = xgb.train(param, dtrain, num_round)
         return bst
 
     def parse_dumped_model(self, booster, item_to_get, splitter):
-        item_to_get += '='
+        item_to_get += "="
         txt_dump = booster.get_dump(with_stats=True)
-        tree_list = [tree.split('/n') for tree in txt_dump]
+        tree_list = [tree.split("/n") for tree in txt_dump]
         split_trees = [tree[0].split(item_to_get)[1:] for tree in tree_list]
-        res = sum([float(line.split(splitter)[0])
-                   for tree in split_trees for line in tree])
+        res = sum(
+            [float(line.split(splitter)[0]) for tree in split_trees for line in tree]
+        )
         return res
 
     def test_trees_to_dataframe(self):
         bst = self.build_model(max_depth=5, num_round=10)
-        gain_from_dump = self.parse_dumped_model(booster=bst,
-                                                 item_to_get='gain',
-                                                 splitter=',')
-        cover_from_dump = self.parse_dumped_model(booster=bst,
-                                                  item_to_get='cover',
-                                                  splitter='\n')
+        gain_from_dump = self.parse_dumped_model(
+            booster=bst, item_to_get="gain", splitter=","
+        )
+        cover_from_dump = self.parse_dumped_model(
+            booster=bst, item_to_get="cover", splitter="\n"
+        )
         # method being tested
         df = bst.trees_to_dataframe()
 
         # test for equality of gains
-        gain_from_df = df[df.Feature != 'Leaf'][['Gain']].sum()
+        gain_from_df = df[df.Feature != "Leaf"][["Gain"]].sum()
         assert np.allclose(gain_from_dump, gain_from_df)
 
         # test for equality of covers
         cover_from_df = df.Cover.sum()
         assert np.allclose(cover_from_dump, cover_from_df)
 
-    def run_tree_to_df_categorical(self, tree_method: str) -> None:
-        X, y = tm.make_categorical(100, 10, 31, onehot=False)
-        Xy = xgb.DMatrix(X, y, enable_categorical=True)
-        booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
-        df = booster.trees_to_dataframe()
-        for _, x in df.iterrows():
-            if x["Feature"] != "Leaf":
-                assert len(x["Category"]) >= 1
-
     def test_tree_to_df_categorical(self) -> None:
-        self.run_tree_to_df_categorical("approx")
-
-    def run_split_value_histograms(self, tree_method) -> None:
-        X, y = tm.make_categorical(1000, 10, 13, onehot=False)
-        reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
-        reg.fit(X, y)
-
-        with pytest.raises(ValueError, match="doesn't"):
-            reg.get_booster().get_split_value_histogram("3", bins=5)
+        run_tree_to_df_categorical("approx", "cpu")
 
     def test_split_value_histograms(self):
-        self.run_split_value_histograms("approx")
+        run_split_value_histograms("approx", "cpu")
diff --git a/tests/python/test_pickling.py b/tests/python/test_pickling.py
index 2f4d77bf0901..198c7f0866fd 100644
--- a/tests/python/test_pickling.py
+++ b/tests/python/test_pickling.py
@@ -22,25 +22,25 @@ def run_model_pickling(self, xgb_params) -> str:
         dtrain = xgb.DMatrix(X, y)
         bst = xgb.train(xgb_params, dtrain)
 
-        dump_0 = bst.get_dump(dump_format='json')
+        dump_0 = bst.get_dump(dump_format="json")
         assert dump_0
         config_0 = bst.save_config()
 
-        filename = 'model.pkl'
+        filename = "model.pkl"
 
-        with open(filename, 'wb') as fd:
+        with open(filename, "wb") as fd:
             pickle.dump(bst, fd)
 
-        with open(filename, 'rb') as fd:
+        with open(filename, "rb") as fd:
             bst = pickle.load(fd)
 
-        with open(filename, 'wb') as fd:
+        with open(filename, "wb") as fd:
             pickle.dump(bst, fd)
 
-        with open(filename, 'rb') as fd:
+        with open(filename, "rb") as fd:
             bst = pickle.load(fd)
 
-        assert bst.get_dump(dump_format='json') == dump_0
+        assert bst.get_dump(dump_format="json") == dump_0
 
         if os.path.exists(filename):
             os.remove(filename)
diff --git a/tests/python/test_plotting.py b/tests/python/test_plotting.py
index 1e1311c5750f..ddcf9c571116 100644
--- a/tests/python/test_plotting.py
+++ b/tests/python/test_plotting.py
@@ -1,53 +1,54 @@
-import json
-
 import numpy as np
 import pytest
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.plotting import run_categorical
 
 try:
     import matplotlib
-    matplotlib.use('Agg')
+
+    matplotlib.use("Agg")
     from graphviz import Source
     from matplotlib.axes import Axes
 except ImportError:
     pass
 
-pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(),
-                                                 tm.no_graphviz()))
+pytestmark = pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz()))
 
 
 class TestPlotting:
     def test_plotting(self):
         m, _ = tm.load_agaricus(__file__)
-        booster = xgb.train({'max_depth': 2, 'eta': 1,
-                             'objective': 'binary:logistic'}, m,
-                            num_boost_round=2)
+        booster = xgb.train(
+            {"max_depth": 2, "eta": 1, "objective": "binary:logistic"},
+            m,
+            num_boost_round=2,
+        )
 
         ax = xgb.plot_importance(booster)
         assert isinstance(ax, Axes)
-        assert ax.get_title() == 'Feature importance'
-        assert ax.get_xlabel() == 'Importance score'
-        assert ax.get_ylabel() == 'Features'
+        assert ax.get_title() == "Feature importance"
+        assert ax.get_xlabel() == "Importance score"
+        assert ax.get_ylabel() == "Features"
         assert len(ax.patches) == 4
 
-        ax = xgb.plot_importance(booster, color='r',
-                                 title='t', xlabel='x', ylabel='y')
+        ax = xgb.plot_importance(booster, color="r", title="t", xlabel="x", ylabel="y")
         assert isinstance(ax, Axes)
-        assert ax.get_title() == 't'
-        assert ax.get_xlabel() == 'x'
-        assert ax.get_ylabel() == 'y'
+        assert ax.get_title() == "t"
+        assert ax.get_xlabel() == "x"
+        assert ax.get_ylabel() == "y"
         assert len(ax.patches) == 4
         for p in ax.patches:
             assert p.get_facecolor() == (1.0, 0, 0, 1.0)  # red
 
-        ax = xgb.plot_importance(booster, color=['r', 'r', 'b', 'b'],
-                                 title=None, xlabel=None, ylabel=None)
+        ax = xgb.plot_importance(
+            booster, color=["r", "r", "b", "b"], title=None, xlabel=None, ylabel=None
+        )
         assert isinstance(ax, Axes)
-        assert ax.get_title() == ''
-        assert ax.get_xlabel() == ''
-        assert ax.get_ylabel() == ''
+        assert ax.get_title() == ""
+        assert ax.get_xlabel() == ""
+        assert ax.get_ylabel() == ""
         assert len(ax.patches) == 4
         assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0)  # red
         assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0)  # red
@@ -66,31 +67,13 @@ def test_importance_plot_lim(self):
         bst = xgb.train({}, dm)
         assert len(bst.get_fscore()) == 71
         ax = xgb.plot_importance(bst)
-        assert ax.get_xlim() == (0., 11.)
-        assert ax.get_ylim() == (-1., 71.)
+        assert ax.get_xlim() == (0.0, 11.0)
+        assert ax.get_ylim() == (-1.0, 71.0)
 
         ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
-        assert ax.get_xlim() == (0., 5.)
-        assert ax.get_ylim() == (10., 71.)
-
-    def run_categorical(self, tree_method: str) -> None:
-        X, y = tm.make_categorical(1000, 31, 19, onehot=False)
-        reg = xgb.XGBRegressor(
-            enable_categorical=True, n_estimators=10, tree_method=tree_method
-        )
-        reg.fit(X, y)
-        trees = reg.get_booster().get_dump(dump_format="json")
-        for tree in trees:
-            j_tree = json.loads(tree)
-            assert "leaf" in j_tree.keys() or isinstance(
-                j_tree["split_condition"], list
-            )
-
-        graph = xgb.to_graphviz(reg, tree_idx=len(j_tree) - 1)
-        assert isinstance(graph, Source)
-        ax = xgb.plot_tree(reg, tree_idx=len(j_tree) - 1)
-        assert isinstance(ax, Axes)
+        assert ax.get_xlim() == (0.0, 5.0)
+        assert ax.get_ylim() == (10.0, 71.0)
 
     @pytest.mark.skipif(**tm.no_pandas())
     def test_categorical(self) -> None:
-        self.run_categorical("approx")
+        run_categorical("approx", "cpu")
diff --git a/tests/python/test_predict.py b/tests/python/test_predict.py
index 4a81e807bfa3..5994d653a796 100644
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -1,6 +1,7 @@
 """Tests for running inplace prediction."""
 
 from concurrent.futures import ThreadPoolExecutor
+from typing import List, Type, Union
 
 import numpy as np
 import pandas as pd
@@ -9,8 +10,8 @@
 
 import xgboost as xgb
 from xgboost import testing as tm
-from xgboost.testing.data import np_dtypes, pd_dtypes
-from xgboost.testing.shared import validate_leaf_output
+from xgboost.testing.data import get_california_housing, np_dtypes, pd_dtypes
+from xgboost.testing.predict import run_base_margin_vs_base_score, run_predict_leaf
 
 
 def run_threaded_predict(X, rows, predict_func):
@@ -29,66 +30,13 @@ def run_threaded_predict(X, rows, predict_func):
         assert f.result()
 
 
-def run_predict_leaf(device: str) -> np.ndarray:
-    rows = 100
-    cols = 4
-    classes = 5
-    num_parallel_tree = 4
-    num_boost_round = 10
-    rng = np.random.RandomState(1994)
-    X = rng.randn(rows, cols)
-    y = rng.randint(low=0, high=classes, size=rows)
-    m = xgb.DMatrix(X, y)
-    booster = xgb.train(
-        {
-            "num_parallel_tree": num_parallel_tree,
-            "num_class": classes,
-            "tree_method": "hist",
-        },
-        m,
-        num_boost_round=num_boost_round,
-    )
-
-    booster.set_param({"device": device})
-    empty = xgb.DMatrix(np.ones(shape=(0, cols)))
-    empty_leaf = booster.predict(empty, pred_leaf=True)
-    assert empty_leaf.shape[0] == 0
-
-    leaf = booster.predict(m, pred_leaf=True, strict_shape=True)
-    assert leaf.shape[0] == rows
-    assert leaf.shape[1] == num_boost_round
-    assert leaf.shape[2] == classes
-    assert leaf.shape[3] == num_parallel_tree
-
-    validate_leaf_output(leaf, num_parallel_tree)
-
-    n_iters = np.int32(2)
-    sliced = booster.predict(
-        m,
-        pred_leaf=True,
-        iteration_range=(0, n_iters),
-        strict_shape=True,
-    )
-    first = sliced[0, ...]
-
-    assert np.prod(first.shape) == classes * num_parallel_tree * n_iters
-
-    # When there's only 1 tree, the output is a 1 dim vector
-    booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
-    booster.set_param({"device": device})
-    assert booster.predict(m, pred_leaf=True).shape == (rows,)
-
-    return leaf
-
-
-def test_predict_leaf() -> None:
-    run_predict_leaf("cpu")
+@pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
+def test_predict_leaf(DMatrixT: Type[xgb.DMatrix]) -> None:
+    run_predict_leaf("cpu", DMatrixT)
 
 
 def test_predict_shape():
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
+    X, y = get_california_housing()
     reg = xgb.XGBRegressor(n_estimators=1)
     reg.fit(X, y)
     predt = reg.get_booster().predict(xgb.DMatrix(X), strict_shape=True)
@@ -124,6 +72,10 @@ def test_predict_shape():
     assert interaction.shape[3] == X.shape[1] + 1
 
 
+def test_base_margin_vs_base_score() -> None:
+    run_base_margin_vs_base_score("cpu")
+
+
 class TestInplacePredict:
     """Tests for running inplace prediction"""
 
@@ -251,11 +203,14 @@ def test_dtypes(self) -> None:
 
     @pytest.mark.skipif(**tm.no_pandas())
     def test_pd_dtypes(self) -> None:
+        import pandas as pd
         from pandas.api.types import is_bool_dtype
 
         for orig, x in pd_dtypes():
-            dtypes = orig.dtypes if isinstance(orig, pd.DataFrame) else [orig.dtypes]
-            if isinstance(orig, pd.DataFrame) and is_bool_dtype(dtypes[0]):
+            dtypes: Union[List, pd.Series] = (
+                orig.dtypes if isinstance(orig, pd.DataFrame) else [orig.dtypes]
+            )
+            if isinstance(orig, pd.DataFrame) and is_bool_dtype(dtypes.iloc[0]):
                 continue
             y = np.arange(x.shape[0])
             Xy = xgb.DMatrix(orig, y, enable_categorical=True)
diff --git a/tests/python/test_quantile_dmatrix.py b/tests/python/test_quantile_dmatrix.py
index 56bd9fb38514..7883a1417cd1 100644
--- a/tests/python/test_quantile_dmatrix.py
+++ b/tests/python/test_quantile_dmatrix.py
@@ -13,7 +13,6 @@
     make_categorical,
     make_ltr,
     make_sparse_regression,
-    predictor_equal,
 )
 from xgboost.testing.data import check_inf, np_dtypes
 from xgboost.testing.data_iter import run_mixed_sparsity
@@ -21,6 +20,7 @@
     check_categorical_strings,
     check_ref_quantile_cut,
 )
+from xgboost.testing.utils import predictor_equal
 
 
 class TestQuantileDMatrix:
@@ -399,5 +399,11 @@ def test_sparse_predict(self) -> None:
     def test_cv_error(self) -> None:
         X, y = make_sparse_regression(8, 2, sparsity=0.2, as_dense=False)
         Xy = xgb.QuantileDMatrix(X, y)
-        with pytest.raises(ValueError, match=""):
-            cv = xgb.cv({}, Xy, 10, nfold=10, early_stopping_rounds=10)
+        with pytest.raises(ValueError):
+            xgb.cv({}, Xy, 10, nfold=10, early_stopping_rounds=10)
+
+
+def test_feature_types() -> None:
+    it = IteratorForTest(*make_batches(32, 8, 4, False), cache=None)
+    with pytest.raises(ValueError, match="specified as batch argument"):
+        xgb.QuantileDMatrix(it, feature_types=["q"] * 8)
diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py
index ff1109ddc3d8..3ed9d1a0e607 100644
--- a/tests/python/test_ranking.py
+++ b/tests/python/test_ranking.py
@@ -69,6 +69,28 @@ def ndcg_gain(y: np.ndarray) -> np.ndarray:
     )
 
 
+def test_ndcg_non_exp() -> None:
+    # NDCG exp gain must have label smaller than 32
+    X, y, q, w = tm.make_ltr(n_samples=1024, n_features=4, n_query_groups=3, max_rel=44)
+
+    def fit(ltr: xgboost.XGBRanker):
+        ltr.fit(
+            X,
+            y,
+            qid=q,
+            sample_weight=w,
+            eval_set=[(X, y)],
+            eval_qid=(q,),
+            sample_weight_eval_set=(w,),
+        )
+
+    ltr = xgboost.XGBRanker(tree_method="hist", ndcg_exp_gain=True, n_estimators=2)
+    with pytest.raises(ValueError, match="Set `ndcg_exp_gain`"):
+        fit(ltr)
+    ltr = xgboost.XGBRanker(tree_method="hist", ndcg_exp_gain=False, n_estimators=2)
+    fit(ltr)
+
+
 def test_ranking_with_unweighted_data():
     Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
     Xcol = np.array([0, 0, 1, 1,  2,  2,  3,  3])
@@ -191,7 +213,7 @@ def after_training(self, model) -> bool:
         lambdarank_pair_method="topk",
         objective="rank:ndcg",
         callbacks=[Position()],
-        boost_from_average=0,
+        base_score=0.5,
     )
     ltr.fit(x, c, qid=q, eval_set=[(x, c)], eval_qid=[q])
 
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index 3f2eb8aed52f..80c22ee3a25e 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -1,5 +1,4 @@
-import json
-from string import ascii_lowercase
+from itertools import product
 from typing import Any, Dict
 
 import numpy as np
@@ -20,6 +19,9 @@
     check_get_quantile_cut,
     check_init_estimation,
     check_quantile_loss,
+    run_adaptive,
+    run_invalid_category,
+    run_max_cat,
     train_result,
 )
 
@@ -219,68 +221,14 @@ def test_sparse(self, dataset):
             hist_result["train"]["rmse"], approx_result["train"]["rmse"]
         )
 
-    def run_invalid_category(self, tree_method: str) -> None:
-        rng = np.random.default_rng()
-        # too large
-        X = rng.integers(low=0, high=4, size=1000).reshape(100, 10)
-        y = rng.normal(loc=0, scale=1, size=100)
-        X[13, 7] = np.iinfo(np.int32).max + 1
-
-        # Check is performed during sketching.
-        Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
-        with pytest.raises(ValueError):
-            xgb.train({"tree_method": tree_method}, Xy)
-
-        X[13, 7] = 16777216
-        Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
-        with pytest.raises(ValueError):
-            xgb.train({"tree_method": tree_method}, Xy)
-
-        # mixed positive and negative values
-        X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10)
-        y = rng.normal(loc=0, scale=1, size=100)
-
-        Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
-        with pytest.raises(ValueError):
-            xgb.train({"tree_method": tree_method}, Xy)
-
-        if tree_method == "gpu_hist":
-            import cupy as cp
-
-            X, y = cp.array(X), cp.array(y)
-            with pytest.raises(ValueError):
-                Xy = xgb.QuantileDMatrix(X, y, feature_types=["c"] * 10)
-
-    def test_invalid_category(self) -> None:
-        self.run_invalid_category("approx")
-        self.run_invalid_category("hist")
-
-    def run_max_cat(self, tree_method: str) -> None:
-        """Test data with size smaller than number of categories."""
-        import pandas as pd
-
-        rng = np.random.default_rng(0)
-        n_cat = 100
-        n = 5
-
-        X = pd.Series(
-            ["".join(rng.choice(list(ascii_lowercase), size=3)) for i in range(n_cat)],
-            dtype="category",
-        )[:n].to_frame()
-
-        reg = xgb.XGBRegressor(
-            enable_categorical=True,
-            tree_method=tree_method,
-            n_estimators=10,
-        )
-        y = pd.Series(range(n))
-        reg.fit(X=X, y=y, eval_set=[(X, y)])
-        assert tm.non_increasing(reg.evals_result()["validation_0"]["rmse"])
+    @pytest.mark.parametrize("tree_method", ["hist", "approx"])
+    def test_invalid_category(self, tree_method: str) -> None:
+        run_invalid_category(tree_method, "cpu")
 
     @pytest.mark.parametrize("tree_method", ["hist", "approx"])
     @pytest.mark.skipif(**tm.no_pandas())
-    def test_max_cat(self, tree_method) -> None:
-        self.run_max_cat(tree_method)
+    def test_max_cat(self, tree_method: str) -> None:
+        run_max_cat(tree_method, "cpu")
 
     @given(
         strategies.integers(10, 400),
@@ -373,98 +321,19 @@ def test_categorical_missing(self, rows: int, cols: int, cats: int) -> None:
             rows, cols, cats, device="cpu", tree_method="hist", extmem=False
         )
 
-    def run_adaptive(self, tree_method, weighted) -> None:
-        rng = np.random.RandomState(1994)
-        from sklearn.datasets import make_regression
-        from sklearn.utils import stats
-
-        n_samples = 256
-        X, y = make_regression(n_samples, 16, random_state=rng)
-        if weighted:
-            w = rng.normal(size=n_samples)
-            w -= w.min()
-            Xy = xgb.DMatrix(X, y, weight=w)
-            base_score = stats._weighted_percentile(y, w, percentile=50)
-        else:
-            Xy = xgb.DMatrix(X, y)
-            base_score = np.median(y)
-
-        booster_0 = xgb.train(
-            {
-                "tree_method": tree_method,
-                "base_score": base_score,
-                "objective": "reg:absoluteerror",
-            },
-            Xy,
-            num_boost_round=1,
-        )
-        booster_1 = xgb.train(
-            {"tree_method": tree_method, "objective": "reg:absoluteerror"},
-            Xy,
-            num_boost_round=1,
-        )
-        config_0 = json.loads(booster_0.save_config())
-        config_1 = json.loads(booster_1.save_config())
-
-        def get_score(config: Dict) -> float:
-            return float(config["learner"]["learner_model_param"]["base_score"])
-
-        assert get_score(config_0) == get_score(config_1)
-
-        with pytest.warns(Warning, match="Model format is default to UBJSON"):
-            raw_booster = booster_1.save_raw(raw_format="deprecated")
-        booster_2 = xgb.Booster(model_file=raw_booster)
-        config_2 = json.loads(booster_2.save_config())
-        assert get_score(config_1) == get_score(config_2)
-
-        raw_booster = booster_1.save_raw(raw_format="ubj")
-        booster_2 = xgb.Booster(model_file=raw_booster)
-        config_2 = json.loads(booster_2.save_config())
-        assert get_score(config_1) == get_score(config_2)
-
-        booster_0 = xgb.train(
-            {
-                "tree_method": tree_method,
-                "base_score": base_score + 1.0,
-                "objective": "reg:absoluteerror",
-            },
-            Xy,
-            num_boost_round=1,
-        )
-        config_0 = json.loads(booster_0.save_config())
-        np.testing.assert_allclose(get_score(config_0), get_score(config_1) + 1)
-
-        evals_result: Dict[str, Dict[str, list]] = {}
-        xgb.train(
-            {
-                "tree_method": tree_method,
-                "objective": "reg:absoluteerror",
-                "subsample": 0.8,
-                "eta": 1.0,
-            },
-            Xy,
-            num_boost_round=10,
-            evals=[(Xy, "Train")],
-            evals_result=evals_result,
-        )
-        mae = evals_result["Train"]["mae"]
-        assert mae[-1] < 20.0
-        assert tm.non_increasing(mae)
-
     @pytest.mark.skipif(**tm.no_sklearn())
     @pytest.mark.parametrize(
-        "tree_method,weighted",
-        [("approx", False), ("hist", False), ("approx", True), ("hist", True)],
+        "tree_method,weighted", list(product(["approx", "hist"], [True, False]))
     )
-    def test_adaptive(self, tree_method, weighted) -> None:
-        self.run_adaptive(tree_method, weighted)
+    def test_adaptive(self, tree_method: str, weighted: bool) -> None:
+        run_adaptive(tree_method, weighted, "cpu")
 
     def test_init_estimation(self) -> None:
-        check_init_estimation("hist")
+        check_init_estimation("hist", "cpu")
 
     @pytest.mark.parametrize("weighted", [True, False])
     def test_quantile_loss(self, weighted: bool) -> None:
-        check_quantile_loss("hist", weighted)
+        check_quantile_loss("hist", weighted, "cpu")
 
     @pytest.mark.skipif(**tm.no_pandas())
     @pytest.mark.parametrize("tree_method", ["hist"])
diff --git a/tests/python/test_with_arrow.py b/tests/python/test_with_arrow.py
index 827aa1709929..3cfc07296f65 100644
--- a/tests/python/test_with_arrow.py
+++ b/tests/python/test_with_arrow.py
@@ -5,6 +5,7 @@
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.compat import is_dataframe
 from xgboost.core import DataSplitMode
 
 pytestmark = pytest.mark.skipif(
@@ -17,6 +18,14 @@
 import pyarrow.csv as pc
 
 
+def test_type_check() -> None:
+    df = pd.DataFrame(
+        [[0, 1, 2.0, 3.0], [1, 2, 3.0, 4.0]], columns=["a", "b", "c", "d"]
+    )
+    table = pa.Table.from_pandas(df)
+    assert is_dataframe(table)
+
+
 class TestArrowTable:
     def test_arrow_table(self):
         df = pd.DataFrame(
diff --git a/tests/python/test_with_modin.py b/tests/python/test_with_modin.py
index 875c5f7f18c5..ea1ed691b820 100644
--- a/tests/python/test_with_modin.py
+++ b/tests/python/test_with_modin.py
@@ -4,6 +4,7 @@
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.compat import is_dataframe
 from xgboost.testing.data import run_base_margin_info
 
 try:
@@ -15,6 +16,12 @@
 pytestmark = pytest.mark.skipif(**tm.no_modin())
 
 
+def test_type_check() -> None:
+    df = md.DataFrame([[1, 2.0], [2, 3.0]], columns=["a", "b"])
+    assert is_dataframe(df)
+    assert is_dataframe(df.a)
+
+
 class TestModin:
     @pytest.mark.xfail
     def test_modin(self) -> None:
diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py
index 8c30bb354b7e..5a3e28c44222 100644
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -5,8 +5,10 @@
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.compat import is_dataframe
 from xgboost.core import DataSplitMode
 from xgboost.testing.data import pd_arrow_dtypes, pd_dtypes, run_base_margin_info
+from xgboost.testing.utils import predictor_equal
 
 try:
     import pandas as pd
@@ -21,10 +23,18 @@
 rng = np.random.RandomState(1994)
 
 
+def test_type_check() -> None:
+    df = pd.DataFrame([[1, 2.0], [2, 3.0]], columns=["a", "b"])
+    assert is_dataframe(df)
+    assert is_dataframe(df.a)
+
+
 class TestPandas:
     def test_pandas(self, data_split_mode=DataSplitMode.ROW):
         world_size = xgb.collective.get_world_size()
         df = pd.DataFrame([[1, 2.0, True], [2, 3.0, False]], columns=["a", "b", "c"])
+        assert is_dataframe(df)
+        assert is_dataframe(df.a)
         dm = xgb.DMatrix(df, label=pd.Series([1, 2]), data_split_mode=data_split_mode)
         assert dm.num_row() == 2
         if data_split_mode == DataSplitMode.ROW:
@@ -482,9 +492,9 @@ def test_nullable_type(self, DMatrixT) -> None:
             if hasattr(orig.dtypes, "__iter__") and any(
                 dtype == "bool" for dtype in orig.dtypes
             ):
-                assert not tm.predictor_equal(m_orig, m_etype)
+                assert not predictor_equal(m_orig, m_etype)
             else:
-                assert tm.predictor_equal(m_orig, m_etype)
+                assert predictor_equal(m_orig, m_etype)
 
             np.testing.assert_allclose(m_orig.get_label(), m_etype.get_label())
             np.testing.assert_allclose(m_etype.get_label(), y.values.astype(np.float32))
@@ -511,7 +521,7 @@ def test_pyarrow_type(self, DMatrixT: Type[xgb.DMatrix]) -> None:
             m_orig = DMatrixT(orig, enable_categorical=True, label=y_orig)
             m_etype = DMatrixT(df, enable_categorical=True, label=y)
 
-            assert tm.predictor_equal(m_orig, m_etype)
+            assert predictor_equal(m_orig, m_etype)
             if y is not None:
                 np.testing.assert_allclose(m_orig.get_label(), m_etype.get_label())
                 np.testing.assert_allclose(m_etype.get_label(), y.values)
diff --git a/tests/python/test_with_polars.py b/tests/python/test_with_polars.py
index 943ea9a44222..c3686448f8a0 100644
--- a/tests/python/test_with_polars.py
+++ b/tests/python/test_with_polars.py
@@ -9,13 +9,20 @@
 import pytest
 
 import xgboost as xgb
+from xgboost.compat import is_dataframe
 
 pl = pytest.importorskip("polars")
 
 
+def test_type_check() -> None:
+    df = pl.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
+    assert is_dataframe(df)
+    assert is_dataframe(df["a"])
+
+
 @pytest.mark.parametrize("DMatrixT", [xgb.DMatrix, xgb.QuantileDMatrix])
 def test_polars_basic(
-    DMatrixT: Union[Type[xgb.DMatrix], Type[xgb.QuantileDMatrix]]
+    DMatrixT: Union[Type[xgb.DMatrix], Type[xgb.QuantileDMatrix]],
 ) -> None:
     df = pl.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
     Xy = DMatrixT(df)
@@ -134,12 +141,49 @@ def test_regressor() -> None:
 
     np.testing.assert_allclose(predt0, predt1)
 
-def test_categorical() ->  None:
+
+def test_categorical() -> None:
     import polars as pl
 
+    cats = ["aa", "cc", "bb", "ee", "ee"]
+    df = pl.DataFrame(
+        {"f0": [1, 3, 2, 4, 4], "f1": cats},
+        schema=[("f0", pl.Int64()), ("f1", pl.Categorical(ordering="lexical"))],
+    )
+    with pytest.raises(ValueError, match="enable_categorical"):
+        xgb.DMatrix(df)
+
+    data = xgb.DMatrix(df, enable_categorical=True)
+    categories = data.get_categories(export_to_arrow=True)
+    assert dict(categories.to_arrow())["f0"] is None
+    f1 = dict(categories.to_arrow())["f1"]
+    assert f1 is not None
+    assert f1.to_pylist() == cats[:4]
+
     df = pl.DataFrame(
-        {"f0": [1, 2, 3], "b": ["a", "b", "c"]},
-        schema=[("a", pl.Int64()), ("b", pl.Categorical())]
+        {"f0": [1, 3, 2, 4, 4], "f1": cats},
+        schema=[("f0", pl.Int64()), ("f1", pl.Enum(cats[:4]))],
+    )
+    data = xgb.DMatrix(df, enable_categorical=True)
+    categories = data.get_categories(export_to_arrow=True)
+    assert dict(categories.to_arrow())["f0"] is None
+    f1 = dict(categories.to_arrow())["f1"]
+    assert f1 is not None
+    assert f1.to_pylist() == cats[:4]
+
+    rng = np.random.default_rng(2025)
+    y = rng.normal(size=(df.shape[0]))
+    Xy = xgb.QuantileDMatrix(df, y, enable_categorical=True)
+    booster = xgb.train({}, Xy, num_boost_round=8)
+    predt_0 = booster.inplace_predict(df)
+
+    df_rev = pl.DataFrame(
+        {"f0": [1, 3, 2, 4, 4], "f1": cats},
+        schema=[("f0", pl.Int64()), ("f1", pl.Enum(cats[:4][::-1]))],
+    )
+    predt_1 = booster.inplace_predict(df_rev)
+    assert (
+        df["f1"].cat.get_categories().to_list()
+        != df_rev["f1"].cat.get_categories().to_list()
     )
-    with pytest.raises(NotImplementedError, match="Categorical feature"):
-        xgb.DMatrix(df, enable_categorical=True)
+    np.testing.assert_allclose(predt_0, predt_1)
diff --git a/tests/python/test_with_scipy.py b/tests/python/test_with_scipy.py
index ab54d2a43f90..3990c4b0580e 100644
--- a/tests/python/test_with_scipy.py
+++ b/tests/python/test_with_scipy.py
@@ -7,7 +7,7 @@
 import scipy.sparse
 
 import xgboost as xgb
-from xgboost import testing as tm
+from xgboost.testing.utils import predictor_equal
 
 
 @pytest.mark.filterwarnings("error")
@@ -59,7 +59,7 @@ def test_csc(DMatrixT: Type[xgb.DMatrix], CSC: Type) -> None:
         data = np.array([0, 1, 2, 3, 4])
         row_idx = np.array([0, 1, 2, 0, 2])
         X = CSC((data, row_idx, indptr), shape=(3, 2))
-        assert tm.predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
+        assert predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
 
 
 @pytest.mark.filterwarnings("error")
@@ -84,4 +84,4 @@ def test_coo(DMatrixT: Type[xgb.DMatrix], COO: Type) -> None:
         assert dtrain.num_col() == 3
         assert dtrain.num_nonmissing() == data.size
 
-        assert tm.predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
+        assert predictor_equal(DMatrixT(X.tocsr()), DMatrixT(X))
diff --git a/tests/python/test_with_shap.py b/tests/python/test_with_shap.py
index 63d0fd11b701..1aeb56f7a54f 100644
--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@@ -2,6 +2,7 @@
 import pytest
 
 import xgboost as xgb
+from xgboost.testing.data import get_california_housing
 
 try:
     import shap
@@ -16,9 +17,7 @@
 # xgboost removed ntree_limit in 2.0, which breaks the SHAP package.
 @pytest.mark.xfail
 def test_with_shap() -> None:
-    from sklearn.datasets import fetch_california_housing
-
-    X, y = fetch_california_housing(return_X_y=True)
+    X, y = get_california_housing()
     dtrain = xgb.DMatrix(X, label=y)
     model = xgb.train({"learning_rate": 0.01}, dtrain, 10)
     explainer = shap.TreeExplainer(model)
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 8842112cf2c0..06b89f83ed5c 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -4,7 +4,7 @@
 import re
 import tempfile
 import warnings
-from typing import Callable, Optional
+from typing import Optional
 
 import numpy as np
 import pytest
@@ -12,9 +12,17 @@
 
 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.data import get_california_housing
 from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
 from xgboost.testing.shared import get_feature_weights, validate_data_initialization
 from xgboost.testing.updater import get_basescore
+from xgboost.testing.with_skl import (
+    run_boost_from_prediction_binary,
+    run_boost_from_prediction_multi_clasas,
+    run_housing_rf_regression,
+    run_intercept,
+    run_recoding,
+)
 
 rng = np.random.RandomState(1994)
 pytestmark = [pytest.mark.skipif(**tm.no_sklearn()), tm.timeout(30)]
@@ -213,7 +221,7 @@ def test_ranking_metric() -> None:
 def test_ranking_qid_df():
     import pandas as pd
 
-    run_ranking_qid_df(pd, "hist")
+    run_ranking_qid_df(pd, "hist", "cpu")
 
 
 def test_stacking_regression():
@@ -458,11 +466,10 @@ def test_num_parallel_tree():
 
 
 def test_regression():
-    from sklearn.datasets import fetch_california_housing
     from sklearn.metrics import mean_squared_error
     from sklearn.model_selection import KFold
 
-    X, y = fetch_california_housing(return_X_y=True)
+    X, y = get_california_housing()
     kf = KFold(n_splits=2, shuffle=True, random_state=rng)
     for train_index, test_index in kf.split(X, y):
         xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
@@ -489,37 +496,15 @@ def test_regression():
             xgb_model.feature_names_in_
 
 
-def run_housing_rf_regression(tree_method):
-    from sklearn.datasets import fetch_california_housing
-    from sklearn.metrics import mean_squared_error
-    from sklearn.model_selection import KFold
-
-    X, y = fetch_california_housing(return_X_y=True)
-    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf.split(X, y):
-        xgb_model = xgb.XGBRFRegressor(random_state=42, tree_method=tree_method).fit(
-            X[train_index], y[train_index]
-        )
-        preds = xgb_model.predict(X[test_index])
-        labels = y[test_index]
-        assert mean_squared_error(preds, labels) < 35
-
-    rfreg = xgb.XGBRFRegressor()
-    with pytest.raises(NotImplementedError):
-        rfreg.set_params(early_stopping_rounds=10)
-        rfreg.fit(X, y)
-
-
 def test_rf_regression():
-    run_housing_rf_regression("hist")
+    run_housing_rf_regression("hist", "cpu")
 
 
 @pytest.mark.parametrize("tree_method", ["exact", "hist", "approx"])
 def test_parameter_tuning(tree_method: str) -> None:
-    from sklearn.datasets import fetch_california_housing
     from sklearn.model_selection import GridSearchCV
 
-    X, y = fetch_california_housing(return_X_y=True)
+    X, y = get_california_housing()
     reg = xgb.XGBRegressor(learning_rate=0.1, tree_method=tree_method)
     grid_cv = GridSearchCV(
         reg, {"max_depth": [2, 4], "n_estimators": [50, 200]}, cv=2, verbose=1
@@ -527,17 +512,16 @@ def test_parameter_tuning(tree_method: str) -> None:
     grid_cv.fit(X, y)
     assert grid_cv.best_score_ < 0.7
     assert grid_cv.best_params_ == {
-        "n_estimators": 200,
-        "max_depth": 4 if tree_method == "exact" else 2,
+        "n_estimators": 50,
+        "max_depth": 2,
     }
 
 
 def test_regression_with_custom_objective():
-    from sklearn.datasets import fetch_california_housing
     from sklearn.metrics import mean_squared_error
     from sklearn.model_selection import KFold
 
-    X, y = fetch_california_housing(return_X_y=True)
+    X, y = get_california_housing()
     kf = KFold(n_splits=2, shuffle=True, random_state=rng)
     for train_index, test_index in kf.split(X, y):
         xgb_model = xgb.XGBRegressor(objective=tm.ls_obj).fit(
@@ -896,7 +880,7 @@ def test_sklearn_get_default_params():
     assert cls.get_params()["base_score"] is None
     cls.fit(X[:4, ...], y[:4, ...])
     base_score = get_basescore(cls)
-    np.testing.assert_equal(base_score, 0.5)
+    np.testing.assert_equal(base_score, [0.5])
 
 
 def run_validation_weights(model):
@@ -1217,89 +1201,29 @@ def test_feature_weights(tree_method):
         reg.fit(X, y, feature_weights=np.ones((kCols, )))
 
 
-def run_boost_from_prediction_binary(tree_method, X, y, as_frame: Optional[Callable]):
-    """
-    Parameters
-    ----------
-
-    as_frame: A callable function to convert margin into DataFrame, useful for different
-    df implementations.
-    """
-
-    model_0 = xgb.XGBClassifier(
-        learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
-    )
-    model_0.fit(X=X, y=y)
-    margin = model_0.predict(X, output_margin=True)
-    if as_frame is not None:
-        margin = as_frame(margin)
-
-    model_1 = xgb.XGBClassifier(
-        learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
-    )
-    model_1.fit(X=X, y=y, base_margin=margin)
-    predictions_1 = model_1.predict(X, base_margin=margin)
-
-    cls_2 = xgb.XGBClassifier(
-        learning_rate=0.3, random_state=0, n_estimators=8, tree_method=tree_method
-    )
-    cls_2.fit(X=X, y=y)
-    predictions_2 = cls_2.predict(X)
-    np.testing.assert_allclose(predictions_1, predictions_2)
-
-
-def run_boost_from_prediction_multi_clasas(
-    estimator, tree_method, X, y, as_frame: Optional[Callable]
-):
-    # Multi-class
-    model_0 = estimator(
-        learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
-    )
-    model_0.fit(X=X, y=y)
-    margin = model_0.get_booster().inplace_predict(X, predict_type="margin")
-    if as_frame is not None:
-        margin = as_frame(margin)
-
-    model_1 = estimator(
-        learning_rate=0.3, random_state=0, n_estimators=4, tree_method=tree_method
-    )
-    model_1.fit(X=X, y=y, base_margin=margin)
-    predictions_1 = model_1.get_booster().predict(
-        xgb.DMatrix(X, base_margin=margin), output_margin=True
-    )
-
-    model_2 = estimator(
-        learning_rate=0.3, random_state=0, n_estimators=8, tree_method=tree_method
-    )
-    model_2.fit(X=X, y=y)
-    predictions_2 = model_2.get_booster().inplace_predict(X, predict_type="margin")
-
-    if hasattr(predictions_1, "get"):
-        predictions_1 = predictions_1.get()
-    if hasattr(predictions_2, "get"):
-        predictions_2 = predictions_2.get()
-    np.testing.assert_allclose(predictions_1, predictions_2, atol=1e-6)
-
-
 @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
-def test_boost_from_prediction(tree_method):
+def test_boost_from_prediction(tree_method: str) -> None:
     import pandas as pd
     from sklearn.datasets import load_breast_cancer, load_iris, make_regression
 
     X, y = load_breast_cancer(return_X_y=True)
 
-    run_boost_from_prediction_binary(tree_method, X, y, None)
-    run_boost_from_prediction_binary(tree_method, X, y, pd.DataFrame)
+    run_boost_from_prediction_binary(tree_method, "cpu", X, y, None)
+    run_boost_from_prediction_binary(tree_method, "cpu", X, y, pd.DataFrame)
 
     X, y = load_iris(return_X_y=True)
 
-    run_boost_from_prediction_multi_clasas(xgb.XGBClassifier, tree_method, X, y, None)
     run_boost_from_prediction_multi_clasas(
-        xgb.XGBClassifier, tree_method, X, y, pd.DataFrame
+        xgb.XGBClassifier, tree_method, "cpu", X, y, None
+    )
+    run_boost_from_prediction_multi_clasas(
+        xgb.XGBClassifier, tree_method, "cpu", X, y, pd.DataFrame
     )
 
     X, y = make_regression(n_samples=100, n_targets=4)
-    run_boost_from_prediction_multi_clasas(xgb.XGBRegressor, tree_method, X, y, None)
+    run_boost_from_prediction_multi_clasas(
+        xgb.XGBRegressor, tree_method, "cpu", X, y, None
+    )
 
 
 def test_estimator_type():
@@ -1473,9 +1397,33 @@ def merror(y_true: np.ndarray, predt: np.ndarray):
         clf.fit(X, y, eval_set=[(X, y)])
 
 
+def test_mixed_metrics() -> None:
+    from sklearn.datasets import make_classification
+    from sklearn.metrics import hamming_loss, hinge_loss, log_loss
+
+    X, y = make_classification(random_state=2025)
+
+    clf = xgb.XGBClassifier(eval_metric=["logloss", hinge_loss], n_estimators=2)
+    clf.fit(X, y, eval_set=[(X, y)])
+    results = clf.evals_result()["validation_0"]
+    assert "logloss" in results
+    assert "hinge_loss" in results
+
+    clf = xgb.XGBClassifier(eval_metric=[hamming_loss, log_loss], n_estimators=2)
+    with pytest.raises(
+        NotImplementedError, match="multiple custom metrics is not yet supported."
+    ):
+        clf.fit(X, y, eval_set=[(X, y)])
+
+    clf = xgb.XGBClassifier(eval_metric=[123, log_loss], n_estimators=2)
+    with pytest.raises(TypeError, match="Invalid type for the `eval_metric`"):
+        clf.fit(X, y, eval_set=[(X, y)])
+
+
 def test_weighted_evaluation_metric():
     from sklearn.datasets import make_hastie_10_2
     from sklearn.metrics import log_loss
+
     X, y = make_hastie_10_2(n_samples=2000, random_state=42)
     labels, y = np.unique(y, return_inverse=True)
     X_train, X_test = X[:1600], X[1600:]
@@ -1513,18 +1461,7 @@ def test_weighted_evaluation_metric():
 
 
 def test_intercept() -> None:
-    X, y, w = tm.make_regression(256, 3, use_cupy=False)
-    reg = xgb.XGBRegressor()
-    reg.fit(X, y, sample_weight=w)
-    result = reg.intercept_
-    assert result.dtype == np.float32
-    assert result[0] < 0.5
-
-    reg = xgb.XGBRegressor(booster="gblinear")
-    reg.fit(X, y, sample_weight=w)
-    result = reg.intercept_
-    assert result.dtype == np.float32
-    assert result[0] < 0.5
+    run_intercept("cpu")
 
 
 def test_fit_none() -> None:
@@ -1619,3 +1556,27 @@ def test_doc_link() -> None:
         name = est.__class__.__name__
         link = est._get_doc_link()
         assert f"xgboost.{name}" in link
+
+
+def test_apply_method() -> None:
+    import pandas as pd
+
+    X_num = np.random.rand(5, 5)
+    df = pd.DataFrame(X_num, columns=[f"f{i}" for i in range(X_num.shape[1])])
+    df["test"] = pd.Series(
+        ["one", "two", "three", "four", "five"], dtype="category"
+    )  # <- categorical column
+    y = np.arange(len(df))
+
+    model = xgb.XGBClassifier(enable_categorical=True)
+    model.fit(df, y)
+
+    model.apply(df)  # this must not raise
+
+    model.set_params(enable_categorical=False)
+    with pytest.raises(ValueError, match="`enable_categorical`"):
+        model.apply(df)
+
+
+def test_recoding() -> None:
+    run_recoding("cpu")
diff --git a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
index 19f0eddaf6c7..d1ec4e4f7444 100644
--- a/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
+++ b/tests/test_distributed/test_gpu_with_dask/test_gpu_with_dask.py
@@ -15,7 +15,7 @@
 import xgboost as xgb
 from xgboost import testing as tm
 from xgboost.collective import CommunicatorContext
-from xgboost.testing.dask import get_rabit_args, make_categorical
+from xgboost.testing.dask import get_rabit_args, make_categorical, run_recode
 from xgboost.testing.params import hist_parameter_strategy
 
 from ..test_with_dask.test_with_dask import (
@@ -41,19 +41,16 @@
     tm.timeout(180),
 ]
 
-try:
-    import cudf
-    import dask.dataframe as dd
-    from dask import __version__ as dask_version
-    from dask import array as da
-    from dask.distributed import Client
-    from dask_cuda import LocalCUDACluster
-
-    from xgboost import dask as dxgb
-    from xgboost.testing.dask import check_init_estimation, check_uneven_nan
-except ImportError:
-    dask_version = None
+import cudf
+import dask
+import dask.dataframe as dd
+from dask import __version__ as dask_version
+from dask import array as da
+from dask.distributed import Client
+from dask_cuda import LocalCUDACluster
 
+from xgboost import dask as dxgb
+from xgboost.testing.dask import check_init_estimation, check_uneven_nan
 
 dask_version_ge110 = dask_version and parse_version(dask_version) >= parse_version(
     "2024.11.0"
@@ -246,19 +243,11 @@ def test_uneven_nan(self) -> None:
                 check_uneven_nan(client, "hist", "cuda", n_workers)
 
     @pytest.mark.skipif(**tm.no_dask_cudf())
+    @pytest.mark.xfail(reason="Incompatible with Dask 2025.2.0+")
     def test_dask_dataframe(self, local_cuda_client: Client) -> None:
         run_with_dask_dataframe(dxgb.DaskDMatrix, local_cuda_client)
         run_with_dask_dataframe(dxgb.DaskQuantileDMatrix, local_cuda_client)
 
-    @pytest.mark.skipif(**tm.no_dask_cudf())
-    def test_categorical(self, local_cuda_client: Client) -> None:
-        X, y = make_categorical(local_cuda_client, 10000, 30, 13)
-        X = X.to_backend("cudf")
-
-        X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, onehot=True)
-        X_onehot = X_onehot.to_backend("cudf")
-        run_categorical(local_cuda_client, "hist", "cuda", X, X_onehot, y)
-
     @given(
         params=hist_parameter_strategy,
         num_rounds=strategies.integers(1, 20),
@@ -309,6 +298,7 @@ def test_gpu_approx(
 
     def test_empty_quantile_dmatrix(self, local_cuda_client: Client) -> None:
         client = local_cuda_client
+
         X, y = make_categorical(client, 1, 30, 13)
         X_valid, y_valid = make_categorical(client, 10000, 30, 13)
 
@@ -316,17 +306,17 @@ def test_empty_quantile_dmatrix(self, local_cuda_client: Client) -> None:
         Xy_valid = dxgb.DaskQuantileDMatrix(
             client, X_valid, y_valid, ref=Xy, enable_categorical=True
         )
-        result = dxgb.train(
-            client,
-            {"tree_method": "hist", "device": "cuda", "debug_synchronize": True},
-            Xy,
-            num_boost_round=10,
-            evals=[(Xy_valid, "Valid")],
-        )
-        predt = dxgb.inplace_predict(client, result["booster"], X).compute()
-        np.testing.assert_allclose(y.compute(), predt)
-        rmse = result["history"]["Valid"]["rmse"][-1]
-        assert rmse < 32.0
+        # The error is from a worker. Dask cannot prioritize which worker's error to
+        # propagate, it could be the emtpy DMatrix error or the collective communication
+        # error. As a result, the test doesn't match the error message.
+        with pytest.raises(ValueError):
+            dxgb.train(
+                client,
+                {"tree_method": "hist", "device": "cuda", "debug_synchronize": True},
+                Xy,
+                num_boost_round=10,
+                evals=[(Xy_valid, "Valid")],
+            )
 
     @pytest.mark.skipif(**tm.no_cupy())
     def test_dask_array(self, local_cuda_client: Client) -> None:
@@ -508,7 +498,11 @@ def test_data_initialization(self, local_cuda_client: Client) -> None:
 
         def worker_fn(worker_addr: str, data_ref: Dict) -> None:
             with dxgb.CommunicatorContext(**rabit_args):
-                local_dtrain = dxgb._dmatrix_from_list_of_parts(**data_ref, nthread=7)
+                from xgboost.dask.data import _dmatrix_from_list_of_parts
+
+                local_dtrain = _dmatrix_from_list_of_parts(
+                    **data_ref, nthread=7, model=None, Xy_cats=None
+                )
                 fw_rows = local_dtrain.get_float_info("feature_weights").shape[0]
                 assert fw_rows == local_dtrain.num_col()
 
@@ -590,6 +584,27 @@ def comp_dm_ddm(dm_names: List[str], ddm_names: List[str]) -> None:
             assert rn == drn
 
 
+@pytest.mark.skipif(**tm.no_dask_cudf())
+def test_categorical(local_cuda_client: Client) -> None:
+    X, y = make_categorical(local_cuda_client, 10000, 30, 13)
+    X = X.to_backend("cudf")
+
+    X_onehot, _ = make_categorical(local_cuda_client, 10000, 30, 13, onehot=True)
+    X_onehot = X_onehot.to_backend("cudf")
+    run_categorical(local_cuda_client, "hist", "cuda", X, X_onehot, y)
+
+
+@pytest.mark.skipif(**tm.no_dask_cudf())
+def test_recode(local_cuda_client: Client) -> None:
+    with dask.config.set(
+        {
+            "array.backend": "cupy",
+            "dataframe.backend": "cudf",
+        }
+    ):
+        run_recode(local_cuda_client, "cuda")
+
+
 @pytest.mark.skipif(**tm.no_cupy())
 def test_with_asyncio(local_cuda_client: Client) -> None:
     address = local_cuda_client.scheduler.address
@@ -670,9 +685,7 @@ async def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainRetur
         X = X.to_backend("cupy")
         y = y.to_backend("cupy")
 
-        m: dxgb.DaskDMatrix = await dxgb.DaskQuantileDMatrix(
-            client, X, y
-        )  # type:ignore
+        m: dxgb.DaskDMatrix = await dxgb.DaskQuantileDMatrix(client, X, y)
         output = await dxgb.train(
             client, {"tree_method": "hist", "device": "cuda"}, dtrain=m
         )
diff --git a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py
index f389d6d26fa3..9a506f2c4b6f 100644
--- a/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py
+++ b/tests/test_distributed/test_gpu_with_spark/test_gpu_spark.py
@@ -208,7 +208,7 @@ def test_cv_sparkxgb_classifier_feature_cols_with_gpu(spark_iris_dataset_feature
     assert f1 >= 0.97
 
     clf = SparkXGBClassifier(
-        features_col=feature_names, use_gpu=True, num_workers=num_workers
+        features_col=feature_names, device="cuda", num_workers=num_workers
     )
     grid = ParamGridBuilder().addGrid(clf.max_depth, [6, 8]).build()
     evaluator = MulticlassClassificationEvaluator(metricName="f1")
diff --git a/tests/test_distributed/test_with_dask/test_demos.py b/tests/test_distributed/test_with_dask/test_demos.py
index 051774ac0516..cbcd6322bfb1 100644
--- a/tests/test_distributed/test_with_dask/test_demos.py
+++ b/tests/test_distributed/test_with_dask/test_demos.py
@@ -7,14 +7,14 @@
 
 
 @pytest.mark.skipif(**tm.no_dask())
-def test_dask_cpu_training_demo():
+def test_dask_cpu_training_demo() -> None:
     script = os.path.join(tm.demo_dir(__file__), "dask", "cpu_training.py")
     cmd = ["python", script]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_dask())
-def test_dask_cpu_survival_demo():
+def test_dask_cpu_survival_demo() -> None:
     script = os.path.join(tm.demo_dir(__file__), "dask", "cpu_survival.py")
     cmd = ["python", script]
     subprocess.check_call(cmd)
@@ -23,14 +23,14 @@ def test_dask_cpu_survival_demo():
 # Not actually run on CI due to missing dask_ml.
 @pytest.mark.skipif(**tm.no_dask())
 @pytest.mark.skipif(**tm.no_dask_ml())
-def test_dask_callbacks_demo():
+def test_dask_callbacks_demo() -> None:
     script = os.path.join(tm.demo_dir(__file__), "dask", "dask_callbacks.py")
     cmd = ["python", script]
     subprocess.check_call(cmd)
 
 
 @pytest.mark.skipif(**tm.no_dask())
-def test_dask_sklearn_demo():
+def test_dask_sklearn_demo() -> None:
     script = os.path.join(tm.demo_dir(__file__), "dask", "sklearn_cpu_training.py")
     cmd = ["python", script]
     subprocess.check_call(cmd)
diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py
index 90a84a0090c1..aeab2a55cd73 100644
--- a/tests/test_distributed/test_with_dask/test_with_dask.py
+++ b/tests/test_distributed/test_with_dask/test_with_dask.py
@@ -1,4 +1,4 @@
-"""Copyright 2019-2024, XGBoost contributors"""
+"""Copyright 2019-2025, XGBoost contributors"""
 
 import asyncio
 import json
@@ -36,13 +36,16 @@
     check_uneven_nan,
     get_rabit_args,
     make_categorical,
+    run_recode,
 )
+from xgboost.testing.data import get_california_housing
 from xgboost.testing.params import hist_cache_strategy, hist_parameter_strategy
 from xgboost.testing.shared import (
     get_feature_weights,
     validate_data_initialization,
     validate_leaf_output,
 )
+from xgboost.testing.updater import get_basescore
 
 dask.config.set({"distributed.scheduler.allowed-failures": False})
 
@@ -90,7 +93,7 @@ def generate_array(
 
 
 @pytest.mark.parametrize("to_frame", [True, False])
-def test_xgbclassifier_classes_type_and_value(to_frame: bool, client: "Client"):
+def test_xgbclassifier_classes_type_and_value(to_frame: bool, client: "Client") -> None:
     X, y = make_classification(n_samples=1000, n_features=4, random_state=123)
     if to_frame:
         import pandas as pd
@@ -210,7 +213,12 @@ def test_dask_sparse(client: "Client") -> None:
 
 
 def run_categorical(
-    client: "Client", tree_method: str, device: str, X, X_onehot, y
+    client: "Client",
+    tree_method: str,
+    device: str,
+    X: dd.DataFrame,
+    X_onehot: dd.DataFrame,
+    y: dd.Series,
 ) -> None:
     # Force onehot
     parameters = {
@@ -314,6 +322,10 @@ def test_categorical(client: "Client") -> None:
     assert reg.get_booster().feature_types == ft
 
 
+def test_recode(client: "Client") -> None:
+    run_recode(client, "cpu")
+
+
 def test_dask_predict_shape_infer(client: "Client") -> None:
     X, y = make_classification(n_samples=kRows, n_informative=5, n_classes=3)
     X_ = dd.from_array(X, chunksize=100)
@@ -595,7 +607,7 @@ def test_dask_regressor(model: str, client: "Client") -> None:
 
 
 def run_dask_classifier(
-    X: dxgb._DaskCollection,
+    X: dxgb._DataT,
     y: dxgb._DaskCollection,
     w: dxgb._DaskCollection,
     model: str,
@@ -940,7 +952,7 @@ def test_auc(client: "Client") -> None:
 # No test for Exact, as empty DMatrix handling are mostly for distributed
 # environment and Exact doesn't support it.
 @pytest.mark.parametrize("tree_method", ["hist", "approx"])
-def test_empty_dmatrix(tree_method) -> None:
+def test_empty_dmatrix(tree_method: str) -> None:
     with LocalCluster(n_workers=kWorkers, dashboard_address=":0") as cluster:
         with Client(cluster) as client:
             parameters = {"tree_method": tree_method}
@@ -953,7 +965,7 @@ def test_empty_dmatrix(tree_method) -> None:
 async def run_from_dask_array_asyncio(scheduler_address: str) -> dxgb.TrainReturnT:
     async with Client(scheduler_address, asynchronous=True) as client:
         X, y, _ = generate_array()
-        m = await DaskDMatrix(client, X, y)  # type: ignore
+        m = await DaskDMatrix(client, X, y)
         output = await dxgb.train(client, {}, dtrain=m)
 
         with_m = await dxgb.predict(client, output, m)
@@ -1058,8 +1070,8 @@ async def train() -> None:
         ) as cluster:
             async with Client(cluster, asynchronous=True) as client:
                 X, y, w = generate_array(with_weights=True)
-                dtrain = await DaskDMatrix(client, X, y, weight=w)  # type: ignore
-                dvalid = await DaskDMatrix(client, X, y, weight=w)  # type: ignore
+                dtrain = await DaskDMatrix(client, X, y, weight=w)
+                dvalid = await DaskDMatrix(client, X, y, weight=w)
                 output = await dxgb.train(client, {}, dtrain=dtrain)
                 await dxgb.predict(client, output, data=dvalid)
 
@@ -1193,7 +1205,7 @@ def test_dask_predict_leaf(booster: str, client: "Client") -> None:
     validate_leaf_output(leaf, num_parallel_tree)
 
 
-def test_dask_iteration_range(client: "Client"):
+def test_dask_iteration_range(client: "Client") -> None:
     X, y, _ = generate_array()
     n_rounds = 10
 
@@ -1226,10 +1238,12 @@ def test_dask_iteration_range(client: "Client"):
     np.testing.assert_allclose(full_predt.compute(), default.compute())
 
 
-def test_killed_task_wo_hang():
+def test_killed_task_wo_hang() -> None:
     # Test that aborting a worker doesn't lead to hang.
     class Eve(xgb.callback.TrainingCallback):
-        def after_iteration(self, model, epoch: int, evals_log) -> bool:
+        def after_iteration(
+            self, model: xgb.Booster, epoch: int, evals_log: Dict
+        ) -> bool:
             if coll.get_rank() == 1:
                 os.abort()
             return False
@@ -1402,7 +1416,7 @@ def run_updater_test(
         note(str(history))
         history = history["train"][dataset.metric]
 
-        def is_stump():
+        def is_stump() -> bool:
             return (
                 params.get("max_depth", None) == 1
                 or params.get("max_leaves", None) == 1
@@ -1522,9 +1536,6 @@ def test_approx(
         self.run_updater_test(client, params, num_rounds, dataset, "approx")
 
     def test_adaptive(self) -> None:
-        def get_score(config: Dict) -> float:
-            return float(config["learner"]["learner_model_param"]["base_score"])
-
         def local_test(rabit_args: Dict[str, Union[int, str]], worker_id: int) -> bool:
             with dxgb.CommunicatorContext(**rabit_args):
                 if worker_id == 0:
@@ -1545,8 +1556,8 @@ def local_test(rabit_args: Dict[str, Union[int, str]], worker_id: int) -> bool:
                     num_boost_round=1,
                 )
                 config = json.loads(booster.save_config())
-                base_score = get_score(config)
-                assert base_score == 250.0
+                base_score = get_basescore(config)
+                assert base_score == [250.0]
                 return True
 
         with LocalCluster(n_workers=2, dashboard_address=":0") as cluster:
@@ -1624,9 +1635,7 @@ def test_feature_weights(self, client: "Client") -> None:
     @pytest.mark.skipif(**tm.no_dask())
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_custom_objective(self, client: "Client") -> None:
-        from sklearn.datasets import fetch_california_housing
-
-        X, y = fetch_california_housing(return_X_y=True)
+        X, y = get_california_housing()
         X, y = da.from_array(X), da.from_array(y)
         rounds = 20
 
@@ -1677,6 +1686,30 @@ def sqr(
             results_custom = reg.evals_result()
             tm.non_increasing(results_custom["validation_0"]["rmse"])
 
+    @pytest.mark.skipif(**tm.no_sklearn())
+    def test_custom_metrics(self, client: "Client") -> None:
+        from sklearn.datasets import make_classification
+        from sklearn.metrics import hamming_loss, hinge_loss, log_loss
+
+        Xn, yn = make_classification(random_state=2025)
+        X, y = da.array(Xn), da.array(yn)
+
+        clf = dxgb.DaskXGBClassifier(
+            eval_metric=["logloss", hinge_loss], n_estimators=2
+        )
+        clf.fit(X, y, eval_set=[(X, y)])
+        results = clf.evals_result()["validation_0"]
+        assert "logloss" in results
+        assert "hinge_loss" in results
+
+        clf = dxgb.DaskXGBClassifier(
+            eval_metric=[hamming_loss, log_loss], n_estimators=2
+        )
+        with pytest.raises(
+            NotImplementedError, match="multiple custom metrics is not yet supported."
+        ):
+            clf.fit(X, y, eval_set=[(X, y)])
+
     def test_no_duplicated_partition(self) -> None:
         """Assert each worker has the correct amount of data, and DMatrix initialization
         doesn't generate unnecessary copies of data.
@@ -1692,9 +1725,14 @@ def test_no_duplicated_partition(self) -> None:
                 n_workers = len(workers)
 
                 def worker_fn(worker_addr: str, data_ref: Dict) -> None:
+                    from xgboost.dask.data import _dmatrix_from_list_of_parts
+
                     with dxgb.CommunicatorContext(**rabit_args):
-                        local_dtrain = dxgb._dmatrix_from_list_of_parts(
-                            **data_ref, nthread=7
+                        local_dtrain = _dmatrix_from_list_of_parts(
+                            **data_ref,
+                            nthread=7,
+                            model=None,
+                            Xy_cats=None,
                         )
                         total = np.array([local_dtrain.num_row()])
                         total = xgb.collective.allreduce(total, xgb.collective.Op.SUM)
@@ -1930,7 +1968,9 @@ def test_parallel_submits(client: "Client") -> None:
 def run_tree_stats(client: Client, tree_method: str, device: str) -> str:
     """assert that different workers count dosn't affect summ statistic's on root"""
 
-    def dask_train(X, y, num_obs, num_features):
+    def dask_train(
+        X: np.ndarray, y: np.ndarray, num_obs: int, num_features: int
+    ) -> Dict[str, Any]:
         chunk_size = 100
         X = da.from_array(X, chunks=(chunk_size, num_features))
         y = da.from_array(y.reshape(num_obs, 1), chunks=(chunk_size, 1))
@@ -2021,11 +2061,11 @@ def test_parallel_submit_multi_clients() -> None:
                 def _() -> dxgb.DaskXGBClassifier:
                     return futures[i][0].compute(futures[i][1]).result()
 
-                f = e.submit(_)
-                t_futures.append(f)
+                tf = e.submit(_)
+                t_futures.append(tf)
 
-        for i, f in enumerate(t_futures):
-            assert f.result().get_booster().num_boosted_rounds() == i + 1
+        for i, tf in enumerate(t_futures):
+            assert tf.result().get_booster().num_boosted_rounds() == i + 1
 
 
 def test_init_estimation(client: Client) -> None:
@@ -2033,7 +2073,7 @@ def test_init_estimation(client: Client) -> None:
 
 
 @pytest.mark.parametrize("tree_method", ["hist", "approx"])
-def test_uneven_nan(tree_method) -> None:
+def test_uneven_nan(tree_method: str) -> None:
     n_workers = 2
     with LocalCluster(n_workers=n_workers) as cluster:
         with Client(cluster) as client:
@@ -2113,7 +2153,9 @@ def test_early_stopping_custom_eval(self, client: "Client") -> None:
         X, y = da.from_array(X), da.from_array(y)
         m = dxgb.DaskDMatrix(client, X, y)
 
-        def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix):
+        def eval_error_metric(
+            predt: np.ndarray, dtrain: xgb.DMatrix
+        ) -> Tuple[str, float]:
             return tm.eval_error_metric(predt, dtrain, rev_link=False)
 
         valid = dxgb.DaskDMatrix(client, X, y)
@@ -2191,11 +2233,12 @@ def test_callback(self, client: "Client") -> None:
     clean_kwargs={"processes": False, "threads": False},
     allow_unclosed=True,
 )
-async def test_worker_left(c: Client, s: Scheduler, a: Worker, b: Worker):
+@pytest.mark.skip(reason="dmlc/xgboost#11405: test_worker_left is flaky")
+async def test_worker_left(c: Client, s: Scheduler, a: Worker, b: Worker) -> None:
     async with Worker(s.address):
         dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
         dy = da.random.random((1000,)).rechunk(chunks=(10,))
-        d_train = await dxgb.DaskDMatrix(  # type: ignore
+        d_train = await dxgb.DaskDMatrix(
             c,
             dx,
             dy,
@@ -2216,7 +2259,8 @@ async def test_worker_left(c: Client, s: Scheduler, a: Worker, b: Worker):
     clean_kwargs={"processes": False, "threads": False},
     allow_unclosed=True,
 )
-async def test_worker_restarted(c, s, a, b):
+@pytest.mark.skip
+async def test_worker_restarted(c: Client, s: Scheduler, a: Nanny, b: Nanny) -> None:
     dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
     dy = da.random.random((1000,)).rechunk(chunks=(10,))
     d_train = await dxgb.DaskDMatrix(
diff --git a/tests/test_distributed/test_with_spark/test_spark_local.py b/tests/test_distributed/test_with_spark/test_spark_local.py
index 2cdafffaae6e..7bdd480b0c69 100644
--- a/tests/test_distributed/test_with_spark/test_spark_local.py
+++ b/tests/test_distributed/test_with_spark/test_spark_local.py
@@ -31,6 +31,7 @@
 )
 from xgboost.spark.core import _non_booster_params
 from xgboost.spark.data import pred_contribs
+from xgboost.testing.collective import get_avail_port
 
 from .utils import SparkTestCase
 
@@ -701,10 +702,10 @@ def test_classifier_model_pipeline_save_load(self, clf_data: ClfData) -> None:
             assert_model_compatible(model.stages[0], tmpdir)
 
     def test_classifier_with_cross_validator(self, clf_data: ClfData) -> None:
-        xgb_classifer = SparkXGBClassifier(n_estimators=1)
-        paramMaps = ParamGridBuilder().addGrid(xgb_classifer.max_depth, [1, 2]).build()
+        xgb_classifier = SparkXGBClassifier(n_estimators=1)
+        paramMaps = ParamGridBuilder().addGrid(xgb_classifier.max_depth, [1, 2]).build()
         cvBin = CrossValidator(
-            estimator=xgb_classifer,
+            estimator=xgb_classifier,
             estimatorParamMaps=paramMaps,
             evaluator=BinaryClassificationEvaluator(),
             seed=1,
@@ -920,12 +921,12 @@ def test_device_param(self, reg_data: RegData, clf_data: ClfData) -> None:
         with pytest.raises(ValueError, match="not supported for distributed"):
             regressor.fit(reg_data.reg_df_train)
 
-        reg = SparkXGBRegressor(device="cuda", tree_method="gpu_hist")
+        reg = SparkXGBRegressor(device="cuda", tree_method="hist")
         reg._validate_params()
         reg = SparkXGBRegressor(device="cuda")
         reg._validate_params()
 
-        clf = SparkXGBClassifier(device="cuda", tree_method="gpu_hist")
+        clf = SparkXGBClassifier(device="cuda", tree_method="approx")
         clf._validate_params()
         clf = SparkXGBClassifier(device="cuda")
         clf._validate_params()
@@ -940,10 +941,10 @@ def test_gpu_params(self) -> None:
         clf = SparkXGBClassifier(device="cuda")
         assert clf._run_on_gpu()
 
-        clf = SparkXGBClassifier(tree_method="gpu_hist")
-        assert clf._run_on_gpu()
+        clf = SparkXGBClassifier(tree_method="hist")
+        assert not clf._run_on_gpu()
 
-        clf = SparkXGBClassifier(use_gpu=True)
+        clf = SparkXGBClassifier(device="cuda", tree_method="approx")
         assert clf._run_on_gpu()
 
     def test_gpu_transform(self, clf_data: ClfData) -> None:
@@ -984,22 +985,22 @@ def test_validate_gpu_params(self) -> None:
             .set("spark.executor.resource.gpu.amount", "1")
             .set("spark.task.resource.gpu.amount", "0.08")
         )
-        classifer_on_cpu = SparkXGBClassifier(use_gpu=False)
-        classifer_on_gpu = SparkXGBClassifier(use_gpu=True)
+        classifier_on_cpu = SparkXGBClassifier(device="cpu")
+        classifier_on_gpu = SparkXGBClassifier(device="cuda")
 
         # No exception for classifier on CPU
-        classifer_on_cpu._validate_gpu_params("3.4.0", standalone_conf)
+        classifier_on_cpu._validate_gpu_params("3.4.0", standalone_conf)
 
         with pytest.raises(
             ValueError, match="XGBoost doesn't support GPU fractional configurations"
         ):
-            classifer_on_gpu._validate_gpu_params("3.3.0", standalone_conf)
+            classifier_on_gpu._validate_gpu_params("3.3.0", standalone_conf)
 
         # No issues
-        classifer_on_gpu._validate_gpu_params("3.4.0", standalone_conf)
-        classifer_on_gpu._validate_gpu_params("3.4.1", standalone_conf)
-        classifer_on_gpu._validate_gpu_params("3.5.0", standalone_conf)
-        classifer_on_gpu._validate_gpu_params("3.5.1", standalone_conf)
+        classifier_on_gpu._validate_gpu_params("3.4.0", standalone_conf)
+        classifier_on_gpu._validate_gpu_params("3.4.1", standalone_conf)
+        classifier_on_gpu._validate_gpu_params("3.5.0", standalone_conf)
+        classifier_on_gpu._validate_gpu_params("3.5.1", standalone_conf)
 
         # no spark.executor.resource.gpu.amount
         standalone_bad_conf = (
@@ -1013,15 +1014,15 @@ def test_validate_gpu_params(self) -> None:
             "The `spark.executor.resource.gpu.amount` is required for training on GPU"
         )
         with pytest.raises(ValueError, match=msg_match):
-            classifer_on_gpu._validate_gpu_params("3.3.0", standalone_bad_conf)
+            classifier_on_gpu._validate_gpu_params("3.3.0", standalone_bad_conf)
         with pytest.raises(ValueError, match=msg_match):
-            classifer_on_gpu._validate_gpu_params("3.4.0", standalone_bad_conf)
+            classifier_on_gpu._validate_gpu_params("3.4.0", standalone_bad_conf)
         with pytest.raises(ValueError, match=msg_match):
-            classifer_on_gpu._validate_gpu_params("3.4.1", standalone_bad_conf)
+            classifier_on_gpu._validate_gpu_params("3.4.1", standalone_bad_conf)
         with pytest.raises(ValueError, match=msg_match):
-            classifer_on_gpu._validate_gpu_params("3.5.0", standalone_bad_conf)
+            classifier_on_gpu._validate_gpu_params("3.5.0", standalone_bad_conf)
         with pytest.raises(ValueError, match=msg_match):
-            classifer_on_gpu._validate_gpu_params("3.5.1", standalone_bad_conf)
+            classifier_on_gpu._validate_gpu_params("3.5.1", standalone_bad_conf)
 
         standalone_bad_conf = (
             SparkConf()
@@ -1034,11 +1035,11 @@ def test_validate_gpu_params(self) -> None:
             "The `spark.task.resource.gpu.amount` is required for training on GPU"
         )
         with pytest.raises(ValueError, match=msg_match):
-            classifer_on_gpu._validate_gpu_params("3.3.0", standalone_bad_conf)
+            classifier_on_gpu._validate_gpu_params("3.3.0", standalone_bad_conf)
 
-        classifer_on_gpu._validate_gpu_params("3.4.0", standalone_bad_conf)
-        classifer_on_gpu._validate_gpu_params("3.5.0", standalone_bad_conf)
-        classifer_on_gpu._validate_gpu_params("3.5.1", standalone_bad_conf)
+        classifier_on_gpu._validate_gpu_params("3.4.0", standalone_bad_conf)
+        classifier_on_gpu._validate_gpu_params("3.5.0", standalone_bad_conf)
+        classifier_on_gpu._validate_gpu_params("3.5.1", standalone_bad_conf)
 
         # Yarn and K8s mode
         for mode in ["yarn", "k8s://"]:
@@ -1054,24 +1055,24 @@ def test_validate_gpu_params(self) -> None:
                 ValueError,
                 match="XGBoost doesn't support GPU fractional configurations",
             ):
-                classifer_on_gpu._validate_gpu_params("3.3.0", conf)
+                classifier_on_gpu._validate_gpu_params("3.3.0", conf)
             with pytest.raises(
                 ValueError,
                 match="XGBoost doesn't support GPU fractional configurations",
             ):
-                classifer_on_gpu._validate_gpu_params("3.4.0", conf)
+                classifier_on_gpu._validate_gpu_params("3.4.0", conf)
             with pytest.raises(
                 ValueError,
                 match="XGBoost doesn't support GPU fractional configurations",
             ):
-                classifer_on_gpu._validate_gpu_params("3.4.1", conf)
+                classifier_on_gpu._validate_gpu_params("3.4.1", conf)
             with pytest.raises(
                 ValueError,
                 match="XGBoost doesn't support GPU fractional configurations",
             ):
-                classifer_on_gpu._validate_gpu_params("3.5.0", conf)
+                classifier_on_gpu._validate_gpu_params("3.5.0", conf)
 
-            classifer_on_gpu._validate_gpu_params("3.5.1", conf)
+            classifier_on_gpu._validate_gpu_params("3.5.1", conf)
 
         for mode in ["yarn", "k8s://"]:
             bad_conf = (
@@ -1085,13 +1086,13 @@ def test_validate_gpu_params(self) -> None:
                 "The `spark.task.resource.gpu.amount` is required for training on GPU"
             )
             with pytest.raises(ValueError, match=msg_match):
-                classifer_on_gpu._validate_gpu_params("3.3.0", bad_conf)
+                classifier_on_gpu._validate_gpu_params("3.3.0", bad_conf)
             with pytest.raises(ValueError, match=msg_match):
-                classifer_on_gpu._validate_gpu_params("3.4.0", bad_conf)
+                classifier_on_gpu._validate_gpu_params("3.4.0", bad_conf)
             with pytest.raises(ValueError, match=msg_match):
-                classifer_on_gpu._validate_gpu_params("3.5.0", bad_conf)
+                classifier_on_gpu._validate_gpu_params("3.5.0", bad_conf)
 
-            classifer_on_gpu._validate_gpu_params("3.5.1", bad_conf)
+            classifier_on_gpu._validate_gpu_params("3.5.1", bad_conf)
 
     def test_skip_stage_level_scheduling(self) -> None:
         standalone_conf = (
@@ -1103,27 +1104,27 @@ def test_skip_stage_level_scheduling(self) -> None:
             .set("spark.task.resource.gpu.amount", "0.08")
         )
 
-        classifer_on_cpu = SparkXGBClassifier(use_gpu=False)
-        classifer_on_gpu = SparkXGBClassifier(use_gpu=True)
+        classifier_on_cpu = SparkXGBClassifier(device="cpu")
+        classifier_on_gpu = SparkXGBClassifier(device="cuda")
 
         # the correct configurations should not skip stage-level scheduling
-        assert not classifer_on_gpu._skip_stage_level_scheduling(
+        assert not classifier_on_gpu._skip_stage_level_scheduling(
             "3.4.0", standalone_conf
         )
-        assert not classifer_on_gpu._skip_stage_level_scheduling(
+        assert not classifier_on_gpu._skip_stage_level_scheduling(
             "3.4.1", standalone_conf
         )
-        assert not classifer_on_gpu._skip_stage_level_scheduling(
+        assert not classifier_on_gpu._skip_stage_level_scheduling(
             "3.5.0", standalone_conf
         )
-        assert not classifer_on_gpu._skip_stage_level_scheduling(
+        assert not classifier_on_gpu._skip_stage_level_scheduling(
             "3.5.1", standalone_conf
         )
 
         # spark version < 3.4.0
-        assert classifer_on_gpu._skip_stage_level_scheduling("3.3.0", standalone_conf)
+        assert classifier_on_gpu._skip_stage_level_scheduling("3.3.0", standalone_conf)
         # not run on GPU
-        assert classifer_on_cpu._skip_stage_level_scheduling("3.4.0", standalone_conf)
+        assert classifier_on_cpu._skip_stage_level_scheduling("3.4.0", standalone_conf)
 
         # spark.executor.cores is not set
         bad_conf = (
@@ -1133,7 +1134,7 @@ def test_skip_stage_level_scheduling(self) -> None:
             .set("spark.executor.resource.gpu.amount", "1")
             .set("spark.task.resource.gpu.amount", "0.08")
         )
-        assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
+        assert classifier_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
 
         # spark.executor.cores=1
         bad_conf = (
@@ -1144,7 +1145,7 @@ def test_skip_stage_level_scheduling(self) -> None:
             .set("spark.executor.resource.gpu.amount", "1")
             .set("spark.task.resource.gpu.amount", "0.08")
         )
-        assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
+        assert classifier_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
 
         # spark.executor.resource.gpu.amount is not set
         bad_conf = (
@@ -1154,7 +1155,7 @@ def test_skip_stage_level_scheduling(self) -> None:
             .set("spark.task.cpus", "1")
             .set("spark.task.resource.gpu.amount", "0.08")
         )
-        assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
+        assert classifier_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
 
         # spark.executor.resource.gpu.amount>1
         bad_conf = (
@@ -1165,7 +1166,7 @@ def test_skip_stage_level_scheduling(self) -> None:
             .set("spark.executor.resource.gpu.amount", "2")
             .set("spark.task.resource.gpu.amount", "0.08")
         )
-        assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
+        assert classifier_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
 
         # spark.task.resource.gpu.amount is not set
         bad_conf = (
@@ -1175,7 +1176,7 @@ def test_skip_stage_level_scheduling(self) -> None:
             .set("spark.task.cpus", "1")
             .set("spark.executor.resource.gpu.amount", "1")
         )
-        assert not classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
+        assert not classifier_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
 
         # spark.task.resource.gpu.amount=1
         bad_conf = (
@@ -1186,7 +1187,7 @@ def test_skip_stage_level_scheduling(self) -> None:
             .set("spark.executor.resource.gpu.amount", "1")
             .set("spark.task.resource.gpu.amount", "1")
         )
-        assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
+        assert classifier_on_gpu._skip_stage_level_scheduling("3.4.0", bad_conf)
 
         # For Yarn and K8S
         for mode in ["yarn", "k8s://"]:
@@ -1199,17 +1200,17 @@ def test_skip_stage_level_scheduling(self) -> None:
                     .set("spark.executor.resource.gpu.amount", "1")
                     .set("spark.task.resource.gpu.amount", gpu_amount)
                 )
-                assert classifer_on_gpu._skip_stage_level_scheduling("3.3.0", conf)
-                assert classifer_on_gpu._skip_stage_level_scheduling("3.4.0", conf)
-                assert classifer_on_gpu._skip_stage_level_scheduling("3.4.1", conf)
-                assert classifer_on_gpu._skip_stage_level_scheduling("3.5.0", conf)
+                assert classifier_on_gpu._skip_stage_level_scheduling("3.3.0", conf)
+                assert classifier_on_gpu._skip_stage_level_scheduling("3.4.0", conf)
+                assert classifier_on_gpu._skip_stage_level_scheduling("3.4.1", conf)
+                assert classifier_on_gpu._skip_stage_level_scheduling("3.5.0", conf)
 
                 # This will be fixed when spark 4.0.0 is released.
                 if gpu_amount == "1.0":
-                    assert classifer_on_gpu._skip_stage_level_scheduling("3.5.1", conf)
+                    assert classifier_on_gpu._skip_stage_level_scheduling("3.5.1", conf)
                 else:
                     # Starting from 3.5.1+, stage-level scheduling is working for Yarn and K8s
-                    assert not classifer_on_gpu._skip_stage_level_scheduling(
+                    assert not classifier_on_gpu._skip_stage_level_scheduling(
                         "3.5.1", conf
                     )
 
@@ -1307,6 +1308,25 @@ def test_classifier_xgb_summary_with_validation(
             atol=1e-3,
         )
 
+    def test_valid_type(self, spark: SparkSession) -> None:
+        # Validation indicator must be boolean.
+        df_train = spark.createDataFrame(
+            [
+                (Vectors.dense(1.0, 2.0, 3.0), 0, 0),
+                (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 1, 0),
+                (Vectors.dense(4.0, 5.0, 6.0), 0, 1),
+                (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1),
+            ],
+            ["features", "label", "isVal"],
+        )
+        reg = SparkXGBRegressor(
+            features_col="features",
+            label_col="label",
+            validation_indicator_col="isVal",
+        )
+        with pytest.raises(TypeError, match="The validation indicator must be boolean"):
+            reg.fit(df_train)
+
 
 class XgboostLocalTest(SparkTestCase):
     def setUp(self):
@@ -1772,16 +1792,17 @@ def test_collective_conf(self):
 
         with tempfile.TemporaryDirectory() as tmpdir:
             path = "file:" + tmpdir
+            port = get_avail_port()
             classifier = SparkXGBClassifier(
                 launch_tracker_on_driver=True,
-                coll_cfg=Config(tracker_host_ip="127.0.0.1", tracker_port=58894),
+                coll_cfg=Config(tracker_host_ip="127.0.0.1", tracker_port=port),
                 num_workers=1,
                 n_estimators=1,
             )
 
             def check_conf(conf: Config) -> None:
                 assert conf.tracker_host_ip == "127.0.0.1"
-                assert conf.tracker_port == 58894
+                assert conf.tracker_port == port
 
             check_conf(classifier.getOrDefault(classifier.coll_cfg))
             classifier.write().overwrite().save(path)
diff --git a/tests/test_distributed/test_with_spark/test_spark_local_cluster.py b/tests/test_distributed/test_with_spark/test_spark_local_cluster.py
index 63bca3ca232f..4e40083552b8 100644
--- a/tests/test_distributed/test_with_spark/test_spark_local_cluster.py
+++ b/tests/test_distributed/test_with_spark/test_spark_local_cluster.py
@@ -306,7 +306,9 @@ def test_classifier_distributed_basic(self):
 
     def test_classifier_distributed_multiclass(self):
         # There is no built-in multiclass option for external storage
-        classifier = SparkXGBClassifier(num_workers=self.n_workers, n_estimators=100)
+        classifier = SparkXGBClassifier(
+            num_workers=self.n_workers, n_estimators=100, base_score=0.5
+        )
         model = classifier.fit(self.cls_df_train_distributed_multiclass)
         pred_result = model.transform(self.cls_df_test_distributed_multiclass).collect()
         for row in pred_result: