diff --git a/.asf.yaml b/.asf.yaml
index 14178a61c8..0b09389458 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features
+# https://github.com/apache/infrastructure-asfyaml/blob/main/README.md
 ---
 github:
   description: "Apache ORC - the smallest, fastest columnar storage for Hadoop workloads"
@@ -24,12 +24,17 @@ github:
     merge: false
     squash: true
     rebase: true
+  ghp_branch: main
+  ghp_path: /site
   labels:
     - apache
     - orc
     - java
     - cpp
     - big-data
+  protected_tags:
+    - "rel/*"
+    - "v*.*.*"
 notifications:
   pullrequests: issues@orc.apache.org
   issues: issues@orc.apache.org
diff --git a/.clang-tidy b/.clang-tidy
index bd995bca54..b401f8948b 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -21,13 +21,14 @@ Checks: "-*,
 
 CheckOptions:
   [
+    { key: readability-identifier-naming.ParameterCase, value: "camelBack" },
+    { key: readability-identifier-naming.PrivateMemberCase, value: "camelBack"},
     { key: readability-identifier-naming.PrivateMemberSuffix, value: "_" },
     { key: readability-identifier-naming.ProtectedMemberSuffix, value: "" },
     { key: readability-identifier-naming.PublicMemberSuffix, value: "" },
-    { key: readability-identifier-naming.ParameterCase, value: "camelBack" },
     { key: readability-identifier-naming.ParameterIgnoredRegexp, value: "^[a-zA-Z]$" },
   ]
 
 WarningsAsErrors: ''
-HeaderFilterRegex: '.*'
+HeaderFilterRegex: '(orc/c\+\+/|orc/tools)'
 FormatStyle: none
\ No newline at end of file
diff --git a/.github/.licenserc.yaml b/.github/.licenserc.yaml
index a66db6601f..a16671e9d6 100644
--- a/.github/.licenserc.yaml
+++ b/.github/.licenserc.yaml
@@ -22,5 +22,6 @@ header:
     - 'NOTICE'
     - '.clang-format'
     - '.asf.yaml'
+    - '.nojekyll'
 
   comment: on-failure
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 8eddbcdea3..05a385618d 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -20,12 +20,9 @@ updates:
     schedule:
       interval: "weekly"
     ignore:
-      # Pin gson to 2.2.4 because of Hive
+      # Pin gson to 2.9.0 because of Hive
       - dependency-name: "com.google.code.gson:gson"
-        versions: "[2.3,)"
+        versions: "[2.9,1)"
       # Pin jodd-core to 3.5.2
       - dependency-name: "org.jodd:jodd-core"
         versions: "[3.5.3,)"
-      # Pin annotations to 17.0.0
-      - dependency-name: "org.jetbrains.annotations"
-        versions: "[17.0.1,)"
diff --git a/.github/lsan-suppressions.txt b/.github/lsan-suppressions.txt
new file mode 100644
index 0000000000..fc26ee8754
--- /dev/null
+++ b/.github/lsan-suppressions.txt
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Add specific leak suppressions here if needed
+# Format:
+# leak:SymbolName
+# leak:source_file.cc
diff --git a/.github/workflows/asan_test.yml b/.github/workflows/asan_test.yml
new file mode 100644
index 0000000000..6e7ac64fbb
--- /dev/null
+++ b/.github/workflows/asan_test.yml
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Address/Undefined Sanitizer Tests
+
+on:
+  pull_request:
+    paths-ignore:
+    - 'site/**'
+    - 'conan/**'
+    branches:
+    - main
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  asan-test:
+    name: "ASAN with ${{ matrix.compiler }} on Ubuntu"
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        compiler: [gcc, clang]
+        include:
+          - compiler: gcc
+            cc: gcc
+            cxx: g++
+          - compiler: clang
+            cc: clang
+            cxx: clang++
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - name: Install dependencies
+    run: |
+      sudo apt-get update
+      sudo apt-get install -y build-essential cmake libpthread-stubs0-dev
+    - name: Configure and Build with ASAN and UBSAN
+      env:
+        CC: ${{ matrix.cc }}
+        CXX: ${{ matrix.cxx }}
+      run: |
+        mkdir -p build && cd build
+        cmake .. -DCMAKE_BUILD_TYPE=Debug -DENABLE_ASAN=ON -DENABLE_UBSAN=ON -DBUILD_ENABLE_AVX512=ON -DBUILD_CPP_ENABLE_METRICS=ON -DBUILD_JAVA=OFF
+        make
+    - name: Run Tests
+      working-directory: build
+      env:
+        ASAN_OPTIONS: detect_leaks=1:symbolize=1:strict_string_checks=1:halt_on_error=0:detect_container_overflow=0
+        LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/lsan-suppressions.txt
+        UBSAN_OPTIONS: print_stacktrace=1
+      run: |
+        ctest --output-on-failure
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index b0350193ba..750dec550c 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 name: Build and test
 
 on:
@@ -30,11 +47,12 @@ jobs:
           - debian11
           - debian12
           - ubuntu24
-          - fedora37
+          - oraclelinux8
           - oraclelinux9
+          - amazonlinux23
     steps:
     - name: Checkout
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
     - name: "Test"
       run: |
         cd docker
@@ -47,11 +65,12 @@ jobs:
       fail-fast: false
       matrix:
         os:
-          - ubuntu-20.04
           - ubuntu-22.04
-          - macos-12
+          - ubuntu-24.04
+          - ubuntu-24.04-arm
           - macos-13
           - macos-14
+          - macos-15
         java:
           - 17
           - 21
@@ -61,71 +80,40 @@ jobs:
           - os: ubuntu-22.04
             java: 17
             cxx: g++
+          - os: ubuntu-latest
+            java: 25-ea
     env:
       MAVEN_OPTS: -Xmx2g
       MAVEN_SKIP_RC: true
     steps:
     - name: Checkout
-      uses: actions/checkout@v2
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: ${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-maven-
+      uses: actions/checkout@v4
     - name: Install Java ${{ matrix.java }}
-      uses: actions/setup-java@v3
+      uses: actions/setup-java@v4
       with:
         distribution: zulu
         java-version: ${{ matrix.java }}
+        cache: 'maven'
     - name: "Test"
       run: |
         mkdir -p ~/.m2
-        mkdir build
-        cd build
-        if [ "${{ matrix.os }}" = "ubuntu-20.04" ]; then
-          cmake -DANALYZE_JAVA=ON -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} -DSTOP_BUILD_ON_WARNING=OFF ..
+        if [ "${{ matrix.java }}" = "25-ea" ]; then
+          cd java
+          # JDK 25 Build
+          ./mvnw package -DskipTests
+          # JDK 25 Test: shims, core, tools modules
+          ./mvnw package --pl tools --am
         else
+          mkdir build
+          cd build
           cmake -DANALYZE_JAVA=ON -DOPENSSL_ROOT_DIR=`brew --prefix openssl@1.1` ..
+          make package test-out
         fi
-        make package test-out
     - name: Step on failure
       if: ${{ failure() }}
       run: |
         cat /home/runner/work/orc/orc/build/java/rat.txt
 
-  windows:
-    name: "C++ ${{ matrix.simd }} Test on Windows"
-    runs-on: windows-2019
-    strategy:
-      fail-fast: false
-      matrix:
-        simd:
-          - General
-          - AVX512
-    env:
-      ORC_USER_SIMD_LEVEL: AVX512
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-    - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1.1
-      with:
-        msbuild-architecture: x64
-    - name: "Test"
-      shell: bash
-      run: |
-        mkdir build
-        cd build
-        if [ "${{ matrix.simd }}" = "General" ]; then
-          cmake .. -G "Visual Studio 16 2019" -DCMAKE_BUILD_TYPE=Debug -DBUILD_LIBHDFSPP=OFF -DBUILD_TOOLS=OFF -DBUILD_JAVA=OFF
-        else
-          cmake .. -G "Visual Studio 16 2019" -DCMAKE_BUILD_TYPE=Debug -DBUILD_LIBHDFSPP=OFF -DBUILD_TOOLS=OFF -DBUILD_JAVA=OFF -DBUILD_ENABLE_AVX512=ON
-        fi
-        cmake --build . --config Debug
-        ctest -C Debug --output-on-failure
-
   simdUbuntu:
     name: "SIMD programming using C++ intrinsic functions on ${{ matrix.os }}"
     runs-on: ${{ matrix.os }}
@@ -140,7 +128,7 @@ jobs:
       ORC_USER_SIMD_LEVEL: AVX512
     steps:
     - name: Checkout
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
     - name: "Test"
       run: |
         mkdir -p ~/.m2
@@ -150,16 +138,25 @@ jobs:
         make package test-out
 
   doc:
-    name: "Javadoc generation"
-    runs-on: ubuntu-20.04
+    name: "Markdown check and Javadoc generation"
+    runs-on: ubuntu-24.04
     steps:
     - name: Checkout
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Super-Linter
+      uses: super-linter/super-linter@12150456a73e248bdc94d0794898f94e23127c88
+      env:
+        DEFAULT_BRANCH: main
+        VALIDATE_MARKDOWN: true
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     - name: Install Java 17
-      uses: actions/setup-java@v3
+      uses: actions/setup-java@v4
       with:
         distribution: zulu
         java-version: 17
+        cache: 'maven'
     - name: "javadoc"
       run: |
         mkdir -p ~/.m2
@@ -167,28 +164,40 @@ jobs:
         ./mvnw install -DskipTests
         ./mvnw javadoc:javadoc
 
-  formatting-check:
-    name: "C++ format check"
-    runs-on: ubuntu-20.04
-    strategy:
-      matrix:
-        path:
-          - 'c++'
-          - 'tools'
+  cpp-linter:
+    runs-on: ubuntu-24.04
     steps:
-    - uses: actions/checkout@v3
-    - name: Run clang-format style check for C++ code
-      uses: jidicula/clang-format-action@v4.9.0
-      with:
-        clang-format-version: '13'
-        check-path: ${{ matrix.path }}
+      - uses: actions/checkout@v4
+      - name: Run build
+        run: |
+          mkdir build && cd build
+          cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_JAVA=OFF
+          cmake --build .
+      - uses: cpp-linter/cpp-linter-action@v2.13.3
+        id: linter
+        continue-on-error: true
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          style: file
+          tidy-checks: file
+          files-changed-only: true
+          lines-changed-only: true
+          thread-comments: true
+          ignore: 'build|cmake_modules|conan|dev|docker|examples|java|site'
+          database: build
+      - name: Fail fast?!
+        if: steps.linter.outputs.checks-failed != 0
+        run: |
+          echo "some linter checks failed. ${{ steps.linter.outputs.checks-failed }}"
+          exit 1
 
   license-check:
     name: "License Check"
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Check license header
         uses: apache/skywalking-eyes@main
         env:
@@ -196,3 +205,53 @@ jobs:
         with:
           config: .github/.licenserc.yaml
 
+  macos-cpp-check:
+    name: "C++ Test on macOS"
+    strategy:
+      fail-fast: false
+      matrix:
+        version: [13, 14, 15]
+    runs-on: macos-${{ matrix.version }}
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+    - name: Install dependencies
+      run: |
+        brew update
+        brew install protobuf
+    - name: Test
+      run: |
+        CMAKE_PREFIX_PATH=$(brew --prefix protobuf)
+        mkdir -p build
+        cd build
+        cmake .. -DBUILD_JAVA=OFF -DPROTOBUF_HOME=${CMAKE_PREFIX_PATH}
+        make package test-out
+
+  meson:
+    name: "Meson C++ configuration"
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-22.04
+          - ubuntu-24.04
+          - ubuntu-24.04-arm
+          - macos-13
+          - macos-14
+          - macos-15
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.x'
+    - name: Install meson
+      run: |
+        pip install --upgrade pip
+        pip install meson
+    - name: Test
+      run: |
+        meson setup build -Dbuildtype=release
+        meson compile -C build
+        meson test -C build
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
new file mode 100644
index 0000000000..52b2e1fc7b
--- /dev/null
+++ b/.github/workflows/pages.yml
@@ -0,0 +1,72 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: GitHub Pages deployment
+
+on:
+  push:
+    branches:
+      - main
+
+concurrency:
+  group: 'docs preview'
+  cancel-in-progress: false
+
+jobs:
+  docs:
+    name: Build and deploy documentation
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      pages: write
+    environment:
+      name: github-pages # https://github.com/actions/deploy-pages/issues/271
+    if: github.repository == 'apache/orc'
+    steps:
+      - name: Checkout ORC repository
+        uses: actions/checkout@v4
+        with:
+          repository: apache/orc
+          ref: 'main'
+      - name: Install Java 17
+        uses: actions/setup-java@v4
+        with:
+          distribution: zulu
+          java-version: 17
+      - name: Install Ruby for documentation generation
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: '3.3'
+          bundler-cache: true
+      - name: Run documentation build
+        run: |
+          cd site
+          gem install bundler -n /usr/local/bin
+          bundle install --retry=100
+          git clone https://github.com/apache/orc.git -b asf-site target
+          bundle exec jekyll build -b /orc
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: 'site/target'
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
index 5a91bcbfc2..eb6d771238 100644
--- a/.github/workflows/publish_snapshot.yml
+++ b/.github/workflows/publish_snapshot.yml
@@ -10,7 +10,7 @@ jobs:
     if: github.repository == 'apache/orc'
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@master
+    - uses: actions/checkout@v4
 
     - uses: actions/setup-java@v3
       with:
diff --git a/.gitignore b/.gitignore
index 2ff46e9694..3635e33bf2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,6 @@ dependency-reduced-pom.xml
 java/bench/data
 *.swp
 .cache/*
+subprojects/*
+!subprojects/packagefiles
+!subprojects/*.wrap
diff --git a/.markdownlint.yaml b/.markdownlint.yaml
new file mode 100644
index 0000000000..11c7a48ee6
--- /dev/null
+++ b/.markdownlint.yaml
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+MD013: false
diff --git a/.markdownlintignore b/.markdownlintignore
new file mode 100644
index 0000000000..3953a04ce3
--- /dev/null
+++ b/.markdownlintignore
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+site
diff --git a/.nojekyll b/.nojekyll
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1fb0e755d6..9d036aa8e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,10 +27,11 @@ project(ORC C CXX)
 
 # Version number of package
 SET(CPACK_PACKAGE_VERSION_MAJOR "2")
-SET(CPACK_PACKAGE_VERSION_MINOR "1")
+SET(CPACK_PACKAGE_VERSION_MINOR "3")
 SET(CPACK_PACKAGE_VERSION_PATCH "0-SNAPSHOT")
 SET(ORC_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake_modules")
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For clang-tidy.
+list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
 
 option (BUILD_JAVA
     "Include ORC Java library in the build process"
@@ -42,7 +43,7 @@ option (ANALYZE_JAVA
 
 option (BUILD_LIBHDFSPP
     "Include LIBHDFSPP library in the build process"
-     ON)
+     OFF)
 
 option(BUILD_CPP_TESTS
     "Build the googletest unit tests"
@@ -76,10 +77,22 @@ option(BUILD_ENABLE_AVX512
     "Enable build with AVX512 at compile time"
     OFF)
 
+option(ENABLE_ASAN
+    "Enable Address Sanitizer"
+    OFF)
+
 option(ORC_PACKAGE_KIND
     "Arbitrary string that identifies the kind of package"
     "")
 
+option(ORC_ENABLE_CLANG_TOOLS
+    "Enable Clang tools"
+    OFF)
+
+option(ENABLE_UBSAN
+    "Enable Undefined Behavior Sanitizer"
+    OFF)
+
 # Make sure that a build type is selected
 if (NOT CMAKE_BUILD_TYPE)
   message(STATUS "No build type selected, default to ReleaseWithDebugInfo")
@@ -151,17 +164,38 @@ elseif (MSVC)
   set (WARN_FLAGS "${WARN_FLAGS} -wd4521") # multiple copy constructors specified
   set (WARN_FLAGS "${WARN_FLAGS} -wd4146") # unary minus operator applied to unsigned type, result still unsigned
 endif ()
+# Configure Address Sanitizer if enabled
+if (ENABLE_ASAN)
+  if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
+    message(STATUS "Address Sanitizer enabled")
+  else()
+    message(WARNING "Address Sanitizer is only supported for GCC and Clang compilers")
+  endif()
+endif()
 
-if (BUILD_CPP_ENABLE_METRICS)
-  message(STATUS "Enable the metrics collection")
-  add_compile_definitions(ENABLE_METRICS=1)
-else ()
-  message(STATUS "Disable the metrics collection")
-  add_compile_definitions(ENABLE_METRICS=0)
-endif ()
+# Configure Undefined Behavior Sanitizer if enabled
+if (ENABLE_UBSAN)
+  if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function  -fno-sanitize-recover=all")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function  -fno-sanitize-recover=all")
+    message(STATUS "Undefined Behavior Sanitizer enabled")
+  else()
+    message(WARNING "Undefined Behavior Sanitizer is only supported for GCC and Clang compilers")
+  endif()
+endif()
 
 enable_testing()
 
+INCLUDE(GNUInstallDirs)  # Put it before ThirdpartyToolchain to make CMAKE_INSTALL_LIBDIR available.
+
+if (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+  set(ORC_INSTALL_CMAKE_DIR ${CMAKE_INSTALL_DATAROOTDIR}/orc)
+else ()
+  set(ORC_INSTALL_CMAKE_DIR ${CMAKE_INSTALL_LIBDIR}/cmake/orc)
+endif ()
+
 INCLUDE(CheckSourceCompiles)
 INCLUDE(ThirdpartyToolchain)
 
@@ -180,7 +214,7 @@ if (BUILD_ENABLE_AVX512 AND NOT APPLE)
   INCLUDE(ConfigSimdLevel)
 endif ()
 
-set (EXAMPLE_DIRECTORY ${CMAKE_SOURCE_DIR}/examples)
+set (EXAMPLE_DIRECTORY ${PROJECT_SOURCE_DIR}/examples)
 
 add_subdirectory(c++)
 
@@ -210,3 +244,7 @@ if (BUILD_CPP_TESTS)
     )
   endif ()
 endif ()
+
+if (ORC_ENABLE_CLANG_TOOLS)
+  INCLUDE(CheckFormat)
+endif ()
diff --git a/README.md b/README.md
index 60b0da5fcb..2ddf0849b9 100644
--- a/README.md
+++ b/README.md
@@ -18,20 +18,21 @@ lists, maps, and unions.
 This project includes both a Java library and a C++ library for reading and writing the _Optimized Row Columnar_ (ORC) file format. The C++ and Java libraries are completely independent of each other and will each read all versions of ORC files.
 
 Releases:
-* Latest: <a href="https://orc.apache.org/releases">Apache ORC releases</a>
-* Maven Central: <a href="https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.orc%22">![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.orc/orc/badge.svg)</a>
-* Downloads: <a href="https://orc.apache.org/downloads">Apache ORC downloads</a>
-* Release tags: <a href="https://github.com/apache/orc/releases">Apache ORC release tags</a>
-* Plan: <a href="https://github.com/apache/orc/milestones">Apache ORC future release plan</a>
+
+* Latest: [Apache ORC releases](https://orc.apache.org/releases)
+* Maven Central: [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.orc/orc/badge.svg)](https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.orc%22)
+* Downloads: [Apache ORC downloads](https://orc.apache.org/downloads)
+* Release tags: [Apache ORC release tags](https://github.com/apache/orc/releases)
+* Plan: [Apache ORC future release plan](https://github.com/apache/orc/milestones)
 
 The current build status:
-* Main branch <a href="https://github.com/apache/orc/actions/workflows/build_and_test.yml?query=branch%3Amain">
-![main build status](https://github.com/apache/orc/actions/workflows/build_and_test.yml/badge.svg?branch=main)</a>
 
-Bug tracking: <a href="https://orc.apache.org/bugs">Apache Jira</a>
+* Main branch [![main build status](https://github.com/apache/orc/actions/workflows/build_and_test.yml/badge.svg?branch=main)](https://github.com/apache/orc/actions/workflows/build_and_test.yml?query=branch%3Amain)
 
+Bug tracking: [Apache Jira](https://orc.apache.org/bugs)
 
 The subdirectories are:
+
 * c++ - the c++ reader and writer
 * cmake_modules - the cmake modules
 * docker - docker scripts to build and test on various linuxes
@@ -43,10 +44,11 @@ The subdirectories are:
 ### Building
 
 * Install java 17 or higher
-* Install maven 3.9.6 or higher
+* Install maven 3.9.9 or higher
 * Install cmake 3.12 or higher
 
 To build a release version with debug information:
+
 ```shell
 % mkdir build
 % cd build
@@ -57,6 +59,7 @@ To build a release version with debug information:
 ```
 
 To build a debug version:
+
 ```shell
 % mkdir build
 % cd build
@@ -67,6 +70,7 @@ To build a debug version:
 ```
 
 To build a release version without debug information:
+
 ```shell
 % mkdir build
 % cd build
@@ -77,6 +81,7 @@ To build a release version without debug information:
 ```
 
 To build only the Java library:
+
 ```shell
 % cd java
 % ./mvnw package
@@ -84,6 +89,7 @@ To build only the Java library:
 ```
 
 To build only the C++ library:
+
 ```shell
 % mkdir build
 % cd build
@@ -94,6 +100,7 @@ To build only the C++ library:
 ```
 
 To build the C++ library with AVX512 enabled:
+
 ```shell
 export ORC_USER_SIMD_LEVEL=AVX512
 % mkdir build
@@ -102,8 +109,49 @@ export ORC_USER_SIMD_LEVEL=AVX512
 % make package
 % make test-out
 ```
+
 Cmake option BUILD_ENABLE_AVX512 can be set to "ON" or (default value)"OFF" at the compile time. At compile time, it defines the SIMD level(AVX512) to be compiled into the binaries.
 
 Environment variable ORC_USER_SIMD_LEVEL can be set to "AVX512" or (default value)"NONE" at the run time. At run time, it defines the SIMD level to dispatch the code which can apply SIMD optimization.
 
 Note that if ORC_USER_SIMD_LEVEL is set to "NONE" at run time, AVX512 will not take effect at run time even if BUILD_ENABLE_AVX512 is set to "ON" at compile time.
+
+### Building with Meson
+
+While CMake is the official build system for orc, there is unofficial support for using Meson to build select parts of the project. To build a debug version of the library and test it using Meson, from the project root you can run:
+
+```shell
+meson setup build
+meson compile -C build
+meson test -C build
+```
+
+By default, Meson will build unoptimized libraries with debug symbols. By contrast, the CMake build system generates release libraries by default. If you would like to create release libraries ala CMake, you should set the buildtype option. You must either remove the existing build directory before changing that setting, or alternatively pass the ``--reconfigure`` flag:
+
+```shell
+meson setup build -Dbuildtype=release --reconfigure
+meson compile -C build
+meson test -C build
+```
+
+Meson supports running your test suite through valgrind out of the box:
+
+```shell
+meson test -C build --wrap=valgrind
+```
+
+If you'd like to enable sanitizers, you can leverage the ``-Db_sanitize=`` option. For example, to enable both ASAN and UBSAN, you can run:
+
+```shell
+meson setup build -Dbuildtype=debug -Db_sanitize=address,undefined --reconfigure
+meson compile -C build
+meson test
+```
+
+Meson takes care of detecting all dependencies on your system, and downloading missing ones as required through its [Wrap system](https://mesonbuild.com/Wrap-dependency-system-manual.html). The dependencies for the project are all stored in the ``subprojects`` directory in individual wrap files. The majority of these are system generated files created by running:
+
+```shell
+meson wrap install <depencency_name>
+```
+
+From the project root. If you are developing orc and need to add a new dependency in the future, be sure to check Meson's [WrapDB](https://mesonbuild.com/Wrapdb-projects.html) to check if a pre-configured wrap entry exists. If not, you may still manually configure the dependency as outlined in the aforementioned Wrap system documentation.
diff --git a/c++/CMakeLists.txt b/c++/CMakeLists.txt
index 449bd10f3e..38c38f7ce4 100644
--- a/c++/CMakeLists.txt
+++ b/c++/CMakeLists.txt
@@ -15,14 +15,23 @@
 # specific language governing permissions and limitations
 # under the License.
 
-include_directories (
-  ${CMAKE_CURRENT_BINARY_DIR}/include
-  "include"
-  )
-
 add_subdirectory(include)
 add_subdirectory(src)
 
 if (BUILD_CPP_TESTS)
   add_subdirectory(test)
 endif ()
+
+# Generate cmake package configuration files
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  orcConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/orcConfig.cmake
+  INSTALL_DESTINATION ${ORC_INSTALL_CMAKE_DIR})
+write_basic_package_version_file(
+  ${CMAKE_CURRENT_BINARY_DIR}/orcConfigVersion.cmake
+  VERSION ${ORC_VERSION}
+  COMPATIBILITY SameMajorVersion)
+install(FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/orcConfig.cmake
+  ${CMAKE_CURRENT_BINARY_DIR}/orcConfigVersion.cmake
+  DESTINATION ${ORC_INSTALL_CMAKE_DIR})
diff --git a/c++/build-support/README.md b/c++/build-support/README.md
new file mode 100644
index 0000000000..80966104bb
--- /dev/null
+++ b/c++/build-support/README.md
@@ -0,0 +1,30 @@
+# Build support
+
+The Python scripts under the folder provide capabilities for formatting code.
+Make sure you've installed `clang-format-13`, `clang-tidy-13` and `clang-apply-replacements-13` and cmake could find them.
+We enforce the version of tools because different versions of tools may generate different results.
+
+## clang-format
+
+To use `run_clang_format.py` you could act like below:
+
+```shell
+mkdir build
+cd build
+cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DORC_ENABLE_CLANG_TOOLS=1
+make check-format # Do checks only
+make format # This would apply suggested changes, take care!
+```
+
+## clang-tidy
+
+To use `run_clang_tidy.py` you could act like below:
+
+```shell
+mkdir build
+cd build
+cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DORC_ENABLE_CLANG_TOOLS=1
+make -j`nproc` # Important
+make check-clang-tidy # Do checks only
+make fix-clang-tidy # This would apply suggested changes, take care!
+```
diff --git a/c++/build-support/run_clang_format.py b/c++/build-support/run_clang_format.py
new file mode 100644
index 0000000000..52d2e6b255
--- /dev/null
+++ b/c++/build-support/run_clang_format.py
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import codecs
+import difflib
+import fnmatch
+import os
+import subprocess
+import sys
+
+
+def check(arguments, source_dir):
+    formatted_filenames = []
+    error = False
+    for directory, subdirs, filenames in os.walk(source_dir):
+        fullpaths = (os.path.join(directory, filename)
+                     for filename in filenames)
+        source_files = [x for x in fullpaths
+                        if x.endswith(".hh") or x.endswith(".cc")]
+        formatted_filenames.extend(
+            # Filter out files that match the globs in the globs file
+            [filename for filename in source_files
+             if not any((fnmatch.fnmatch(filename, exclude_glob)
+                         for exclude_glob in exclude_globs))])
+
+    if arguments.fix:
+        if not arguments.quiet:
+            # Print out each file on its own line, but run
+            # clang format once for all of the files
+            print("\n".join(map(lambda x: "Formatting {}".format(x),
+                                formatted_filenames)))
+        subprocess.check_call([arguments.clang_format_binary,
+                               "-i"] + formatted_filenames)
+    else:
+        for filename in formatted_filenames:
+            if not arguments.quiet:
+                print("Checking {}".format(filename))
+            #
+            # Due to some incompatibilities between Python 2 and
+            # Python 3, there are some specific actions we take here
+            # to make sure the difflib.unified_diff call works.
+            #
+            # In Python 2, the call to subprocess.check_output return
+            # a 'str' type. In Python 3, however, the call returns a
+            # 'bytes' type unless the 'encoding' argument is
+            # specified. Unfortunately, the 'encoding' argument is not
+            # in the Python 2 API. We could do an if/else here based
+            # on the version of Python we are running, but it's more
+            # straightforward to read the file in binary and do utf-8
+            # conversion. In Python 2, it's just converting string
+            # types to unicode types, whereas in Python 3 it's
+            # converting bytes types to utf-8 encoded str types. This
+            # approach ensures that the arguments to
+            # difflib.unified_diff are acceptable string types in both
+            # Python 2 and Python 3.
+            with open(filename, "rb") as reader:
+                # Run clang-format and capture its output
+                formatted = subprocess.check_output(
+                    [arguments.clang_format_binary,
+                     filename])
+                formatted = codecs.decode(formatted, "utf-8")
+                # Read the original file
+                original = codecs.decode(reader.read(), "utf-8")
+                # Run the equivalent of diff -u
+                diff = list(difflib.unified_diff(
+                    original.splitlines(True),
+                    formatted.splitlines(True),
+                    fromfile=filename,
+                    tofile="{} (after clang format)".format(
+                        filename)))
+                if diff:
+                    print("{} had clang-format style issues".format(filename))
+                    # Print out the diff to stderr
+                    error = True
+                    sys.stderr.writelines(diff)
+    return error
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Runs clang format on all of the source "
+        "files. If --fix is specified,  and compares the output "
+        "with the existing file, outputting a unifiied diff if "
+        "there are any necessary changes")
+    parser.add_argument("clang_format_binary",
+                        help="Path to the clang-format binary")
+    parser.add_argument("--exclude_globs",
+                        help="Filename containing globs for files "
+                        "that should be excluded from the checks")
+    parser.add_argument("--source_dirs",
+                        help="Comma-separated root directories of the code")
+    parser.add_argument("--fix", default=False,
+                        action="http://github.com/store_true",
+                        help="If specified, will re-format the source "
+                        "code instead of comparing the re-formatted "
+                        "output, defaults to %(default)s")
+    parser.add_argument("--quiet", default=False,
+                        action="http://github.com/store_true",
+                        help="If specified, only print errors")
+
+    args = parser.parse_args()
+
+    had_err = False
+    exclude_globs = []
+    if args.exclude_globs:
+        for line in open(args.exclude_globs):
+            if line.strip() == "":
+                continue
+            if line[0] == "#":
+                continue
+            exclude_globs.append(line.strip())
+
+    for source_dir in args.source_dirs.split(','):
+        if len(source_dir) > 0:
+            had_err = had_err or check(args, source_dir)
+
+    sys.exit(1 if had_err else 0)
\ No newline at end of file
diff --git a/run_clang_tidy.py b/c++/build-support/run_clang_tidy.py
old mode 100644
new mode 100755
similarity index 100%
rename from run_clang_tidy.py
rename to c++/build-support/run_clang_tidy.py
diff --git a/c++/include/CMakeLists.txt b/c++/include/CMakeLists.txt
index 056d1b9fab..a9f8b4a3b5 100644
--- a/c++/include/CMakeLists.txt
+++ b/c++/include/CMakeLists.txt
@@ -22,10 +22,11 @@ configure_file (
 
 install(FILES
   "${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh"
-  DESTINATION "include/orc"
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/orc"
   )
 
 install(DIRECTORY
-        "orc/"
-        DESTINATION "include/orc"
-        FILES_MATCHING PATTERN "*.hh")
+  "orc/"
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/orc"
+  FILES_MATCHING PATTERN "*.hh"
+  )
diff --git a/c++/include/orc/Exceptions.hh b/c++/include/orc/Exceptions.hh
index 97cf5d8a0d..b19a00760c 100644
--- a/c++/include/orc/Exceptions.hh
+++ b/c++/include/orc/Exceptions.hh
@@ -67,6 +67,18 @@ namespace orc {
     SchemaEvolutionError(const SchemaEvolutionError&);
     SchemaEvolutionError& operator=(const SchemaEvolutionError&) = delete;
   };
+
+  class CompressionError : public std::runtime_error {
+   public:
+    explicit CompressionError(const std::string& whatArg);
+    explicit CompressionError(const char* whatArg);
+    ~CompressionError() noexcept override;
+    CompressionError(const CompressionError&);
+
+   private:
+    CompressionError& operator=(const CompressionError&);
+  };
+
 }  // namespace orc
 
 #endif
diff --git a/c++/include/orc/Geospatial.hh b/c++/include/orc/Geospatial.hh
new file mode 100644
index 0000000000..d3b9e28285
--- /dev/null
+++ b/c++/include/orc/Geospatial.hh
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains code adapted from the Apache Arrow project.
+ *
+ * Original source:
+ * https://github.com/apache/arrow/blob/main/cpp/src/parquet/geospatial/statistics.h
+ *
+ * The original code is licensed under the Apache License, Version 2.0.
+ *
+ * Modifications may have been made from the original source.
+ */
+
+#ifndef ORC_GEOSPATIAL_HH
+#define ORC_GEOSPATIAL_HH
+
+#include <array>
+#include <cmath>
+#include <ostream>
+#include <string>
+
+namespace orc::geospatial {
+
+  constexpr double INF = std::numeric_limits<double>::infinity();
+  // The maximum number of dimensions supported (X, Y, Z, M)
+  inline constexpr int MAX_DIMENSIONS = 4;
+
+  // Supported combinations of geometry dimensions
+  enum class Dimensions {
+    XY = 0,    // X and Y only
+    XYZ = 1,   // X, Y, and Z
+    XYM = 2,   // X, Y, and M
+    XYZM = 3,  // X, Y, Z, and M
+    VALUE_MIN = 0,
+    VALUE_MAX = 3
+  };
+
+  // Supported geometry types according to ISO WKB
+  enum class GeometryType {
+    POINT = 1,
+    LINESTRING = 2,
+    POLYGON = 3,
+    MULTIPOINT = 4,
+    MULTILINESTRING = 5,
+    MULTIPOLYGON = 6,
+    GEOMETRYCOLLECTION = 7,
+    VALUE_MIN = 1,
+    VALUE_MAX = 7
+  };
+
+  // BoundingBox represents the minimum bounding rectangle (or box) for a geometry.
+  // It supports up to 4 dimensions (X, Y, Z, M).
+  struct BoundingBox {
+    using XY = std::array<double, 2>;
+    using XYZ = std::array<double, 3>;
+    using XYM = std::array<double, 3>;
+    using XYZM = std::array<double, 4>;
+
+    // Default constructor: initializes to an empty bounding box.
+    BoundingBox() : min{INF, INF, INF, INF}, max{-INF, -INF, -INF, -INF} {}
+    // Constructor with explicit min/max values.
+    BoundingBox(const XYZM& mins, const XYZM& maxes) : min(mins), max(maxes) {}
+    BoundingBox(const BoundingBox& other) = default;
+    BoundingBox& operator=(const BoundingBox&) = default;
+
+    // Update the bounding box to include a 2D coordinate.
+    void updateXY(const XY& coord) {
+      updateInternal(coord);
+    }
+    // Update the bounding box to include a 3D coordinate (XYZ).
+    void updateXYZ(const XYZ& coord) {
+      updateInternal(coord);
+    }
+    // Update the bounding box to include a 3D coordinate (XYM).
+    void updateXYM(const XYM& coord) {
+      std::array<int, 3> dims = {0, 1, 3};
+      for (int i = 0; i < 3; ++i) {
+        auto dim = dims[i];
+        if (!std::isnan(min[dim]) && !std::isnan(max[dim])) {
+          min[dim] = std::min(min[dim], coord[i]);
+          max[dim] = std::max(max[dim], coord[i]);
+        }
+      }
+    }
+    // Update the bounding box to include a 4D coordinate (XYZM).
+    void updateXYZM(const XYZM& coord) {
+      updateInternal(coord);
+    }
+
+    // Reset the bounding box to its initial empty state.
+    void reset() {
+      for (int i = 0; i < MAX_DIMENSIONS; ++i) {
+        min[i] = INF;
+        max[i] = -INF;
+      }
+    }
+
+    // Invalidate the bounding box (set all values to NaN).
+    void invalidate() {
+      for (int i = 0; i < MAX_DIMENSIONS; ++i) {
+        min[i] = std::numeric_limits<double>::quiet_NaN();
+        max[i] = std::numeric_limits<double>::quiet_NaN();
+      }
+    }
+
+    // Check if the bound for a given dimension is empty.
+    bool boundEmpty(int dim) const {
+      return std::isinf(min[dim] - max[dim]);
+    }
+
+    // Check if the bound for a given dimension is valid (not NaN).
+    bool boundValid(int dim) const {
+      return !std::isnan(min[dim]) && !std::isnan(max[dim]);
+    }
+
+    // Get the lower bound (min values).
+    const XYZM& lowerBound() const {
+      return min;
+    }
+    // Get the upper bound (max values).
+    const XYZM& upperBound() const {
+      return max;
+    }
+
+    // Get validity for each dimension.
+    std::array<bool, MAX_DIMENSIONS> dimensionValid() const {
+      return {boundValid(0), boundValid(1), boundValid(2), boundValid(3)};
+    }
+    // Get emptiness for each dimension.
+    std::array<bool, MAX_DIMENSIONS> dimensionEmpty() const {
+      return {boundEmpty(0), boundEmpty(1), boundEmpty(2), boundEmpty(3)};
+    }
+
+    // Merge another bounding box into this one.
+    void merge(const BoundingBox& other) {
+      for (int i = 0; i < MAX_DIMENSIONS; ++i) {
+        if (std::isnan(min[i]) || std::isnan(max[i]) || std::isnan(other.min[i]) ||
+            std::isnan(other.max[i])) {
+          min[i] = std::numeric_limits<double>::quiet_NaN();
+          max[i] = std::numeric_limits<double>::quiet_NaN();
+        } else {
+          min[i] = std::min(min[i], other.min[i]);
+          max[i] = std::max(max[i], other.max[i]);
+        }
+      }
+    }
+
+    // Convert the bounding box to a string representation.
+    std::string toString() const;
+
+    XYZM min;  // Minimum values for each dimension
+    XYZM max;  // Maximum values for each dimension
+
+   private:
+    // Internal update function for XY, XYZ, or XYZM coordinates.
+    template <typename Coord>
+    void updateInternal(const Coord& coord) {
+      for (size_t i = 0; i < coord.size(); ++i) {
+        if (!std::isnan(min[i]) && !std::isnan(max[i])) {
+          min[i] = std::min(min[i], coord[i]);
+          max[i] = std::max(max[i], coord[i]);
+        }
+      }
+    }
+  };
+
+  inline bool operator==(const BoundingBox& lhs, const BoundingBox& rhs) {
+    return lhs.min == rhs.min && lhs.max == rhs.max;
+  }
+  inline bool operator!=(const BoundingBox& lhs, const BoundingBox& rhs) {
+    return !(lhs == rhs);
+  }
+  inline std::ostream& operator<<(std::ostream& os, const BoundingBox& obj) {
+    os << obj.toString();
+    return os;
+  }
+
+}  // namespace orc::geospatial
+
+#endif  // ORC_GEOSPATIAL_HH
diff --git a/c++/include/orc/Int128.hh b/c++/include/orc/Int128.hh
index 6954c771cf..e728e70e7b 100644
--- a/c++/include/orc/Int128.hh
+++ b/c++/include/orc/Int128.hh
@@ -193,43 +193,13 @@ namespace orc {
      * Shift left by the given number of bits.
      * Values larger than 2**127 will shift into the sign bit.
      */
-    Int128& operator<<=(uint32_t bits) {
-      if (bits != 0) {
-        if (bits < 64) {
-          highbits_ <<= bits;
-          highbits_ |= (lowbits_ >> (64 - bits));
-          lowbits_ <<= bits;
-        } else if (bits < 128) {
-          highbits_ = static_cast<int64_t>(lowbits_) << (bits - 64);
-          lowbits_ = 0;
-        } else {
-          highbits_ = 0;
-          lowbits_ = 0;
-        }
-      }
-      return *this;
-    }
+    Int128& operator<<=(uint32_t bits);
 
     /**
      * Shift right by the given number of bits. Negative values will
      * sign extend and fill with one bits.
      */
-    Int128& operator>>=(uint32_t bits) {
-      if (bits != 0) {
-        if (bits < 64) {
-          lowbits_ >>= bits;
-          lowbits_ |= static_cast<uint64_t>(highbits_ << (64 - bits));
-          highbits_ = static_cast<int64_t>(static_cast<uint64_t>(highbits_) >> bits);
-        } else if (bits < 128) {
-          lowbits_ = static_cast<uint64_t>(highbits_ >> (bits - 64));
-          highbits_ = highbits_ >= 0 ? 0 : -1l;
-        } else {
-          highbits_ = highbits_ >= 0 ? 0 : -1l;
-          lowbits_ = static_cast<uint64_t>(highbits_);
-        }
-      }
-      return *this;
-    }
+    Int128& operator>>=(uint32_t bits);
 
     bool operator==(const Int128& right) const {
       return highbits_ == right.highbits_ && lowbits_ == right.lowbits_;
diff --git a/c++/include/orc/OrcFile.hh b/c++/include/orc/OrcFile.hh
index 6e4a07bf7c..ea71567c5f 100644
--- a/c++/include/orc/OrcFile.hh
+++ b/c++/include/orc/OrcFile.hh
@@ -19,6 +19,7 @@
 #ifndef ORC_FILE_HH
 #define ORC_FILE_HH
 
+#include <future>
 #include <string>
 
 #include "orc/Reader.hh"
@@ -58,6 +59,18 @@ namespace orc {
      */
     virtual void read(void* buf, uint64_t length, uint64_t offset) = 0;
 
+    /**
+     * Read data asynchronously into the buffer. The buffer is allocated by the caller.
+     * @param buf the buffer to read into
+     * @param length the number of bytes to read.
+     * @param offset the position in the stream to read from.
+     * @return a future that will be set when the read is complete.
+     */
+    virtual std::future<void> readAsync(void* buf, uint64_t length, uint64_t offset) {
+      return std::async(std::launch::async,
+                        [this, buf, length, offset] { this->read(buf, length, offset); });
+    }
+
     /**
      * Get the name of the stream for error messages.
      */
@@ -127,8 +140,8 @@ namespace orc {
    * @param path the uri of the file in HDFS
    * @param metrics the metrics of the reader
    */
-  std::unique_ptr<InputStream> readHdfsFile(const std::string& path,
-                                            ReaderMetrics* metrics = nullptr);
+  [[deprecated("readHdfsFile is deprecated in 2.0.1")]] std::unique_ptr<InputStream> readHdfsFile(
+      const std::string& path, ReaderMetrics* metrics = nullptr);
 
   /**
    * Create a reader to read the ORC file.
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index 4b254593ee..e9f420f113 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -40,6 +40,17 @@ namespace orc {
   struct ReaderOptionsPrivate;
   struct RowReaderOptionsPrivate;
 
+  struct CacheOptions {
+    // The maximum distance in bytes between two consecutive
+    // ranges; beyond this value, ranges are not combined
+    uint64_t holeSizeLimit = 8192;
+
+    // The maximum size in bytes of a combined range; if
+    // combining two consecutive ranges would produce a range of a
+    // size greater than this, they are not combined
+    uint64_t rangeSizeLimit = 32 * 1024 * 1024;
+  };
+
   /**
    * Expose the reader metrics including the latency and
    * number of calls of the decompression/decoding/IO modules.
@@ -59,9 +70,20 @@ namespace orc {
     std::atomic<uint64_t> IOBlockingLatencyUs{0};
     std::atomic<uint64_t> SelectedRowGroupCount{0};
     std::atomic<uint64_t> EvaluatedRowGroupCount{0};
+    std::atomic<uint64_t> ReadRangeCacheHits{0};
+    std::atomic<uint64_t> ReadRangeCacheMisses{0};
   };
   ReaderMetrics* getDefaultReaderMetrics();
 
+  // Row group index of a single column in a stripe.
+  struct RowGroupIndex {
+    // Positions are represented as a two-dimensional array where the first
+    // dimension is row group index and the second dimension is the position
+    // list of the row group. The size of the second dimension should be equal
+    // among all row groups.
+    std::vector<std::vector<uint64_t>> positions;
+  };
+
   /**
    * Options for creating a Reader.
    */
@@ -107,6 +129,11 @@ namespace orc {
      */
     ReaderOptions& setReaderMetrics(ReaderMetrics* metrics);
 
+    /**
+     * Set the cache options.
+     */
+    ReaderOptions& setCacheOptions(const CacheOptions& cacheOptions);
+
     /**
      * Set the location of the tail as defined by the logical length of the
      * file.
@@ -138,6 +165,11 @@ namespace orc {
      * Get the reader metrics.
      */
     ReaderMetrics* getReaderMetrics() const;
+
+    /**
+     * Set the cache options.
+     */
+    const CacheOptions& getCacheOptions() const;
   };
 
   /**
@@ -466,9 +498,11 @@ namespace orc {
     /**
      * Get the statistics about a stripe.
      * @param stripeIndex the index of the stripe (0 to N-1) to get statistics about
-     * @return the statistics about that stripe
+     * @param includeRowIndex whether the row index of the stripe is included
+     * @return the statistics about that stripe and row group index statistics
      */
-    virtual std::unique_ptr<StripeStatistics> getStripeStatistics(uint64_t stripeIndex) const = 0;
+    virtual std::unique_ptr<StripeStatistics> getStripeStatistics(
+        uint64_t stripeIndex, bool includeRowIndex = true) const = 0;
 
     /**
      * Get the length of the data stripes in the file.
@@ -605,6 +639,33 @@ namespace orc {
      */
     virtual std::map<uint32_t, BloomFilterIndex> getBloomFilters(
         uint32_t stripeIndex, const std::set<uint32_t>& included) const = 0;
+
+    /**
+     * Get row group index of all selected columns in the specified stripe
+     * @param stripeIndex index of the stripe to be read for row group index.
+     * @param included index of selected columns to return (if not specified,
+     *        all columns will be returned).
+     * @return map of row group index keyed by its column index.
+     */
+    virtual std::map<uint32_t, RowGroupIndex> getRowGroupIndex(
+        uint32_t stripeIndex, const std::set<uint32_t>& included = {}) const = 0;
+
+    /**
+     * Trigger IO prefetch and cache the prefetched contents asynchronously.
+     * It is thread safe. Users should make sure requested stripes and columns
+     * are not overlapped, otherwise the overlapping part will be prefetched multiple time,
+     * which doesn't affect correctness but waste IO and memory resources.
+     * @param stripes the stripes to prefetch
+     * @param includeTypes the types to prefetch
+     */
+    virtual void preBuffer(const std::vector<uint32_t>& stripes,
+                           const std::list<uint64_t>& includeTypes) = 0;
+
+    /**
+     * Release cached entries whose right boundary is less than or equal to the given boundary.
+     * @param boundary the boundary value to release cache entries
+     */
+    virtual void releaseBuffer(uint64_t boundary) = 0;
   };
 
   /**
diff --git a/c++/include/orc/Statistics.hh b/c++/include/orc/Statistics.hh
index 4ba8c35f7d..58169abe59 100644
--- a/c++/include/orc/Statistics.hh
+++ b/c++/include/orc/Statistics.hh
@@ -19,12 +19,11 @@
 #ifndef ORC_STATISTICS_HH
 #define ORC_STATISTICS_HH
 
+#include "orc/Geospatial.hh"
 #include "orc/Type.hh"
 #include "orc/Vector.hh"
 #include "orc/orc-config.hh"
 
-#include <sstream>
-
 namespace orc {
 
   /**
@@ -367,6 +366,33 @@ namespace orc {
     virtual int32_t getMaximumNanos() const = 0;
   };
 
+  /**
+   * Statistics for Geometry and Geography
+   */
+  class GeospatialColumnStatistics : public ColumnStatistics {
+   public:
+    virtual ~GeospatialColumnStatistics();
+
+    /**
+     * Get bounding box
+     * @return bounding box
+     */
+    virtual const geospatial::BoundingBox& getBoundingBox() const = 0;
+
+    /**
+     * Get geospatial types
+     * @return a sorted vector of geometry type IDs that elements is unique
+     */
+    virtual std::vector<int32_t> getGeospatialTypes() const = 0;
+
+    /**
+     * Update stats by a new value
+     * @param value new value to update
+     * @param length length of the value
+     */
+    virtual void update(const char* value, size_t length) = 0;
+  };
+
   class Statistics {
    public:
     virtual ~Statistics();
diff --git a/c++/include/orc/Type.hh b/c++/include/orc/Type.hh
index 82e0e3cc86..4bb794ff34 100644
--- a/c++/include/orc/Type.hh
+++ b/c++/include/orc/Type.hh
@@ -25,6 +25,18 @@
 
 namespace orc {
 
+  namespace geospatial {
+    enum EdgeInterpolationAlgorithm {
+      SPHERICAL = 0,
+      VINCENTY = 1,
+      THOMAS = 2,
+      ANDOYER = 3,
+      KARNEY = 4
+    };
+    std::string AlgoToString(EdgeInterpolationAlgorithm algo);
+    EdgeInterpolationAlgorithm AlgoFromString(const std::string& algo);
+  }  // namespace geospatial
+
   enum TypeKind {
     BOOLEAN = 0,
     BYTE = 1,
@@ -44,7 +56,9 @@ namespace orc {
     DATE = 15,
     VARCHAR = 16,
     CHAR = 17,
-    TIMESTAMP_INSTANT = 18
+    TIMESTAMP_INSTANT = 18,
+    GEOMETRY = 19,
+    GEOGRAPHY = 20
   };
 
   class Type {
@@ -59,6 +73,10 @@ namespace orc {
     virtual uint64_t getMaximumLength() const = 0;
     virtual uint64_t getPrecision() const = 0;
     virtual uint64_t getScale() const = 0;
+    // for geospatial types only
+    virtual const std::string& getCrs() const = 0;
+    // for geography type only
+    virtual geospatial::EdgeInterpolationAlgorithm getAlgorithm() const = 0;
     virtual Type& setAttribute(const std::string& key, const std::string& value) = 0;
     virtual bool hasAttributeKey(const std::string& key) const = 0;
     virtual Type& removeAttribute(const std::string& key) = 0;
@@ -115,6 +133,10 @@ namespace orc {
   std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements);
   std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key, std::unique_ptr<Type> value);
   std::unique_ptr<Type> createUnionType();
+  std::unique_ptr<Type> createGeometryType(const std::string& crs = "OGC:CRS84");
+  std::unique_ptr<Type> createGeographyType(
+      const std::string& crs = "OGC:CRS84",
+      geospatial::EdgeInterpolationAlgorithm algo = geospatial::SPHERICAL);
 
 }  // namespace orc
 #endif
diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 0dfe926965..663bef9cd7 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -57,6 +57,8 @@ namespace orc {
     bool hasNulls;
     // whether the vector batch is encoded
     bool isEncoded;
+    // whether the dictionary is decoded into vector batch
+    bool dictionaryDecoded;
 
     // custom memory pool
     MemoryPool& memoryPool;
@@ -88,6 +90,14 @@ namespace orc {
      */
     virtual bool hasVariableLength();
 
+    /**
+     * Decode possible dictionary into vector batch.
+     */
+    void decodeDictionary();
+
+   protected:
+    virtual void decodeDictionaryImpl() {}
+
    private:
     ColumnVectorBatch(const ColumnVectorBatch&);
     ColumnVectorBatch& operator=(const ColumnVectorBatch&);
@@ -248,6 +258,10 @@ namespace orc {
     ~EncodedStringVectorBatch() override;
     std::string toString() const override;
     void resize(uint64_t capacity) override;
+
+    // Calculate data and length in StringVectorBatch from dictionary and index
+    void decodeDictionaryImpl() override;
+
     std::shared_ptr<StringDictionary> dictionary;
 
     // index for dictionary entry
@@ -264,6 +278,9 @@ namespace orc {
     bool hasVariableLength() override;
 
     std::vector<ColumnVectorBatch*> fields;
+
+   protected:
+    void decodeDictionaryImpl() override;
   };
 
   struct ListVectorBatch : public ColumnVectorBatch {
@@ -283,6 +300,9 @@ namespace orc {
 
     // the concatenated elements
     std::unique_ptr<ColumnVectorBatch> elements;
+
+   protected:
+    void decodeDictionaryImpl() override;
   };
 
   struct MapVectorBatch : public ColumnVectorBatch {
@@ -304,6 +324,9 @@ namespace orc {
     std::unique_ptr<ColumnVectorBatch> keys;
     // the concatenated elements
     std::unique_ptr<ColumnVectorBatch> elements;
+
+   protected:
+    void decodeDictionaryImpl() override;
   };
 
   struct UnionVectorBatch : public ColumnVectorBatch {
@@ -327,6 +350,9 @@ namespace orc {
 
     // the sub-columns
     std::vector<ColumnVectorBatch*> children;
+
+   protected:
+    void decodeDictionaryImpl() override;
   };
 
   struct Decimal {
diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh
index 7968fbce7f..78f06739bc 100644
--- a/c++/include/orc/Writer.hh
+++ b/c++/include/orc/Writer.hh
@@ -277,6 +277,32 @@ namespace orc {
      * @return if not set, return default value which is 1 MB.
      */
     uint64_t getOutputBufferCapacity() const;
+
+    /**
+     * Set the initial block size of original input buffer in the class CompressionStream.
+     * the input buffer is used to store raw data before compression, while the output buffer is
+     * dedicated to holding compressed data
+     */
+    WriterOptions& setMemoryBlockSize(uint64_t capacity);
+
+    /**
+     * Get the initial block size of original input buffer in the class CompressionStream.
+     * @return if not set, return default value which is 64 KB.
+     */
+    uint64_t getMemoryBlockSize() const;
+
+    /**
+     * Set whether the compression block should be aligned to row group boundary.
+     * The boolean type may not be aligned to row group boundary due to the
+     * requirement of the Boolean RLE encoder to pack input bits into bytes
+     */
+    WriterOptions& setAlignBlockBoundToRowGroup(bool alignBlockBoundToRowGroup);
+
+    /**
+     * Get if the compression block should be aligned to row group boundary.
+     * @return if not set, return default value which is false.
+     */
+    bool getAlignBlockBoundToRowGroup() const;
   };
 
   class Writer {
diff --git a/c++/include/orc/meson.build b/c++/include/orc/meson.build
new file mode 100644
index 0000000000..e2524051f0
--- /dev/null
+++ b/c++/include/orc/meson.build
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+cdata = configuration_data()
+cdata.set('ORC_VERSION', meson.project_version())
+cdata.set('ORC_CXX_HAS_CSTDINT', 1)
+
+configure_file(
+    input: 'orc-config.hh.in',
+    output: 'orc-config.hh',
+    configuration: cdata,
+    format: 'cmake',
+    install: true,
+    install_dir: 'orc',
+)
+
+install_headers(
+    [
+        'BloomFilter.hh',
+        'ColumnPrinter.hh',
+        'Common.hh',
+        'Exceptions.hh',
+        'Geospatial.hh',
+        'Int128.hh',
+        'MemoryPool.hh',
+        'OrcFile.hh',
+        'Reader.hh',
+        'Statistics.hh',
+        'Type.hh',
+        'Vector.hh',
+        'Writer.hh',
+    ],
+    subdir: 'orc',
+)
+
+install_headers(
+    [
+        'sargs/Literal.hh',
+        'sargs/SearchArgument.hh',
+        'sargs/TruthValue.hh',
+    ],
+    subdir: 'orc/sargs',
+)
diff --git a/c++/include/orc/sargs/SearchArgument.hh b/c++/include/orc/sargs/SearchArgument.hh
index 6493840a92..2fa3ea04cb 100644
--- a/c++/include/orc/sargs/SearchArgument.hh
+++ b/c++/include/orc/sargs/SearchArgument.hh
@@ -251,6 +251,12 @@ namespace orc {
      * @return the new SearchArgument
      */
     virtual std::unique_ptr<SearchArgument> build() = 0;
+
+    /**
+     * Add a maybe leaf to the current item on the stack.
+     * @return this
+     */
+    virtual SearchArgumentBuilder& maybe() = 0;
   };
 
   /**
diff --git a/c++/meson.build b/c++/meson.build
new file mode 100644
index 0000000000..216d7e5634
--- /dev/null
+++ b/c++/meson.build
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# required dependencies
+protobuf_dep = dependency('protobuf', fallback: ['protobuf', 'protobuf_dep'])
+lz4_dep = dependency('liblz4')
+snappy_dep = dependency('snappy')
+zlib_dep = dependency('zlib')
+zstd_dep = dependency('libzstd')
+sparsehash_c11_dep = dependency('sparsehash-c11')
+
+# optional dependencies (should be set later in configuration)
+gtest_dep = disabler()
+gmock_dep = disabler()
+
+subdir('include/orc')
+subdir('src')
+
+if get_option('tests').enabled()
+    gtest_dep = dependency('gtest')
+    gmock_dep = dependency('gmock')
+    subdir('test')
+endif
+
+pkg = import('pkgconfig')
+pkg.generate(orc_lib)
diff --git a/c++/orcConfig.cmake.in b/c++/orcConfig.cmake.in
new file mode 100644
index 0000000000..49663b3423
--- /dev/null
+++ b/c++/orcConfig.cmake.in
@@ -0,0 +1,103 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This config sets the following variables in your project:
+#
+#   orc_VERSION - version of the found ORC
+#   orc_FOUND - true if ORC found on the system
+#
+# This config sets the following targets in your project:
+#
+#   orc::orc - for linked as static library
+#
+# For backward compatibility, this config also sets the following variables:
+#
+#   ORC_FOUND - same as orc_FOUND above
+#   ORC_STATIC_LIB - static library of the found ORC
+#   ORC_INCLUDE_DIR - include directory of the found ORC
+#   ORC_INCLUDE_DIRS - same as ORC_INCLUDE_DIR above
+
+@PACKAGE_INIT@
+
+set(ORC_VENDOR_DEPENDENCIES "@ORC_VENDOR_DEPENDENCIES@")
+set(ORC_SYSTEM_DEPENDENCIES "@ORC_SYSTEM_DEPENDENCIES@")
+
+if(DEFINED CMAKE_MODULE_PATH)
+  set(ORC_CMAKE_MODULE_PATH_OLD ${CMAKE_MODULE_PATH})
+else()
+  unset(ORC_CMAKE_MODULE_PATH_OLD)
+endif()
+set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
+
+include(CMakeFindDependencyMacro)
+foreach(dependency ${ORC_SYSTEM_DEPENDENCIES})
+  find_dependency(${dependency})
+endforeach()
+
+if(DEFINED ORC_CMAKE_MODULE_PATH_OLD)
+  set(CMAKE_MODULE_PATH ${ORC_CMAKE_MODULE_PATH_OLD})
+  unset(ORC_CMAKE_MODULE_PATH_OLD)
+else()
+  unset(CMAKE_MODULE_PATH)
+endif()
+
+include("${CMAKE_CURRENT_LIST_DIR}/orcTargets.cmake")
+
+get_target_property(orc_static_configurations orc::orc IMPORTED_CONFIGURATIONS)
+
+foreach(dependency ${ORC_VENDOR_DEPENDENCIES})
+  string(REPLACE "|" ";" dependency_pair ${dependency})
+  list(LENGTH dependency_pair dependency_pair_length)
+  if(NOT dependency_pair_length EQUAL 2)
+    message(FATAL_ERROR "Invalid vendor dependency: ${dependency}")
+  endif()
+  list(GET dependency_pair 0 target_name)
+  list(GET dependency_pair 1 static_lib_name)
+
+  add_library("${target_name}" STATIC IMPORTED)
+
+  foreach(CONFIGURATION ${orc_static_configurations})
+    string(TOUPPER "${CONFIGURATION}" CONFIGURATION)
+    get_target_property(orc_static_location orc::orc LOCATION_${CONFIGURATION})
+    get_filename_component(orc_lib_dir "${orc_static_location}" DIRECTORY)
+    set_property(TARGET "${target_name}"
+                 APPEND
+                 PROPERTY IMPORTED_CONFIGURATIONS ${CONFIGURATION})
+    set_target_properties("${target_name}"
+                          PROPERTIES IMPORTED_LOCATION_${CONFIGURATION}
+                                     "${orc_lib_dir}/${static_lib_name}")
+  endforeach()
+endforeach()
+
+check_required_components(orc)
+
+foreach(BUILD_TYPE_SUFFIX
+        "_RELEASE"
+        "_RELWITHDEBINFO"
+        "_MINSIZEREL"
+        "_DEBUG"
+        "")
+  if(NOT ORC_STATIC_LIB)
+    get_target_property(ORC_STATIC_LIB orc::orc IMPORTED_LOCATION${BUILD_TYPE_SUFFIX})
+  endif()
+endforeach()
+
+get_target_property(ORC_INCLUDE_DIR orc::orc INTERFACE_INCLUDE_DIRECTORIES)
+
+set(ORC_FOUND TRUE)
+set(ORC_VERSION ${orc_VERSION})
+set(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR})
diff --git a/c++/src/Adaptor.hh.in b/c++/src/Adaptor.hh.in
index 2cce8158e2..f3ed763eb3 100644
--- a/c++/src/Adaptor.hh.in
+++ b/c++/src/Adaptor.hh.in
@@ -49,6 +49,12 @@ typedef SSIZE_T ssize_t;
   ssize_t pread(int fd, void* buf, size_t count, off_t offset);
 #endif
 
+#if defined(__GNUC__) || defined(__clang__)
+  #define NO_SANITIZE_ATTR __attribute__((no_sanitize("signed-integer-overflow", "shift")))
+#else
+  #define NO_SANITIZE_ATTR
+#endif
+
 #ifdef HAS_DIAGNOSTIC_PUSH
   #ifdef __clang__
     #define DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
diff --git a/c++/src/BlockBuffer.hh b/c++/src/BlockBuffer.hh
index 2faf38f7f9..6d265b0e32 100644
--- a/c++/src/BlockBuffer.hh
+++ b/c++/src/BlockBuffer.hh
@@ -106,12 +106,14 @@ namespace orc {
     }
 
     void resize(uint64_t size);
+
     /**
      * Requests the BlockBuffer to contain at least newCapacity bytes.
      * Reallocation happens if there is need of more space.
      * @param newCapacity new capacity of BlockBuffer
      */
     void reserve(uint64_t newCapacity);
+
     /**
      * Write the BlockBuffer content into OutputStream
      * @param output the output stream to write to
diff --git a/c++/src/BloomFilter.cc b/c++/src/BloomFilter.cc
index 887637223a..025bdd8a03 100644
--- a/c++/src/BloomFilter.cc
+++ b/c++/src/BloomFilter.cc
@@ -208,7 +208,7 @@ namespace orc {
   }
 
   DIAGNOSTIC_POP
-
+  NO_SANITIZE_ATTR
   void BloomFilterImpl::addHash(int64_t hash64) {
     int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
     // In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
@@ -226,6 +226,7 @@ namespace orc {
     }
   }
 
+  NO_SANITIZE_ATTR
   bool BloomFilterImpl::testHash(int64_t hash64) const {
     int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
     // In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
diff --git a/c++/src/BloomFilter.hh b/c++/src/BloomFilter.hh
index ebc4a5ee04..75fb02a026 100644
--- a/c++/src/BloomFilter.hh
+++ b/c++/src/BloomFilter.hh
@@ -194,6 +194,7 @@ namespace orc {
   // Thomas Wang's integer hash function
   // http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
   // Put this in header file so tests can use it as well.
+  NO_SANITIZE_ATTR
   inline int64_t getLongHash(int64_t key) {
     key = (~key) + (key << 21);  // key = (key << 21) - key - 1;
     key = key ^ (key >> 24);
diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc
index bdbaad1da6..ded9f55a00 100644
--- a/c++/src/ByteRLE.cc
+++ b/c++/src/ByteRLE.cc
@@ -63,6 +63,8 @@ namespace orc {
 
     virtual void suppress() override;
 
+    virtual void finishEncode() override;
+
     /**
      * Reset to initial state
      */
@@ -186,16 +188,17 @@ namespace orc {
 
   void ByteRleEncoderImpl::recordPosition(PositionRecorder* recorder) const {
     uint64_t flushedSize = outputStream->getSize();
-    uint64_t unflushedSize = static_cast<uint64_t>(bufferPosition);
+    uint64_t unusedBufferSize = static_cast<uint64_t>(bufferLength - bufferPosition);
     if (outputStream->isCompressed()) {
       // start of the compression chunk in the stream
       recorder->add(flushedSize);
-      // number of decompressed bytes that need to be consumed
-      recorder->add(unflushedSize);
+      // There are multiple blocks in the input buffer, but bufferPosition only records the
+      // effective length of the last block. We need rawInputBufferSize to record the total length
+      // of all variable blocks.
+      recorder->add(outputStream->getRawInputBufferSize() - unusedBufferSize);
     } else {
-      flushedSize -= static_cast<uint64_t>(bufferLength);
       // byte offset of the RLE run’s start location
-      recorder->add(flushedSize + unflushedSize);
+      recorder->add(flushedSize - unusedBufferSize);
     }
     recorder->add(static_cast<uint64_t>(numLiterals));
   }
@@ -215,6 +218,13 @@ namespace orc {
     reset();
   }
 
+  void ByteRleEncoderImpl::finishEncode() {
+    writeValues();
+    outputStream->BackUp(bufferLength - bufferPosition);
+    outputStream->finishStream();
+    bufferLength = bufferPosition = 0;
+  }
+
   std::unique_ptr<ByteRleEncoder> createByteRleEncoder(
       std::unique_ptr<BufferedOutputStream> output) {
     return std::make_unique<ByteRleEncoderImpl>(std::move(output));
diff --git a/c++/src/ByteRLE.hh b/c++/src/ByteRLE.hh
index bd19f52ecc..bee064f666 100644
--- a/c++/src/ByteRLE.hh
+++ b/c++/src/ByteRLE.hh
@@ -59,6 +59,13 @@ namespace orc {
      * suppress the data and reset to initial state
      */
     virtual void suppress() = 0;
+
+    /**
+     * Finalize the encoding process. This function should be called after all data required for
+     * encoding has been added. It ensures that any remaining data is processed and the final state
+     * of the encoder is set.
+     */
+    virtual void finishEncode() = 0;
   };
 
   class ByteRleDecoder {
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 33ad584840..b8a168307c 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -138,12 +138,6 @@ configure_file (
   "${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
   )
 
-include_directories (
-  ${CMAKE_CURRENT_SOURCE_DIR}
-  ${CMAKE_CURRENT_BINARY_DIR}
-  ${LIBHDFSPP_INCLUDE_DIR}
-  )
-
 add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
    COMMAND ${PROTOBUF_EXECUTABLE}
         -I ../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto
@@ -156,6 +150,7 @@ set(SOURCE_FILES
   orc_proto.pb.h
   io/InputStream.cc
   io/OutputStream.cc
+  io/Cache.cc
   sargs/ExpressionTree.cc
   sargs/Literal.cc
   sargs/PredicateLeaf.cc
@@ -176,6 +171,7 @@ set(SOURCE_FILES
   ConvertColumnReader.cc
   CpuInfoUtil.cc
   Exceptions.cc
+  Geospatial.cc
   Int128.cc
   LzoDecompressor.cc
   MemoryPool.cc
@@ -197,7 +193,6 @@ set(SOURCE_FILES
 
 if(BUILD_LIBHDFSPP)
   set(SOURCE_FILES ${SOURCE_FILES} OrcHdfsFile.cc)
-  add_definitions(-DBUILD_LIBHDFSPP)
 endif(BUILD_LIBHDFSPP)
 
 if(BUILD_ENABLE_AVX512)
@@ -209,14 +204,46 @@ endif(BUILD_ENABLE_AVX512)
 add_library (orc STATIC ${SOURCE_FILES})
 
 target_link_libraries (orc
-  orc::protobuf
-  orc::zlib
-  orc::snappy
-  orc::lz4
-  orc::zstd
-  ${LIBHDFSPP_LIBRARIES}
+  INTERFACE
+    ${ORC_INSTALL_INTERFACE_TARGETS}
+  PRIVATE
+    $<BUILD_INTERFACE:orc::protobuf>
+    $<BUILD_INTERFACE:orc::zlib>
+    $<BUILD_INTERFACE:orc::snappy>
+    $<BUILD_INTERFACE:orc::lz4>
+    $<BUILD_INTERFACE:orc::zstd>
+    $<BUILD_INTERFACE:orc::sparsehash>
+    $<BUILD_INTERFACE:${LIBHDFSPP_LIBRARIES}>
   )
 
+target_include_directories (orc
+  INTERFACE
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+  PUBLIC
+    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/c++/include>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/c++/include>
+  PRIVATE
+    ${CMAKE_CURRENT_BINARY_DIR}
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${LIBHDFSPP_INCLUDE_DIR}
+)
+
+if (BUILD_LIBHDFSPP)
+  target_compile_definitions(orc PUBLIC -DBUILD_LIBHDFSPP)
+endif (BUILD_LIBHDFSPP)
+
+if (BUILD_CPP_ENABLE_METRICS)
+  message(STATUS "Enable the metrics collection")
+  target_compile_definitions(orc PUBLIC ENABLE_METRICS=1)
+else ()
+  message(STATUS "Disable the metrics collection")
+  target_compile_definitions(orc PUBLIC ENABLE_METRICS=0)
+endif ()
+
 add_dependencies(orc orc-format_ep)
 
-install(TARGETS orc DESTINATION lib)
+install(TARGETS orc EXPORT orc_targets)
+install(EXPORT orc_targets
+  DESTINATION ${ORC_INSTALL_CMAKE_DIR}
+  NAMESPACE "orc::"
+  FILE "orcTargets.cmake")
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
index 8b16ecbd09..6535c612ce 100644
--- a/c++/src/ColumnPrinter.cc
+++ b/c++/src/ColumnPrinter.cc
@@ -254,6 +254,8 @@ namespace orc {
           break;
 
         case BINARY:
+        case GEOMETRY:
+        case GEOGRAPHY:
           result = std::make_unique<BinaryColumnPrinter>(buffer, param);
           break;
 
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index e70f916ffd..89ff0e0245 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -395,7 +395,7 @@ namespace orc {
       int64_t bits = 0;
       if (bufferEnd_ - bufferPointer_ >= 8) {
         if (isLittleEndian) {
-          bits = *(reinterpret_cast<const int64_t*>(bufferPointer_));
+          memcpy(&bits, bufferPointer_, sizeof(bits));
         } else {
           bits = static_cast<int64_t>(static_cast<unsigned char>(bufferPointer_[0]));
           bits |= static_cast<int64_t>(static_cast<unsigned char>(bufferPointer_[1])) << 8;
@@ -509,8 +509,10 @@ namespace orc {
           bufferNum = std::min(numValues,
                                static_cast<size_t>(bufferEnd_ - bufferPointer_) / bytesPerValue_);
           uint64_t bufferBytes = bufferNum * bytesPerValue_;
-          memcpy(outArray, bufferPointer_, bufferBytes);
-          bufferPointer_ += bufferBytes;
+          if (bufferBytes > 0) {
+            memcpy(outArray, bufferPointer_, bufferBytes);
+            bufferPointer_ += bufferBytes;
+          }
         }
         for (size_t i = bufferNum; i < numValues; ++i) {
           outArray[i] = readDouble<ValueType>();
@@ -724,6 +726,9 @@ namespace orc {
     if (totalBytes <= lastBufferLength_) {
       // subtract the needed bytes from the ones left over
       lastBufferLength_ -= totalBytes;
+      if (lastBuffer_ == nullptr) {
+        throw ParseError("StringDirectColumnReader::skip: lastBuffer_ is null");
+      }
       lastBuffer_ += totalBytes;
     } else {
       // move the stream forward after accounting for the buffered bytes
@@ -778,7 +783,9 @@ namespace orc {
     byteBatch.blob.resize(totalLength);
     char* ptr = byteBatch.blob.data();
     while (bytesBuffered + lastBufferLength_ < totalLength) {
-      memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_);
+      if (lastBuffer_ != nullptr) {
+        memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_);
+      }
       bytesBuffered += lastBufferLength_;
       const void* readBuffer;
       int readLength;
@@ -1740,6 +1747,8 @@ namespace orc {
       case CHAR:
       case STRING:
       case VARCHAR:
+      case GEOMETRY:
+      case GEOGRAPHY:
         switch (static_cast<int64_t>(stripe.getEncoding(type.getColumnId()).kind())) {
           case proto::ColumnEncoding_Kind_DICTIONARY:
           case proto::ColumnEncoding_Kind_DICTIONARY_V2:
diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc
index 86e30ce90d..915277ef41 100644
--- a/c++/src/ColumnWriter.cc
+++ b/c++/src/ColumnWriter.cc
@@ -17,13 +17,19 @@
  */
 
 #include "orc/Int128.hh"
+#include "orc/Statistics.hh"
+#include "orc/Type.hh"
 #include "orc/Writer.hh"
 
+#include <memory>
 #include "ByteRLE.hh"
 #include "ColumnWriter.hh"
 #include "RLE.hh"
 #include "Statistics.hh"
 #include "Timezone.hh"
+#include "Utils.hh"
+
+#include <sparsehash/dense_hash_map>
 
 namespace orc {
   StreamsFactory::~StreamsFactory() {
@@ -47,11 +53,11 @@ namespace orc {
     // In the future, we can decide compression strategy and modifier
     // based on stream kind. But for now we just use the setting from
     // WriterOption
-    return createCompressor(options_.getCompression(), outStream_,
-                            options_.getCompressionStrategy(),
-                            // BufferedOutputStream initial capacity
-                            options_.getOutputBufferCapacity(), options_.getCompressionBlockSize(),
-                            *options_.getMemoryPool(), options_.getWriterMetrics());
+    return createCompressor(
+        options_.getCompression(), outStream_, options_.getCompressionStrategy(),
+        // BufferedOutputStream initial capacity
+        options_.getOutputBufferCapacity(), options_.getCompressionBlockSize(),
+        options_.getMemoryBlockSize(), *options_.getMemoryPool(), options_.getWriterMetrics());
   }
 
   std::unique_ptr<StreamsFactory> createStreamsFactory(const WriterOptions& options,
@@ -253,6 +259,10 @@ namespace orc {
     // PASS
   }
 
+  void ColumnWriter::finishStreams() {
+    notNullEncoder->finishEncode();
+  }
+
   class StructColumnWriter : public ColumnWriter {
    public:
     StructColumnWriter(const Type& type, const StreamsFactory& factory,
@@ -282,6 +292,8 @@ namespace orc {
 
     virtual void reset() override;
 
+    virtual void finishStreams() override;
+
    private:
     std::vector<std::unique_ptr<ColumnWriter>> children_;
   };
@@ -415,6 +427,13 @@ namespace orc {
     }
   }
 
+  void StructColumnWriter::finishStreams() {
+    ColumnWriter::finishStreams();
+    for (uint32_t i = 0; i < children_.size(); ++i) {
+      children_[i]->finishStreams();
+    }
+  }
+
   template <typename BatchType>
   class IntegerColumnWriter : public ColumnWriter {
    public:
@@ -432,6 +451,8 @@ namespace orc {
 
     virtual void recordPosition() const override;
 
+    virtual void finishStreams() override;
+
    protected:
     std::unique_ptr<RleEncoder> rleEncoder;
 
@@ -527,6 +548,12 @@ namespace orc {
     rleEncoder->recordPosition(rowIndexPosition.get());
   }
 
+  template <typename BatchType>
+  void IntegerColumnWriter<BatchType>::finishStreams() {
+    ColumnWriter::finishStreams();
+    rleEncoder->finishEncode();
+  }
+
   template <typename BatchType>
   class ByteColumnWriter : public ColumnWriter {
    public:
@@ -543,6 +570,8 @@ namespace orc {
 
     virtual void recordPosition() const override;
 
+    virtual void finishStreams() override;
+
    private:
     std::unique_ptr<ByteRleEncoder> byteRleEncoder_;
   };
@@ -591,7 +620,7 @@ namespace orc {
         if (enableBloomFilter) {
           bloomFilter->addLong(data[i]);
         }
-        intStats->update(static_cast<int64_t>(byteData[i]), 1);
+        intStats->update(static_cast<int64_t>(static_cast<signed char>(byteData[i])), 1);
       }
     }
     intStats->increase(count);
@@ -636,6 +665,12 @@ namespace orc {
     byteRleEncoder_->recordPosition(rowIndexPosition.get());
   }
 
+  template <typename BatchType>
+  void ByteColumnWriter<BatchType>::finishStreams() {
+    ColumnWriter::finishStreams();
+    byteRleEncoder_->finishEncode();
+  }
+
   template <typename BatchType>
   class BooleanColumnWriter : public ColumnWriter {
    public:
@@ -653,6 +688,8 @@ namespace orc {
 
     virtual void recordPosition() const override;
 
+    virtual void finishStreams() override;
+
    private:
     std::unique_ptr<ByteRleEncoder> rleEncoder_;
   };
@@ -749,6 +786,12 @@ namespace orc {
     rleEncoder_->recordPosition(rowIndexPosition.get());
   }
 
+  template <typename BatchType>
+  void BooleanColumnWriter<BatchType>::finishStreams() {
+    ColumnWriter::finishStreams();
+    rleEncoder_->finishEncode();
+  }
+
   template <typename ValueType, typename BatchType>
   class FloatingColumnWriter : public ColumnWriter {
    public:
@@ -766,6 +809,8 @@ namespace orc {
 
     virtual void recordPosition() const override;
 
+    virtual void finishStreams() override;
+
    private:
     bool isFloat_;
     std::unique_ptr<AppendOnlyBufferedStream> dataStream_;
@@ -877,30 +922,36 @@ namespace orc {
     dataStream_->recordPosition(rowIndexPosition.get());
   }
 
+  template <typename ValueType, typename BatchType>
+  void FloatingColumnWriter<ValueType, BatchType>::finishStreams() {
+    ColumnWriter::finishStreams();
+    dataStream_->finishStream();
+  }
+
   /**
    * Implementation of increasing sorted string dictionary
    */
   class SortedStringDictionary {
    public:
     struct DictEntry {
-      DictEntry(const char* str, size_t len) : data(str), length(len) {}
-      const char* data;
-      size_t length;
+      DictEntry(const char* str, size_t len) : data(std::make_unique<std::string>(str, len)) {}
+
+      std::unique_ptr<std::string> data;
     };
 
-    SortedStringDictionary() : totalLength_(0) {}
+    SortedStringDictionary() : totalLength_(0) {
+      /// Need to set empty key otherwise dense_hash_map will not work correctly
+      keyToIndex_.set_empty_key(std::string_view{});
+    }
 
     // insert a new string into dictionary, return its insertion order
-    size_t insert(const char* data, size_t len);
+    size_t insert(const char* str, size_t len);
 
     // write dictionary data & length to output buffer
     void flush(AppendOnlyBufferedStream* dataStream, RleEncoder* lengthEncoder) const;
 
-    // reorder input index buffer from insertion order to dictionary order
-    void reorder(std::vector<int64_t>& idxBuffer) const;
-
     // get dict entries in insertion order
-    void getEntriesInInsertionOrder(std::vector<const DictEntry*>&) const;
+    const std::vector<DictEntry>& getEntriesInInsertionOrder() const;
 
     // return count of entries
     size_t size() const;
@@ -911,18 +962,11 @@ namespace orc {
     void clear();
 
    private:
-    struct LessThan {
-      bool operator()(const DictEntry& left, const DictEntry& right) const {
-        int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
-        if (ret != 0) {
-          return ret < 0;
-        }
-        return left.length < right.length;
-      }
-    };
+    // store dictionary entries in insertion order
+    mutable std::vector<DictEntry> flatDict_;
 
-    std::map<DictEntry, size_t, LessThan> dict_;
-    std::vector<std::vector<char>> data_;
+    // map from string to its insertion order index
+    google::dense_hash_map<std::string_view, size_t> keyToIndex_;
     uint64_t totalLength_;
 
     // use friend class here to avoid being bothered by const function calls
@@ -935,64 +979,39 @@ namespace orc {
 
   // insert a new string into dictionary, return its insertion order
   size_t SortedStringDictionary::insert(const char* str, size_t len) {
-    auto ret = dict_.insert({DictEntry(str, len), dict_.size()});
-    if (ret.second) {
-      // make a copy to internal storage
-      data_.push_back(std::vector<char>(len));
-      memcpy(data_.back().data(), str, len);
-      // update dictionary entry to link pointer to internal storage
-      DictEntry* entry = const_cast<DictEntry*>(&(ret.first->first));
-      entry->data = data_.back().data();
+    size_t index = flatDict_.size();
+
+    auto it = keyToIndex_.find(std::string_view{str, len});
+    if (it != keyToIndex_.end()) {
+      return it->second;
+    } else {
+      flatDict_.emplace_back(str, len);
       totalLength_ += len;
+
+      const auto& lastEntry = flatDict_.back();
+      keyToIndex_.emplace(std::string_view{lastEntry.data->data(), lastEntry.data->size()}, index);
+      return index;
     }
-    return ret.first->second;
   }
 
   // write dictionary data & length to output buffer
   void SortedStringDictionary::flush(AppendOnlyBufferedStream* dataStream,
                                      RleEncoder* lengthEncoder) const {
-    for (auto it = dict_.cbegin(); it != dict_.cend(); ++it) {
-      dataStream->write(it->first.data, it->first.length);
-      lengthEncoder->write(static_cast<int64_t>(it->first.length));
-    }
-  }
-
-  /**
-   * Reorder input index buffer from insertion order to dictionary order
-   *
-   * We require this function because string values are buffered by indexes
-   * in their insertion order. Until the entire dictionary is complete can
-   * we get their sorted indexes in the dictionary in that ORC specification
-   * demands dictionary should be ordered. Therefore this function transforms
-   * the indexes from insertion order to dictionary value order for final
-   * output.
-   */
-  void SortedStringDictionary::reorder(std::vector<int64_t>& idxBuffer) const {
-    // iterate the dictionary to get mapping from insertion order to value order
-    std::vector<size_t> mapping(dict_.size());
-    size_t dictIdx = 0;
-    for (auto it = dict_.cbegin(); it != dict_.cend(); ++it) {
-      mapping[it->second] = dictIdx++;
-    }
-
-    // do the transformation
-    for (size_t i = 0; i != idxBuffer.size(); ++i) {
-      idxBuffer[i] = static_cast<int64_t>(mapping[static_cast<size_t>(idxBuffer[i])]);
+    for (const auto& entry : flatDict_) {
+      dataStream->write(entry.data->data(), entry.data->size());
+      lengthEncoder->write(static_cast<int64_t>(entry.data->size()));
     }
   }
 
   // get dict entries in insertion order
-  void SortedStringDictionary::getEntriesInInsertionOrder(
-      std::vector<const DictEntry*>& entries) const {
-    entries.resize(dict_.size());
-    for (auto it = dict_.cbegin(); it != dict_.cend(); ++it) {
-      entries[it->second] = &(it->first);
-    }
+  const std::vector<SortedStringDictionary::DictEntry>&
+  SortedStringDictionary::getEntriesInInsertionOrder() const {
+    return flatDict_;
   }
 
   // return count of entries
   size_t SortedStringDictionary::size() const {
-    return dict_.size();
+    return flatDict_.size();
   }
 
   // return total length of strings in the dictioanry
@@ -1002,8 +1021,8 @@ namespace orc {
 
   void SortedStringDictionary::clear() {
     totalLength_ = 0;
-    data_.clear();
-    dict_.clear();
+    keyToIndex_.clear();
+    flatDict_.clear();
   }
 
   class StringColumnWriter : public ColumnWriter {
@@ -1028,6 +1047,8 @@ namespace orc {
 
     virtual void reset() override;
 
+    virtual void finishStreams() override;
+
    private:
     /**
      * dictionary related functions
@@ -1221,6 +1242,14 @@ namespace orc {
     }
   }
 
+  void StringColumnWriter::finishStreams() {
+    ColumnWriter::finishStreams();
+    if (!useDictionary) {
+      directDataStream->finishStream();
+      directLengthEncoder->finishEncode();
+    }
+  }
+
   bool StringColumnWriter::checkDictionaryKeyRatio() {
     if (!doneDictionaryCheck) {
       useDictionary = dictionary.size() <=
@@ -1295,9 +1324,6 @@ namespace orc {
       // flush dictionary data & length streams
       dictionary.flush(dictStream.get(), dictLengthEncoder.get());
 
-      // convert index from insertion order to dictionary order
-      dictionary.reorder(dictionary.idxInDictBuffer_);
-
       // write data sequences
       int64_t* data = dictionary.idxInDictBuffer_.data();
       if (enableIndex) {
@@ -1341,90 +1367,19 @@ namespace orc {
     }
 
     // get dictionary entries in insertion order
-    std::vector<const SortedStringDictionary::DictEntry*> entries;
-    dictionary.getEntriesInInsertionOrder(entries);
+    const auto& entries = dictionary.getEntriesInInsertionOrder();
 
     // store each length of the data into a vector
-    const SortedStringDictionary::DictEntry* dictEntry = nullptr;
     for (uint64_t i = 0; i != dictionary.idxInDictBuffer_.size(); ++i) {
       // write one row data in direct encoding
-      dictEntry = entries[static_cast<size_t>(dictionary.idxInDictBuffer_[i])];
-      directDataStream->write(dictEntry->data, dictEntry->length);
-      directLengthEncoder->write(static_cast<int64_t>(dictEntry->length));
+      const auto& dictEntry = entries[static_cast<size_t>(dictionary.idxInDictBuffer_[i])];
+      directDataStream->write(dictEntry.data->data(), dictEntry.data->size());
+      directLengthEncoder->write(static_cast<int64_t>(dictEntry.data->size()));
     }
 
     deleteDictStreams();
   }
 
-  struct Utf8Utils {
-    /**
-     * Counts how many utf-8 chars of the input data
-     */
-    static uint64_t charLength(const char* data, uint64_t length) {
-      uint64_t chars = 0;
-      for (uint64_t i = 0; i < length; i++) {
-        if (isUtfStartByte(data[i])) {
-          chars++;
-        }
-      }
-      return chars;
-    }
-
-    /**
-     * Return the number of bytes required to read at most maxCharLength
-     * characters in full from a utf-8 encoded byte array provided
-     * by data. This does not validate utf-8 data, but
-     * operates correctly on already valid utf-8 data.
-     *
-     * @param maxCharLength number of characters required
-     * @param data the bytes of UTF-8
-     * @param length the length of data to truncate
-     */
-    static uint64_t truncateBytesTo(uint64_t maxCharLength, const char* data, uint64_t length) {
-      uint64_t chars = 0;
-      if (length <= maxCharLength) {
-        return length;
-      }
-      for (uint64_t i = 0; i < length; i++) {
-        if (isUtfStartByte(data[i])) {
-          chars++;
-        }
-        if (chars > maxCharLength) {
-          return i;
-        }
-      }
-      // everything fits
-      return length;
-    }
-
-    /**
-     * Checks if b is the first byte of a UTF-8 character.
-     */
-    inline static bool isUtfStartByte(char b) {
-      return (b & 0xC0) != 0x80;
-    }
-
-    /**
-     * Find the start of the last character that ends in the current string.
-     * @param text the bytes of the utf-8
-     * @param from the first byte location
-     * @param until the last byte location
-     * @return the index of the last character
-     */
-    static uint64_t findLastCharacter(const char* text, uint64_t from, uint64_t until) {
-      uint64_t posn = until;
-      /* we don't expect characters more than 5 bytes */
-      while (posn >= from) {
-        if (isUtfStartByte(text[posn])) {
-          return posn;
-        }
-        posn -= 1;
-      }
-      /* beginning of a valid char not found */
-      throw std::logic_error("Could not truncate string, beginning of a valid char not found");
-    }
-  };
-
   class CharColumnWriter : public StringColumnWriter {
    public:
     CharColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options)
@@ -1639,6 +1594,8 @@ namespace orc {
 
     virtual void recordPosition() const override;
 
+    virtual void finishStreams() override;
+
    protected:
     std::unique_ptr<RleEncoder> secRleEncoder, nanoRleEncoder;
 
@@ -1779,6 +1736,12 @@ namespace orc {
     nanoRleEncoder->recordPosition(rowIndexPosition.get());
   }
 
+  void TimestampColumnWriter::finishStreams() {
+    ColumnWriter::finishStreams();
+    secRleEncoder->finishEncode();
+    nanoRleEncoder->finishEncode();
+  }
+
   class DateColumnWriter : public IntegerColumnWriter<LongVectorBatch> {
    public:
     DateColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options);
@@ -1848,6 +1811,8 @@ namespace orc {
 
     virtual void recordPosition() const override;
 
+    virtual void finishStreams() override;
+
    protected:
     RleVersion rleVersion;
     uint64_t precision;
@@ -1966,6 +1931,12 @@ namespace orc {
     scaleEncoder->recordPosition(rowIndexPosition.get());
   }
 
+  void Decimal64ColumnWriter::finishStreams() {
+    ColumnWriter::finishStreams();
+    valueStream->finishStream();
+    scaleEncoder->finishEncode();
+  }
+
   class Decimal64ColumnWriterV2 : public ColumnWriter {
    public:
     Decimal64ColumnWriterV2(const Type& type, const StreamsFactory& factory,
@@ -1982,6 +1953,8 @@ namespace orc {
 
     virtual void recordPosition() const override;
 
+    virtual void finishStreams() override;
+
    protected:
     uint64_t precision;
     uint64_t scale;
@@ -2072,6 +2045,11 @@ namespace orc {
     valueEncoder->recordPosition(rowIndexPosition.get());
   }
 
+  void Decimal64ColumnWriterV2::finishStreams() {
+    ColumnWriter::finishStreams();
+    valueEncoder->finishEncode();
+  }
+
   class Decimal128ColumnWriter : public Decimal64ColumnWriter {
    public:
     Decimal128ColumnWriter(const Type& type, const StreamsFactory& factory,
@@ -2187,6 +2165,8 @@ namespace orc {
 
     virtual void reset() override;
 
+    virtual void finishStreams() override;
+
    private:
     std::unique_ptr<RleEncoder> lengthEncoder_;
     RleVersion rleVersion_;
@@ -2363,6 +2343,14 @@ namespace orc {
     }
   }
 
+  void ListColumnWriter::finishStreams() {
+    ColumnWriter::finishStreams();
+    lengthEncoder_->finishEncode();
+    if (child_) {
+      child_->finishStreams();
+    }
+  }
+
   class MapColumnWriter : public ColumnWriter {
    public:
     MapColumnWriter(const Type& type, const StreamsFactory& factory, const WriterOptions& options);
@@ -2395,6 +2383,8 @@ namespace orc {
 
     virtual void reset() override;
 
+    virtual void finishStreams() override;
+
    private:
     std::unique_ptr<ColumnWriter> keyWriter_;
     std::unique_ptr<ColumnWriter> elemWriter_;
@@ -2613,6 +2603,17 @@ namespace orc {
     }
   }
 
+  void MapColumnWriter::finishStreams() {
+    ColumnWriter::finishStreams();
+    lengthEncoder_->finishEncode();
+    if (keyWriter_) {
+      keyWriter_->finishStreams();
+    }
+    if (elemWriter_) {
+      elemWriter_->finishStreams();
+    }
+  }
+
   class UnionColumnWriter : public ColumnWriter {
    public:
     UnionColumnWriter(const Type& type, const StreamsFactory& factory,
@@ -2645,6 +2646,8 @@ namespace orc {
 
     virtual void reset() override;
 
+    virtual void finishStreams() override;
+
    private:
     std::unique_ptr<ByteRleEncoder> rleEncoder_;
     std::vector<std::unique_ptr<ColumnWriter>> children_;
@@ -2816,6 +2819,73 @@ namespace orc {
     }
   }
 
+  void UnionColumnWriter::finishStreams() {
+    ColumnWriter::finishStreams();
+    rleEncoder_->finishEncode();
+    for (uint32_t i = 0; i < children_.size(); ++i) {
+      children_[i]->finishStreams();
+    }
+  }
+
+  class GeospatialColumnWriter : public BinaryColumnWriter {
+   public:
+    GeospatialColumnWriter(const Type& type, const StreamsFactory& factory,
+                           const WriterOptions& options)
+        : BinaryColumnWriter(type, factory, options),
+          isGeometry_(type.getKind() == TypeKind::GEOMETRY) {}
+
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t numValues,
+                     const char* incomingMask) override {
+      ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+      const StringVectorBatch* strBatch = dynamic_cast<const StringVectorBatch*>(&rowBatch);
+      if (strBatch == nullptr) {
+        throw InvalidArgument("Failed to cast to StringVectorBatch");
+      }
+      auto data = &strBatch->data[offset];
+      auto length = &strBatch->length[offset];
+      const char* notNull = strBatch->hasNulls ? strBatch->notNull.data() + offset : nullptr;
+
+      bool hasNull = false;
+      GeospatialColumnStatisticsImpl* geoStats = nullptr;
+      if (isGeometry_) {
+        geoStats = dynamic_cast<GeospatialColumnStatisticsImpl*>(colIndexStatistics.get());
+      }
+
+      uint64_t count = 0;
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (notNull == nullptr || notNull[i]) {
+          uint64_t len = static_cast<uint64_t>(length[i]);
+          directDataStream->write(data[i], len);
+
+          // update stats
+          if (geoStats) {
+            ++count;
+            geoStats->update(data[i], len);
+          }
+
+          if (enableBloomFilter) {
+            bloomFilter->addBytes(data[i], length[i]);
+          }
+        } else if (!hasNull) {
+          hasNull = true;
+          if (geoStats) {
+            geoStats->setHasNull(hasNull);
+          }
+        }
+      }
+
+      directLengthEncoder->add(length, numValues, notNull);
+
+      if (geoStats) {
+        geoStats->increase(count);
+      }
+    }
+
+   private:
+    bool isGeometry_;
+  };
+
   std::unique_ptr<ColumnWriter> buildWriter(const Type& type, const StreamsFactory& factory,
                                             const WriterOptions& options) {
     switch (static_cast<int64_t>(type.getKind())) {
@@ -2886,6 +2956,9 @@ namespace orc {
         return std::make_unique<MapColumnWriter>(type, factory, options);
       case UNION:
         return std::make_unique<UnionColumnWriter>(type, factory, options);
+      case GEOMETRY:
+      case GEOGRAPHY:
+        return std::make_unique<GeospatialColumnWriter>(type, factory, options);
       default:
         throw NotImplementedYet(
             "Type is not supported yet for creating "
diff --git a/c++/src/ColumnWriter.hh b/c++/src/ColumnWriter.hh
index 8afd1eb72c..1c5e15d707 100644
--- a/c++/src/ColumnWriter.hh
+++ b/c++/src/ColumnWriter.hh
@@ -179,6 +179,18 @@ namespace orc {
      */
     virtual void writeDictionary();
 
+    /**
+     * Finalize the encoding and compressing process. This function should be
+     * called after all data required for encoding has been added. It ensures
+     * that any remaining data is processed and the final state of the streams
+     * is set.
+     * Note: boolean type cannot cut off the current byte if it is not filled
+     * with 8 bits, otherwise Boolean RLE may incorrectly read the unfilled
+     * trailing bits. In this case, the last byte will be the head of the next
+     * compression block.
+     */
+    virtual void finishStreams();
+
    protected:
     /**
      * Utility function to translate ColumnStatistics into protobuf form and
diff --git a/c++/src/Compression.cc b/c++/src/Compression.cc
index 4002276e18..f373a75bff 100644
--- a/c++/src/Compression.cc
+++ b/c++/src/Compression.cc
@@ -52,19 +52,22 @@ namespace orc {
   class CompressionStreamBase : public BufferedOutputStream {
    public:
     CompressionStreamBase(OutputStream* outStream, int compressionLevel, uint64_t capacity,
-                          uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics);
+                          uint64_t compressionBlockSize, uint64_t memoryBlockSize, MemoryPool& pool,
+                          WriterMetrics* metrics);
 
     virtual bool Next(void** data, int* size) override = 0;
-    virtual void BackUp(int count) override;
+    virtual void BackUp(int count) override = 0;
 
     virtual std::string getName() const override = 0;
-    virtual uint64_t flush() override;
-    virtual void suppress() override;
+    virtual uint64_t flush() override = 0;
+    virtual void suppress() override = 0;
 
     virtual bool isCompressed() const override {
       return true;
     }
     virtual uint64_t getSize() const override;
+    virtual uint64_t getRawInputBufferSize() const override = 0;
+    virtual void finishStream() override = 0;
 
    protected:
     void writeData(const unsigned char* data, int size);
@@ -78,9 +81,6 @@ namespace orc {
     // ensure enough room for compression block header
     void ensureHeader();
 
-    // Buffer to hold uncompressed data until user calls Next()
-    DataBuffer<unsigned char> rawInputBuffer;
-
     // Compress level
     int level;
 
@@ -99,46 +99,26 @@ namespace orc {
     // Compression block header pointer array
     static const uint32_t HEADER_SIZE = 3;
     std::array<char*, HEADER_SIZE> header;
+
+    // Compression block size
+    uint64_t compressionBlockSize;
   };
 
   CompressionStreamBase::CompressionStreamBase(OutputStream* outStream, int compressionLevel,
-                                               uint64_t capacity, uint64_t blockSize,
-                                               MemoryPool& pool, WriterMetrics* metrics)
-      : BufferedOutputStream(pool, outStream, capacity, blockSize, metrics),
-        rawInputBuffer(pool, blockSize),
+                                               uint64_t capacity, uint64_t compressionBlockSize,
+                                               uint64_t memoryBlockSize, MemoryPool& pool,
+                                               WriterMetrics* metrics)
+      : BufferedOutputStream(pool, outStream, capacity, memoryBlockSize, metrics),
         level(compressionLevel),
         outputBuffer(nullptr),
         bufferSize(0),
         outputPosition(0),
-        outputSize(0) {
+        outputSize(0),
+        compressionBlockSize(compressionBlockSize) {
     // init header pointer array
     header.fill(nullptr);
   }
 
-  void CompressionStreamBase::BackUp(int count) {
-    if (count > bufferSize) {
-      throw std::logic_error("Can't backup that much!");
-    }
-    bufferSize -= count;
-  }
-
-  uint64_t CompressionStreamBase::flush() {
-    void* data;
-    int size;
-    if (!Next(&data, &size)) {
-      throw std::runtime_error("Failed to flush compression buffer.");
-    }
-    BufferedOutputStream::BackUp(outputSize - outputPosition);
-    bufferSize = outputSize = outputPosition = 0;
-    return BufferedOutputStream::flush();
-  }
-
-  void CompressionStreamBase::suppress() {
-    outputBuffer = nullptr;
-    bufferSize = outputPosition = outputSize = 0;
-    BufferedOutputStream::suppress();
-  }
-
   uint64_t CompressionStreamBase::getSize() const {
     return BufferedOutputStream::getSize() - static_cast<uint64_t>(outputSize - outputPosition);
   }
@@ -149,12 +129,12 @@ namespace orc {
     while (offset < size) {
       if (outputPosition == outputSize) {
         if (!BufferedOutputStream::Next(reinterpret_cast<void**>(&outputBuffer), &outputSize)) {
-          throw std::runtime_error("Failed to get next output buffer from output stream.");
+          throw CompressionError("Failed to get next output buffer from output stream.");
         }
         outputPosition = 0;
       } else if (outputPosition > outputSize) {
         // for safety this will unlikely happen
-        throw std::logic_error("Write to an out-of-bound place during compression!");
+        throw CompressionError("Write to an out-of-bound place during compression!");
       }
       int currentSize = std::min(outputSize - outputPosition, size - offset);
       memcpy(outputBuffer + outputPosition, data + offset, static_cast<size_t>(currentSize));
@@ -168,7 +148,7 @@ namespace orc {
     for (uint32_t i = 0; i < HEADER_SIZE; ++i) {
       if (outputPosition >= outputSize) {
         if (!BufferedOutputStream::Next(reinterpret_cast<void**>(&outputBuffer), &outputSize)) {
-          throw std::runtime_error("Failed to get next output buffer from output stream.");
+          throw CompressionError("Failed to get next output buffer from output stream.");
         }
         outputPosition = 0;
       }
@@ -183,31 +163,74 @@ namespace orc {
   class CompressionStream : public CompressionStreamBase {
    public:
     CompressionStream(OutputStream* outStream, int compressionLevel, uint64_t capacity,
-                      uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics);
+                      uint64_t compressionBlockSize, uint64_t memoryBlockSize, MemoryPool& pool,
+                      WriterMetrics* metrics);
 
     virtual bool Next(void** data, int* size) override;
     virtual std::string getName() const override = 0;
+    virtual void BackUp(int count) override;
+    virtual void suppress() override;
+    virtual uint64_t flush() override;
+    uint64_t getRawInputBufferSize() const override {
+      return rawInputBuffer.size();
+    }
+    virtual void finishStream() override {
+      compressInternal();
+      BufferedOutputStream::finishStream();
+    }
 
    protected:
     // return total compressed size
     virtual uint64_t doStreamingCompression() = 0;
+
+    // Buffer to hold uncompressed data until user calls Next()
+    BlockBuffer rawInputBuffer;
+
+    void compressInternal();
   };
 
+  void CompressionStream::BackUp(int count) {
+    uint64_t backup = static_cast<uint64_t>(count);
+    uint64_t currSize = rawInputBuffer.size();
+    if (backup > currSize) {
+      throw CompressionError("Can't backup that much!");
+    }
+    rawInputBuffer.resize(currSize - backup);
+  }
+
+  uint64_t CompressionStream::flush() {
+    compressInternal();
+    BufferedOutputStream::BackUp(outputSize - outputPosition);
+    rawInputBuffer.resize(0);
+    outputSize = outputPosition = 0;
+    return BufferedOutputStream::flush();
+  }
+
+  void CompressionStream::suppress() {
+    outputBuffer = nullptr;
+    outputPosition = outputSize = 0;
+    rawInputBuffer.resize(0);
+    BufferedOutputStream::suppress();
+  }
+
   CompressionStream::CompressionStream(OutputStream* outStream, int compressionLevel,
-                                       uint64_t capacity, uint64_t blockSize, MemoryPool& pool,
+                                       uint64_t capacity, uint64_t compressionBlockSize,
+                                       uint64_t memoryBlockSize, MemoryPool& pool,
                                        WriterMetrics* metrics)
-      : CompressionStreamBase(outStream, compressionLevel, capacity, blockSize, pool, metrics) {
+      : CompressionStreamBase(outStream, compressionLevel, capacity, compressionBlockSize,
+                              memoryBlockSize, pool, metrics),
+        rawInputBuffer(pool, memoryBlockSize) {
     // PASS
   }
 
-  bool CompressionStream::Next(void** data, int* size) {
-    if (bufferSize != 0) {
+  void CompressionStream::compressInternal() {
+    if (rawInputBuffer.size() != 0) {
       ensureHeader();
 
       uint64_t preSize = getSize();
       uint64_t totalCompressedSize = doStreamingCompression();
-      if (totalCompressedSize >= static_cast<unsigned long>(bufferSize)) {
-        writeHeader(static_cast<size_t>(bufferSize), true);
+      if (totalCompressedSize >= static_cast<unsigned long>(rawInputBuffer.size())) {
+        writeHeader(static_cast<size_t>(rawInputBuffer.size()), true);
         // reset output buffer
         outputBuffer = nullptr;
         outputPosition = outputSize = 0;
@@ -215,23 +238,42 @@ namespace orc {
         BufferedOutputStream::BackUp(static_cast<int>(backup));
 
         // copy raw input buffer into block buffer
-        writeData(rawInputBuffer.data(), bufferSize);
+        uint64_t blockNumber = rawInputBuffer.getBlockNumber();
+        for (uint64_t i = 0; i < blockNumber; ++i) {
+          auto block = rawInputBuffer.getBlock(i);
+          writeData(reinterpret_cast<const unsigned char*>(block.data), block.size);
+        }
       } else {
         writeHeader(totalCompressedSize, false);
       }
+      rawInputBuffer.resize(0);
     }
+  }
 
-    *data = rawInputBuffer.data();
-    *size = static_cast<int>(rawInputBuffer.size());
-    bufferSize = *size;
+  bool CompressionStream::Next(void** data, int* size) {
+    if (rawInputBuffer.size() > compressionBlockSize) {
+      std::stringstream ss;
+      ss << "uncompressed data size " << rawInputBuffer.size()
+         << " is larger than compression block size " << compressionBlockSize;
+      throw CompressionError(ss.str());
+    }
+
+    // compress data in the rawInputBuffer when it is full
+    if (rawInputBuffer.size() == compressionBlockSize) {
+      compressInternal();
+    }
 
+    auto block = rawInputBuffer.getNextBlock();
+    *data = block.data;
+    *size = static_cast<int>(block.size);
     return true;
   }
 
   class ZlibCompressionStream : public CompressionStream {
    public:
-    ZlibCompressionStream(OutputStream* outStream, int compressionLevel, uint64_t capacity,
-                          uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics);
+    ZlibCompressionStream(OutputStream* outStream, int compressionLevel, uint64_t bufferCapacity,
+                          uint64_t compressionBlockSize, uint64_t memoryBlockSize, MemoryPool& pool,
+                          WriterMetrics* metrics);
 
     virtual ~ZlibCompressionStream() override {
       end();
@@ -249,42 +291,57 @@ namespace orc {
   };
 
   ZlibCompressionStream::ZlibCompressionStream(OutputStream* outStream, int compressionLevel,
-                                               uint64_t capacity, uint64_t blockSize,
-                                               MemoryPool& pool, WriterMetrics* metrics)
-      : CompressionStream(outStream, compressionLevel, capacity, blockSize, pool, metrics) {
+                                               uint64_t bufferCapacity,
+                                               uint64_t compressionBlockSize,
+                                               uint64_t memoryBlockSize, MemoryPool& pool,
+                                               WriterMetrics* metrics)
+      : CompressionStream(outStream, compressionLevel, bufferCapacity, compressionBlockSize,
+                          memoryBlockSize, pool, metrics) {
     init();
   }
 
   uint64_t ZlibCompressionStream::doStreamingCompression() {
     if (deflateReset(&strm_) != Z_OK) {
-      throw std::runtime_error("Failed to reset inflate.");
+      throw CompressionError("Failed to reset inflate.");
     }
 
-    strm_.avail_in = static_cast<unsigned int>(bufferSize);
-    strm_.next_in = rawInputBuffer.data();
+    // iterate through all blocks
+    uint64_t blockId = 0;
+    bool finish = false;
 
     do {
-      if (outputPosition >= outputSize) {
-        if (!BufferedOutputStream::Next(reinterpret_cast<void**>(&outputBuffer), &outputSize)) {
-          throw std::runtime_error("Failed to get next output buffer from output stream.");
-        }
-        outputPosition = 0;
+      if (blockId == rawInputBuffer.getBlockNumber()) {
+        finish = true;
+        strm_.avail_in = 0;
+        strm_.next_in = nullptr;
+      } else {
+        auto block = rawInputBuffer.getBlock(blockId++);
+        strm_.avail_in = static_cast<unsigned int>(block.size);
+        strm_.next_in = reinterpret_cast<unsigned char*>(block.data);
       }
-      strm_.next_out = reinterpret_cast<unsigned char*>(outputBuffer + outputPosition);
-      strm_.avail_out = static_cast<unsigned int>(outputSize - outputPosition);
 
-      int ret = deflate(&strm_, Z_FINISH);
-      outputPosition = outputSize - static_cast<int>(strm_.avail_out);
+      do {
+        if (outputPosition >= outputSize) {
+          if (!BufferedOutputStream::Next(reinterpret_cast<void**>(&outputBuffer), &outputSize)) {
+            throw CompressionError("Failed to get next output buffer from output stream.");
+          }
+          outputPosition = 0;
+        }
+        strm_.next_out = reinterpret_cast<unsigned char*>(outputBuffer + outputPosition);
+        strm_.avail_out = static_cast<unsigned int>(outputSize - outputPosition);
 
-      if (ret == Z_STREAM_END) {
-        break;
-      } else if (ret == Z_OK) {
-        // needs more buffer so will continue the loop
-      } else {
-        throw std::runtime_error("Failed to deflate input data.");
-      }
-    } while (strm_.avail_out == 0);
+        int ret = deflate(&strm_, finish ? Z_FINISH : Z_NO_FLUSH);
+        outputPosition = outputSize - static_cast<int>(strm_.avail_out);
 
+        if (ret == Z_STREAM_END) {
+          break;
+        } else if (ret == Z_OK) {
+          // needs more buffer so will continue the loop
+        } else {
+          throw CompressionError("Failed to deflate input data.");
+        }
+      } while (strm_.avail_out == 0);
+    } while (!finish);
     return strm_.total_out;
   }
 
@@ -305,7 +362,7 @@ namespace orc {
     strm_.next_in = nullptr;
 
     if (deflateInit2(&strm_, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
-      throw std::runtime_error("Error while calling deflateInit2() for zlib.");
+      throw CompressionError("Error while calling deflateInit2() for zlib.");
     }
   }
 
@@ -505,7 +562,7 @@ namespace orc {
     } else if (state == DECOMPRESS_START) {
       NextDecompress(data, size, availableSize);
     } else {
-      throw std::logic_error(
+      throw CompressionError(
           "Unknown compression state in "
           "DecompressionStream::Next");
     }
@@ -519,7 +576,7 @@ namespace orc {
 
   void DecompressionStream::BackUp(int count) {
     if (outputBuffer == nullptr || outputBufferLength != 0) {
-      throw std::logic_error("Backup without previous Next in " + getName());
+      throw CompressionError("Backup without previous Next in " + getName());
     }
     outputBuffer -= static_cast<size_t>(count);
     outputBufferLength = static_cast<size_t>(count);
@@ -647,13 +704,17 @@ namespace orc {
       case Z_OK:
         break;
       case Z_MEM_ERROR:
-        throw std::logic_error("Memory error from inflateInit2");
+        throw CompressionError(
+            "Memory error from ZlibDecompressionStream::ZlibDecompressionStream inflateInit2");
       case Z_VERSION_ERROR:
-        throw std::logic_error("Version error from inflateInit2");
+        throw CompressionError(
+            "Version error from ZlibDecompressionStream::ZlibDecompressionStream inflateInit2");
       case Z_STREAM_ERROR:
-        throw std::logic_error("Stream error from inflateInit2");
+        throw CompressionError(
+            "Stream error from ZlibDecompressionStream::ZlibDecompressionStream inflateInit2");
       default:
-        throw std::logic_error("Unknown error from inflateInit2");
+        throw CompressionError(
+            "Unknown error from  ZlibDecompressionStream::ZlibDecompressionStream inflateInit2");
     }
   }
 
@@ -674,7 +735,7 @@ namespace orc {
     zstream_.next_out = reinterpret_cast<Bytef*>(const_cast<char*>(outputBuffer));
     zstream_.avail_out = static_cast<uInt>(outputDataBuffer.capacity());
     if (inflateReset(&zstream_) != Z_OK) {
-      throw std::logic_error(
+      throw CompressionError(
           "Bad inflateReset in "
           "ZlibDecompressionStream::NextDecompress");
     }
@@ -694,19 +755,19 @@ namespace orc {
         case Z_STREAM_END:
           break;
         case Z_BUF_ERROR:
-          throw std::logic_error(
+          throw CompressionError(
               "Buffer error in "
               "ZlibDecompressionStream::NextDecompress");
         case Z_DATA_ERROR:
-          throw std::logic_error(
+          throw CompressionError(
               "Data error in "
               "ZlibDecompressionStream::NextDecompress");
         case Z_STREAM_ERROR:
-          throw std::logic_error(
+          throw CompressionError(
               "Stream error in "
               "ZlibDecompressionStream::NextDecompress");
         default:
-          throw std::logic_error(
+          throw CompressionError(
               "Unknown error in "
               "ZlibDecompressionStream::NextDecompress");
       }
@@ -812,7 +873,7 @@ namespace orc {
     }
 
     if (outLength > maxOutputLength) {
-      throw std::logic_error("Snappy length exceeds block size");
+      throw CompressionError("Snappy length exceeds block size");
     }
 
     if (!snappy::RawUncompress(input, length, output)) {
@@ -881,14 +942,23 @@ namespace orc {
    public:
     BlockCompressionStream(OutputStream* outStream, int compressionLevel, uint64_t capacity,
                            uint64_t blockSize, MemoryPool& pool, WriterMetrics* metrics)
-        : CompressionStreamBase(outStream, compressionLevel, capacity, blockSize, pool, metrics),
-          compressorBuffer(pool) {
+        : CompressionStreamBase(outStream, compressionLevel, capacity, blockSize, blockSize, pool,
+                                metrics),
+          compressorBuffer(pool),
+          rawInputBuffer(pool, blockSize) {
       // PASS
     }
 
     virtual bool Next(void** data, int* size) override;
     virtual void suppress() override;
+    virtual void BackUp(int count) override;
+    virtual uint64_t flush() override;
     virtual std::string getName() const override = 0;
+    uint64_t getRawInputBufferSize() const override {
+      return bufferSize;
+    }
+
+    virtual void finishStream() override;
 
    protected:
     // compresses a block and returns the compressed size
@@ -900,8 +970,23 @@ namespace orc {
 
     // should allocate max possible compressed size
     DataBuffer<unsigned char> compressorBuffer;
+
+    // Buffer to hold uncompressed data until user calls Next()
+    DataBuffer<unsigned char> rawInputBuffer;
   };
 
+  void BlockCompressionStream::BackUp(int count) {
+    if (count > bufferSize) {
+      throw CompressionError("Can't backup that much!");
+    }
+    bufferSize -= count;
+  }
+
+  uint64_t BlockCompressionStream::flush() {
+    finishStream();
+    return BufferedOutputStream::flush();
+  }
+
   bool BlockCompressionStream::Next(void** data, int* size) {
     if (bufferSize != 0) {
       ensureHeader();
@@ -935,7 +1020,19 @@ namespace orc {
 
   void BlockCompressionStream::suppress() {
     compressorBuffer.resize(0);
-    CompressionStreamBase::suppress();
+    outputBuffer = nullptr;
+    bufferSize = outputPosition = outputSize = 0;
+    BufferedOutputStream::suppress();
+  }
+
+  void BlockCompressionStream::finishStream() {
+    void* data;
+    int size;
+    if (!Next(&data, &size)) {
+      throw CompressionError("Failed to flush compression buffer.");
+    }
+    BufferedOutputStream::BackUp(outputSize - outputPosition);
+    bufferSize = outputSize = outputPosition = 0;
   }
 
   /**
@@ -976,7 +1073,7 @@ namespace orc {
         reinterpret_cast<char*>(compressorBuffer.data()), bufferSize,
         static_cast<int>(compressorBuffer.size()), level);
     if (result == 0) {
-      throw std::runtime_error("Error during block compression using lz4.");
+      throw CompressionError("Error during block compression using lz4.");
     }
     return static_cast<uint64_t>(result);
   }
@@ -984,7 +1081,7 @@ namespace orc {
   void Lz4CompressionSteam::init() {
     state_ = LZ4_createStream();
     if (!state_) {
-      throw std::runtime_error("Error while allocating state for lz4.");
+      throw CompressionError("Error while allocating state for lz4.");
     }
   }
 
@@ -1072,7 +1169,7 @@ namespace orc {
   void ZSTDCompressionStream::init() {
     cctx_ = ZSTD_createCCtx();
     if (!cctx_) {
-      throw std::runtime_error("Error while calling ZSTD_createCCtx() for zstd.");
+      throw CompressionError("Error while calling ZSTD_createCCtx() for zstd.");
     }
   }
 
@@ -1129,7 +1226,7 @@ namespace orc {
   void ZSTDDecompressionStream::init() {
     dctx_ = ZSTD_createDCtx();
     if (!dctx_) {
-      throw std::runtime_error("Error while calling ZSTD_createDCtx() for zstd.");
+      throw CompressionError("Error while calling ZSTD_createDCtx() for zstd.");
     }
   }
 
@@ -1140,12 +1237,10 @@ namespace orc {
 
   DIAGNOSTIC_PUSH
 
-  std::unique_ptr<BufferedOutputStream> createCompressor(CompressionKind kind,
-                                                         OutputStream* outStream,
-                                                         CompressionStrategy strategy,
-                                                         uint64_t bufferCapacity,
-                                                         uint64_t compressionBlockSize,
-                                                         MemoryPool& pool, WriterMetrics* metrics) {
+  std::unique_ptr<BufferedOutputStream> createCompressor(
+      CompressionKind kind, OutputStream* outStream, CompressionStrategy strategy,
+      uint64_t bufferCapacity, uint64_t compressionBlockSize, uint64_t memoryBlockSize,
+      MemoryPool& pool, WriterMetrics* metrics) {
     switch (static_cast<int64_t>(kind)) {
       case CompressionKind_NONE: {
         return std::make_unique<BufferedOutputStream>(pool, outStream, bufferCapacity,
@@ -1154,8 +1249,8 @@ namespace orc {
       case CompressionKind_ZLIB: {
         int level =
             (strategy == CompressionStrategy_SPEED) ? Z_BEST_SPEED + 1 : Z_DEFAULT_COMPRESSION;
-        return std::make_unique<ZlibCompressionStream>(outStream, level, bufferCapacity,
-                                                       compressionBlockSize, pool, metrics);
+        return std::make_unique<ZlibCompressionStream>(
+            outStream, level, bufferCapacity, compressionBlockSize, memoryBlockSize, pool, metrics);
       }
       case CompressionKind_ZSTD: {
         int level = (strategy == CompressionStrategy_SPEED) ? 1 : ZSTD_CLEVEL_DEFAULT;
diff --git a/c++/src/Compression.hh b/c++/src/Compression.hh
index 55b152dd63..24170c56b4 100644
--- a/c++/src/Compression.hh
+++ b/c++/src/Compression.hh
@@ -42,15 +42,16 @@ namespace orc {
    * @param outStream the output stream that is the underlying target
    * @param strategy compression strategy
    * @param bufferCapacity compression stream buffer total capacity
-   * @param compressionBlockSize compression buffer block size
+   * @param compressionBlockSize compression is triggered when the original input buffer size
+   * reaches this size
+   * @param memoryBlockSize the block size for original input buffer
    * @param pool the memory pool
+   * @param metrics the writer metrics
    */
-  std::unique_ptr<BufferedOutputStream> createCompressor(CompressionKind kind,
-                                                         OutputStream* outStream,
-                                                         CompressionStrategy strategy,
-                                                         uint64_t bufferCapacity,
-                                                         uint64_t compressionBlockSize,
-                                                         MemoryPool& pool, WriterMetrics* metrics);
+  std::unique_ptr<BufferedOutputStream> createCompressor(
+      CompressionKind kind, OutputStream* outStream, CompressionStrategy strategy,
+      uint64_t bufferCapacity, uint64_t compressionBlockSize, uint64_t memoryBlockSize,
+      MemoryPool& pool, WriterMetrics* metrics);
 }  // namespace orc
 
 #endif
diff --git a/c++/src/ConvertColumnReader.cc b/c++/src/ConvertColumnReader.cc
index 67ee6d6c45..7db5b88954 100644
--- a/c++/src/ConvertColumnReader.cc
+++ b/c++/src/ConvertColumnReader.cc
@@ -17,6 +17,9 @@
  */
 
 #include "ConvertColumnReader.hh"
+#include "Utils.hh"
+
+#include <optional>
 
 namespace orc {
 
@@ -72,6 +75,23 @@ namespace orc {
     }
   }
 
+  static inline void handleParseFromStringError(ColumnVectorBatch& dstBatch, uint64_t idx,
+                                                bool shouldThrow, const std::string& typeName,
+                                                const std::string& str,
+                                                const std::string& expectedFormat = "") {
+    if (!shouldThrow) {
+      dstBatch.notNull.data()[idx] = 0;
+      dstBatch.hasNulls = true;
+    } else {
+      std::ostringstream ss;
+      ss << "Failed to parse " << typeName << " from string:" << str;
+      if (expectedFormat != "") {
+        ss << " the following format \"" << expectedFormat << "\" is expected";
+      }
+      throw SchemaEvolutionError(ss.str());
+    }
+  }
+
   // return false if overflow
   template <typename ReadType>
   static bool downCastToInteger(ReadType& dstValue, int64_t inputLong) {
@@ -106,13 +126,13 @@ namespace orc {
                                            bool shouldThrow) {
     constexpr bool isFileTypeFloatingPoint(std::is_floating_point<FileType>::value);
     constexpr bool isReadTypeFloatingPoint(std::is_floating_point<ReadType>::value);
-    int64_t longValue = static_cast<int64_t>(srcValue);
+
     if (isFileTypeFloatingPoint) {
       if (isReadTypeFloatingPoint) {
         destValue = static_cast<ReadType>(srcValue);
       } else {
         if (!canFitInLong(static_cast<double>(srcValue)) ||
-            !downCastToInteger(destValue, longValue)) {
+            !downCastToInteger(destValue, static_cast<int64_t>(srcValue))) {
           handleOverflow<FileType, ReadType>(destBatch, idx, shouldThrow);
         }
       }
@@ -399,13 +419,14 @@ namespace orc {
     ConvertToTimestampColumnReader(const Type& readType, const Type& fileType,
                                    StripeStreams& stripe, bool throwOnOverflow)
         : ConvertColumnReader(readType, fileType, stripe, throwOnOverflow),
-          readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ? &getTimezoneByName("GMT")
-                                                                 : &stripe.getReaderTimezone()),
+          isInstant(readType.getKind() == TIMESTAMP_INSTANT),
+          readerTimezone(isInstant ? &getTimezoneByName("GMT") : &stripe.getReaderTimezone()),
           needConvertTimezone(readerTimezone != &getTimezoneByName("GMT")) {}
 
     void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override;
 
    protected:
+    const bool isInstant;
     const orc::Timezone* readerTimezone;
     const bool needConvertTimezone;
   };
@@ -558,6 +579,8 @@ namespace orc {
 
       const auto& srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get());
       auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      dstBatch.precision = toPrecision_;
+      dstBatch.scale = toScale_;
       for (uint64_t i = 0; i < numValues; ++i) {
         if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
           convertDecimalToDecimal(dstBatch, i, srcBatch);
@@ -694,6 +717,318 @@ namespace orc {
     const int32_t scale_;
   };
 
+  template <typename ReadTypeBatch, typename ReadType>
+  class StringVariantToNumericColumnReader : public ConvertColumnReader {
+   public:
+    StringVariantToNumericColumnReader(const Type& readType, const Type& fileType,
+                                       StripeStreams& stripe, bool throwOnOverflow)
+        : ConvertColumnReader(readType, fileType, stripe, throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const StringVectorBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          if constexpr (std::is_floating_point_v<ReadType>) {
+            convertToDouble(dstBatch, srcBatch, i);
+          } else {
+            convertToInteger(dstBatch, srcBatch, i);
+          }
+        }
+      }
+    }
+
+   private:
+    void convertToInteger(ReadTypeBatch& dstBatch, const StringVectorBatch& srcBatch,
+                          uint64_t idx) {
+      int64_t longValue = 0;
+      const std::string longStr(srcBatch.data[idx], srcBatch.length[idx]);
+      try {
+        longValue = std::stoll(longStr);
+      } catch (...) {
+        handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Long", longStr);
+        return;
+      }
+      if constexpr (std::is_same_v<ReadType, bool>) {
+        dstBatch.data[idx] = longValue == 0 ? 0 : 1;
+      } else {
+        if (!downCastToInteger(dstBatch.data[idx], longValue)) {
+          handleOverflow<std::string, ReadType>(dstBatch, idx, throwOnOverflow);
+        }
+      }
+    }
+
+    void convertToDouble(ReadTypeBatch& dstBatch, const StringVectorBatch& srcBatch, uint64_t idx) {
+      const std::string floatValue(srcBatch.data[idx], srcBatch.length[idx]);
+      try {
+        if constexpr (std::is_same_v<ReadType, float>) {
+          dstBatch.data[idx] = std::stof(floatValue);
+        } else {
+          dstBatch.data[idx] = std::stod(floatValue);
+        }
+      } catch (...) {
+        handleParseFromStringError(dstBatch, idx, throwOnOverflow, typeid(readType).name(),
+                                   floatValue);
+      }
+    }
+  };
+
+  class StringVariantConvertColumnReader : public ConvertToStringVariantColumnReader {
+   public:
+    StringVariantConvertColumnReader(const Type& readType, const Type& fileType,
+                                     StripeStreams& stripe, bool throwOnOverflow)
+        : ConvertToStringVariantColumnReader(readType, fileType, stripe, throwOnOverflow) {}
+
+    uint64_t convertToStrBuffer(ColumnVectorBatch& rowBatch, uint64_t numValues) override {
+      uint64_t size = 0;
+      strBuffer.resize(numValues);
+      const auto& srcBatch = *SafeCastBatchTo<const StringVectorBatch*>(data.get());
+      const auto maxLength = readType.getMaximumLength();
+      if (readType.getKind() == STRING) {
+        for (uint64_t i = 0; i < numValues; ++i) {
+          if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+            strBuffer[i] = std::string(srcBatch.data[i], srcBatch.length[i]);
+            size += strBuffer[i].size();
+          }
+        }
+      } else if (readType.getKind() == VARCHAR) {
+        for (uint64_t i = 0; i < numValues; ++i) {
+          if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+            const char* charData = srcBatch.data[i];
+            uint64_t originLength = srcBatch.length[i];
+            uint64_t itemLength = Utf8Utils::truncateBytesTo(maxLength, charData, originLength);
+            strBuffer[i] = std::string(charData, itemLength);
+            size += strBuffer[i].length();
+          }
+        }
+      } else if (readType.getKind() == CHAR) {
+        for (uint64_t i = 0; i < numValues; ++i) {
+          if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+            const char* charData = srcBatch.data[i];
+            uint64_t originLength = srcBatch.length[i];
+            uint64_t charLength = Utf8Utils::charLength(charData, originLength);
+            auto itemLength = Utf8Utils::truncateBytesTo(maxLength, charData, originLength);
+            strBuffer[i] = std::string(srcBatch.data[i], itemLength);
+            // the padding is exactly 1 byte per char
+            if (charLength < maxLength) {
+              strBuffer[i].resize(itemLength + maxLength - charLength, ' ');
+            }
+            size += strBuffer[i].length();
+          }
+        }
+      } else {
+        throw SchemaEvolutionError("Invalid type for numeric to string conversion: " +
+                                   readType.toString());
+      }
+      return size;
+    }
+  };
+
+  class StringVariantToTimestampColumnReader : public ConvertToTimestampColumnReader {
+   public:
+    StringVariantToTimestampColumnReader(const Type& readType, const Type& fileType,
+                                         StripeStreams& stripe, bool throwOnOverflow)
+        : ConvertToTimestampColumnReader(readType, fileType, stripe, throwOnOverflow) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertToTimestampColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const StringVectorBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<TimestampVectorBatch*>(&rowBatch);
+
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          convertToTimestamp(dstBatch, i, std::string(srcBatch.data[i], srcBatch.length[i]));
+        }
+      }
+    }
+
+   private:
+    // Algorithm: http://howardhinnant.github.io/date_algorithms.html
+    // The algorithm implements a proleptic Gregorian calendar.
+    int64_t daysFromProlepticGregorianCalendar(int32_t y, int32_t m, int32_t d) {
+      y -= m <= 2;
+      int32_t era = y / 400;
+      int32_t yoe = y - era * 400;                                   // [0, 399]
+      int32_t doy = (153 * (m + (m > 2 ? -3 : 9)) + 2) / 5 + d - 1;  // [0, 365]
+      int32_t doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;           // [0, 146096]
+      return 1ll * era * 146097 + doe - 719468;
+    }
+
+    std::optional<std::pair<int64_t, int64_t>> tryBestToParseFromString(
+        const std::string& timeStr) {
+      int32_t year, month, day, hour, min, sec, nanos = 0;
+      int32_t matched = std::sscanf(timeStr.c_str(), "%4d-%2d-%2d %2d:%2d:%2d.%d", &year, &month,
+                                    &day, &hour, &min, &sec, &nanos);
+      if (matched != 6 && matched != 7) {
+        return std::nullopt;
+      }
+      if (nanos) {
+        if (nanos < 0 || nanos >= 1e9) {
+          return std::nullopt;
+        }
+        while (nanos < static_cast<int64_t>(1e8)) {
+          nanos *= 10;
+        }
+      }
+      int64_t daysSinceEpoch = daysFromProlepticGregorianCalendar(year, month, day);
+      int64_t secondSinceEpoch = 60ll * (60 * (24L * daysSinceEpoch + hour) + min) + sec;
+      return std::make_optional(std::pair<int64_t, int64_t>{secondSinceEpoch, nanos});
+    }
+
+    void convertToTimestamp(TimestampVectorBatch& dstBatch, uint64_t idx,
+                            const std::string& timeStr) {
+      // Expected timestamp_instant format string : yyyy-mm-dd hh:mm:ss[.xxx] timezone
+      // Eg. "2019-07-09 13:11:00 America/Los_Angeles"
+      // Expected timestamp format string         : yyyy-mm-dd hh:mm:ss[.xxx]
+      // Eg. "2019-07-09 13:11:00"
+      static std::string expectedTimestampInstantFormat = "yyyy-mm-dd hh:mm:ss[.xxx] timezone";
+      static std::string expectedTimestampFormat = "yyyy-mm-dd hh:mm:ss[.xxx]";
+      auto timestamp = tryBestToParseFromString(timeStr);
+      if (!timestamp.has_value()) {
+        if (!isInstant) {
+          handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Timestamp", timeStr,
+                                     expectedTimestampFormat);
+          return;
+        }
+        handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Timestamp_Instant", timeStr,
+                                   expectedTimestampInstantFormat);
+        return;
+      }
+
+      auto& [second, nanos] = timestamp.value();
+
+      if (isInstant) {
+        size_t pos = 0;  // get the name of timezone
+        pos = timeStr.find(' ', pos) + 1;
+        pos = timeStr.find(' ', pos);
+        if (pos == std::string::npos) {
+          handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Timestamp_Instant", timeStr,
+                                     expectedTimestampInstantFormat);
+          return;
+        }
+        pos += 1;
+        size_t subStrLength = timeStr.length() - pos;
+        try {
+          second = getTimezoneByName(timeStr.substr(pos, subStrLength)).convertFromUTC(second);
+        } catch (const TimezoneError&) {
+          handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Timestamp_Instant", timeStr,
+                                     expectedTimestampInstantFormat);
+          return;
+        }
+      } else {
+        if (needConvertTimezone) {
+          second = readerTimezone->convertFromUTC(second);
+        }
+      }
+      dstBatch.data[idx] = second;
+      dstBatch.nanoseconds[idx] = nanos;
+    }
+  };
+
+  template <typename ReadTypeBatch>
+  class StringVariantToDecimalColumnReader : public ConvertColumnReader {
+   public:
+    StringVariantToDecimalColumnReader(const Type& readType, const Type& fileType,
+                                       StripeStreams& stripe, bool throwOnOverflow)
+        : ConvertColumnReader(readType, fileType, stripe, throwOnOverflow),
+          precision_(static_cast<int32_t>(readType.getPrecision())),
+          scale_(static_cast<int32_t>(readType.getScale())) {}
+
+    void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override {
+      ConvertColumnReader::next(rowBatch, numValues, notNull);
+
+      const auto& srcBatch = *SafeCastBatchTo<const StringVectorBatch*>(data.get());
+      auto& dstBatch = *SafeCastBatchTo<ReadTypeBatch*>(&rowBatch);
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (!rowBatch.hasNulls || rowBatch.notNull[i]) {
+          convertToDecimal(dstBatch, i, std::string(srcBatch.data[i], srcBatch.length[i]));
+        }
+      }
+    }
+
+   private:
+    void convertToDecimal(ReadTypeBatch& dstBatch, uint64_t idx, const std::string& decimalStr) {
+      constexpr int32_t MAX_PRECISION_128 = 38;
+      int32_t fromPrecision = 0;
+      int32_t fromScale = 0;
+      uint32_t start = 0;
+      bool negative = false;
+      if (decimalStr.empty()) {
+        handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Decimal", decimalStr);
+        return;
+      }
+      auto dotPos = decimalStr.find('.');
+      if (dotPos == std::string::npos) {
+        fromScale = 0;
+        fromPrecision = decimalStr.length();
+        dotPos = decimalStr.length();
+      } else {
+        if (dotPos + 1 == decimalStr.length()) {
+          handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Decimal", decimalStr);
+          return;
+        }
+        fromPrecision = decimalStr.length() - 1;
+        fromScale = decimalStr.length() - dotPos - 1;
+      }
+      if (decimalStr.front() == '-') {
+        negative = true;
+        start++;
+        fromPrecision--;
+      }
+      const std::string integerPortion = decimalStr.substr(start, dotPos - start);
+      if (dotPos == start || fromPrecision > MAX_PRECISION_128 || fromPrecision <= 0 ||
+          !std::all_of(integerPortion.begin(), integerPortion.end(), ::isdigit)) {
+        handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Decimal", decimalStr);
+        return;
+      }
+
+      Int128 i128;
+      try {
+        bool overflow = false;
+        i128 = Int128(integerPortion);
+        // overflow won't happen
+        i128 *= scaleUpInt128ByPowerOfTen(Int128(1), fromScale, overflow);
+      } catch (const std::exception& e) {
+        handleParseFromStringError(dstBatch, idx, throwOnOverflow, "Decimal", decimalStr);
+        return;
+      }
+      if (dotPos + 1 < decimalStr.length()) {
+        const std::string fractionPortion = decimalStr.substr(dotPos + 1, fromScale);
+        if (!std::all_of(fractionPortion.begin(), fractionPortion.end(), ::isdigit)) {
+          handleOverflow<std::string, Int128>(dstBatch, idx, throwOnOverflow);
+          return;
+        }
+        i128 += Int128(fractionPortion);
+      }
+
+      auto [overflow, result] = convertDecimal(i128, fromScale, precision_, scale_);
+      if (overflow) {
+        handleOverflow<std::string, Int128>(dstBatch, idx, throwOnOverflow);
+        return;
+      }
+      if (negative) {
+        result.negate();
+      }
+
+      if constexpr (std::is_same_v<ReadTypeBatch, Decimal128VectorBatch>) {
+        dstBatch.values[idx] = result;
+      } else {
+        if (!result.fitsInLong()) {
+          handleOverflow<std::string, decltype(dstBatch.values[idx])>(dstBatch, idx,
+                                                                      throwOnOverflow);
+        } else {
+          dstBatch.values[idx] = result.toLong();
+        }
+      }
+    }
+
+    const int32_t precision_;
+    const int32_t scale_;
+  };
+
 #define DEFINE_NUMERIC_CONVERT_READER(FROM, TO, TYPE) \
   using FROM##To##TO##ColumnReader =                  \
       NumericConvertColumnReader<FROM##VectorBatch, TO##VectorBatch, TYPE>;
@@ -730,6 +1065,18 @@ namespace orc {
   using Decimal64To##TO##ColumnReader = DecimalToStringVariantColumnReader<Decimal64VectorBatch>; \
   using Decimal128To##TO##ColumnReader = DecimalToStringVariantColumnReader<Decimal128VectorBatch>;
 
+#define DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(FROM, TO, TYPE) \
+  using FROM##To##TO##ColumnReader = StringVariantToNumericColumnReader<TO##VectorBatch, TYPE>;
+
+#define DEFINE_STRING_VARIANT_CONVERT_READER(FROM, TO) \
+  using FROM##To##TO##ColumnReader = StringVariantConvertColumnReader;
+
+#define DEFINE_STRING_VARIANT_CONVERT_TO_TIMESTAMP_READER(FROM, TO) \
+  using FROM##To##TO##ColumnReader = StringVariantToTimestampColumnReader;
+
+#define DEFINE_STRING_VARIANT_CONVERT_CONVERT_TO_DECIMAL_READER(FROM, TO) \
+  using FROM##To##TO##ColumnReader = StringVariantToDecimalColumnReader<TO##VectorBatch>;
+
   DEFINE_NUMERIC_CONVERT_READER(Boolean, Byte, int8_t)
   DEFINE_NUMERIC_CONVERT_READER(Boolean, Short, int16_t)
   DEFINE_NUMERIC_CONVERT_READER(Boolean, Int, int32_t)
@@ -834,8 +1181,57 @@ namespace orc {
   DEFINE_DECIMAL_CONVERT_TO_STRING_VARINT_READER(Char)
   DEFINE_DECIMAL_CONVERT_TO_STRING_VARINT_READER(Varchar)
 
+  // String variant to numeric
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(String, Boolean, bool)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(String, Byte, int8_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(String, Short, int16_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(String, Int, int32_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(String, Long, int64_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(String, Float, float)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(String, Double, double)
+
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Char, Boolean, bool)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Char, Byte, int8_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Char, Short, int16_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Char, Int, int32_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Char, Long, int64_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Char, Float, float)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Char, Double, double)
+
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Varchar, Boolean, bool)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Varchar, Byte, int8_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Varchar, Short, int16_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Varchar, Int, int32_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Varchar, Long, int64_t)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Varchar, Float, float)
+  DEFINE_STRING_VARIANT_CONVERT_TO_NUMERIC_READER(Varchar, Double, double)
+
+  // String variant to string variant
+  DEFINE_STRING_VARIANT_CONVERT_READER(String, String)
+  DEFINE_STRING_VARIANT_CONVERT_READER(String, Char)
+  DEFINE_STRING_VARIANT_CONVERT_READER(String, Varchar)
+  DEFINE_STRING_VARIANT_CONVERT_READER(Char, Char)
+  DEFINE_STRING_VARIANT_CONVERT_READER(Char, String)
+  DEFINE_STRING_VARIANT_CONVERT_READER(Char, Varchar)
+  DEFINE_STRING_VARIANT_CONVERT_READER(Varchar, String)
+  DEFINE_STRING_VARIANT_CONVERT_READER(Varchar, Char)
+  DEFINE_STRING_VARIANT_CONVERT_READER(Varchar, Varchar)
+
+  // String variant to timestamp
+  DEFINE_STRING_VARIANT_CONVERT_TO_TIMESTAMP_READER(String, Timestamp)
+  DEFINE_STRING_VARIANT_CONVERT_TO_TIMESTAMP_READER(Char, Timestamp)
+  DEFINE_STRING_VARIANT_CONVERT_TO_TIMESTAMP_READER(Varchar, Timestamp)
+
+  // String variant to decimal
+  DEFINE_STRING_VARIANT_CONVERT_CONVERT_TO_DECIMAL_READER(String, Decimal64)
+  DEFINE_STRING_VARIANT_CONVERT_CONVERT_TO_DECIMAL_READER(String, Decimal128)
+  DEFINE_STRING_VARIANT_CONVERT_CONVERT_TO_DECIMAL_READER(Char, Decimal64)
+  DEFINE_STRING_VARIANT_CONVERT_CONVERT_TO_DECIMAL_READER(Char, Decimal128)
+  DEFINE_STRING_VARIANT_CONVERT_CONVERT_TO_DECIMAL_READER(Varchar, Decimal64)
+  DEFINE_STRING_VARIANT_CONVERT_CONVERT_TO_DECIMAL_READER(Varchar, Decimal128)
+
 #define CREATE_READER(NAME) \
-  return std::make_unique<NAME>(_readType, fileType, stripe, throwOnOverflow);
+  return std::make_unique<NAME>(readType, fileType, stripe, throwOnOverflow);
 
 #define CASE_CREATE_READER(TYPE, CONVERT) \
   case TYPE:                              \
@@ -858,7 +1254,7 @@ namespace orc {
 
 #define CASE_CREATE_DECIMAL_READER(FROM)            \
   case DECIMAL: {                                   \
-    if (isDecimal64(_readType)) {                   \
+    if (isDecimal64(readType)) {                    \
       CREATE_READER(FROM##ToDecimal64ColumnReader)  \
     } else {                                        \
       CREATE_READER(FROM##ToDecimal128ColumnReader) \
@@ -868,7 +1264,7 @@ namespace orc {
 #define CASE_EXCEPTION                                                                 \
   default:                                                                             \
     throw SchemaEvolutionError("Cannot convert from " + fileType.toString() + " to " + \
-                               _readType.toString());
+                               readType.toString());
 
   std::unique_ptr<ColumnReader> buildConvertReader(const Type& fileType, StripeStreams& stripe,
                                                    bool useTightNumericVector,
@@ -878,11 +1274,11 @@ namespace orc {
           "SchemaEvolution only support tight vector, please create ColumnVectorBatch with "
           "option useTightNumericVector");
     }
-    const auto& _readType = *stripe.getSchemaEvolution()->getReadType(fileType);
+    const auto& readType = *stripe.getSchemaEvolution()->getReadType(fileType);
 
     switch (fileType.getKind()) {
       case BOOLEAN: {
-        switch (_readType.getKind()) {
+        switch (readType.getKind()) {
           CASE_CREATE_READER(BYTE, BooleanToByte)
           CASE_CREATE_READER(SHORT, BooleanToShort)
           CASE_CREATE_READER(INT, BooleanToInt)
@@ -906,7 +1302,7 @@ namespace orc {
         }
       }
       case BYTE: {
-        switch (_readType.getKind()) {
+        switch (readType.getKind()) {
           CASE_CREATE_READER(BOOLEAN, ByteToBoolean)
           CASE_CREATE_READER(SHORT, ByteToShort)
           CASE_CREATE_READER(INT, ByteToInt)
@@ -930,7 +1326,7 @@ namespace orc {
         }
       }
       case SHORT: {
-        switch (_readType.getKind()) {
+        switch (readType.getKind()) {
           CASE_CREATE_READER(BOOLEAN, ShortToBoolean)
           CASE_CREATE_READER(BYTE, ShortToByte)
           CASE_CREATE_READER(INT, ShortToInt)
@@ -954,7 +1350,7 @@ namespace orc {
         }
       }
       case INT: {
-        switch (_readType.getKind()) {
+        switch (readType.getKind()) {
           CASE_CREATE_READER(BOOLEAN, IntToBoolean)
           CASE_CREATE_READER(BYTE, IntToByte)
           CASE_CREATE_READER(SHORT, IntToShort)
@@ -978,7 +1374,7 @@ namespace orc {
         }
       }
       case LONG: {
-        switch (_readType.getKind()) {
+        switch (readType.getKind()) {
           CASE_CREATE_READER(BOOLEAN, LongToBoolean)
           CASE_CREATE_READER(BYTE, LongToByte)
           CASE_CREATE_READER(SHORT, LongToShort)
@@ -1002,7 +1398,7 @@ namespace orc {
         }
       }
       case FLOAT: {
-        switch (_readType.getKind()) {
+        switch (readType.getKind()) {
           CASE_CREATE_READER(BOOLEAN, FloatToBoolean)
           CASE_CREATE_READER(BYTE, FloatToByte)
           CASE_CREATE_READER(SHORT, FloatToShort)
@@ -1026,7 +1422,7 @@ namespace orc {
         }
       }
       case DOUBLE: {
-        switch (_readType.getKind()) {
+        switch (readType.getKind()) {
           CASE_CREATE_READER(BOOLEAN, DoubleToBoolean)
           CASE_CREATE_READER(BYTE, DoubleToByte)
           CASE_CREATE_READER(SHORT, DoubleToShort)
@@ -1050,7 +1446,7 @@ namespace orc {
         }
       }
       case DECIMAL: {
-        switch (_readType.getKind()) {
+        switch (readType.getKind()) {
           CASE_CREATE_FROM_DECIMAL_READER(BOOLEAN, Boolean)
           CASE_CREATE_FROM_DECIMAL_READER(BYTE, Byte)
           CASE_CREATE_FROM_DECIMAL_READER(SHORT, Short)
@@ -1065,13 +1461,13 @@ namespace orc {
           CASE_CREATE_FROM_DECIMAL_READER(TIMESTAMP_INSTANT, Timestamp)
           case DECIMAL: {
             if (isDecimal64(fileType)) {
-              if (isDecimal64(_readType)) {
+              if (isDecimal64(readType)) {
                 CREATE_READER(Decimal64ToDecimal64ColumnReader)
               } else {
                 CREATE_READER(Decimal64ToDecimal128ColumnReader)
               }
             } else {
-              if (isDecimal64(_readType)) {
+              if (isDecimal64(readType)) {
                 CREATE_READER(Decimal128ToDecimal64ColumnReader)
               } else {
                 CREATE_READER(Decimal128ToDecimal128ColumnReader)
@@ -1087,7 +1483,96 @@ namespace orc {
             CASE_EXCEPTION
         }
       }
-      case STRING:
+      case STRING: {
+        switch (readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, StringToBoolean)
+          CASE_CREATE_READER(BYTE, StringToByte)
+          CASE_CREATE_READER(SHORT, StringToShort)
+          CASE_CREATE_READER(INT, StringToInt)
+          CASE_CREATE_READER(LONG, StringToLong)
+          CASE_CREATE_READER(FLOAT, StringToFloat)
+          CASE_CREATE_READER(DOUBLE, StringToDouble)
+          CASE_CREATE_READER(STRING, StringToString)
+          CASE_CREATE_READER(CHAR, StringToChar)
+          CASE_CREATE_READER(VARCHAR, StringToVarchar)
+          CASE_CREATE_READER(TIMESTAMP, StringToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, StringToTimestamp)
+          case DECIMAL: {
+            if (isDecimal64(readType)) {
+              CREATE_READER(StringToDecimal64ColumnReader)
+            } else {
+              CREATE_READER(StringToDecimal128ColumnReader)
+            }
+          }
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case CHAR: {
+        switch (readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, CharToBoolean)
+          CASE_CREATE_READER(BYTE, CharToByte)
+          CASE_CREATE_READER(SHORT, CharToShort)
+          CASE_CREATE_READER(INT, CharToInt)
+          CASE_CREATE_READER(LONG, CharToLong)
+          CASE_CREATE_READER(FLOAT, CharToFloat)
+          CASE_CREATE_READER(DOUBLE, CharToDouble)
+          CASE_CREATE_READER(STRING, CharToString)
+          CASE_CREATE_READER(CHAR, CharToChar)
+          CASE_CREATE_READER(VARCHAR, CharToVarchar)
+          CASE_CREATE_READER(TIMESTAMP, CharToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, CharToTimestamp)
+          case DECIMAL: {
+            if (isDecimal64(readType)) {
+              CREATE_READER(CharToDecimal64ColumnReader)
+            } else {
+              CREATE_READER(CharToDecimal128ColumnReader)
+            }
+          }
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
+      case VARCHAR: {
+        switch (readType.getKind()) {
+          CASE_CREATE_READER(BOOLEAN, VarcharToBoolean)
+          CASE_CREATE_READER(BYTE, VarcharToByte)
+          CASE_CREATE_READER(SHORT, VarcharToShort)
+          CASE_CREATE_READER(INT, VarcharToInt)
+          CASE_CREATE_READER(LONG, VarcharToLong)
+          CASE_CREATE_READER(FLOAT, VarcharToFloat)
+          CASE_CREATE_READER(DOUBLE, VarcharToDouble)
+          CASE_CREATE_READER(STRING, VarcharToString)
+          CASE_CREATE_READER(CHAR, VarcharToChar)
+          CASE_CREATE_READER(VARCHAR, VarcharToVarchar)
+          CASE_CREATE_READER(TIMESTAMP, VarcharToTimestamp)
+          CASE_CREATE_READER(TIMESTAMP_INSTANT, VarcharToTimestamp)
+          case DECIMAL: {
+            if (isDecimal64(readType)) {
+              CREATE_READER(VarcharToDecimal64ColumnReader)
+            } else {
+              CREATE_READER(VarcharToDecimal128ColumnReader)
+            }
+          }
+          case BINARY:
+          case LIST:
+          case MAP:
+          case STRUCT:
+          case UNION:
+          case DATE:
+            CASE_EXCEPTION
+        }
+      }
       case BINARY:
       case TIMESTAMP:
       case LIST:
@@ -1095,21 +1580,9 @@ namespace orc {
       case STRUCT:
       case UNION:
       case DATE:
-      case VARCHAR:
-      case CHAR:
       case TIMESTAMP_INSTANT:
         CASE_EXCEPTION
     }
   }
 
-#undef DEFINE_NUMERIC_CONVERT_READER
-#undef DEFINE_NUMERIC_CONVERT_TO_STRING_VARINT_READER
-#undef DEFINE_NUMERIC_CONVERT_TO_DECIMAL_READER
-#undef DEFINE_NUMERIC_CONVERT_TO_TIMESTAMP_READER
-#undef DEFINE_DECIMAL_CONVERT_TO_NUMERIC_READER
-#undef DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER
-#undef CASE_CREATE_FROM_DECIMAL_READER
-#undef CASE_CREATE_READER
-#undef CASE_EXCEPTION
-
 }  // namespace orc
diff --git a/c++/src/CpuInfoUtil.cc b/c++/src/CpuInfoUtil.cc
index 82669de20a..588f8dc96a 100644
--- a/c++/src/CpuInfoUtil.cc
+++ b/c++/src/CpuInfoUtil.cc
@@ -74,7 +74,7 @@ namespace orc {
 
 #if defined(_WIN32)
     //------------------------------ WINDOWS ------------------------------//
-    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cacheSizes) {
       PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
       PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
       DWORD buffer_size = 0;
@@ -108,8 +108,8 @@ namespace orc {
         if (RelationCache == buffer_position->Relationship) {
           PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
           if (cache->Level >= 1 && cache->Level <= kCacheLevels) {
-            const int64_t current = (*cache_sizes)[cache->Level - 1];
-            (*cache_sizes)[cache->Level - 1] = std::max<int64_t>(current, cache->Size);
+            const int64_t current = (*cacheSizes)[cache->Level - 1];
+            (*cacheSizes)[cache->Level - 1] = std::max<int64_t>(current, cache->Size);
           }
         }
         offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
@@ -136,23 +136,22 @@ namespace orc {
     }
 #endif  // MINGW
 
-    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
-                           std::string* model_name) {
+    void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+                           std::string* modelName) {
       int register_EAX_id = 1;
       int highest_valid_id = 0;
       int highest_extended_valid_id = 0;
       std::bitset<32> features_ECX;
-      std::array<int, 4> cpu_info;
+      std::array<int, 4> cpuInfo;
 
       // Get highest valid id
-      __cpuid(cpu_info.data(), 0);
-      highest_valid_id = cpu_info[0];
+      __cpuid(cpuInfo.data(), 0);
+      highest_valid_id = cpuInfo[0];
       // HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
       // HEX of "AuthenticAMD": 41757468 656E7469 63414D44
-      if (cpu_info[1] == 0x756e6547 && cpu_info[3] == 0x49656e69 && cpu_info[2] == 0x6c65746e) {
+      if (cpuInfo[1] == 0x756e6547 && cpuInfo[3] == 0x49656e69 && cpuInfo[2] == 0x6c65746e) {
         *vendor = CpuInfo::Vendor::Intel;
-      } else if (cpu_info[1] == 0x68747541 && cpu_info[3] == 0x69746e65 &&
-                 cpu_info[2] == 0x444d4163) {
+      } else if (cpuInfo[1] == 0x68747541 && cpuInfo[3] == 0x69746e65 && cpuInfo[2] == 0x444d4163) {
         *vendor = CpuInfo::Vendor::AMD;
       }
 
@@ -161,19 +160,19 @@ namespace orc {
       }
 
       // EAX=1: Processor Info and Feature Bits
-      __cpuidex(cpu_info.data(), register_EAX_id, 0);
-      features_ECX = cpu_info[2];
+      __cpuidex(cpuInfo.data(), register_EAX_id, 0);
+      features_ECX = cpuInfo[2];
 
       // Get highest extended id
-      __cpuid(cpu_info.data(), 0x80000000);
-      highest_extended_valid_id = cpu_info[0];
+      __cpuid(cpuInfo.data(), 0x80000000);
+      highest_extended_valid_id = cpuInfo[0];
 
       // Retrieve CPU model name
       if (highest_extended_valid_id >= static_cast<int>(0x80000004)) {
-        model_name->clear();
+        modelName->clear();
         for (int i = 0x80000002; i <= static_cast<int>(0x80000004); ++i) {
-          __cpuidex(cpu_info.data(), i, 0);
-          *model_name += std::string(reinterpret_cast<char*>(cpu_info.data()), sizeof(cpu_info));
+          __cpuidex(cpuInfo.data(), i, 0);
+          *modelName += std::string(reinterpret_cast<char*>(cpuInfo.data()), sizeof(cpuInfo));
         }
       }
 
@@ -184,37 +183,37 @@ namespace orc {
         zmm_enabled = (xcr0 & 0xE0) == 0xE0;
       }
 
-      if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
-      if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
-      if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
-      if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT;
-      if (features_ECX[28]) *hardware_flags |= CpuInfo::AVX;
+      if (features_ECX[9]) *hardwareFlags |= CpuInfo::SSSE3;
+      if (features_ECX[19]) *hardwareFlags |= CpuInfo::SSE4_1;
+      if (features_ECX[20]) *hardwareFlags |= CpuInfo::SSE4_2;
+      if (features_ECX[23]) *hardwareFlags |= CpuInfo::POPCNT;
+      if (features_ECX[28]) *hardwareFlags |= CpuInfo::AVX;
 
       // cpuid with EAX=7, ECX=0: Extended Features
       register_EAX_id = 7;
       if (highest_valid_id > register_EAX_id) {
-        __cpuidex(cpu_info.data(), register_EAX_id, 0);
-        std::bitset<32> features_EBX = cpu_info[1];
+        __cpuidex(cpuInfo.data(), register_EAX_id, 0);
+        std::bitset<32> features_EBX = cpuInfo[1];
 
-        if (features_EBX[3]) *hardware_flags |= CpuInfo::BMI1;
-        if (features_EBX[5]) *hardware_flags |= CpuInfo::AVX2;
-        if (features_EBX[8]) *hardware_flags |= CpuInfo::BMI2;
+        if (features_EBX[3]) *hardwareFlags |= CpuInfo::BMI1;
+        if (features_EBX[5]) *hardwareFlags |= CpuInfo::AVX2;
+        if (features_EBX[8]) *hardwareFlags |= CpuInfo::BMI2;
         if (zmm_enabled) {
-          if (features_EBX[16]) *hardware_flags |= CpuInfo::AVX512F;
-          if (features_EBX[17]) *hardware_flags |= CpuInfo::AVX512DQ;
-          if (features_EBX[28]) *hardware_flags |= CpuInfo::AVX512CD;
-          if (features_EBX[30]) *hardware_flags |= CpuInfo::AVX512BW;
-          if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
+          if (features_EBX[16]) *hardwareFlags |= CpuInfo::AVX512F;
+          if (features_EBX[17]) *hardwareFlags |= CpuInfo::AVX512DQ;
+          if (features_EBX[28]) *hardwareFlags |= CpuInfo::AVX512CD;
+          if (features_EBX[30]) *hardwareFlags |= CpuInfo::AVX512BW;
+          if (features_EBX[31]) *hardwareFlags |= CpuInfo::AVX512VL;
         }
       }
     }
 
 #elif defined(CPUINFO_ARCH_ARM)
     // Windows on Arm
-    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
-                           std::string* model_name) {
-      *hardware_flags |= CpuInfo::ASIMD;
-      // TODO: vendor, model_name
+    void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+                           std::string* modelName) {
+      *hardwareFlags |= CpuInfo::ASIMD;
+      // TODO: vendor, modelName
     }
 #endif
 
@@ -236,25 +235,25 @@ namespace orc {
       return std::nullopt;
     }
 
-    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cacheSizes) {
       static_assert(kCacheLevels >= 3, "");
       auto c = IntegerSysCtlByName("hw.l1dcachesize");
       if (c.has_value()) {
-        (*cache_sizes)[0] = *c;
+        (*cacheSizes)[0] = *c;
       }
       c = IntegerSysCtlByName("hw.l2cachesize");
       if (c.has_value()) {
-        (*cache_sizes)[1] = *c;
+        (*cacheSizes)[1] = *c;
       }
       c = IntegerSysCtlByName("hw.l3cachesize");
       if (c.has_value()) {
-        (*cache_sizes)[2] = *c;
+        (*cacheSizes)[2] = *c;
       }
     }
 
-    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
-                           std::string* model_name) {
-      // hardware_flags
+    void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+                           std::string* modelName) {
+      // hardwareFlags
       struct SysCtlCpuFeature {
         const char* name;
         int64_t flag;
@@ -280,13 +279,13 @@ namespace orc {
       for (const auto& feature : features) {
         auto v = IntegerSysCtlByName(feature.name);
         if (v.value_or(0)) {
-          *hardware_flags |= feature.flag;
+          *hardwareFlags |= feature.flag;
         }
       }
 
-      // TODO: vendor, model_name
+      // TODO: vendor, modelName
       *vendor = CpuInfo::Vendor::Unknown;
-      *model_name = "Unknown";
+      *modelName = "Unknown";
     }
 
 #else
@@ -345,7 +344,7 @@ namespace orc {
       const struct {
         std::string name;
         int64_t flag;
-      } flag_mappings[] = {
+      } flagMappings[] = {
 #if defined(CPUINFO_ARCH_X86)
         {"ssse3", CpuInfo::SSSE3},
         {"sse4_1", CpuInfo::SSE4_1},
@@ -364,12 +363,12 @@ namespace orc {
         {"asimd", CpuInfo::ASIMD},
 #endif
       };
-      const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
+      const int64_t num_flags = sizeof(flagMappings) / sizeof(flagMappings[0]);
 
       int64_t flags = 0;
       for (int i = 0; i < num_flags; ++i) {
-        if (values.find(flag_mappings[i].name) != std::string::npos) {
-          flags |= flag_mappings[i].flag;
+        if (values.find(flagMappings[i].name) != std::string::npos) {
+          flags |= flagMappings[i].flag;
         }
       }
       return flags;
@@ -469,9 +468,9 @@ namespace orc {
 
 #elif defined(CPUINFO_ARCH_ARM)
     //------------------------------ AARCH64 ------------------------------//
-    bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_flags) {
-      if (simd_level == "NONE") {
-        *hardware_flags &= ~CpuInfo::ASIMD;
+    bool ArchParseUserSimdLevel(const std::string& simdLevel, int64_t* hardwareFlags) {
+      if (simdLevel == "NONE") {
+        *hardwareFlags &= ~CpuInfo::ASIMD;
         return true;
       }
       return false;
@@ -485,7 +484,7 @@ namespace orc {
 
 #else
     //------------------------------ PPC, ... ------------------------------//
-    bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_flags) {
+    bool ArchParseUserSimdLevel(const std::string& simdLevel, int64_t* hardwareFlags) {
       return true;
     }
 
@@ -496,17 +495,17 @@ namespace orc {
   }  // namespace
 
   struct CpuInfo::Impl {
-    int64_t hardware_flags = 0;
+    int64_t hardwareFlags = 0;
     int numCores = 0;
-    int64_t original_hardware_flags = 0;
+    int64_t originalHardwareFlags = 0;
     Vendor vendor = Vendor::Unknown;
-    std::string model_name = "Unknown";
-    std::array<int64_t, kCacheLevels> cache_sizes{};
+    std::string modelName = "Unknown";
+    std::array<int64_t, kCacheLevels> cacheSizes{};
 
     Impl() {
-      OsRetrieveCacheSize(&cache_sizes);
-      OsRetrieveCpuInfo(&hardware_flags, &vendor, &model_name);
-      original_hardware_flags = hardware_flags;
+      OsRetrieveCacheSize(&cacheSizes);
+      OsRetrieveCpuInfo(&hardwareFlags, &vendor, &modelName);
+      originalHardwareFlags = hardwareFlags;
       numCores = std::max(static_cast<int>(std::thread::hardware_concurrency()), 1);
 
       // parse user simd level
@@ -514,7 +513,7 @@ namespace orc {
       std::string userSimdLevel = maybe_env_var == nullptr ? "NONE" : std::string(maybe_env_var);
       std::transform(userSimdLevel.begin(), userSimdLevel.end(), userSimdLevel.begin(),
                      [](unsigned char c) { return std::toupper(c); });
-      if (!ArchParseUserSimdLevel(userSimdLevel, &hardware_flags)) {
+      if (!ArchParseUserSimdLevel(userSimdLevel, &hardwareFlags)) {
         throw ParseError("Invalid value for ORC_USER_SIMD_LEVEL: " + userSimdLevel);
       }
     }
@@ -530,8 +529,8 @@ namespace orc {
 #endif
 
   const CpuInfo* CpuInfo::getInstance() {
-    static CpuInfo cpu_info;
-    return &cpu_info;
+    static CpuInfo cpuInfo;
+    return &cpuInfo;
   }
 
 #ifdef __clang__
@@ -539,7 +538,7 @@ namespace orc {
 #endif
 
   int64_t CpuInfo::hardwareFlags() const {
-    return impl_->hardware_flags;
+    return impl_->hardwareFlags;
   }
 
   int CpuInfo::numCores() const {
@@ -551,7 +550,7 @@ namespace orc {
   }
 
   const std::string& CpuInfo::modelName() const {
-    return impl_->model_name;
+    return impl_->modelName;
   }
 
   int64_t CpuInfo::cacheSize(CacheLevel level) const {
@@ -564,18 +563,18 @@ namespace orc {
 
     static_assert(static_cast<int>(CacheLevel::L1) == 0, "");
     const int i = static_cast<int>(level);
-    if (impl_->cache_sizes[i] > 0) return impl_->cache_sizes[i];
+    if (impl_->cacheSizes[i] > 0) return impl_->cacheSizes[i];
     if (i == 0) return kDefaultCacheSizes[0];
     // l3 may be not available, return maximum of l2 or default size
-    return std::max(kDefaultCacheSizes[i], impl_->cache_sizes[i - 1]);
+    return std::max(kDefaultCacheSizes[i], impl_->cacheSizes[i - 1]);
   }
 
   bool CpuInfo::isSupported(int64_t flags) const {
-    return (impl_->hardware_flags & flags) == flags;
+    return (impl_->hardwareFlags & flags) == flags;
   }
 
   bool CpuInfo::isDetected(int64_t flags) const {
-    return (impl_->original_hardware_flags & flags) == flags;
+    return (impl_->originalHardwareFlags & flags) == flags;
   }
 
   void CpuInfo::verifyCpuRequirements() const {
diff --git a/c++/src/Exceptions.cc b/c++/src/Exceptions.cc
index 30ecf7dc7c..2ba1ab404c 100644
--- a/c++/src/Exceptions.cc
+++ b/c++/src/Exceptions.cc
@@ -84,4 +84,20 @@ namespace orc {
   SchemaEvolutionError::~SchemaEvolutionError() noexcept {
     // PASS
   }
+
+  CompressionError::CompressionError(const std::string& whatArg) : runtime_error(whatArg) {
+    // PASS
+  }
+
+  CompressionError::CompressionError(const char* whatArg) : runtime_error(whatArg) {
+    // PASS
+  }
+
+  CompressionError::CompressionError(const CompressionError& error) : runtime_error(error) {
+    // PASS
+  }
+
+  CompressionError::~CompressionError() noexcept {
+    // PASS
+  }
 }  // namespace orc
diff --git a/c++/src/Geospatial.cc b/c++/src/Geospatial.cc
new file mode 100644
index 0000000000..6d7d268703
--- /dev/null
+++ b/c++/src/Geospatial.cc
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains code adapted from the Apache Arrow project.
+ *
+ * Original source:
+ * https://github.com/apache/arrow/blob/main/cpp/src/parquet/geospatial/statistics.cc
+ *
+ * The original code is licensed under the Apache License, Version 2.0.
+ *
+ * Modifications may have been made from the original source.
+ */
+
+#include "orc/Geospatial.hh"
+#include "orc/Exceptions.hh"
+
+#include "Geospatial.hh"
+
+#include <algorithm>
+#include <cstring>
+#include <optional>
+#include <sstream>
+
+namespace orc::geospatial {
+
+  template <typename T>
+  inline std::enable_if_t<std::is_trivially_copyable_v<T>, T> safeLoadAs(const uint8_t* unaligned) {
+    std::remove_const_t<T> ret;
+    std::memcpy(&ret, unaligned, sizeof(T));
+    return ret;
+  }
+
+  template <typename U, typename T>
+  inline std::enable_if_t<std::is_trivially_copyable_v<T> && std::is_trivially_copyable_v<U> &&
+                              sizeof(T) == sizeof(U),
+                          U>
+  safeCopy(T value) {
+    std::remove_const_t<U> ret;
+    std::memcpy(&ret, static_cast<const void*>(&value), sizeof(T));
+    return ret;
+  }
+
+  static bool isLittleEndian() {
+    static union {
+      uint32_t i;
+      char c[4];
+    } num = {0x01020304};
+    return num.c[0] == 4;
+  }
+
+#if defined(_MSC_VER)
+#include <intrin.h>  // IWYU pragma: keep
+#define ORC_BYTE_SWAP64 _byteSwap_uint64
+#define ORC_BYTE_SWAP32 _byteSwap_ulong
+#else
+#define ORC_BYTE_SWAP64 __builtin_bswap64
+#define ORC_BYTE_SWAP32 __builtin_bswap32
+#endif
+
+  // Swap the byte order (i.e. endianness)
+  static inline uint32_t byteSwap(uint32_t value) {
+    return static_cast<uint32_t>(ORC_BYTE_SWAP32(value));
+  }
+  static inline double byteSwap(double value) {
+    const uint64_t swapped = ORC_BYTE_SWAP64(safeCopy<uint64_t>(value));
+    return safeCopy<double>(swapped);
+  }
+
+  std::string BoundingBox::toString() const {
+    std::stringstream ss;
+    ss << "BoundingBox{xMin=" << min[0] << ", xMax=" << max[0] << ", yMin=" << min[1]
+       << ", yMax=" << max[1] << ", zMin=" << min[2] << ", zMax=" << max[2] << ", mMin=" << min[3]
+       << ", mMax=" << max[3] << "}";
+    return ss.str();
+  }
+
+  /// \brief Object to keep track of the low-level consumption of a well-known binary
+  /// geometry
+  ///
+  /// Briefly, ISO well-known binary supported by the Parquet spec is an endian byte
+  /// (0x01 or 0x00), followed by geometry type + dimensions encoded as a (uint32_t),
+  /// followed by geometry-specific data. Coordinate sequences are represented by a
+  /// uint32_t (the number of coordinates) plus a sequence of doubles (number of coordinates
+  /// multiplied by the number of dimensions).
+  class WKBBuffer {
+   public:
+    WKBBuffer() : data_(nullptr), size_(0) {}
+    WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {}
+
+    uint8_t readUInt8() {
+      return readChecked<uint8_t>();
+    }
+
+    uint32_t readUInt32(bool swap) {
+      auto value = readChecked<uint32_t>();
+      return swap ? byteSwap(value) : value;
+    }
+
+    template <typename Coord, typename Visit>
+    void readCoords(uint32_t nCoords, bool swap, Visit&& visit) {
+      size_t total_bytes = nCoords * sizeof(Coord);
+      if (size_ < total_bytes) {
+      }
+
+      if (swap) {
+        Coord coord;
+        for (uint32_t i = 0; i < nCoords; i++) {
+          coord = readUnchecked<Coord>();
+          for (auto& c : coord) {
+            c = byteSwap(c);
+          }
+
+          std::forward<Visit>(visit)(coord);
+        }
+      } else {
+        for (uint32_t i = 0; i < nCoords; i++) {
+          std::forward<Visit>(visit)(readUnchecked<Coord>());
+        }
+      }
+    }
+
+    size_t size() const {
+      return size_;
+    }
+
+   private:
+    const uint8_t* data_;
+    size_t size_;
+
+    template <typename T>
+    T readChecked() {
+      if (size_ < sizeof(T)) {
+        std::stringstream ss;
+        ss << "Can't read" << sizeof(T) << " bytes from WKBBuffer with " << size_ << " remaining";
+        throw ParseError(ss.str());
+      }
+
+      return readUnchecked<T>();
+    }
+
+    template <typename T>
+    T readUnchecked() {
+      T out = safeLoadAs<T>(data_);
+      data_ += sizeof(T);
+      size_ -= sizeof(T);
+      return out;
+    }
+  };
+
+  using GeometryTypeAndDimensions = std::pair<GeometryType, Dimensions>;
+
+  namespace {
+
+    std::optional<GeometryTypeAndDimensions> parseGeometryType(uint32_t wkbGeometryType) {
+      // The number 1000 can be used because WKB geometry types are constructed
+      // on purpose such that this relationship is true (e.g., LINESTRING ZM maps
+      // to 3002).
+      uint32_t geometryTypeComponent = wkbGeometryType % 1000;
+      uint32_t dimensionsComponent = wkbGeometryType / 1000;
+
+      auto minGeometryTypeValue = static_cast<uint32_t>(GeometryType::VALUE_MIN);
+      auto maxGeometryTypeValue = static_cast<uint32_t>(GeometryType::VALUE_MAX);
+      auto minDimensionValue = static_cast<uint32_t>(Dimensions::VALUE_MIN);
+      auto maxDimensionValue = static_cast<uint32_t>(Dimensions::VALUE_MAX);
+
+      if (geometryTypeComponent < minGeometryTypeValue ||
+          geometryTypeComponent > maxGeometryTypeValue || dimensionsComponent < minDimensionValue ||
+          dimensionsComponent > maxDimensionValue) {
+        return std::nullopt;
+      }
+
+      return std::make_optional(
+          GeometryTypeAndDimensions{static_cast<GeometryType>(geometryTypeComponent),
+                                    static_cast<Dimensions>(dimensionsComponent)});
+    }
+
+  }  // namespace
+
+  std::vector<int32_t> WKBGeometryBounder::geometryTypes() const {
+    std::vector<int32_t> out(geospatialTypes_.begin(), geospatialTypes_.end());
+    std::sort(out.begin(), out.end());
+    return out;
+  }
+
+  void WKBGeometryBounder::mergeGeometry(std::string_view bytesWkb) {
+    if (!isValid_) {
+      return;
+    }
+    mergeGeometry(reinterpret_cast<const uint8_t*>(bytesWkb.data()), bytesWkb.size());
+  }
+
+  void WKBGeometryBounder::mergeGeometry(const uint8_t* bytesWkb, size_t bytesSize) {
+    if (!isValid_) {
+      return;
+    }
+    WKBBuffer src{bytesWkb, static_cast<int64_t>(bytesSize)};
+    try {
+      mergeGeometryInternal(&src, /*record_wkb_type=*/true);
+    } catch (const ParseError&) {
+      invalidate();
+      return;
+    }
+    if (src.size() != 0) {
+      // "Exepcted zero bytes after consuming WKB
+      invalidate();
+    }
+  }
+
+  void WKBGeometryBounder::mergeGeometryInternal(WKBBuffer* src, bool recordWkbType) {
+    uint8_t endian = src->readUInt8();
+    bool swap = endian != 0x00;
+    if (isLittleEndian()) {
+      swap = endian != 0x01;
+    }
+
+    uint32_t wkbGeometryType = src->readUInt32(swap);
+    auto geometryTypeAndDimensions = parseGeometryType(wkbGeometryType);
+    if (!geometryTypeAndDimensions.has_value()) {
+      invalidate();
+      return;
+    }
+    auto& [geometry_type, dimensions] = geometryTypeAndDimensions.value();
+
+    // Keep track of geometry types encountered if at the top level
+    if (recordWkbType) {
+      geospatialTypes_.insert(static_cast<int32_t>(wkbGeometryType));
+    }
+
+    switch (geometry_type) {
+      case GeometryType::POINT:
+        mergeSequence(src, dimensions, 1, swap);
+        break;
+
+      case GeometryType::LINESTRING: {
+        uint32_t nCoords = src->readUInt32(swap);
+        mergeSequence(src, dimensions, nCoords, swap);
+        break;
+      }
+      case GeometryType::POLYGON: {
+        uint32_t n_parts = src->readUInt32(swap);
+        for (uint32_t i = 0; i < n_parts; i++) {
+          uint32_t nCoords = src->readUInt32(swap);
+          mergeSequence(src, dimensions, nCoords, swap);
+        }
+        break;
+      }
+
+      // These are all encoded the same in WKB, even though this encoding would
+      // allow for parts to be of a different geometry type or different dimensions.
+      // For the purposes of bounding, this does not cause us problems. We pass
+      // record_wkb_type = false because we do not want the child geometry to be
+      // added to the geometry_types list (e.g., for a MultiPoint, we only want
+      // the code for MultiPoint to be added, not the code for Point).
+      case GeometryType::MULTIPOINT:
+      case GeometryType::MULTILINESTRING:
+      case GeometryType::MULTIPOLYGON:
+      case GeometryType::GEOMETRYCOLLECTION: {
+        uint32_t n_parts = src->readUInt32(swap);
+        for (uint32_t i = 0; i < n_parts; i++) {
+          mergeGeometryInternal(src, /*record_wkb_type*/ false);
+        }
+        break;
+      }
+    }
+  }
+
+  void WKBGeometryBounder::mergeSequence(WKBBuffer* src, Dimensions dimensions, uint32_t nCoords,
+                                         bool swap) {
+    switch (dimensions) {
+      case Dimensions::XY:
+        src->readCoords<BoundingBox::XY>(nCoords, swap,
+                                         [&](BoundingBox::XY coord) { box_.updateXY(coord); });
+        break;
+      case Dimensions::XYZ:
+        src->readCoords<BoundingBox::XYZ>(nCoords, swap,
+                                          [&](BoundingBox::XYZ coord) { box_.updateXYZ(coord); });
+        break;
+      case Dimensions::XYM:
+        src->readCoords<BoundingBox::XYM>(nCoords, swap,
+                                          [&](BoundingBox::XYM coord) { box_.updateXYM(coord); });
+        break;
+      case Dimensions::XYZM:
+        src->readCoords<BoundingBox::XYZM>(
+            nCoords, swap, [&](BoundingBox::XYZM coord) { box_.updateXYZM(coord); });
+        break;
+      default:
+        invalidate();
+    }
+  }
+
+}  // namespace orc::geospatial
diff --git a/c++/src/Geospatial.hh b/c++/src/Geospatial.hh
new file mode 100644
index 0000000000..aebb72747a
--- /dev/null
+++ b/c++/src/Geospatial.hh
@@ -0,0 +1,86 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_GEOSPATIAL_IMPL_HH
+#define ORC_GEOSPATIAL_IMPL_HH
+
+#include "orc/Geospatial.hh"
+
+#include <unordered_set>
+#include <vector>
+
+namespace orc {
+  namespace geospatial {
+    class WKBBuffer;
+
+    class WKBGeometryBounder {
+     public:
+      void mergeGeometry(std::string_view bytesWkb);
+      void mergeGeometry(const uint8_t* bytesWkb, size_t bytesSize);
+
+      void mergeBox(const BoundingBox& box) {
+        box_.merge(box);
+      }
+      void mergeGeometryTypes(const std::vector<int>& geospatialTypes) {
+        geospatialTypes_.insert(geospatialTypes.begin(), geospatialTypes.end());
+      }
+      void merge(const WKBGeometryBounder& other) {
+        if (!isValid() || !other.isValid()) {
+          invalidate();
+          return;
+        }
+        box_.merge(other.box_);
+        geospatialTypes_.insert(other.geospatialTypes_.begin(), other.geospatialTypes_.end());
+      }
+
+      // Get the bounding box for the merged geometries.
+      const BoundingBox& bounds() const {
+        return box_;
+      }
+
+      // Get the set of geometry types encountered during merging.
+      // Returns a sorted vector of geometry type IDs.
+      std::vector<int32_t> geometryTypes() const;
+
+      void reset() {
+        isValid_ = true;
+        box_.reset();
+        geospatialTypes_.clear();
+      }
+      bool isValid() const {
+        return isValid_;
+      }
+      void invalidate() {
+        isValid_ = false;
+        box_.invalidate();
+        geospatialTypes_.clear();
+      }
+
+     private:
+      BoundingBox box_;
+      std::unordered_set<int32_t> geospatialTypes_;
+      bool isValid_ = true;
+
+      void mergeGeometryInternal(WKBBuffer* src, bool recordWkbType);
+      void mergeSequence(WKBBuffer* src, Dimensions dimensions, uint32_t nCoords, bool swap);
+    };
+  }  // namespace geospatial
+}  // namespace orc
+
+#endif
diff --git a/c++/src/Int128.cc b/c++/src/Int128.cc
index 4a1d0b763a..0d4da78b5a 100644
--- a/c++/src/Int128.cc
+++ b/c++/src/Int128.cc
@@ -25,9 +25,44 @@
 #include <sstream>
 
 namespace orc {
+  NO_SANITIZE_ATTR
+  Int128& Int128::operator<<=(uint32_t bits) {
+    if (bits != 0) {
+      if (bits < 64) {
+        highbits_ <<= bits;
+        highbits_ |= (lowbits_ >> (64 - bits));
+        lowbits_ <<= bits;
+      } else if (bits < 128) {
+        highbits_ = static_cast<int64_t>(lowbits_) << (bits - 64);
+        lowbits_ = 0;
+      } else {
+        highbits_ = 0;
+        lowbits_ = 0;
+      }
+    }
+    return *this;
+  }
+
+  NO_SANITIZE_ATTR
+  Int128& Int128::operator>>=(uint32_t bits) {
+    if (bits != 0) {
+      if (bits < 64) {
+        lowbits_ >>= bits;
+        lowbits_ |= static_cast<uint64_t>(highbits_ << (64 - bits));
+        highbits_ = static_cast<int64_t>(static_cast<uint64_t>(highbits_) >> bits);
+      } else if (bits < 128) {
+        lowbits_ = static_cast<uint64_t>(highbits_ >> (bits - 64));
+        highbits_ = highbits_ >= 0 ? 0 : -1l;
+      } else {
+        highbits_ = highbits_ >= 0 ? 0 : -1l;
+        lowbits_ = static_cast<uint64_t>(highbits_);
+      }
+    }
+    return *this;
+  }
 
   Int128 Int128::maximumValue() {
-    return Int128(0x7fffffffffffffff, 0xfffffffffffffff);
+    return Int128(0x7fffffffffffffff, 0xffffffffffffffff);
   }
 
   Int128 Int128::minimumValue() {
diff --git a/c++/src/LzoDecompressor.cc b/c++/src/LzoDecompressor.cc
index f494f4b651..68e25425c2 100644
--- a/c++/src/LzoDecompressor.cc
+++ b/c++/src/LzoDecompressor.cc
@@ -342,7 +342,7 @@ namespace orc {
         char* literalOutputLimit = output + literalLength;
         if (literalOutputLimit > fastOutputLimit ||
             input + literalLength > inputLimit - SIZE_OF_LONG) {
-          if (literalOutputLimit > outputLimit) {
+          if (literalOutputLimit > outputLimit || input + literalLength > inputLimit) {
             throw MalformedInputException(input - inputAddress);
           }
 
diff --git a/c++/src/Options.hh b/c++/src/Options.hh
index daf9d52e1c..0a4bd56d8f 100644
--- a/c++/src/Options.hh
+++ b/c++/src/Options.hh
@@ -23,6 +23,8 @@
 #include "orc/OrcFile.hh"
 #include "orc/Reader.hh"
 
+#include "io/Cache.hh"
+
 #include <limits>
 
 namespace orc {
@@ -43,6 +45,7 @@ namespace orc {
     MemoryPool* memoryPool;
     std::string serializedTail;
     ReaderMetrics* metrics;
+    CacheOptions cacheOptions;
 
     ReaderOptionsPrivate() {
       tailLocation = std::numeric_limits<uint64_t>::max();
@@ -122,6 +125,15 @@ namespace orc {
     return privateBits_->errorStream;
   }
 
+  ReaderOptions& ReaderOptions::setCacheOptions(const CacheOptions& cacheOptions) {
+    privateBits_->cacheOptions = cacheOptions;
+    return *this;
+  }
+
+  const CacheOptions& ReaderOptions::getCacheOptions() const {
+    return privateBits_->cacheOptions;
+  }
+
   /**
    * RowReaderOptions Implementation
    */
diff --git a/c++/src/OrcFile.cc b/c++/src/OrcFile.cc
index 8899299d3d..be86724329 100644
--- a/c++/src/OrcFile.cc
+++ b/c++/src/OrcFile.cc
@@ -79,7 +79,7 @@ namespace orc {
     }
 
     void read(void* buf, uint64_t length, uint64_t offset) override {
-      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount);
+      SCOPED_STOPWATCH(metrics_, IOBlockingLatencyUs, IOCount);
       if (!buf) {
         throw ParseError("Buffer is null");
       }
diff --git a/c++/src/OrcHdfsFile.cc b/c++/src/OrcHdfsFile.cc
index 09ff71a0e9..d878e276cb 100644
--- a/c++/src/OrcHdfsFile.cc
+++ b/c++/src/OrcHdfsFile.cc
@@ -42,23 +42,23 @@ namespace orc {
 
   class HdfsFileInputStream : public InputStream {
    private:
-    std::string filename;
-    std::unique_ptr<hdfs::FileHandle> file;
-    std::unique_ptr<hdfs::FileSystem> file_system;
-    uint64_t totalLength;
-    const uint64_t READ_SIZE = 1024 * 1024;  // 1 MB
-    ReaderMetrics* metrics;
+    std::string filename_;
+    std::unique_ptr<hdfs::FileHandle> file_;
+    std::unique_ptr<hdfs::FileSystem> fileSystem_;
+    uint64_t totalLength_;
+    const uint64_t readSize_ = 1024 * 1024;  // 1 MB
+    ReaderMetrics* metrics_;
 
    public:
-    HdfsFileInputStream(std::string _filename, ReaderMetrics* _metrics) : metrics(_metrics) {
-      filename = _filename;
+    HdfsFileInputStream(std::string filename, ReaderMetrics* metrics) : metrics_(metrics) {
+      filename_ = filename;
 
       // Building a URI object from the given uri_path
       hdfs::URI uri;
       try {
-        uri = hdfs::URI::parse_from_string(filename);
+        uri = hdfs::URI::parse_from_string(filename_);
       } catch (const hdfs::uri_parse_error&) {
-        throw ParseError("Malformed URI: " + filename);
+        throw ParseError("Malformed URI: " + filename_);
       }
 
       // This sets conf path to default "$HADOOP_CONF_DIR" or "/etc/hadoop/conf"
@@ -82,9 +82,9 @@ namespace orc {
       }
       hdfs::IoService* io_service = hdfs::IoService::New();
       // Wrapping file_system into a unique pointer to guarantee deletion
-      file_system =
+      fileSystem_ =
           std::unique_ptr<hdfs::FileSystem>(hdfs::FileSystem::New(io_service, "", options));
-      if (file_system.get() == nullptr) {
+      if (fileSystem_.get() == nullptr) {
         throw ParseError("Can't create FileSystem object. ");
       }
       hdfs::Status status;
@@ -92,13 +92,13 @@ namespace orc {
       if (!uri.get_host().empty()) {
         // Using port if supplied, otherwise using "" to look up port in configs
         std::string port = uri.has_port() ? std::to_string(uri.get_port()) : "";
-        status = file_system->Connect(uri.get_host(), port);
+        status = fileSystem_->Connect(uri.get_host(), port);
         if (!status.ok()) {
           throw ParseError("Can't connect to " + uri.get_host() + ":" + port + ". " +
                            status.ToString());
         }
       } else {
-        status = file_system->ConnectToDefaultFs();
+        status = fileSystem_->ConnectToDefaultFs();
         if (!status.ok()) {
           if (!options.defaultFS.get_host().empty()) {
             throw ParseError("Error connecting to " + options.defaultFS.str() + ". " +
@@ -110,32 +110,32 @@ namespace orc {
         }
       }
 
-      if (file_system.get() == nullptr) {
+      if (fileSystem_.get() == nullptr) {
         throw ParseError("Can't connect the file system. ");
       }
 
       hdfs::FileHandle* file_raw = nullptr;
-      status = file_system->Open(uri.get_path(true), &file_raw);
+      status = fileSystem_->Open(uri.get_path(true), &file_raw);
       if (!status.ok()) {
         throw ParseError("Can't open " + uri.get_path(true) + ". " + status.ToString());
       }
       // Wrapping file_raw into a unique pointer to guarantee deletion
-      file.reset(file_raw);
+      file_.reset(file_raw);
 
       hdfs::StatInfo stat_info;
-      status = file_system->GetFileInfo(uri.get_path(true), stat_info);
+      status = fileSystem_->GetFileInfo(uri.get_path(true), stat_info);
       if (!status.ok()) {
         throw ParseError("Can't stat " + uri.get_path(true) + ". " + status.ToString());
       }
-      totalLength = stat_info.length;
+      totalLength_ = stat_info.length;
     }
 
     uint64_t getLength() const override {
-      return totalLength;
+      return totalLength_;
     }
 
     uint64_t getNaturalReadSize() const override {
-      return READ_SIZE;
+      return readSize_;
     }
 
     void read(void* buf, uint64_t length, uint64_t offset) override {
@@ -151,8 +151,8 @@ namespace orc {
 
       do {
         status =
-            file->PositionRead(buf_ptr, static_cast<size_t>(length) - total_bytes_read,
-                               static_cast<off_t>(offset + total_bytes_read), &last_bytes_read);
+            file_->PositionRead(buf_ptr, static_cast<size_t>(length) - total_bytes_read,
+                                static_cast<off_t>(offset + total_bytes_read), &last_bytes_read);
         if (!status.ok()) {
           throw ParseError("Error reading the file: " + status.ToString());
         }
@@ -162,7 +162,7 @@ namespace orc {
     }
 
     const std::string& getName() const override {
-      return filename;
+      return filename_;
     }
 
     ~HdfsFileInputStream() override;
diff --git a/c++/src/RLE.cc b/c++/src/RLE.cc
index 89aca6a10e..19ca558fc6 100644
--- a/c++/src/RLE.cc
+++ b/c++/src/RLE.cc
@@ -77,6 +77,7 @@ namespace orc {
     add<int16_t>(data, numValues, notNull);
   }
 
+  NO_SANITIZE_ATTR
   void RleEncoder::writeVslong(int64_t val) {
     writeVulong((val << 1) ^ (val >> 63));
   }
@@ -108,15 +109,23 @@ namespace orc {
 
   void RleEncoder::recordPosition(PositionRecorder* recorder) const {
     uint64_t flushedSize = outputStream->getSize();
-    uint64_t unflushedSize = static_cast<uint64_t>(bufferPosition);
+    uint64_t unusedBufferSize = static_cast<uint64_t>(bufferLength - bufferPosition);
     if (outputStream->isCompressed()) {
       recorder->add(flushedSize);
-      recorder->add(unflushedSize);
+      // There are multiple blocks in the input buffer, but bufferPosition only records the
+      // effective length of the last block. We need rawInputBufferSize to record the total length
+      // of all variable blocks.
+      recorder->add(outputStream->getRawInputBufferSize() - unusedBufferSize);
     } else {
-      flushedSize -= static_cast<uint64_t>(bufferLength);
-      recorder->add(flushedSize + unflushedSize);
+      recorder->add(flushedSize - unusedBufferSize);
     }
     recorder->add(static_cast<uint64_t>(numLiterals));
   }
 
+  void RleEncoder::finishEncode() {
+    outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition));
+    outputStream->finishStream();
+    bufferLength = bufferPosition = 0;
+  }
+
 }  // namespace orc
diff --git a/c++/src/RLE.hh b/c++/src/RLE.hh
index a45b4056bc..3ad93e3dc9 100644
--- a/c++/src/RLE.hh
+++ b/c++/src/RLE.hh
@@ -26,6 +26,7 @@
 
 namespace orc {
 
+  NO_SANITIZE_ATTR
   inline int64_t zigZag(int64_t value) {
     return (value << 1) ^ (value >> 63);
   }
@@ -84,6 +85,13 @@ namespace orc {
 
     virtual void write(int64_t val) = 0;
 
+    /**
+     * Finalize the encoding process. This function should be called after all data required for
+     * encoding has been added. It ensures that any remaining data is processed and the final state
+     * of the encoder is set.
+     */
+    virtual void finishEncode();
+
    protected:
     std::unique_ptr<BufferedOutputStream> outputStream;
     size_t bufferPosition;
diff --git a/c++/src/RLEv1.cc b/c++/src/RLEv1.cc
index 5d6f600669..72c555e610 100644
--- a/c++/src/RLEv1.cc
+++ b/c++/src/RLEv1.cc
@@ -74,10 +74,8 @@ namespace orc {
   }
 
   uint64_t RleEncoderV1::flush() {
-    writeValues();
-    outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition));
+    finishEncode();
     uint64_t dataSize = outputStream->flush();
-    bufferLength = bufferPosition = 0;
     return dataSize;
   }
 
@@ -135,6 +133,11 @@ namespace orc {
     }
   }
 
+  void RleEncoderV1::finishEncode() {
+    writeValues();
+    RleEncoder::finishEncode();
+  }
+
   signed char RleDecoderV1::readByte() {
     SCOPED_MINUS_STOPWATCH(metrics, DecodingLatencyUs);
     if (bufferStart_ == bufferEnd_) {
diff --git a/c++/src/RLEv1.hh b/c++/src/RLEv1.hh
index a2a00c9305..024b1e5e97 100644
--- a/c++/src/RLEv1.hh
+++ b/c++/src/RLEv1.hh
@@ -38,6 +38,8 @@ namespace orc {
 
     void write(int64_t val) override;
 
+    void finishEncode() override;
+
    private:
     int64_t delta_;
     bool repeat_;
diff --git a/c++/src/RLEv2.hh b/c++/src/RLEv2.hh
index a8e0340e7e..c2ce5aa851 100644
--- a/c++/src/RLEv2.hh
+++ b/c++/src/RLEv2.hh
@@ -108,6 +108,8 @@ namespace orc {
 
     void write(int64_t val) override;
 
+    void finishEncode() override;
+
    private:
     const bool alignedBitPacking_;
     uint32_t fixedRunLength_;
@@ -121,6 +123,7 @@ namespace orc {
     int64_t* zigzagLiterals_;
     int64_t* baseRedLiterals_;
     int64_t* adjDeltas_;
+    static constexpr int64_t BASE_VALUE_LIMIT = int64_t(1) << 56;
 
     uint32_t getOpCode(EncodingType encoding);
     int64_t* prepareForDirectOrPatchedBase(EncodingOption& option);
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 8a43818a53..349ae1b407 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -751,27 +751,35 @@ namespace orc {
     return *(contents_->schema.get());
   }
 
-  std::unique_ptr<StripeStatistics> ReaderImpl::getStripeStatistics(uint64_t stripeIndex) const {
+  std::unique_ptr<StripeStatistics> ReaderImpl::getStripeStatistics(uint64_t stripeIndex,
+                                                                    bool includeRowIndex) const {
     if (!isMetadataLoaded_) {
       readMetadata();
     }
     if (contents_->metadata == nullptr) {
       throw std::logic_error("No stripe statistics in file");
     }
-    size_t num_cols = static_cast<size_t>(
-        contents_->metadata->stripe_stats(static_cast<int>(stripeIndex)).col_stats_size());
-    std::vector<std::vector<proto::ColumnStatistics>> indexStats(num_cols);
 
     proto::StripeInformation currentStripeInfo = footer_->stripes(static_cast<int>(stripeIndex));
     proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents_.get());
 
-    getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter, &indexStats);
-
     const Timezone& writerTZ = currentStripeFooter.has_writer_timezone()
                                    ? getTimezoneByName(currentStripeFooter.writer_timezone())
                                    : getLocalTimezone();
     StatContext statContext(hasCorrectStatistics(), &writerTZ);
-    return std::make_unique<StripeStatisticsImpl>(
+
+    if (!includeRowIndex) {
+      return std::make_unique<StripeStatisticsImpl>(
+          contents_->metadata->stripe_stats(static_cast<int>(stripeIndex)), statContext);
+    }
+
+    size_t num_cols = static_cast<size_t>(
+        contents_->metadata->stripe_stats(static_cast<int>(stripeIndex)).col_stats_size());
+    std::vector<std::vector<proto::ColumnStatistics>> indexStats(num_cols);
+
+    getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter, &indexStats);
+
+    return std::make_unique<StripeStatisticsWithRowGroupIndexImpl>(
         contents_->metadata->stripe_stats(static_cast<int>(stripeIndex)), indexStats, statContext);
   }
 
@@ -865,6 +873,8 @@ namespace orc {
       case proto::Type_Kind_CHAR:
       case proto::Type_Kind_STRING:
       case proto::Type_Kind_VARCHAR:
+      case proto::Type_Kind_GEOMETRY:
+      case proto::Type_Kind_GEOGRAPHY:
         return 4;
       default:
         return 0;
@@ -1117,7 +1127,7 @@ namespace orc {
   }
 
   bool RowReaderImpl::next(ColumnVectorBatch& data) {
-    SCOPED_STOPWATCH(contents->readerMetrics, ReaderInclusiveLatencyUs, ReaderCall);
+    SCOPED_STOPWATCH(contents_->readerMetrics, ReaderInclusiveLatencyUs, ReaderCall);
     if (currentStripe_ >= lastStripe_) {
       data.numElements = 0;
       markEndOfFile();
@@ -1426,17 +1436,10 @@ namespace orc {
       uint32_t stripeIndex, const std::set<uint32_t>& included) const {
     std::map<uint32_t, BloomFilterIndex> ret;
 
-    // find stripe info
-    if (stripeIndex >= static_cast<uint32_t>(footer_->stripes_size())) {
-      throw std::logic_error("Illegal stripe index: " +
-                             to_string(static_cast<int64_t>(stripeIndex)));
-    }
-    const proto::StripeInformation currentStripeInfo =
-        footer_->stripes(static_cast<int>(stripeIndex));
-    const proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents_);
+    uint64_t offset;
+    auto currentStripeFooter = loadCurrentStripeFooter(stripeIndex, offset);
 
     // iterate stripe footer to get stream of bloom_filter
-    uint64_t offset = static_cast<uint64_t>(currentStripeInfo.offset());
     for (int i = 0; i < currentStripeFooter.streams_size(); i++) {
       const proto::Stream& stream = currentStripeFooter.streams(i);
       uint32_t column = static_cast<uint32_t>(stream.column());
@@ -1474,6 +1477,150 @@ namespace orc {
     return ret;
   }
 
+  proto::StripeFooter ReaderImpl::loadCurrentStripeFooter(uint32_t stripeIndex,
+                                                          uint64_t& offset) const {
+    // find stripe info
+    if (stripeIndex >= static_cast<uint32_t>(footer_->stripes_size())) {
+      throw std::logic_error("Illegal stripe index: " +
+                             to_string(static_cast<int64_t>(stripeIndex)));
+    }
+    const proto::StripeInformation currentStripeInfo =
+        footer_->stripes(static_cast<int>(stripeIndex));
+    offset = static_cast<uint64_t>(currentStripeInfo.offset());
+    return getStripeFooter(currentStripeInfo, *contents_);
+  }
+
+  std::map<uint32_t, RowGroupIndex> ReaderImpl::getRowGroupIndex(
+      uint32_t stripeIndex, const std::set<uint32_t>& included) const {
+    std::map<uint32_t, RowGroupIndex> ret;
+    uint64_t offset;
+    auto currentStripeFooter = loadCurrentStripeFooter(stripeIndex, offset);
+
+    // iterate stripe footer to get stream of row_index
+    for (int i = 0; i < currentStripeFooter.streams_size(); i++) {
+      const proto::Stream& stream = currentStripeFooter.streams(i);
+      uint32_t column = static_cast<uint32_t>(stream.column());
+      uint64_t length = static_cast<uint64_t>(stream.length());
+      RowGroupIndex& rowGroupIndex = ret[column];
+
+      if (stream.kind() == proto::Stream_Kind_ROW_INDEX &&
+          (included.empty() || included.find(column) != included.end())) {
+        std::unique_ptr<SeekableInputStream> pbStream =
+            createDecompressor(contents_->compression,
+                               std::make_unique<SeekableFileInputStream>(
+                                   contents_->stream.get(), offset, length, *contents_->pool),
+                               contents_->blockSize, *(contents_->pool), contents_->readerMetrics);
+
+        proto::RowIndex pbRowIndex;
+        if (!pbRowIndex.ParseFromZeroCopyStream(pbStream.get())) {
+          std::stringstream errMsgBuffer;
+          errMsgBuffer << "Failed to parse RowIndex at column " << column << " in stripe "
+                       << stripeIndex;
+          throw ParseError(errMsgBuffer.str());
+        }
+
+        // add rowGroupIndex to result for one column
+        for (auto& rowIndexEntry : pbRowIndex.entry()) {
+          std::vector<uint64_t> posVector;
+          for (auto& position : rowIndexEntry.positions()) {
+            posVector.push_back(position);
+          }
+          rowGroupIndex.positions.push_back(posVector);
+        }
+      }
+      offset += length;
+    }
+    return ret;
+  }
+
+  void ReaderImpl::releaseBuffer(uint64_t boundary) {
+    std::lock_guard<std::mutex> lock(contents_->readCacheMutex);
+
+    if (contents_->readCache) {
+      contents_->readCache->evictEntriesBefore(boundary);
+    }
+  }
+
+  void ReaderImpl::preBuffer(const std::vector<uint32_t>& stripes,
+                             const std::list<uint64_t>& includeTypes) {
+    std::vector<uint32_t> newStripes;
+    for (auto stripe : stripes) {
+      if (stripe < static_cast<uint32_t>(footer_->stripes_size())) newStripes.push_back(stripe);
+    }
+
+    std::list<uint64_t> newIncludeTypes;
+    for (auto type : includeTypes) {
+      if (type < static_cast<uint64_t>(footer_->types_size())) newIncludeTypes.push_back(type);
+    }
+
+    if (newStripes.empty() || newIncludeTypes.empty()) {
+      return;
+    }
+
+    orc::RowReaderOptions rowReaderOptions;
+    rowReaderOptions.includeTypes(newIncludeTypes);
+    ColumnSelector columnSelector(contents_.get());
+    std::vector<bool> selectedColumns;
+    columnSelector.updateSelected(selectedColumns, rowReaderOptions);
+
+    std::vector<ReadRange> ranges;
+    ranges.reserve(newIncludeTypes.size());
+    for (auto stripe : newStripes) {
+      // get stripe information
+      const auto& stripeInfo = footer_->stripes(stripe);
+      uint64_t stripeFooterStart =
+          stripeInfo.offset() + stripeInfo.index_length() + stripeInfo.data_length();
+      uint64_t stripeFooterLength = stripeInfo.footer_length();
+
+      // get stripe footer
+      std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
+          contents_->compression,
+          std::make_unique<SeekableFileInputStream>(contents_->stream.get(), stripeFooterStart,
+                                                    stripeFooterLength, *contents_->pool),
+          contents_->blockSize, *contents_->pool, contents_->readerMetrics);
+      proto::StripeFooter stripeFooter;
+      if (!stripeFooter.ParseFromZeroCopyStream(pbStream.get())) {
+        throw ParseError(std::string("bad StripeFooter from ") + pbStream->getName());
+      }
+
+      // traverse all streams in stripe footer, choose selected streams to prebuffer
+      uint64_t offset = stripeInfo.offset();
+      for (int i = 0; i < stripeFooter.streams_size(); i++) {
+        const proto::Stream& stream = stripeFooter.streams(i);
+        if (offset + stream.length() > stripeFooterStart) {
+          std::stringstream msg;
+          msg << "Malformed stream meta at stream index " << i << " in stripe " << stripe
+              << ": streamOffset=" << offset << ", streamLength=" << stream.length()
+              << ", stripeOffset=" << stripeInfo.offset()
+              << ", stripeIndexLength=" << stripeInfo.index_length()
+              << ", stripeDataLength=" << stripeInfo.data_length();
+          throw ParseError(msg.str());
+        }
+
+        if (stream.has_kind() && selectedColumns[stream.column()]) {
+          const auto& kind = stream.kind();
+          if (kind == proto::Stream_Kind_DATA || kind == proto::Stream_Kind_DICTIONARY_DATA ||
+              kind == proto::Stream_Kind_PRESENT || kind == proto::Stream_Kind_LENGTH ||
+              kind == proto::Stream_Kind_SECONDARY) {
+            ranges.emplace_back(offset, stream.length());
+          }
+        }
+
+        offset += stream.length();
+      }
+
+      {
+        std::lock_guard<std::mutex> lock(contents_->readCacheMutex);
+
+        if (!contents_->readCache) {
+          contents_->readCache = std::make_shared<ReadRangeCache>(
+              getStream(), options_.getCacheOptions(), contents_->pool, contents_->readerMetrics);
+        }
+        contents_->readCache->cache(std::move(ranges));
+      }
+    }
+  }
+
   RowReader::~RowReader() {
     // PASS
   }
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index 630d812c38..3d81d26920 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -26,6 +26,8 @@
 
 #include "ColumnReader.hh"
 #include "RLE.hh"
+#include "io/Cache.hh"
+
 #include "SchemaEvolution.hh"
 #include "TypeImpl.hh"
 #include "sargs/SargsApplier.hh"
@@ -70,6 +72,11 @@ namespace orc {
     bool isDecimalAsLong;
     std::unique_ptr<proto::Metadata> metadata;
     ReaderMetrics* readerMetrics;
+
+    // mutex to protect readCache_ from concurrent access
+    std::mutex readCacheMutex;
+    // cached io ranges. only valid when preBuffer is invoked.
+    std::shared_ptr<ReadRangeCache> readCache;
   };
 
   proto::StripeFooter getStripeFooter(const proto::StripeInformation& info,
@@ -245,6 +252,10 @@ namespace orc {
     const SchemaEvolution* getSchemaEvolution() const {
       return &schemaEvolution_;
     }
+
+    std::shared_ptr<ReadRangeCache> getReadCache() const {
+      return contents_->readCache;
+    }
   };
 
   class ReaderImpl : public Reader {
@@ -260,15 +271,16 @@ namespace orc {
     // footer
     proto::Footer* footer_;
     uint64_t numberOfStripes_;
+
     uint64_t getMemoryUse(int stripeIx, std::vector<bool>& selectedColumns);
 
     // internal methods
     void readMetadata() const;
     void checkOrcVersion();
-    void getRowIndexStatistics(
-        const proto::StripeInformation& stripeInfo, uint64_t stripeIndex,
-        const proto::StripeFooter& currentStripeFooter,
-        std::vector<std::vector<proto::ColumnStatistics> >* indexStats) const;
+    void getRowIndexStatistics(const proto::StripeInformation& stripeInfo, uint64_t stripeIndex,
+                               const proto::StripeFooter& currentStripeFooter,
+                               std::vector<std::vector<proto::ColumnStatistics>>* indexStats) const;
+    proto::StripeFooter loadCurrentStripeFooter(uint32_t stripeIndex, uint64_t& offset) const;
 
     // metadata
     mutable bool isMetadataLoaded_;
@@ -318,7 +330,8 @@ namespace orc {
 
     const std::string& getStreamName() const override;
 
-    std::unique_ptr<StripeStatistics> getStripeStatistics(uint64_t stripeIndex) const override;
+    std::unique_ptr<StripeStatistics> getStripeStatistics(
+        uint64_t stripeIndex, bool includeRowIndex = true) const override;
 
     std::unique_ptr<RowReader> createRowReader() const override;
 
@@ -374,6 +387,13 @@ namespace orc {
 
     std::map<uint32_t, BloomFilterIndex> getBloomFilters(
         uint32_t stripeIndex, const std::set<uint32_t>& included) const override;
+
+    void preBuffer(const std::vector<uint32_t>& stripes,
+                   const std::list<uint64_t>& includeTypes) override;
+    void releaseBuffer(uint64_t boundary) override;
+
+    std::map<uint32_t, RowGroupIndex> getRowGroupIndex(
+        uint32_t stripeIndex, const std::set<uint32_t>& included) const override;
   };
 }  // namespace orc
 
diff --git a/c++/src/RleEncoderV2.cc b/c++/src/RleEncoderV2.cc
index 18c5200254..91383bb569 100644
--- a/c++/src/RleEncoderV2.cc
+++ b/c++/src/RleEncoderV2.cc
@@ -423,7 +423,7 @@ namespace orc {
       // fallback to DIRECT encoding.
       // The decision to use patched base was based on zigzag values, but the
       // actual patching is done on base reduced literals.
-      if ((option.brBits100p - option.brBits95p) != 0) {
+      if ((option.brBits100p - option.brBits95p) != 0 && std::abs(option.min) < BASE_VALUE_LIMIT) {
         option.encoding = PATCHED_BASE;
         preparePatchedBlob(option);
         return;
@@ -440,31 +440,8 @@ namespace orc {
   }
 
   uint64_t RleEncoderV2::flush() {
-    if (numLiterals != 0) {
-      EncodingOption option = {};
-      if (variableRunLength_ != 0) {
-        determineEncoding(option);
-        writeValues(option);
-      } else if (fixedRunLength_ != 0) {
-        if (fixedRunLength_ < MIN_REPEAT) {
-          variableRunLength_ = fixedRunLength_;
-          fixedRunLength_ = 0;
-          determineEncoding(option);
-          writeValues(option);
-        } else if (fixedRunLength_ >= MIN_REPEAT && fixedRunLength_ <= MAX_SHORT_REPEAT_LENGTH) {
-          option.encoding = SHORT_REPEAT;
-          writeValues(option);
-        } else {
-          option.encoding = DELTA;
-          option.isFixedDelta = true;
-          writeValues(option);
-        }
-      }
-    }
-
-    outputStream->BackUp(static_cast<int>(bufferLength - bufferPosition));
+    finishEncode();
     uint64_t dataSize = outputStream->flush();
-    bufferLength = bufferPosition = 0;
     return dataSize;
   }
 
@@ -779,4 +756,30 @@ namespace orc {
     fixedRunLength_ = 1;
     variableRunLength_ = 1;
   }
+
+  void RleEncoderV2::finishEncode() {
+    if (numLiterals != 0) {
+      EncodingOption option = {};
+      if (variableRunLength_ != 0) {
+        determineEncoding(option);
+        writeValues(option);
+      } else if (fixedRunLength_ != 0) {
+        if (fixedRunLength_ < MIN_REPEAT) {
+          variableRunLength_ = fixedRunLength_;
+          fixedRunLength_ = 0;
+          determineEncoding(option);
+          writeValues(option);
+        } else if (fixedRunLength_ >= MIN_REPEAT && fixedRunLength_ <= MAX_SHORT_REPEAT_LENGTH) {
+          option.encoding = SHORT_REPEAT;
+          writeValues(option);
+        } else {
+          option.encoding = DELTA;
+          option.isFixedDelta = true;
+          writeValues(option);
+        }
+      }
+    }
+
+    RleEncoder::finishEncode();
+  }
 }  // namespace orc
diff --git a/c++/src/SchemaEvolution.cc b/c++/src/SchemaEvolution.cc
index 4099818ff9..442c43c228 100644
--- a/c++/src/SchemaEvolution.cc
+++ b/c++/src/SchemaEvolution.cc
@@ -18,6 +18,7 @@
 
 #include "SchemaEvolution.hh"
 #include "orc/Exceptions.hh"
+#include "orc/Type.hh"
 
 namespace orc {
 
@@ -80,7 +81,7 @@ namespace orc {
     if (readType.getKind() == fileType.getKind()) {
       ret.isValid = true;
       if (fileType.getKind() == CHAR || fileType.getKind() == VARCHAR) {
-        ret.isValid = readType.getMaximumLength() == fileType.getMaximumLength();
+        ret.needConvert = readType.getMaximumLength() != fileType.getMaximumLength();
       } else if (fileType.getKind() == DECIMAL) {
         ret.needConvert = readType.getPrecision() != fileType.getPrecision() ||
                           readType.getScale() != fileType.getScale();
@@ -105,11 +106,17 @@ namespace orc {
         }
         case STRING:
         case CHAR:
-        case VARCHAR:
+        case VARCHAR: {
+          ret.isValid = ret.needConvert = isStringVariant(readType) || isNumeric(readType) ||
+                                          isTimestamp(readType) || isDecimal(readType);
+          break;
+        }
         case TIMESTAMP:
         case TIMESTAMP_INSTANT:
         case DATE:
-        case BINARY: {
+        case BINARY:
+        case GEOMETRY:
+        case GEOGRAPHY: {
           // Not support
           break;
         }
@@ -231,6 +238,8 @@ namespace orc {
         case FLOAT:
         case DOUBLE:
         case BINARY:
+        case GEOMETRY:
+        case GEOGRAPHY:
         case TIMESTAMP:
         case LIST:
         case MAP:
diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc
index f9581215b3..a86247f107 100644
--- a/c++/src/Statistics.cc
+++ b/c++/src/Statistics.cc
@@ -44,6 +44,8 @@ namespace orc {
       return new DateColumnStatisticsImpl(s, statContext);
     } else if (s.has_binary_statistics()) {
       return new BinaryColumnStatisticsImpl(s, statContext);
+    } else if (s.has_geospatial_statistics()) {
+      return new GeospatialColumnStatisticsImpl(s);
     } else {
       return new ColumnStatisticsImpl(s);
     }
@@ -81,11 +83,20 @@ namespace orc {
     // PASS
   }
 
-  StripeStatisticsImpl::StripeStatisticsImpl(
+  StripeStatisticsImpl::StripeStatisticsImpl(const proto::StripeStatistics& stripeStats,
+                                             const StatContext& statContext) {
+    columnStats_ = std::make_unique<StatisticsImpl>(stripeStats, statContext);
+  }
+
+  StripeStatisticsWithRowGroupIndexImpl::~StripeStatisticsWithRowGroupIndexImpl() {
+    // PASS
+  }
+
+  StripeStatisticsWithRowGroupIndexImpl::StripeStatisticsWithRowGroupIndexImpl(
       const proto::StripeStatistics& stripeStats,
       std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
-      const StatContext& statContext) {
-    columnStats_ = std::make_unique<StatisticsImpl>(stripeStats, statContext);
+      const StatContext& statContext)
+      : StripeStatisticsImpl(stripeStats, statContext) {
     rowIndexStats_.resize(indexStats.size());
     for (size_t i = 0; i < rowIndexStats_.size(); i++) {
       for (size_t j = 0; j < indexStats[i].size(); j++) {
@@ -139,6 +150,10 @@ namespace orc {
     // PASS
   }
 
+  GeospatialColumnStatistics::~GeospatialColumnStatistics() {
+    // PASS
+  }
+
   ColumnStatisticsImpl::~ColumnStatisticsImpl() {
     // PASS
   }
@@ -179,15 +194,19 @@ namespace orc {
     // PASS
   }
 
+  GeospatialColumnStatisticsImpl::~GeospatialColumnStatisticsImpl() {
+    // PASS
+  }
+
   ColumnStatisticsImpl::ColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
   }
 
   BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl(const proto::ColumnStatistics& pb,
                                                          const StatContext& statContext) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (pb.has_binary_statistics() && statContext.correctStats) {
       stats_.setHasTotalLength(pb.binary_statistics().has_sum());
       stats_.setTotalLength(static_cast<uint64_t>(pb.binary_statistics().sum()));
@@ -197,7 +216,7 @@ namespace orc {
   BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl(const proto::ColumnStatistics& pb,
                                                            const StatContext& statContext) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (pb.has_bucket_statistics() && statContext.correctStats) {
       hasCount_ = true;
       trueCount_ = pb.bucket_statistics().count(0);
@@ -210,7 +229,7 @@ namespace orc {
   DateColumnStatisticsImpl::DateColumnStatisticsImpl(const proto::ColumnStatistics& pb,
                                                      const StatContext& statContext) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (!pb.has_date_statistics() || !statContext.correctStats) {
       // hasMinimum_ is false by default;
       // hasMaximum_ is false by default;
@@ -227,7 +246,7 @@ namespace orc {
   DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl(const proto::ColumnStatistics& pb,
                                                            const StatContext& statContext) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (pb.has_decimal_statistics() && statContext.correctStats) {
       const proto::DecimalStatistics& stats = pb.decimal_statistics();
       stats_.setHasMinimum(stats.has_minimum());
@@ -242,7 +261,7 @@ namespace orc {
 
   DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (!pb.has_double_statistics()) {
       stats_.setMinimum(0);
       stats_.setMaximum(0);
@@ -261,7 +280,7 @@ namespace orc {
 
   IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl(const proto::ColumnStatistics& pb) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (!pb.has_int_statistics()) {
       stats_.setMinimum(0);
       stats_.setMaximum(0);
@@ -281,7 +300,7 @@ namespace orc {
   StringColumnStatisticsImpl::StringColumnStatisticsImpl(const proto::ColumnStatistics& pb,
                                                          const StatContext& statContext) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (!pb.has_string_statistics() || !statContext.correctStats) {
       stats_.setTotalLength(0);
     } else {
@@ -299,7 +318,7 @@ namespace orc {
   TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl(const proto::ColumnStatistics& pb,
                                                                const StatContext& statContext) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (!pb.has_timestamp_statistics() || !statContext.correctStats) {
       stats_.setMinimum(0);
       stats_.setMaximum(0);
@@ -365,7 +384,7 @@ namespace orc {
   CollectionColumnStatisticsImpl::CollectionColumnStatisticsImpl(
       const proto::ColumnStatistics& pb) {
     stats_.setNumberOfValues(pb.number_of_values());
-    stats_.setHasNull(pb.has_null());
+    stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
     if (!pb.has_collection_statistics()) {
       stats_.setMinimum(0);
       stats_.setMaximum(0);
@@ -382,6 +401,40 @@ namespace orc {
     }
   }
 
+  GeospatialColumnStatisticsImpl::GeospatialColumnStatisticsImpl(
+      const proto::ColumnStatistics& pb) {
+    reset();
+    if (!pb.has_geospatial_statistics()) {
+      bounder_.invalidate();
+    } else {
+      const proto::GeospatialStatistics& stats = pb.geospatial_statistics();
+      geospatial::BoundingBox::XYZM min;
+      geospatial::BoundingBox::XYZM max;
+      for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+        min[i] = max[i] = std::numeric_limits<double>::quiet_NaN();
+      }
+      if (stats.has_bbox()) {
+        const auto& protoBBox = stats.bbox();
+        min[0] = protoBBox.xmin();
+        min[1] = protoBBox.ymin();
+        max[0] = protoBBox.xmax();
+        max[1] = protoBBox.ymax();
+        if (protoBBox.has_zmin() && protoBBox.has_zmax()) {
+          min[2] = protoBBox.zmin();
+          max[2] = protoBBox.zmax();
+        }
+        if (protoBBox.has_mmin() && protoBBox.has_mmax()) {
+          min[3] = protoBBox.mmin();
+          max[3] = protoBBox.mmax();
+        }
+      }
+      bounder_.mergeBox(geospatial::BoundingBox(min, max));
+      std::vector<int32_t> types = {stats.geospatial_types().begin(),
+                                    stats.geospatial_types().end()};
+      bounder_.mergeGeometryTypes(types);
+    }
+  }
+
   std::unique_ptr<MutableColumnStatistics> createColumnStatistics(const Type& type) {
     switch (static_cast<int64_t>(type.getKind())) {
       case BOOLEAN:
@@ -413,6 +466,9 @@ namespace orc {
         return std::make_unique<TimestampColumnStatisticsImpl>();
       case DECIMAL:
         return std::make_unique<DecimalColumnStatisticsImpl>();
+      case GEOGRAPHY:
+      case GEOMETRY:
+        return std::make_unique<GeospatialColumnStatisticsImpl>();
       default:
         throw NotImplementedYet("Not supported type: " + type.toString());
     }
diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh
index 6f212c15cc..94b1e5d2b2 100644
--- a/c++/src/Statistics.hh
+++ b/c++/src/Statistics.hh
@@ -24,6 +24,7 @@
 #include "orc/OrcFile.hh"
 #include "orc/Reader.hh"
 
+#include "Geospatial.hh"
 #include "Timezone.hh"
 #include "TypeImpl.hh"
 
@@ -1683,6 +1684,127 @@ namespace orc {
     }
   };
 
+  class GeospatialColumnStatisticsImpl : public GeospatialColumnStatistics,
+                                         public MutableColumnStatistics {
+   private:
+    geospatial::WKBGeometryBounder bounder_;
+    InternalCharStatistics stats_;
+
+   public:
+    GeospatialColumnStatisticsImpl() {
+      reset();
+    }
+    explicit GeospatialColumnStatisticsImpl(const proto::ColumnStatistics& stats);
+    virtual ~GeospatialColumnStatisticsImpl();
+
+    uint64_t getNumberOfValues() const override {
+      return stats_.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      stats_.setNumberOfValues(value);
+    }
+
+    void increase(uint64_t count) override {
+      stats_.setNumberOfValues(stats_.getNumberOfValues() + count);
+    }
+
+    bool hasNull() const override {
+      return stats_.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      stats_.setHasNull(hasNull);
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const GeospatialColumnStatisticsImpl& geoStats =
+          dynamic_cast<const GeospatialColumnStatisticsImpl&>(other);
+      stats_.merge(geoStats.stats_);
+      bounder_.merge(geoStats.bounder_);
+    }
+
+    void reset() override {
+      stats_.reset();
+      bounder_.reset();
+    }
+
+    void update(const char* value, size_t length) override {
+      bounder_.mergeGeometry(std::string_view(value, length));
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_has_null(stats_.hasNull());
+      pbStats.set_number_of_values(stats_.getNumberOfValues());
+
+      proto::GeospatialStatistics* geoStats = pbStats.mutable_geospatial_statistics();
+      const auto& bbox = bounder_.bounds();
+      if (bbox.boundValid(0) && bbox.boundValid(1) && !bbox.boundEmpty(0) && !bbox.boundEmpty(1)) {
+        geoStats->mutable_bbox()->set_xmin(bbox.min[0]);
+        geoStats->mutable_bbox()->set_xmax(bbox.max[0]);
+        geoStats->mutable_bbox()->set_ymin(bbox.min[1]);
+        geoStats->mutable_bbox()->set_ymax(bbox.max[1]);
+        if (bbox.boundValid(2) && !bbox.boundEmpty(2)) {
+          geoStats->mutable_bbox()->set_zmin(bbox.min[2]);
+          geoStats->mutable_bbox()->set_zmax(bbox.max[2]);
+        }
+        if (bbox.boundValid(3) && !bbox.boundEmpty(3)) {
+          geoStats->mutable_bbox()->set_mmin(bbox.min[3]);
+          geoStats->mutable_bbox()->set_mmax(bbox.max[3]);
+        }
+      }
+      for (auto type : bounder_.geometryTypes()) {
+        geoStats->add_geospatial_types(type);
+      }
+    }
+
+    std::string toString() const override {
+      if (!bounder_.isValid()) {
+        return "<GeoStatistics> invalid";
+      }
+
+      std::stringstream ss;
+      ss << "<GeoStatistics>";
+
+      std::string dim_label("xyzm");
+      const auto& bbox = bounder_.bounds();
+      auto dim_valid = bbox.dimensionValid();
+      auto dim_empty = bbox.dimensionEmpty();
+      auto lower = bbox.lowerBound();
+      auto upper = bbox.upperBound();
+
+      for (int i = 0; i < 4; i++) {
+        ss << " " << dim_label[i] << ": ";
+        if (!dim_valid[i]) {
+          ss << "invalid";
+        } else if (dim_empty[i]) {
+          ss << "empty";
+        } else {
+          ss << "[" << lower[i] << ", " << upper[i] << "]";
+        }
+      }
+
+      std::vector<int32_t> maybe_geometry_types = bounder_.geometryTypes();
+      ss << " geometry_types: [";
+      std::string sep("");
+      for (int32_t geometry_type : maybe_geometry_types) {
+        ss << sep << geometry_type;
+        sep = ", ";
+      }
+      ss << "]";
+
+      return ss.str();
+    }
+
+    const geospatial::BoundingBox& getBoundingBox() const override {
+      return bounder_.bounds();
+    }
+
+    std::vector<int32_t> getGeospatialTypes() const override {
+      return bounder_.geometryTypes();
+    }
+  };
+
   ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
                                             const StatContext& statContext);
 
@@ -1713,7 +1835,6 @@ namespace orc {
   class StripeStatisticsImpl : public StripeStatistics {
    private:
     std::unique_ptr<StatisticsImpl> columnStats_;
-    std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > > rowIndexStats_;
 
     // DELIBERATELY NOT IMPLEMENTED
     StripeStatisticsImpl(const StripeStatisticsImpl&);
@@ -1721,7 +1842,6 @@ namespace orc {
 
    public:
     StripeStatisticsImpl(const proto::StripeStatistics& stripeStats,
-                         std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
                          const StatContext& statContext);
 
     virtual const ColumnStatistics* getColumnStatistics(uint32_t columnId) const override {
@@ -1732,13 +1852,38 @@ namespace orc {
       return columnStats_->getNumberOfColumns();
     }
 
+    virtual const ColumnStatistics* getRowIndexStatistics(uint32_t, uint32_t) const override {
+      throw NotImplementedYet("set includeRowIndex true to get row index stats");
+    }
+
+    virtual ~StripeStatisticsImpl() override;
+
+    virtual uint32_t getNumberOfRowIndexStats(uint32_t) const override {
+      throw NotImplementedYet("set includeRowIndex true to get row index stats");
+    }
+  };
+
+  class StripeStatisticsWithRowGroupIndexImpl : public StripeStatisticsImpl {
+   private:
+    std::vector<std::vector<std::shared_ptr<const ColumnStatistics> > > rowIndexStats_;
+
+    // DELIBERATELY NOT IMPLEMENTED
+    StripeStatisticsWithRowGroupIndexImpl(const StripeStatisticsWithRowGroupIndexImpl&);
+    StripeStatisticsWithRowGroupIndexImpl& operator=(const StripeStatisticsWithRowGroupIndexImpl&);
+
+   public:
+    StripeStatisticsWithRowGroupIndexImpl(
+        const proto::StripeStatistics& stripeStats,
+        std::vector<std::vector<proto::ColumnStatistics> >& indexStats,
+        const StatContext& statContext);
+
     virtual const ColumnStatistics* getRowIndexStatistics(uint32_t columnId,
                                                           uint32_t rowIndex) const override {
       // check id indices are valid
       return rowIndexStats_[columnId][rowIndex].get();
     }
 
-    virtual ~StripeStatisticsImpl() override;
+    virtual ~StripeStatisticsWithRowGroupIndexImpl() override;
 
     uint32_t getNumberOfRowIndexStats(uint32_t columnId) const override {
       return static_cast<uint32_t>(rowIndexStats_[columnId].size());
diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc
index f4345c0871..a5609f7629 100644
--- a/c++/src/StripeStream.cc
+++ b/c++/src/StripeStream.cc
@@ -19,6 +19,7 @@
 #include "StripeStream.hh"
 #include "RLE.hh"
 #include "Reader.hh"
+#include "io/Cache.hh"
 #include "orc/Exceptions.hh"
 
 #include "wrap/coded-stream-wrapper.h"
@@ -37,7 +38,8 @@ namespace orc {
         stripeStart_(stripeStart),
         input_(input),
         writerTimezone_(writerTimezone),
-        readerTimezone_(readerTimezone) {
+        readerTimezone_(readerTimezone),
+        readCache_(reader.getReadCache()) {
     // PASS
   }
 
@@ -89,7 +91,6 @@ namespace orc {
       if (stream.has_kind() && stream.kind() == kind &&
           stream.column() == static_cast<uint64_t>(columnId)) {
         uint64_t streamLength = stream.length();
-        uint64_t myBlock = shouldStream ? input_.getNaturalReadSize() : streamLength;
         if (offset + streamLength > dataEnd) {
           std::stringstream msg;
           msg << "Malformed stream meta at stream index " << i << " in stripe " << stripeIndex_
@@ -99,9 +100,23 @@ namespace orc {
               << ", stripeDataLength=" << stripeInfo_.data_length();
           throw ParseError(msg.str());
         }
-        return createDecompressor(reader_.getCompression(),
-                                  std::make_unique<SeekableFileInputStream>(
-                                      &input_, offset, stream.length(), *pool, myBlock),
+
+        BufferSlice slice;
+        if (readCache_) {
+          ReadRange range{offset, streamLength};
+          slice = readCache_->read(range);
+        }
+
+        uint64_t myBlock = shouldStream ? input_.getNaturalReadSize() : streamLength;
+        std::unique_ptr<SeekableInputStream> seekableInput;
+        if (slice.buffer) {
+          seekableInput = std::make_unique<SeekableArrayInputStream>(
+              slice.buffer->data() + slice.offset, slice.length);
+        } else {
+          seekableInput = std::make_unique<SeekableFileInputStream>(&input_, offset, streamLength,
+                                                                    *pool, myBlock);
+        }
+        return createDecompressor(reader_.getCompression(), std::move(seekableInput),
                                   reader_.getCompressionSize(), *pool,
                                   reader_.getFileContents().readerMetrics);
       }
diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh
index ad82d472c2..2d26f8575e 100644
--- a/c++/src/StripeStream.hh
+++ b/c++/src/StripeStream.hh
@@ -30,6 +30,7 @@
 namespace orc {
 
   class RowReaderImpl;
+  class ReadRangeCache;
 
   /**
    * StripeStream Implementation
@@ -45,6 +46,7 @@ namespace orc {
     InputStream& input_;
     const Timezone& writerTimezone_;
     const Timezone& readerTimezone_;
+    std::shared_ptr<ReadRangeCache> readCache_;
 
    public:
     StripeStreamsImpl(const RowReaderImpl& reader, uint64_t index,
diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc
index 32276a850d..384f8ea99f 100644
--- a/c++/src/Timezone.cc
+++ b/c++/src/Timezone.cc
@@ -24,6 +24,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+#include <atomic>
 #include <map>
 #include <sstream>
 
@@ -655,25 +656,24 @@ namespace orc {
     epoch_ = utcEpoch - getVariant(utcEpoch).gmtOffset;
   }
 
-  const char* getTimezoneDirectory() {
+  std::string getTimezoneDirectory() {
     const char* dir = getenv("TZDIR");
     if (!dir) {
-      dir = DEFAULT_TZDIR;
+      // this is present if we're in an activated conda environment
+      const char* condaPrefix = getenv("CONDA_PREFIX");
+      if (condaPrefix) {
+        std::string condaDir(condaPrefix);
+        condaDir += "/share/zoneinfo";
+        return condaDir;
+      } else {
+        dir = DEFAULT_TZDIR;
+      }
     }
     return dir;
   }
 
-  /**
-   * Get a timezone by absolute filename.
-   * Results are cached.
-   */
-  const Timezone& getTimezoneByFilename(const std::string& filename) {
-    // ORC-110
-    std::lock_guard<std::mutex> timezone_lock(timezone_mutex);
-    std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = timezoneCache.find(filename);
-    if (itr != timezoneCache.end()) {
-      return *(itr->second).get();
-    }
+  static std::vector<unsigned char> loadTZDB(const std::string& filename) {
+    std::vector<unsigned char> buffer;
     if (!fileExists(filename.c_str())) {
       std::stringstream ss;
       ss << "Time zone file " << filename << " does not exist."
@@ -683,12 +683,65 @@ namespace orc {
     try {
       std::unique_ptr<InputStream> file = readFile(filename);
       size_t size = static_cast<size_t>(file->getLength());
-      std::vector<unsigned char> buffer(size);
+      buffer.resize(size);
       file->read(&buffer[0], size, 0);
-      timezoneCache[filename] = std::make_shared<TimezoneImpl>(filename, buffer);
     } catch (ParseError& err) {
       throw TimezoneError(err.what());
     }
+    return buffer;
+  }
+
+  class LazyTimezone : public Timezone {
+   private:
+    std::string filename_;
+    mutable std::unique_ptr<TimezoneImpl> impl_;
+    mutable std::once_flag initialized_;
+
+    TimezoneImpl* getImpl() const {
+      std::call_once(initialized_, [&]() {
+        auto buffer = loadTZDB(filename_);
+        impl_ = std::make_unique<TimezoneImpl>(filename_, std::move(buffer));
+      });
+      return impl_.get();
+    }
+
+   public:
+    LazyTimezone(const std::string& filename) : filename_(filename) {}
+
+    const TimezoneVariant& getVariant(int64_t clk) const override {
+      return getImpl()->getVariant(clk);
+    }
+    int64_t getEpoch() const override {
+      return getImpl()->getEpoch();
+    }
+    void print(std::ostream& os) const override {
+      return getImpl()->print(os);
+    }
+    uint64_t getVersion() const override {
+      return getImpl()->getVersion();
+    }
+
+    int64_t convertToUTC(int64_t clk) const override {
+      return getImpl()->convertToUTC(clk);
+    }
+
+    int64_t convertFromUTC(int64_t clk) const override {
+      return getImpl()->convertFromUTC(clk);
+    }
+  };
+
+  /**
+   * Get a timezone by absolute filename.
+   * Results are cached.
+   */
+  const Timezone& getTimezoneByFilename(const std::string& filename) {
+    // ORC-110
+    std::lock_guard<std::mutex> timezone_lock(timezone_mutex);
+    std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = timezoneCache.find(filename);
+    if (itr != timezoneCache.end()) {
+      return *(itr->second).get();
+    }
+    timezoneCache[filename] = std::make_shared<LazyTimezone>(filename);
     return *timezoneCache[filename].get();
   }
 
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index c7b073c713..18c4985ab1 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -19,8 +19,10 @@
 #include "TypeImpl.hh"
 #include "Adaptor.hh"
 #include "orc/Exceptions.hh"
+#include "orc/Type.hh"
 
 #include <iostream>
+#include <memory>
 #include <sstream>
 
 namespace orc {
@@ -62,6 +64,33 @@ namespace orc {
     subtypeCount_ = 0;
   }
 
+  TypeImpl::TypeImpl(TypeKind kind, const std::string& crs) {
+    parent_ = nullptr;
+    columnId_ = -1;
+    maximumColumnId_ = -1;
+    kind_ = kind;
+    maxLength_ = 0;
+    precision_ = 0;
+    scale_ = 0;
+    subtypeCount_ = 0;
+    crs_ = crs;
+    edgeInterpolationAlgorithm_ = geospatial::EdgeInterpolationAlgorithm::SPHERICAL;
+  }
+
+  TypeImpl::TypeImpl(TypeKind kind, const std::string& crs,
+                     geospatial::EdgeInterpolationAlgorithm algo) {
+    parent_ = nullptr;
+    columnId_ = -1;
+    maximumColumnId_ = -1;
+    kind_ = kind;
+    maxLength_ = 0;
+    precision_ = 0;
+    scale_ = 0;
+    subtypeCount_ = 0;
+    crs_ = crs;
+    edgeInterpolationAlgorithm_ = algo;
+  }
+
   uint64_t TypeImpl::assignIds(uint64_t root) const {
     columnId_ = static_cast<int64_t>(root);
     uint64_t current = root + 1;
@@ -120,6 +149,14 @@ namespace orc {
     return scale_;
   }
 
+  const std::string& TypeImpl::getCrs() const {
+    return crs_;
+  }
+
+  geospatial::EdgeInterpolationAlgorithm TypeImpl::getAlgorithm() const {
+    return edgeInterpolationAlgorithm_;
+  }
+
   Type& TypeImpl::setAttribute(const std::string& key, const std::string& value) {
     attributes_[key] = value;
     return *this;
@@ -189,6 +226,45 @@ namespace orc {
     return true;
   }
 
+  namespace geospatial {
+    std::string AlgoToString(EdgeInterpolationAlgorithm algo) {
+      switch (algo) {
+        case EdgeInterpolationAlgorithm::SPHERICAL:
+          return "speherial";
+        case VINCENTY:
+          return "vincenty";
+        case THOMAS:
+          return "thomas";
+        case ANDOYER:
+          return "andoyer";
+        case KARNEY:
+          return "karney";
+        default:
+          throw InvalidArgument("Unknown algo");
+      }
+    }
+
+    EdgeInterpolationAlgorithm AlgoFromString(const std::string& algo) {
+      if (algo == "speherial") {
+        return EdgeInterpolationAlgorithm::SPHERICAL;
+      }
+      if (algo == "vincenty") {
+        return VINCENTY;
+      }
+      if (algo == "thomas") {
+        return THOMAS;
+      }
+      if (algo == "andoyer") {
+        return ANDOYER;
+      }
+      if (algo == "karney") {
+        return KARNEY;
+      }
+      throw InvalidArgument("Unknown algo: " + algo);
+    }
+
+  }  // namespace geospatial
+
   std::string TypeImpl::toString() const {
     switch (static_cast<int64_t>(kind_)) {
       case BOOLEAN:
@@ -271,6 +347,17 @@ namespace orc {
         result << "char(" << maxLength_ << ")";
         return result.str();
       }
+      case GEOMETRY: {
+        std::stringstream result;
+        result << "geometry(" << crs_ << ")";
+        return result.str();
+      }
+      case GEOGRAPHY: {
+        std::stringstream result;
+        result << "geography(" << crs_ << ","
+               << geospatial::AlgoToString(edgeInterpolationAlgorithm_) << ")";
+        return result.str();
+      }
       default:
         throw NotImplementedYet("Unknown type");
     }
@@ -322,6 +409,8 @@ namespace orc {
       case BINARY:
       case CHAR:
       case VARCHAR:
+      case GEOMETRY:
+      case GEOGRAPHY:
         return encoded ? std::make_unique<EncodedStringVectorBatch>(capacity, memoryPool)
                        : std::make_unique<StringVectorBatch>(capacity, memoryPool);
 
@@ -419,6 +508,15 @@ namespace orc {
     return std::make_unique<TypeImpl>(UNION);
   }
 
+  std::unique_ptr<Type> createGeometryType(const std::string& crs) {
+    return std::make_unique<TypeImpl>(GEOMETRY, crs);
+  }
+
+  std::unique_ptr<Type> createGeographyType(const std::string& crs,
+                                            geospatial::EdgeInterpolationAlgorithm algo) {
+    return std::make_unique<TypeImpl>(GEOGRAPHY, crs, algo);
+  }
+
   std::string printProtobufMessage(const google::protobuf::Message& message);
   std::unique_ptr<Type> convertType(const proto::Type& type, const proto::Footer& footer) {
     std::unique_ptr<Type> ret;
@@ -443,6 +541,16 @@ namespace orc {
         ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()), type.maximum_length());
         break;
 
+      case proto::Type_Kind_GEOMETRY:
+        ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()), type.crs());
+        break;
+
+      case proto::Type_Kind_GEOGRAPHY:
+        ret = std::make_unique<TypeImpl>(
+            static_cast<TypeKind>(type.kind()), type.crs(),
+            static_cast<geospatial::EdgeInterpolationAlgorithm>(type.algorithm()));
+        break;
+
       case proto::Type_Kind_DECIMAL:
         ret = std::make_unique<TypeImpl>(DECIMAL, type.precision(), type.scale());
         break;
@@ -523,6 +631,13 @@ namespace orc {
       case CHAR:
         result = std::make_unique<TypeImpl>(fileType->getKind(), fileType->getMaximumLength());
         break;
+      case GEOMETRY:
+        result = std::make_unique<TypeImpl>(fileType->getKind(), fileType->getCrs());
+        break;
+      case GEOGRAPHY:
+        result = std::make_unique<TypeImpl>(fileType->getKind(), fileType->getCrs(),
+                                            fileType->getAlgorithm());
+        break;
 
       case LIST:
         result = std::make_unique<TypeImpl>(fileType->getKind());
@@ -660,7 +775,8 @@ namespace orc {
       std::pair<std::string, size_t> nameRes = parseName(input, pos, end);
       pos = nameRes.second;
       if (input[pos] != ':') {
-        throw std::logic_error("Invalid struct type. No field name set.");
+        throw std::logic_error("Invalid struct type. Field name can not contain '" +
+                               std::string(1, input[pos]) + "'.");
       }
       std::pair<std::unique_ptr<Type>, size_t> typeRes = TypeImpl::parseType(input, ++pos, end);
       result->addStructField(nameRes.first, std::move(typeRes.first));
@@ -709,6 +825,22 @@ namespace orc {
     return std::make_unique<TypeImpl>(DECIMAL, precision, scale);
   }
 
+  std::unique_ptr<Type> TypeImpl::parseGeographyType(const std::string& input, size_t start,
+                                                     size_t end) {
+    if (input[start] != '(') {
+      throw std::logic_error("Missing ( after geography.");
+    }
+    size_t pos = start + 1;
+    size_t sep = input.find(',', pos);
+    if (sep + 1 >= end || sep == std::string::npos) {
+      throw std::logic_error("Geography type must specify CRS.");
+    }
+    std::string crs = input.substr(pos, sep - pos);
+    std::string algoStr = input.substr(sep + 1, end - sep - 1);
+    geospatial::EdgeInterpolationAlgorithm algo = geospatial::AlgoFromString(algoStr);
+    return std::make_unique<TypeImpl>(GEOGRAPHY, crs, algo);
+  }
+
   void validatePrimitiveType(std::string category, const std::string& input, const size_t pos) {
     if (input[pos] == '<' || input[pos] == '(') {
       std::ostringstream oss;
@@ -779,6 +911,14 @@ namespace orc {
       uint64_t maxLength =
           static_cast<uint64_t>(atoi(input.substr(start + 1, end - start + 1).c_str()));
       return std::make_unique<TypeImpl>(CHAR, maxLength);
+    } else if (category == "geometry") {
+      if (input[start] != '(') {
+        throw std::logic_error("Missing ( after geometry.");
+      }
+      std::string crs = input.substr(start + 1, end - start + 1);
+      return std::make_unique<TypeImpl>(GEOMETRY, crs);
+    } else if (category == "geography") {
+      return parseGeographyType(input, start, end);
     } else {
       throw std::logic_error("Unknown type " + category);
     }
diff --git a/c++/src/TypeImpl.hh b/c++/src/TypeImpl.hh
index 647d5a5d2c..2db175aba6 100644
--- a/c++/src/TypeImpl.hh
+++ b/c++/src/TypeImpl.hh
@@ -24,6 +24,7 @@
 #include "Adaptor.hh"
 #include "wrap/orc-proto-wrapper.hh"
 
+#include <memory>
 #include <vector>
 
 namespace orc {
@@ -41,6 +42,9 @@ namespace orc {
     uint64_t precision_;
     uint64_t scale_;
     std::map<std::string, std::string> attributes_;
+    std::string crs_;
+    geospatial::EdgeInterpolationAlgorithm edgeInterpolationAlgorithm_ =
+        geospatial::EdgeInterpolationAlgorithm::SPHERICAL;
 
    public:
     /**
@@ -58,6 +62,16 @@ namespace orc {
      */
     TypeImpl(TypeKind kind, uint64_t precision, uint64_t scale);
 
+    /**
+     * Create geometry type.
+     */
+    TypeImpl(TypeKind kind, const std::string& crs);
+
+    /**
+     * Create geography type.
+     */
+    TypeImpl(TypeKind kind, const std::string& crs, geospatial::EdgeInterpolationAlgorithm algo);
+
     uint64_t getColumnId() const override;
 
     uint64_t getMaximumColumnId() const override;
@@ -76,6 +90,10 @@ namespace orc {
 
     uint64_t getScale() const override;
 
+    const std::string& getCrs() const override;
+
+    geospatial::EdgeInterpolationAlgorithm getAlgorithm() const override;
+
     Type& setAttribute(const std::string& key, const std::string& value) override;
 
     bool hasAttributeKey(const std::string& key) const override;
@@ -176,6 +194,14 @@ namespace orc {
     static std::unique_ptr<Type> parseDecimalType(const std::string& input, size_t start,
                                                   size_t end);
 
+    /**
+     * Parse geography type from string
+     * @param input the input string of a decimal type
+     * @param start start position of the input string
+     * @param end end position of the input string
+     */
+    static std::unique_ptr<Type> parseGeographyType(const std::string& input, size_t start,
+                                                    size_t end);
     /**
      * Parse type for a category
      * @param category type name
diff --git a/c++/src/Utils.hh b/c++/src/Utils.hh
index 4a609788f9..851d0af15c 100644
--- a/c++/src/Utils.hh
+++ b/c++/src/Utils.hh
@@ -21,6 +21,7 @@
 
 #include <atomic>
 #include <chrono>
+#include <stdexcept>
 
 namespace orc {
 
@@ -70,6 +71,75 @@ namespace orc {
 #define SCOPED_MINUS_STOPWATCH(METRICS_PTR, LATENCY_VAR)
 #endif
 
+  struct Utf8Utils {
+    /**
+     * Counts how many utf-8 chars of the input data
+     */
+    static uint64_t charLength(const char* data, uint64_t length) {
+      uint64_t chars = 0;
+      for (uint64_t i = 0; i < length; i++) {
+        if (isUtfStartByte(data[i])) {
+          chars++;
+        }
+      }
+      return chars;
+    }
+
+    /**
+     * Return the number of bytes required to read at most maxCharLength
+     * characters in full from a utf-8 encoded byte array provided
+     * by data. This does not validate utf-8 data, but
+     * operates correctly on already valid utf-8 data.
+     *
+     * @param maxCharLength number of characters required
+     * @param data the bytes of UTF-8
+     * @param length the length of data to truncate
+     */
+    static uint64_t truncateBytesTo(uint64_t maxCharLength, const char* data, uint64_t length) {
+      uint64_t chars = 0;
+      if (length <= maxCharLength) {
+        return length;
+      }
+      for (uint64_t i = 0; i < length; i++) {
+        if (isUtfStartByte(data[i])) {
+          chars++;
+        }
+        if (chars > maxCharLength) {
+          return i;
+        }
+      }
+      // everything fits
+      return length;
+    }
+
+    /**
+     * Checks if b is the first byte of a UTF-8 character.
+     */
+    inline static bool isUtfStartByte(char b) {
+      return (b & 0xC0) != 0x80;
+    }
+
+    /**
+     * Find the start of the last character that ends in the current string.
+     * @param text the bytes of the utf-8
+     * @param from the first byte location
+     * @param until the last byte location
+     * @return the index of the last character
+     */
+    static uint64_t findLastCharacter(const char* text, uint64_t from, uint64_t until) {
+      uint64_t posn = until;
+      /* we don't expect characters more than 5 bytes */
+      while (posn >= from) {
+        if (isUtfStartByte(text[posn])) {
+          return posn;
+        }
+        posn -= 1;
+      }
+      /* beginning of a valid char not found */
+      throw std::logic_error("Could not truncate string, beginning of a valid char not found");
+    }
+  };
+
 }  // namespace orc
 
 #endif
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index bc44469959..49f47aeb03 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -34,6 +34,7 @@ namespace orc {
         notNull(pool, cap),
         hasNulls(false),
         isEncoded(false),
+        dictionaryDecoded(false),
         memoryPool(pool) {
     std::memset(notNull.data(), 1, capacity);
   }
@@ -61,6 +62,13 @@ namespace orc {
     return false;
   }
 
+  void ColumnVectorBatch::decodeDictionary() {
+    if (dictionaryDecoded) return;
+
+    decodeDictionaryImpl();
+    dictionaryDecoded = true;
+  }
+
   StringDictionary::StringDictionary(MemoryPool& pool)
       : dictionaryBlob(pool), dictionaryOffset(pool) {
     // PASS
@@ -88,6 +96,17 @@ namespace orc {
     }
   }
 
+  void EncodedStringVectorBatch::decodeDictionaryImpl() {
+    size_t n = index.size();
+    resize(n);
+
+    for (size_t i = 0; i < n; ++i) {
+      if (!hasNulls || notNull[i]) {
+        dictionary->getValueByIndex(index[i], data[i], length[i]);
+      }
+    }
+  }
+
   StringVectorBatch::StringVectorBatch(uint64_t capacity, MemoryPool& pool)
       : ColumnVectorBatch(capacity, pool),
         data(pool, capacity),
@@ -174,6 +193,12 @@ namespace orc {
     return false;
   }
 
+  void StructVectorBatch::decodeDictionaryImpl() {
+    for (const auto& field : fields) {
+      field->decodeDictionary();
+    }
+  }
+
   ListVectorBatch::ListVectorBatch(uint64_t cap, MemoryPool& pool)
       : ColumnVectorBatch(cap, pool), offsets(pool, cap + 1) {
     offsets.zeroOut();
@@ -211,6 +236,10 @@ namespace orc {
     return true;
   }
 
+  void ListVectorBatch::decodeDictionaryImpl() {
+    elements->decodeDictionary();
+  }
+
   MapVectorBatch::MapVectorBatch(uint64_t cap, MemoryPool& pool)
       : ColumnVectorBatch(cap, pool), offsets(pool, cap + 1) {
     offsets.zeroOut();
@@ -251,6 +280,16 @@ namespace orc {
     return true;
   }
 
+  void MapVectorBatch::decodeDictionaryImpl() {
+    if (keys) {
+      keys->decodeDictionary();
+    }
+
+    if (elements) {
+      elements->decodeDictionary();
+    }
+  }
+
   UnionVectorBatch::UnionVectorBatch(uint64_t cap, MemoryPool& pool)
       : ColumnVectorBatch(cap, pool), tags(pool, cap), offsets(pool, cap) {
     tags.zeroOut();
@@ -310,6 +349,12 @@ namespace orc {
     return false;
   }
 
+  void UnionVectorBatch::decodeDictionaryImpl() {
+    for (const auto& child : children) {
+      child->decodeDictionary();
+    }
+  }
+
   Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool)
       : ColumnVectorBatch(cap, pool),
         precision(0),
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index fceac7c2fb..c235169cca 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -24,6 +24,7 @@
 #include "Utils.hh"
 
 #include <memory>
+#include <stdexcept>
 
 namespace orc {
 
@@ -46,6 +47,8 @@ namespace orc {
     WriterMetrics* metrics;
     bool useTightNumericVector;
     uint64_t outputBufferCapacity;
+    uint64_t memoryBlockSize;
+    bool alignBlockBoundToRowGroup;
 
     WriterOptionsPrivate() : fileVersion(FileVersion::v_0_12()) {  // default to Hive_0_12
       stripeSize = 64 * 1024 * 1024;                               // 64M
@@ -67,6 +70,8 @@ namespace orc {
       metrics = nullptr;
       useTightNumericVector = false;
       outputBufferCapacity = 1024 * 1024;
+      memoryBlockSize = 64 * 1024;  // 64K
+      alignBlockBoundToRowGroup = false;
     }
   };
 
@@ -287,6 +292,24 @@ namespace orc {
     return privateBits_->outputBufferCapacity;
   }
 
+  WriterOptions& WriterOptions::setMemoryBlockSize(uint64_t capacity) {
+    privateBits_->memoryBlockSize = capacity;
+    return *this;
+  }
+
+  uint64_t WriterOptions::getMemoryBlockSize() const {
+    return privateBits_->memoryBlockSize;
+  }
+
+  WriterOptions& WriterOptions::setAlignBlockBoundToRowGroup(bool alignBlockBoundToRowGroup) {
+    privateBits_->alignBlockBoundToRowGroup = alignBlockBoundToRowGroup;
+    return *this;
+  }
+
+  bool WriterOptions::getAlignBlockBoundToRowGroup() const {
+    return privateBits_->alignBlockBoundToRowGroup;
+  }
+
   Writer::~Writer() {
     // PASS
   }
@@ -352,11 +375,16 @@ namespace orc {
 
     useTightNumericVector_ = opts.getUseTightNumericVector();
 
+    if (options_.getCompressionBlockSize() % options_.getMemoryBlockSize() != 0) {
+      throw std::invalid_argument(
+          "Compression block size must be a multiple of memory block size.");
+    }
+
     // compression stream for stripe footer, file footer and metadata
-    compressionStream_ =
-        createCompressor(options_.getCompression(), outStream_, options_.getCompressionStrategy(),
-                         options_.getOutputBufferCapacity(), options_.getCompressionBlockSize(),
-                         *options_.getMemoryPool(), options_.getWriterMetrics());
+    compressionStream_ = createCompressor(
+        options_.getCompression(), outStream_, options_.getCompressionStrategy(),
+        options_.getOutputBufferCapacity(), options_.getCompressionBlockSize(),
+        options_.getMemoryBlockSize(), *options_.getMemoryPool(), options_.getWriterMetrics());
 
     // uncompressed stream for post script
     bufferedStream_.reset(new BufferedOutputStream(*options_.getMemoryPool(), outStream_,
@@ -385,6 +413,9 @@ namespace orc {
         stripeRows_ += chunkSize;
 
         if (indexRows_ >= rowIndexStride) {
+          if (options_.getAlignBlockBoundToRowGroup()) {
+            columnWriter_->finishStreams();
+          }
           columnWriter_->createRowIndexEntry();
           indexRows_ = 0;
         }
@@ -437,7 +468,7 @@ namespace orc {
     // Write file header
     const static size_t magicIdLength = strlen(WriterImpl::magicId);
     {
-      SCOPED_STOPWATCH(options.getWriterMetrics(), IOBlockingLatencyUs, IOCount);
+      SCOPED_STOPWATCH(options_.getWriterMetrics(), IOBlockingLatencyUs, IOCount);
       outStream_->write(WriterImpl::magicId, magicIdLength);
     }
     currentOffset_ += magicIdLength;
@@ -585,7 +616,7 @@ namespace orc {
       throw std::logic_error("Failed to write post script.");
     }
     unsigned char psLength = static_cast<unsigned char>(bufferedStream_->flush());
-    SCOPED_STOPWATCH(options.getWriterMetrics(), IOBlockingLatencyUs, IOCount);
+    SCOPED_STOPWATCH(options_.getWriterMetrics(), IOBlockingLatencyUs, IOCount);
     outStream_->write(&psLength, sizeof(unsigned char));
   }
 
@@ -672,6 +703,40 @@ namespace orc {
         protoType.set_kind(proto::Type_Kind_CHAR);
         break;
       }
+      case GEOMETRY: {
+        protoType.set_kind(proto::Type_Kind_GEOMETRY);
+        protoType.set_crs(t.getCrs());
+        break;
+      }
+      case GEOGRAPHY: {
+        protoType.set_kind(proto::Type_Kind_GEOGRAPHY);
+        protoType.set_crs(t.getCrs());
+        switch (t.getAlgorithm()) {
+          case geospatial::EdgeInterpolationAlgorithm::SPHERICAL: {
+            protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_SPHERICAL);
+            break;
+          }
+          case orc::geospatial::EdgeInterpolationAlgorithm::VINCENTY: {
+            protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_VINCENTY);
+            break;
+          }
+          case orc::geospatial::EdgeInterpolationAlgorithm::THOMAS: {
+            protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_VINCENTY);
+            break;
+          }
+          case orc::geospatial::EdgeInterpolationAlgorithm::ANDOYER: {
+            protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_ANDOYER);
+            break;
+          }
+          case orc::geospatial::EdgeInterpolationAlgorithm::KARNEY: {
+            protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_KARNEY);
+            break;
+          }
+          default:
+            throw std::invalid_argument("Unknown Algorithm.");
+        }
+        break;
+      }
       default:
         throw std::logic_error("Unknown type.");
     }
diff --git a/c++/src/io/Cache.cc b/c++/src/io/Cache.cc
new file mode 100644
index 0000000000..39f63fdd2b
--- /dev/null
+++ b/c++/src/io/Cache.cc
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+
+#include "Cache.hh"
+
+namespace orc {
+
+  std::vector<ReadRange> ReadRangeCombiner::coalesce(std::vector<ReadRange> ranges) const {
+    if (ranges.empty()) {
+      return ranges;
+    }
+
+    // Remove zero-sized ranges
+    auto end = std::remove_if(ranges.begin(), ranges.end(),
+                              [](const ReadRange& range) { return range.length == 0; });
+    // Sort in position order
+    std::sort(ranges.begin(), end, [](const ReadRange& a, const ReadRange& b) {
+      return a.offset != b.offset ? a.offset < b.offset : a.length > b.length;
+    });
+
+    // Remove ranges that overlap 100%
+    std::vector<ReadRange> uniqueRanges;
+    uniqueRanges.reserve(ranges.size());
+    for (auto it = ranges.begin(); it != end; ++it) {
+      if (uniqueRanges.empty() || !uniqueRanges.back().contains(*it)) {
+        uniqueRanges.push_back(*it);
+      }
+    }
+    ranges = std::move(uniqueRanges);
+
+    // Skip further processing if ranges is empty after removing zero-sized ranges.
+    if (ranges.empty()) {
+      return ranges;
+    }
+
+#ifndef NDEBUG
+    for (size_t i = 0; i < ranges.size() - 1; ++i) {
+      const auto& left = ranges[i];
+      const auto& right = ranges[i + 1];
+      assert(left.offset < right.offset);
+      assert(!left.contains(right));
+    }
+#endif
+
+    std::vector<ReadRange> coalesced;
+    auto itr = ranges.begin();
+
+    // Start of the current coalesced range and end (exclusive) of previous range.
+    // Both are initialized with the start of first range which is a placeholder value.
+    uint64_t coalescedStart = itr->offset;
+    uint64_t coalescedEnd = coalescedStart + itr->length;
+
+    for (++itr; itr < ranges.end(); ++itr) {
+      const uint64_t currentRangeStart = itr->offset;
+      const uint64_t currentRangeEnd = currentRangeStart + itr->length;
+
+      assert(coalescedStart < coalescedEnd);
+      assert(currentRangeStart < currentRangeEnd);
+
+      // At this point, the coalesced range is [coalesced_start, prev_range_end).
+      // Stop coalescing if:
+      //   - coalesced range is too large, or
+      //   - distance (hole/gap) between consecutive ranges is too large.
+      if ((currentRangeEnd - coalescedStart > rangeSizeLimit) ||
+          (currentRangeStart > coalescedEnd + holeSizeLimit)) {
+        coalesced.push_back({coalescedStart, coalescedEnd - coalescedStart});
+        coalescedStart = currentRangeStart;
+      }
+
+      // Update the prev_range_end with the current range.
+      coalescedEnd = currentRangeEnd;
+    }
+    coalesced.push_back({coalescedStart, coalescedEnd - coalescedStart});
+
+    assert(coalesced.front().offset == ranges.front().offset);
+    assert(coalesced.back().offset + coalesced.back().length ==
+           ranges.back().offset + ranges.back().length);
+    return coalesced;
+  }
+
+  std::vector<ReadRange> ReadRangeCombiner::coalesceReadRanges(std::vector<ReadRange> ranges,
+                                                               uint64_t holeSizeLimit,
+                                                               uint64_t rangeSizeLimit) {
+    assert(rangeSizeLimit > holeSizeLimit);
+
+    ReadRangeCombiner combiner{holeSizeLimit, rangeSizeLimit};
+    return combiner.coalesce(std::move(ranges));
+  }
+
+  void ReadRangeCache::cache(std::vector<ReadRange> ranges) {
+    ranges = ReadRangeCombiner::coalesceReadRanges(std::move(ranges), options_.holeSizeLimit,
+                                                   options_.rangeSizeLimit);
+
+    std::vector<RangeCacheEntry> newEntries = makeCacheEntries(ranges);
+    // Add new entries, themselves ordered by offset
+    if (entries_.size() > 0) {
+      std::vector<RangeCacheEntry> merged(entries_.size() + newEntries.size());
+      std::merge(entries_.begin(), entries_.end(), newEntries.begin(), newEntries.end(),
+                 merged.begin());
+      entries_ = std::move(merged);
+    } else {
+      entries_ = std::move(newEntries);
+    }
+  }
+
+  BufferSlice ReadRangeCache::read(const ReadRange& range) {
+    if (range.length == 0) {
+      return {std::make_shared<Buffer>(*memoryPool_, 0), 0, 0};
+    }
+
+    const auto it = std::lower_bound(entries_.begin(), entries_.end(), range,
+                                     [](const RangeCacheEntry& entry, const ReadRange& range) {
+                                       return entry.range.offset + entry.range.length <
+                                              range.offset + range.length;
+                                     });
+
+    BufferSlice result{};
+    bool hit_cache = false;
+    if (it != entries_.end() && it->range.contains(range)) {
+      hit_cache = it->future.valid();
+      it->future.get();
+      result = BufferSlice{it->buffer, range.offset - it->range.offset, range.length};
+    }
+
+    if (metrics_) {
+      if (hit_cache)
+        metrics_->ReadRangeCacheHits.fetch_add(1);
+      else
+        metrics_->ReadRangeCacheMisses.fetch_add(1);
+    }
+    return result;
+  }
+
+  void ReadRangeCache::evictEntriesBefore(uint64_t boundary) {
+    auto it = std::lower_bound(entries_.begin(), entries_.end(), boundary,
+                               [](const RangeCacheEntry& entry, uint64_t offset) {
+                                 return entry.range.offset + entry.range.length <= offset;
+                               });
+    entries_.erase(entries_.begin(), it);
+  }
+
+  std::vector<RangeCacheEntry> ReadRangeCache::makeCacheEntries(
+      const std::vector<ReadRange>& ranges) const {
+    std::vector<RangeCacheEntry> newEntries;
+    newEntries.reserve(ranges.size());
+    for (const auto& range : ranges) {
+      BufferPtr buffer = std::make_shared<Buffer>(*memoryPool_, range.length);
+      std::future<void> future = stream_->readAsync(buffer->data(), buffer->size(), range.offset);
+      newEntries.emplace_back(range, std::move(buffer), std::move(future));
+    }
+    return newEntries;
+  }
+
+}  // namespace orc
diff --git a/c++/src/io/Cache.hh b/c++/src/io/Cache.hh
new file mode 100644
index 0000000000..7fc79718aa
--- /dev/null
+++ b/c++/src/io/Cache.hh
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "orc/MemoryPool.hh"
+#include "orc/OrcFile.hh"
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <future>
+#include <utility>
+#include <vector>
+
+namespace orc {
+
+  struct ReadRange {
+    uint64_t offset;
+    uint64_t length;
+
+    ReadRange() = default;
+    ReadRange(uint64_t offset, uint64_t length) : offset(offset), length(length) {}
+
+    friend bool operator==(const ReadRange& left, const ReadRange& right) {
+      return (left.offset == right.offset && left.length == right.length);
+    }
+    friend bool operator!=(const ReadRange& left, const ReadRange& right) {
+      return !(left == right);
+    }
+
+    bool contains(const ReadRange& other) const {
+      return (offset <= other.offset && offset + length >= other.offset + other.length);
+    }
+  };
+
+  struct ReadRangeCombiner {
+    const uint64_t holeSizeLimit;
+    const uint64_t rangeSizeLimit;
+
+    std::vector<ReadRange> coalesce(std::vector<ReadRange> ranges) const;
+
+    static std::vector<ReadRange> coalesceReadRanges(std::vector<ReadRange> ranges,
+                                                     uint64_t holeSizeLimit,
+                                                     uint64_t rangeSizeLimit);
+  };
+
+  using Buffer = DataBuffer<char>;
+  using BufferPtr = std::shared_ptr<Buffer>;
+
+  struct RangeCacheEntry {
+    ReadRange range;
+    BufferPtr buffer;
+    std::shared_future<void> future;  // use shared_future in case of multiple get calls
+
+    RangeCacheEntry() = default;
+    RangeCacheEntry(const ReadRange& range, BufferPtr buffer, std::future<void> future)
+        : range(range), buffer(std::move(buffer)), future(std::move(future).share()) {}
+
+    friend bool operator<(const RangeCacheEntry& left, const RangeCacheEntry& right) {
+      return left.range.offset < right.range.offset;
+    }
+  };
+
+  struct BufferSlice {
+    BufferPtr buffer = nullptr;
+    uint64_t offset = 0;
+    uint64_t length = 0;
+  };
+
+  /// A read cache designed to hide IO latencies when reading.
+  class ReadRangeCache {
+   public:
+    /// Construct a read cache with given options
+    explicit ReadRangeCache(InputStream* stream, CacheOptions options, MemoryPool* memoryPool,
+                            ReaderMetrics* metrics = nullptr)
+        : stream_(stream),
+          options_(std::move(options)),
+          memoryPool_(memoryPool),
+          metrics_(metrics) {}
+
+    ~ReadRangeCache() = default;
+
+    /// Cache the given ranges in the background.
+    ///
+    /// The caller must ensure that the ranges do not overlap with each other,
+    /// nor with previously cached ranges.  Otherwise, behaviour will be undefined.
+    void cache(std::vector<ReadRange> ranges);
+
+    /// Read a range previously given to Cache().
+    BufferSlice read(const ReadRange& range);
+
+    /// Evict cache entries with its range before given boundary.
+    void evictEntriesBefore(uint64_t boundary);
+
+   private:
+    std::vector<RangeCacheEntry> makeCacheEntries(const std::vector<ReadRange>& ranges) const;
+
+    InputStream* stream_;
+    CacheOptions options_;
+    // Ordered by offset (so as to find a matching region by binary search)
+    std::vector<RangeCacheEntry> entries_;
+    MemoryPool* memoryPool_;
+    ReaderMetrics* metrics_;
+  };
+
+}  // namespace orc
diff --git a/c++/src/io/OutputStream.cc b/c++/src/io/OutputStream.cc
index 6fc68e262f..fbf1ca61dd 100644
--- a/c++/src/io/OutputStream.cc
+++ b/c++/src/io/OutputStream.cc
@@ -61,6 +61,10 @@ namespace orc {
     }
   }
 
+  void BufferedOutputStream::finishStream() {
+    // PASS
+  }
+
   google::protobuf::int64 BufferedOutputStream::ByteCount() const {
     return static_cast<google::protobuf::int64>(dataBuffer_->size());
   }
@@ -87,7 +91,7 @@ namespace orc {
     uint64_t dataSize = dataBuffer_->size();
     // flush data buffer into outputStream
     if (dataSize > 0) {
-      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount);
+      SCOPED_STOPWATCH(metrics_, IOBlockingLatencyUs, IOCount);
       dataBuffer_->writeTo(outputStream_, metrics_);
     }
     dataBuffer_->resize(0);
@@ -98,6 +102,10 @@ namespace orc {
     dataBuffer_->resize(0);
   }
 
+  uint64_t BufferedOutputStream::getRawInputBufferSize() const {
+    throw std::logic_error("getRawInputBufferSize is not supported.");
+  }
+
   void AppendOnlyBufferedStream::write(const char* data, size_t size) {
     size_t dataOffset = 0;
     while (size > 0) {
@@ -120,25 +128,31 @@ namespace orc {
   }
 
   uint64_t AppendOnlyBufferedStream::flush() {
-    outStream_->BackUp(bufferLength_ - bufferOffset_);
-    bufferOffset_ = bufferLength_ = 0;
-    buffer_ = nullptr;
+    finishStream();
     return outStream_->flush();
   }
 
   void AppendOnlyBufferedStream::recordPosition(PositionRecorder* recorder) const {
     uint64_t flushedSize = outStream_->getSize();
-    uint64_t unflushedSize = static_cast<uint64_t>(bufferOffset_);
+    uint64_t unusedBufferSize = static_cast<uint64_t>(bufferLength_ - bufferOffset_);
     if (outStream_->isCompressed()) {
       // start of the compression chunk in the stream
       recorder->add(flushedSize);
-      // number of decompressed bytes that need to be consumed
-      recorder->add(unflushedSize);
+      // There are multiple blocks in the input buffer, but bufferPosition only records the
+      // effective length of the last block. We need rawInputBufferSize to record the total length
+      // of all variable blocks.
+      recorder->add(outStream_->getRawInputBufferSize() - unusedBufferSize);
     } else {
-      flushedSize -= static_cast<uint64_t>(bufferLength_);
       // byte offset of the start location
-      recorder->add(flushedSize + unflushedSize);
+      recorder->add(flushedSize - unusedBufferSize);
     }
   }
 
+  void AppendOnlyBufferedStream::finishStream() {
+    outStream_->BackUp(bufferLength_ - bufferOffset_);
+    outStream_->finishStream();
+    bufferOffset_ = bufferLength_ = 0;
+    buffer_ = nullptr;
+  }
+
 }  // namespace orc
diff --git a/c++/src/io/OutputStream.hh b/c++/src/io/OutputStream.hh
index c63bc805bb..6319de96d6 100644
--- a/c++/src/io/OutputStream.hh
+++ b/c++/src/io/OutputStream.hh
@@ -69,10 +69,12 @@ namespace orc {
     virtual uint64_t getSize() const;
     virtual uint64_t flush();
     virtual void suppress();
+    virtual uint64_t getRawInputBufferSize() const;
 
     virtual bool isCompressed() const {
       return false;
     }
+    virtual void finishStream();
   };
   DIAGNOSTIC_POP
 
@@ -98,6 +100,7 @@ namespace orc {
     void write(const char* data, size_t size);
     uint64_t getSize() const;
     uint64_t flush();
+    void finishStream();
 
     void recordPosition(PositionRecorder* recorder) const;
   };
diff --git a/c++/src/meson.build b/c++/src/meson.build
new file mode 100644
index 0000000000..44a98500f3
--- /dev/null
+++ b/c++/src/meson.build
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+compiler = meson.get_compiler('cpp')
+has_pread = compiler.compiles('''
+    #include<fcntl.h>
+    #include<unistd.h>
+    int main(int,char*[]){
+      int f = open("/x/y", O_RDONLY);
+      char buf[100];
+      return pread(f, buf, 100, 1000) == 0;
+    }
+''')
+
+has_strptime = compiler.compiles('''
+    #include<time.h>
+    int main(int,char*[]){
+      struct tm time2020;
+      return !strptime("2020-02-02 12:34:56", "%Y-%m-%d %H:%M:%S", &time2020);
+    }
+''')
+
+has_builtin_overflow_check = compiler.compiles('''
+    int main(){
+      int a;
+      return __builtin_add_overflow(1, 2, &a);
+    }
+''')
+
+has_diagnostic_push = compiler.compiles('''
+    #ifdef __clang__
+      #pragma clang diagnostic push
+      #pragma clang diagnostic ignored "-Wdeprecated"
+      #pragma clang diagnostic pop
+   #elif defined(__GNUC__)
+      #pragma GCC diagnostic push
+      #pragma GCC diagnostic ignored "-Wdeprecated"
+      #pragma GCC diagnostic pop
+   #elif defined(_MSC_VER)
+      #pragma warning( push )
+      #pragma warning( disable : 4996 )
+      #pragma warning( pop )
+   #else
+     unknownCompiler!
+   #endif
+   int main(int, char *[]) {}
+''')
+
+has_std_isnan = compiler.compiles('''
+    #include<cmath>
+    int main(int, char *[]) {
+      return std::isnan(1.0f);
+    }
+''')
+
+has_double_to_string = compiler.compiles('''
+    #include<string>
+    int main(int, char *[]) {
+      double d = 5;
+      std::to_string(d);
+    }
+''')
+
+has_int64_to_string = compiler.compiles('''
+    #include<cstdint>
+    #include<string>
+    int main(int, char *[]) {
+      int64_t d = 5;
+      std::to_string(d);
+    }
+''')
+
+has_pre_1970 = compiler.run('''
+    #include<time.h>
+    int main(int, char *[]) {
+      time_t t = -14210715; // 1969-07-20 12:34:45
+      struct tm *ptm = gmtime(&t);
+      return !(ptm && ptm->tm_year == 69);
+    }
+''')
+
+has_post_2038 = compiler.run('''
+    #include<stdlib.h>
+    #include<time.h>
+    int main(int, char *[]) {
+      setenv("TZ", "America/Los_Angeles", 1);
+      tzset();
+      struct tm time2037;
+      struct tm time2038;
+      strptime("2037-05-05 12:34:56", "%Y-%m-%d %H:%M:%S", &time2037);
+      strptime("2038-05-05 12:34:56", "%Y-%m-%d %H:%M:%S", &time2038);
+      return (mktime(&time2038) - mktime(&time2037)) <= 31500000;
+    }
+''')
+
+cdata = configuration_data()
+cdata.set10('HAS_PREAD', has_pread)
+cdata.set10('HAS_STRPTIME', has_strptime)
+cdata.set10('HAS_DIAGNOSTIC_PUSH', has_diagnostic_push)
+cdata.set10('HAS_DOUBLE_TO_STRING', has_double_to_string)
+cdata.set10('HAS_INT64_TO_STRING', has_int64_to_string)
+cdata.set('HAS_PRE_1970', has_pre_1970.returncode() == 0)
+cdata.set('HAS_POST_2038', has_post_2038.returncode() == 0)
+cdata.set10('HAS_STD_ISNAN', has_std_isnan)
+cdata.set10('HAS_BUILTIN_OVERFLOW_CHECK', has_builtin_overflow_check)
+cdata.set10('NEEDS_Z_PREFIX', false)  # Meson zlib subproject does not need this
+
+adaptor_header = configure_file(
+    input: 'Adaptor.hh.in',
+    output: 'Adaptor.hh',
+    configuration: cdata,
+    format: 'cmake',
+)
+
+source_files = [adaptor_header]
+source_files += files(
+    'io/InputStream.cc',
+    'io/OutputStream.cc',
+    'io/Cache.cc',
+    'sargs/ExpressionTree.cc',
+    'sargs/Literal.cc',
+    'sargs/PredicateLeaf.cc',
+    'sargs/SargsApplier.cc',
+    'sargs/SearchArgument.cc',
+    'sargs/TruthValue.cc',
+    'wrap/orc-proto-wrapper.cc',
+    'Adaptor.cc',
+    'BlockBuffer.cc',
+    'BloomFilter.cc',
+    'BpackingDefault.cc',
+    'ByteRLE.cc',
+    'ColumnPrinter.cc',
+    'ColumnReader.cc',
+    'ColumnWriter.cc',
+    'Common.cc',
+    'Compression.cc',
+    'ConvertColumnReader.cc',
+    'CpuInfoUtil.cc',
+    'Exceptions.cc',
+    'Geospatial.cc',
+    'Int128.cc',
+    'LzoDecompressor.cc',
+    'MemoryPool.cc',
+    'Murmur3.cc',
+    'OrcFile.cc',
+    'Reader.cc',
+    'RLEv1.cc',
+    'RLEV2Util.cc',
+    'RleDecoderV2.cc',
+    'RleEncoderV2.cc',
+    'RLE.cc',
+    'SchemaEvolution.cc',
+    'Statistics.cc',
+    'StripeStream.cc',
+    'Timezone.cc',
+    'TypeImpl.cc',
+    'Vector.cc',
+    'Writer.cc',
+)
+
+incdir = include_directories('../include')
+orc_format_proto_dep = dependency('orc_format_proto')
+# zstd requires us to add the threads
+threads_dep = dependency('threads')
+
+orc_lib = library(
+    'orc',
+    sources: source_files,
+    dependencies: [
+        orc_format_proto_dep,
+        protobuf_dep,
+        zlib_dep,
+        snappy_dep,
+        lz4_dep,
+        zstd_dep,
+        threads_dep,
+        sparsehash_c11_dep,
+    ],
+    include_directories: incdir,
+    install: true,
+)
+
+orc_dep = declare_dependency(
+    link_with: orc_lib,
+    include_directories: incdir,
+    dependencies: orc_format_proto_dep,
+)
diff --git a/c++/src/sargs/ExpressionTree.cc b/c++/src/sargs/ExpressionTree.cc
index e49bca4b77..58dd13817d 100644
--- a/c++/src/sargs/ExpressionTree.cc
+++ b/c++/src/sargs/ExpressionTree.cc
@@ -110,6 +110,9 @@ namespace orc {
         return result;
       }
       case Operator::NOT:
+        if (children_.size() != 1) {
+          throw std::invalid_argument("NOT operator must have exactly one child");
+        }
         return !children_.at(0)->evaluate(leaves);
       case Operator::LEAF:
         return leaves[leaf_];
@@ -159,6 +162,9 @@ namespace orc {
         sstream << ')';
         break;
       case Operator::NOT:
+        if (children_.size() != 1) {
+          throw std::invalid_argument("NOT operator must have exactly one child");
+        }
         sstream << "(not " << children_.at(0)->toString() << ')';
         break;
       case Operator::LEAF:
diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc
index d9df1c5d5c..5c77616836 100644
--- a/c++/src/sargs/PredicateLeaf.cc
+++ b/c++/src/sargs/PredicateLeaf.cc
@@ -701,6 +701,9 @@ namespace orc {
       }
     }
 
+    // files written by trino may lack of hasnull field.
+    if (!colStats.has_has_null()) return TruthValue::YES_NO_NULL;
+
     bool allNull = colStats.has_null() && colStats.number_of_values() == 0;
     if (operator_ == Operator::IS_NULL ||
         ((operator_ == Operator::EQUALS || operator_ == Operator::NULL_SAFE_EQUALS) &&
diff --git a/c++/src/sargs/SearchArgument.cc b/c++/src/sargs/SearchArgument.cc
index 83d4af2435..ff0ba1e2d5 100644
--- a/c++/src/sargs/SearchArgument.cc
+++ b/c++/src/sargs/SearchArgument.cc
@@ -272,6 +272,12 @@ namespace orc {
     return *this;
   }
 
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::maybe() {
+    TreeNode& parent = currTree_.front();
+    parent->addChild(std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
+    return *this;
+  }
+
   /**
    * Recursively explore the tree to find the leaves that are still reachable
    * after optimizations.
diff --git a/c++/src/sargs/SearchArgument.hh b/c++/src/sargs/SearchArgument.hh
index 1963c993d6..7d663f7349 100644
--- a/c++/src/sargs/SearchArgument.hh
+++ b/c++/src/sargs/SearchArgument.hh
@@ -275,6 +275,12 @@ namespace orc {
      */
     std::unique_ptr<SearchArgument> build() override;
 
+    /**
+     * Add a maybe leaf to the current item on the stack.
+     * @return this
+     */
+    SearchArgumentBuilder& maybe() override;
+
    private:
     SearchArgumentBuilder& start(ExpressionTree::Operator op);
     size_t addLeaf(PredicateLeaf leaf);
diff --git a/c++/test/CMakeLists.txt b/c++/test/CMakeLists.txt
index b04055366c..b0ee48f38a 100644
--- a/c++/test/CMakeLists.txt
+++ b/c++/test/CMakeLists.txt
@@ -15,14 +15,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-include_directories(
-  ${PROJECT_SOURCE_DIR}/c++/src
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX17_FLAGS} ${WARN_FLAGS}")
+
+add_library (orc-test-include INTERFACE)
+target_include_directories (orc-test-include INTERFACE
   ${PROJECT_BINARY_DIR}/c++/include
   ${PROJECT_BINARY_DIR}/c++/src
+  ${PROJECT_SOURCE_DIR}/c++/src
 )
 
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX17_FLAGS} ${WARN_FLAGS}")
-
 if(BUILD_ENABLE_AVX512)
   set(SIMD_TEST_SRCS TestRleVectorDecoder.cc)
 endif(BUILD_ENABLE_AVX512)
@@ -55,13 +56,16 @@ add_executable (orc-test
   TestRleEncoder.cc
   TestRLEV2Util.cc
   TestSargsApplier.cc
+  TestStatistics.cc
   TestSearchArgument.cc
   TestSchemaEvolution.cc
   TestStripeIndexStatistics.cc
   TestTimestampStatistics.cc
   TestTimezone.cc
   TestType.cc
+  TestUtil.cc
   TestWriter.cc
+  TestCache.cc
   ${SIMD_TEST_SRCS}
 )
 
@@ -73,6 +77,8 @@ target_link_libraries (orc-test
   orc::zlib
   orc::gtest
   orc::gmock
+  orc::sparsehash
+  orc-test-include
 )
 
 add_executable (create-test-files
@@ -82,6 +88,7 @@ add_executable (create-test-files
 target_link_libraries (create-test-files
   orc
   orc::protobuf
+  orc-test-include
 )
 
 if (TEST_VALGRIND_MEMCHECK)
diff --git a/c++/test/MemoryInputStream.hh b/c++/test/MemoryInputStream.hh
index e6ef55b6de..31333ae430 100644
--- a/c++/test/MemoryInputStream.hh
+++ b/c++/test/MemoryInputStream.hh
@@ -22,8 +22,6 @@
 #include "io/InputStream.hh"
 #include "orc/OrcFile.hh"
 
-#include <iostream>
-
 namespace orc {
   class MemoryInputStream : public InputStream {
    public:
@@ -44,6 +42,11 @@ namespace orc {
       memcpy(buf, buffer_ + offset, length);
     }
 
+    std::future<void> readAsync(void* buf, uint64_t length, uint64_t offset) override {
+      return std::async(std::launch::async,
+                        [this, buf, length, offset] { this->read(buf, length, offset); });
+    }
+
     virtual const std::string& getName() const override {
       return name_;
     }
diff --git a/c++/test/TestByteRle.cc b/c++/test/TestByteRle.cc
index a822a61d6b..7717eab387 100644
--- a/c++/test/TestByteRle.cc
+++ b/c++/test/TestByteRle.cc
@@ -1263,7 +1263,7 @@ namespace orc {
     MemoryOutputStream memStream(capacity);
     std::unique_ptr<ByteRleEncoder> encoder = createBooleanRleEncoder(
         createCompressor(CompressionKind_ZSTD, &memStream, CompressionStrategy_COMPRESSION,
-                         capacity, blockSize, *getDefaultPool(), nullptr));
+                         capacity, blockSize, blockSize, *getDefaultPool(), nullptr));
     encoder->add(data, numValues, nullptr);
     encoder->flush();
 
diff --git a/c++/test/TestCache.cc b/c++/test/TestCache.cc
new file mode 100644
index 0000000000..496ba3ec90
--- /dev/null
+++ b/c++/test/TestCache.cc
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstring>
+
+#include "MemoryInputStream.hh"
+#include "io/Cache.hh"
+
+#include "wrap/gmock.h"
+#include "wrap/gtest-wrapper.h"
+
+namespace orc {
+
+  TEST(TestReadRangeCombiner, testBasics) {
+    ReadRangeCombiner combinator{0, 100};
+    /// Ranges with partial overlap and identical offsets
+    std::vector<ReadRange> ranges{{0, 15}, {5, 11}, {5, 15}};
+    std::vector<ReadRange> result = combinator.coalesce(std::move(ranges));
+    std::vector<ReadRange> expect{{0, 20}};
+    ASSERT_EQ(result, expect);
+  }
+
+  TEST(TestCoalesceReadRanges, testBasics) {
+    auto check = [](std::vector<ReadRange> ranges, std::vector<ReadRange> expected) -> void {
+      const uint64_t holeSizeLimit = 9;
+      const uint64_t rangeSizeLimit = 99;
+      auto coalesced = ReadRangeCombiner::coalesceReadRanges(ranges, holeSizeLimit, rangeSizeLimit);
+      ASSERT_EQ(coalesced, expected);
+    };
+
+    check({}, {});
+    // Zero sized range that ends up in empty list
+    check({{110, 0}}, {});
+    // Combination on 1 zero sized range and 1 non-zero sized range
+    check({{110, 10}, {120, 0}}, {{110, 10}});
+    // 1 non-zero sized range
+    check({{110, 10}}, {{110, 10}});
+    // No holes + unordered ranges
+    check({{130, 10}, {110, 10}, {120, 10}}, {{110, 30}});
+    // No holes
+    check({{110, 10}, {120, 10}, {130, 10}}, {{110, 30}});
+    // Small holes only
+    check({{110, 11}, {130, 11}, {150, 11}}, {{110, 51}});
+    // Large holes
+    check({{110, 10}, {130, 10}}, {{110, 10}, {130, 10}});
+    check({{110, 11}, {130, 11}, {150, 10}, {170, 11}, {190, 11}}, {{110, 50}, {170, 31}});
+
+    // With zero-sized ranges
+    check({{110, 11}, {130, 0}, {130, 11}, {145, 0}, {150, 11}, {200, 0}}, {{110, 51}});
+
+    // No holes but large ranges
+    check({{110, 100}, {210, 100}}, {{110, 100}, {210, 100}});
+    // Small holes and large range in the middle (*)
+    check({{110, 10}, {120, 11}, {140, 100}, {240, 11}, {260, 11}},
+          {{110, 21}, {140, 100}, {240, 31}});
+    // Mid-size ranges that would turn large after coalescing
+    check({{100, 50}, {150, 50}}, {{100, 50}, {150, 50}});
+    check({{100, 30}, {130, 30}, {160, 30}, {190, 30}, {220, 30}}, {{100, 90}, {190, 60}});
+
+    // Same as (*) but unsorted
+    check({{140, 100}, {120, 11}, {240, 11}, {110, 10}, {260, 11}},
+          {{110, 21}, {140, 100}, {240, 31}});
+
+    // Completely overlapping ranges should be eliminated
+    check({{20, 5}, {20, 5}, {21, 2}}, {{20, 5}});
+  }
+
+  TEST(TestReadRangeCache, testBasics) {
+    std::string data = "abcdefghijklmnopqrstuvwxyz";
+
+    CacheOptions options;
+    options.holeSizeLimit = 2;
+    options.rangeSizeLimit = 10;
+
+    auto file = std::make_shared<MemoryInputStream>(data.data(), data.size());
+    ReadRangeCache cache(file.get(), options, getDefaultPool());
+
+    cache.cache({{1, 2}, {3, 2}, {8, 2}, {20, 2}, {25, 0}});
+    cache.cache({{10, 4}, {14, 0}, {15, 4}});
+
+    auto assert_slice_equal = [](const BufferSlice& slice, const std::string& expected) {
+      ASSERT_TRUE(slice.buffer);
+      ASSERT_EQ(expected, std::string_view(slice.buffer->data() + slice.offset, slice.length));
+    };
+
+    BufferSlice slice;
+
+    slice = cache.read({20, 2});
+    assert_slice_equal(slice, "uv");
+
+    slice = cache.read({1, 2});
+    assert_slice_equal(slice, "bc");
+
+    slice = cache.read({3, 2});
+    assert_slice_equal(slice, "de");
+
+    slice = cache.read({8, 2});
+    assert_slice_equal(slice, "ij");
+
+    slice = cache.read({10, 4});
+    assert_slice_equal(slice, "klmn");
+
+    slice = cache.read({15, 4});
+    assert_slice_equal(slice, "pqrs");
+
+    // Zero-sized
+    slice = cache.read({14, 0});
+    assert_slice_equal(slice, "");
+    slice = cache.read({25, 0});
+    assert_slice_equal(slice, "");
+
+    // Non-cached ranges
+    ASSERT_FALSE(cache.read({20, 3}).buffer);
+    ASSERT_FALSE(cache.read({19, 3}).buffer);
+    ASSERT_FALSE(cache.read({0, 3}).buffer);
+    ASSERT_FALSE(cache.read({25, 2}).buffer);
+
+    // Release cache entries before 10. After that cache entries would be: {10, 9}, {20, 2}
+    cache.evictEntriesBefore(15);
+    ASSERT_FALSE(cache.read({1, 2}).buffer);
+    ASSERT_FALSE(cache.read({8, 2}).buffer);
+    slice = cache.read({10, 4});
+    assert_slice_equal(slice, "klmn");
+    slice = cache.read({20, 2});
+    assert_slice_equal(slice, "uv");
+  }
+}  // namespace orc
diff --git a/c++/test/TestColumnStatistics.cc b/c++/test/TestColumnStatistics.cc
index 5cf2d9e41b..642a8019de 100644
--- a/c++/test/TestColumnStatistics.cc
+++ b/c++/test/TestColumnStatistics.cc
@@ -17,6 +17,7 @@
  */
 
 #include "Statistics.hh"
+#include "TestUtil.hh"
 #include "orc/OrcFile.hh"
 #include "wrap/gmock.h"
 #include "wrap/gtest-wrapper.h"
@@ -531,4 +532,345 @@ namespace orc {
     collectionStats->merge(*other);
     EXPECT_FALSE(collectionStats->hasTotalChildren());
   }
+
+  TEST(ColumnStatistics, TestGeospatialDefaults) {
+    std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new GeospatialColumnStatisticsImpl());
+    EXPECT_TRUE(geoStats->getGeospatialTypes().empty());
+    auto bbox = geoStats->getBoundingBox();
+    for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+      EXPECT_TRUE(bbox.boundEmpty(i));
+      EXPECT_TRUE(bbox.boundValid(i));
+    }
+    EXPECT_EQ("<GeoStatistics> x: empty y: empty z: empty m: empty geometry_types: []",
+              geoStats->toString());
+  }
+
+  TEST(ColumnStatistics, TestGeospatialUpdate) {
+    std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new GeospatialColumnStatisticsImpl());
+    EXPECT_TRUE(geoStats->getGeospatialTypes().empty());
+    const auto& bbox = geoStats->getBoundingBox();
+    for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+      EXPECT_TRUE(bbox.boundEmpty(i));
+      EXPECT_TRUE(bbox.boundValid(i));
+    }
+    EXPECT_EQ(geoStats->getGeospatialTypes().size(), 0);
+
+    geospatial::BoundingBox::XYZM expectedMin;
+    geospatial::BoundingBox::XYZM expectedMax;
+    std::array<bool, geospatial::MAX_DIMENSIONS> expectedEmpty;
+    std::array<bool, geospatial::MAX_DIMENSIONS> expectedValid;
+    std::vector<int32_t> expectedTypes;
+    for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+      expectedMin[i] = geospatial::INF;
+      expectedMax[i] = -geospatial::INF;
+      expectedEmpty[i] = true;
+      expectedValid[i] = true;
+    }
+
+    auto Verify = [&]() {
+      EXPECT_EQ(expectedEmpty, geoStats->getBoundingBox().dimensionEmpty());
+      EXPECT_EQ(expectedValid, geoStats->getBoundingBox().dimensionValid());
+      EXPECT_EQ(expectedTypes, geoStats->getGeospatialTypes());
+      for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+        if (geoStats->getBoundingBox().boundValid(i)) {
+          EXPECT_EQ(expectedMin[i], geoStats->getBoundingBox().lowerBound()[i]);
+          EXPECT_EQ(expectedMax[i], geoStats->getBoundingBox().upperBound()[i]);
+        } else {
+          EXPECT_TRUE(std::isnan(geoStats->getBoundingBox().lowerBound()[i]));
+          EXPECT_TRUE(std::isnan(geoStats->getBoundingBox().upperBound()[i]));
+        }
+      }
+    };
+
+    // Update a xy point
+    std::string xy0 = MakeWKBPoint({10, 11}, false, false);
+    geoStats->update(xy0.c_str(), xy0.size());
+    expectedMin[0] = expectedMax[0] = 10;
+    expectedMin[1] = expectedMax[1] = 11;
+    expectedEmpty[0] = expectedEmpty[1] = false;
+    expectedTypes.push_back(1);
+    Verify();
+
+    // Update a xyz point.
+    std::string xyz0 = MakeWKBPoint({11, 12, 13}, true, false);
+    geoStats->update(xyz0.c_str(), xyz0.size());
+    expectedMax[0] = 11;
+    expectedMax[1] = 12;
+    expectedMin[2] = expectedMax[2] = 13;
+    expectedEmpty[2] = false;
+    expectedTypes.push_back(1001);
+    Verify();
+
+    // Update a xym point.
+    std::string xym0 = MakeWKBPoint({9, 10, 0, 11}, false, true);
+    geoStats->update(xym0.c_str(), xym0.size());
+    expectedMin[0] = 9;
+    expectedMin[1] = 10;
+    expectedMin[3] = expectedMax[3] = 11;
+    expectedEmpty[3] = false;
+    expectedTypes.push_back(2001);
+    Verify();
+
+    // Update a xymz point.
+    std::string xymz0 = MakeWKBPoint({8, 9, 10, 12}, true, true);
+    geoStats->update(xymz0.c_str(), xymz0.size());
+    expectedMin[0] = 8;
+    expectedMin[1] = 9;
+    expectedMin[2] = 10;
+    expectedMax[3] = 12;
+    expectedTypes.push_back(3001);
+    Verify();
+
+    // Update NaN to every dimension.
+    std::string xyzm1 = MakeWKBPoint(
+        {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(),
+         std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()},
+        true, false);
+    geoStats->update(xyzm1.c_str(), xyzm1.size());
+    Verify();
+
+    // Update a invalid WKB
+    std::string invalidWKB;
+    geoStats->update(invalidWKB.c_str(), invalidWKB.size());
+    expectedValid[0] = expectedValid[1] = expectedValid[2] = expectedValid[3] = false;
+    expectedTypes.clear();
+    Verify();
+
+    // Update a xy point again
+    std::string xy1 = MakeWKBPoint({10, 11}, false, false);
+    geoStats->update(xy1.c_str(), xy1.size());
+    Verify();
+  }
+
+  TEST(ColumnStatistics, TestGeospatialToProto) {
+    // Test Empty
+    std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new GeospatialColumnStatisticsImpl());
+    proto::ColumnStatistics pbStats;
+    geoStats->toProtoBuf(pbStats);
+    EXPECT_TRUE(pbStats.has_geospatial_statistics());
+    EXPECT_EQ(0, pbStats.geospatial_statistics().geospatial_types().size());
+    EXPECT_FALSE(pbStats.geospatial_statistics().has_bbox());
+
+    // Update a xy point
+    std::string xy = MakeWKBPoint({10, 11}, false, false);
+    geoStats->update(xy.c_str(), xy.size());
+    pbStats.Clear();
+    geoStats->toProtoBuf(pbStats);
+    EXPECT_TRUE(pbStats.has_geospatial_statistics());
+    EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types().size());
+    EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types(0));
+    EXPECT_TRUE(pbStats.geospatial_statistics().has_bbox());
+    const auto& bbox0 = pbStats.geospatial_statistics().bbox();
+    EXPECT_TRUE(bbox0.has_xmin());
+    EXPECT_TRUE(bbox0.has_xmax());
+    EXPECT_TRUE(bbox0.has_ymin());
+    EXPECT_TRUE(bbox0.has_ymax());
+    EXPECT_FALSE(bbox0.has_zmin());
+    EXPECT_FALSE(bbox0.has_zmax());
+    EXPECT_FALSE(bbox0.has_mmin());
+    EXPECT_FALSE(bbox0.has_mmax());
+    EXPECT_EQ(10, bbox0.xmin());
+    EXPECT_EQ(10, bbox0.xmax());
+    EXPECT_EQ(11, bbox0.ymin());
+    EXPECT_EQ(11, bbox0.ymax());
+
+    // Update a xyzm point.
+    std::string xyzm = MakeWKBPoint({-10, -11, -12, -13}, true, true);
+    geoStats->update(xyzm.c_str(), xyzm.size());
+    pbStats.Clear();
+    geoStats->toProtoBuf(pbStats);
+    EXPECT_TRUE(pbStats.has_geospatial_statistics());
+    EXPECT_EQ(2, pbStats.geospatial_statistics().geospatial_types().size());
+    EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types(0));
+    EXPECT_EQ(3001, pbStats.geospatial_statistics().geospatial_types(1));
+    EXPECT_TRUE(pbStats.geospatial_statistics().has_bbox());
+    const auto& bbox1 = pbStats.geospatial_statistics().bbox();
+    EXPECT_TRUE(bbox1.has_xmin());
+    EXPECT_TRUE(bbox1.has_xmax());
+    EXPECT_TRUE(bbox1.has_ymin());
+    EXPECT_TRUE(bbox1.has_ymax());
+    EXPECT_TRUE(bbox1.has_zmin());
+    EXPECT_TRUE(bbox1.has_zmax());
+    EXPECT_TRUE(bbox1.has_mmin());
+    EXPECT_TRUE(bbox1.has_mmax());
+    EXPECT_EQ(-10, bbox1.xmin());
+    EXPECT_EQ(10, bbox1.xmax());
+    EXPECT_EQ(-11, bbox1.ymin());
+    EXPECT_EQ(11, bbox1.ymax());
+    EXPECT_EQ(-12, bbox1.zmin());
+    EXPECT_EQ(-12, bbox1.zmax());
+    EXPECT_EQ(-13, bbox1.mmin());
+    EXPECT_EQ(-13, bbox1.mmax());
+
+    // Update a invalid point
+    std::string invalidWKB;
+    geoStats->update(invalidWKB.c_str(), invalidWKB.size());
+    pbStats.Clear();
+    geoStats->toProtoBuf(pbStats);
+    EXPECT_TRUE(pbStats.has_geospatial_statistics());
+    EXPECT_EQ(0, pbStats.geospatial_statistics().geospatial_types().size());
+    EXPECT_FALSE(pbStats.geospatial_statistics().has_bbox());
+  }
+
+  TEST(ColumnStatistics, TestGeospatialMerge) {
+    std::unique_ptr<GeospatialColumnStatisticsImpl> invalidStats(
+        new GeospatialColumnStatisticsImpl());
+    invalidStats->update("0", 0);
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats(
+        new GeospatialColumnStatisticsImpl());
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyStats(new GeospatialColumnStatisticsImpl());
+    std::string xy = MakeWKBPoint({10, 11}, false, false);
+    xyStats->update(xy.c_str(), xy.size());
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyzStats(new GeospatialColumnStatisticsImpl());
+    std::string xyz = MakeWKBPoint({12, 13, 14}, true, false);
+    xyzStats->update(xyz.c_str(), xyz.size());
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyzmStats(new GeospatialColumnStatisticsImpl());
+    std::string xyzm = MakeWKBPoint({-10, -11, -12, -13}, true, true);
+    xyzmStats->update(xyzm.c_str(), xyzm.size());
+
+    // invalid merge invalid
+    invalidStats->merge(*invalidStats);
+    std::array<bool, 4> expectedValid = {false, false, false, false};
+    EXPECT_EQ(invalidStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(invalidStats->getGeospatialTypes().size(), 0);
+
+    // Empty merge empty
+    emptyStats->merge(*emptyStats);
+    expectedValid = {true, true, true, true};
+    std::array<bool, 4> expectedEmpty = {true, true, true, true};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 0);
+
+    // Empty merge xy
+    emptyStats->merge(*xyStats);
+    expectedEmpty = {false, false, true, true};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
+    EXPECT_EQ(10, emptyStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(10, emptyStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(11, emptyStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(11, emptyStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 1);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
+
+    // Empty merge xyz
+    emptyStats->merge(*xyzStats);
+    expectedEmpty = {false, false, false, true};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
+    EXPECT_EQ(10, emptyStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(12, emptyStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(11, emptyStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(13, emptyStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(14, emptyStats->getBoundingBox().lowerBound()[2]);
+    EXPECT_EQ(14, emptyStats->getBoundingBox().upperBound()[2]);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 2);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[1], 1001);
+
+    // Empty merge xyzm
+    emptyStats->merge(*xyzmStats);
+    expectedEmpty = {false, false, false, false};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
+    EXPECT_EQ(-10, emptyStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(12, emptyStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(-11, emptyStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(13, emptyStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(-12, emptyStats->getBoundingBox().lowerBound()[2]);
+    EXPECT_EQ(14, emptyStats->getBoundingBox().upperBound()[2]);
+    EXPECT_EQ(-13, emptyStats->getBoundingBox().lowerBound()[3]);
+    EXPECT_EQ(-13, emptyStats->getBoundingBox().upperBound()[3]);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 3);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[1], 1001);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[2], 3001);
+
+    // Empty merge invalid
+    emptyStats->merge(*invalidStats);
+    expectedValid = {false, false, false, false};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 0);
+  }
+
+  TEST(ColumnStatistics, TestGeospatialFromProto) {
+    proto::ColumnStatistics pbStats;
+    // No geostats
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats0(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    std::array<bool, 4> expectedValid = {false, false, false, false};
+    EXPECT_TRUE(emptyStats0->getGeospatialTypes().empty());
+    EXPECT_EQ(emptyStats0->getBoundingBox().dimensionValid(), expectedValid);
+
+    // Add empty geostats
+    pbStats.mutable_geospatial_statistics();
+    std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats1(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    EXPECT_TRUE(emptyStats1->getGeospatialTypes().empty());
+    EXPECT_EQ(emptyStats1->getBoundingBox().dimensionValid(), expectedValid);
+
+    // Set xy bounds
+    auto* geoProtoStas = pbStats.mutable_geospatial_statistics();
+    geoProtoStas->mutable_bbox()->set_xmin(0);
+    geoProtoStas->mutable_bbox()->set_xmax(1);
+    geoProtoStas->mutable_bbox()->set_ymin(0);
+    geoProtoStas->mutable_bbox()->set_ymax(1);
+    geoProtoStas->mutable_geospatial_types()->Add(2);
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyStats(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    expectedValid = {true, true, false, false};
+    EXPECT_EQ(xyStats->getGeospatialTypes().size(), 1);
+    EXPECT_EQ(xyStats->getGeospatialTypes()[0], 2);
+    EXPECT_EQ(xyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(0, xyStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(1, xyStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(0, xyStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(1, xyStats->getBoundingBox().upperBound()[1]);
+
+    // Set xyz bounds
+    geoProtoStas->mutable_bbox()->set_zmin(0);
+    geoProtoStas->mutable_bbox()->set_zmax(1);
+    geoProtoStas->mutable_geospatial_types()->Add(1003);
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyzStats(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    expectedValid = {true, true, true, false};
+    EXPECT_EQ(xyzStats->getGeospatialTypes().size(), 2);
+    EXPECT_EQ(xyzStats->getGeospatialTypes()[0], 2);
+    EXPECT_EQ(xyzStats->getGeospatialTypes()[1], 1003);
+    EXPECT_EQ(xyzStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[2]);
+    EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[2]);
+
+    // Set xyzm bounds
+    geoProtoStas->mutable_bbox()->set_mmin(0);
+    geoProtoStas->mutable_bbox()->set_mmax(1);
+    geoProtoStas->mutable_geospatial_types()->Add(3003);
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyzmStats(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    expectedValid = {true, true, true, true};
+    EXPECT_EQ(xyzmStats->getGeospatialTypes().size(), 3);
+    EXPECT_EQ(xyzmStats->getGeospatialTypes()[0], 2);
+    EXPECT_EQ(xyzmStats->getGeospatialTypes()[1], 1003);
+    EXPECT_EQ(xyzmStats->getGeospatialTypes()[2], 3003);
+    EXPECT_EQ(xyzmStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[2]);
+    EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[2]);
+    EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[3]);
+    EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[3]);
+  }
+
 }  // namespace orc
diff --git a/c++/test/TestCompression.cc b/c++/test/TestCompression.cc
index a77800a3dd..e95a6f0169 100644
--- a/c++/test/TestCompression.cc
+++ b/c++/test/TestCompression.cc
@@ -42,12 +42,12 @@ namespace orc {
   }
 
   void decompressAndVerify(const MemoryOutputStream& memStream, CompressionKind kind,
-                           const char* data, size_t size, MemoryPool& pool) {
+                           const char* data, size_t size, MemoryPool& pool, uint64_t capacity) {
     auto inputStream =
         std::make_unique<SeekableArrayInputStream>(memStream.getData(), memStream.getLength());
 
     std::unique_ptr<SeekableInputStream> decompressStream =
-        createDecompressor(kind, std::move(inputStream), 1024, pool, getDefaultReaderMetrics());
+        createDecompressor(kind, std::move(inputStream), capacity, pool, getDefaultReaderMetrics());
 
     const char* decompressedBuffer;
     int decompressedSize;
@@ -66,7 +66,7 @@ namespace orc {
                          CompressionStrategy strategy, uint64_t capacity, uint64_t block,
                          MemoryPool& pool, const char* data, size_t dataSize) {
     std::unique_ptr<BufferedOutputStream> compressStream =
-        createCompressor(kind, outStream, strategy, capacity, block, pool, nullptr);
+        createCompressor(kind, outStream, strategy, capacity, block, block, pool, nullptr);
 
     size_t pos = 0;
     char* compressBuffer;
@@ -99,7 +99,7 @@ namespace orc {
     char testData[] = "hello world!";
     compressAndVerify(kind, &memStream, CompressionStrategy_SPEED, capacity, block, *pool, testData,
                       sizeof(testData));
-    decompressAndVerify(memStream, kind, testData, sizeof(testData), *pool);
+    decompressAndVerify(memStream, kind, testData, sizeof(testData), *pool, capacity);
   }
 
   TEST(TestCompression, zlib_compress_original_string) {
@@ -117,7 +117,7 @@ namespace orc {
     char testData[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
     compressAndVerify(kind, &memStream, CompressionStrategy_SPEED, capacity, block, *pool, testData,
                       sizeof(testData));
-    decompressAndVerify(memStream, kind, testData, sizeof(testData), *pool);
+    decompressAndVerify(memStream, kind, testData, sizeof(testData), *pool, capacity);
   }
 
   TEST(TestCompression, compress_simple_repeated_string) {
@@ -138,7 +138,7 @@ namespace orc {
     }
     compressAndVerify(kind, &memStream, CompressionStrategy_SPEED, capacity, block, *pool, testData,
                       170);
-    decompressAndVerify(memStream, kind, testData, 170, *pool);
+    decompressAndVerify(memStream, kind, testData, 170, *pool, capacity);
   }
 
   TEST(TestCompression, zlib_compress_two_blocks) {
@@ -158,7 +158,7 @@ namespace orc {
     generateRandomData(testData, dataSize, true);
     compressAndVerify(kind, &memStream, CompressionStrategy_SPEED, capacity, block, *pool, testData,
                       dataSize);
-    decompressAndVerify(memStream, kind, testData, dataSize, *pool);
+    decompressAndVerify(memStream, kind, testData, dataSize, *pool, capacity);
     delete[] testData;
   }
 
@@ -179,7 +179,7 @@ namespace orc {
     generateRandomData(testData, dataSize, false);
     compressAndVerify(kind, &memStream, CompressionStrategy_SPEED, capacity, block, *pool, testData,
                       dataSize);
-    decompressAndVerify(memStream, kind, testData, dataSize, *pool);
+    decompressAndVerify(memStream, kind, testData, dataSize, *pool, capacity);
     delete[] testData;
   }
 
@@ -205,7 +205,7 @@ namespace orc {
     }
 
     std::unique_ptr<BufferedOutputStream> compressStream = createCompressor(
-        kind, &memStream, CompressionStrategy_SPEED, capacity, block, *pool, nullptr);
+        kind, &memStream, CompressionStrategy_SPEED, capacity, block, block, *pool, nullptr);
 
     EXPECT_TRUE(ps.SerializeToZeroCopyStream(compressStream.get()));
     compressStream->flush();
@@ -213,8 +213,8 @@ namespace orc {
     auto inputStream =
         std::make_unique<SeekableArrayInputStream>(memStream.getData(), memStream.getLength());
 
-    std::unique_ptr<SeekableInputStream> decompressStream =
-        createDecompressor(kind, std::move(inputStream), 1024, *pool, getDefaultReaderMetrics());
+    std::unique_ptr<SeekableInputStream> decompressStream = createDecompressor(
+        kind, std::move(inputStream), capacity, *pool, getDefaultReaderMetrics());
 
     proto::PostScript ps2;
     ps2.ParseFromZeroCopyStream(decompressStream.get());
@@ -312,7 +312,7 @@ namespace orc {
     uint64_t batchSize = 1024, blockSize = 256;
 
     AppendOnlyBufferedStream outStream(createCompressor(
-        kind, &memStream, strategy, DEFAULT_MEM_STREAM_SIZE, blockSize, *pool, nullptr));
+        kind, &memStream, strategy, DEFAULT_MEM_STREAM_SIZE, blockSize, blockSize, *pool, nullptr));
 
     // write 3 batches of data and record positions between every batch
     size_t row = 0;
@@ -335,7 +335,7 @@ namespace orc {
     auto inputStream =
         std::make_unique<SeekableArrayInputStream>(memStream.getData(), memStream.getLength());
     std::unique_ptr<SeekableInputStream> decompressStream = createDecompressor(
-        kind, std::move(inputStream), blockSize, *pool, getDefaultReaderMetrics());
+        kind, std::move(inputStream), DEFAULT_MEM_STREAM_SIZE, *pool, getDefaultReaderMetrics());
 
     // prepare positions to seek to
     EXPECT_EQ(rowIndexEntry1.positions_size(), rowIndexEntry2.positions_size());
diff --git a/c++/test/TestConvertColumnReader.cc b/c++/test/TestConvertColumnReader.cc
index 83798289db..6096fe4573 100644
--- a/c++/test/TestConvertColumnReader.cc
+++ b/c++/test/TestConvertColumnReader.cc
@@ -27,6 +27,7 @@
 #include "ConvertColumnReader.hh"
 #include "MemoryInputStream.hh"
 #include "MemoryOutputStream.hh"
+#include <iomanip>
 
 namespace orc {
 
@@ -650,6 +651,10 @@ namespace orc {
     auto& readC2 = dynamic_cast<Decimal128VectorBatch&>(*readStructBatch.fields[1]);
     auto& readC3 = dynamic_cast<Decimal64VectorBatch&>(*readStructBatch.fields[2]);
     auto& readC4 = dynamic_cast<Decimal128VectorBatch&>(*readStructBatch.fields[3]);
+    EXPECT_TRUE(9 == readC1.precision && 5 == readC1.scale);
+    EXPECT_TRUE(20 == readC2.precision && 5 == readC2.scale);
+    EXPECT_TRUE(10 == readC3.precision && 3 == readC3.scale);
+    EXPECT_TRUE(19 == readC4.precision && 3 == readC4.scale);
     EXPECT_EQ(TEST_CASES, readBatch->numElements);
     for (int i = 0; i < TEST_CASES / 2; i++) {
       size_t idx = static_cast<size_t>(i);
@@ -815,4 +820,411 @@ namespace orc {
     }
   }
 
+  TEST(ConvertColumnReader, TestConvertStringVariantToNumeric) {
+    constexpr int DEFAULT_MEM_STREAM_SIZE = 10 * 1024 * 1024;
+    constexpr int TEST_CASES = 6;
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    std::unique_ptr<Type> fileType(
+        Type::buildTypeFromString("struct<c1:char(25),c2:varchar(25),c3:string>"));
+    std::shared_ptr<Type> readType(Type::buildTypeFromString("struct<c1:boolean,c2:int,c3:float>"));
+    WriterOptions options;
+    auto writer = createWriter(*fileType, &memStream, options);
+    auto batch = writer->createRowBatch(TEST_CASES);
+    auto structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    auto& c1 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[0]);
+    auto& c2 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[1]);
+    auto& c3 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[2]);
+    std::vector<std::string> raw1{"",     "123456", "0", "-1234567890", "999999999999999999999999",
+                                  "error"};
+    std::vector<std::string> raw2{"",     "123456", "0", "-1234567890", "999999999999999999999999",
+                                  "error"};
+    std::vector<std::string> raw3{
+        "",     "123456", "-0.0", "-123456789.0123", "1000000000000000000000000000000000000000",
+        "error"};
+
+    c1.notNull[0] = c2.notNull[0] = c3.notNull[0] = false;
+    for (int i = 1; i < TEST_CASES; i++) {
+      c1.data[i] = raw1[i].data();
+      c1.length[i] = raw1[i].length();
+      c1.notNull[i] = true;
+
+      c2.data[i] = raw2[i].data();
+      c2.length[i] = raw2[i].length();
+      c2.notNull[i] = true;
+
+      c3.data[i] = raw3[i].data();
+      c3.length[i] = raw3[i].length();
+      c3.notNull[i] = true;
+    }
+
+    structBatch->numElements = c1.numElements = c2.numElements = c3.numElements = TEST_CASES;
+    structBatch->hasNulls = c1.hasNulls = c2.hasNulls = c3.hasNulls = true;
+    writer->add(*batch);
+    writer->close();
+    auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(), memStream.getLength());
+    auto pool = getDefaultPool();
+    auto reader = createReader(*pool, std::move(inStream));
+    RowReaderOptions rowReaderOptions;
+    rowReaderOptions.setUseTightNumericVector(true);
+    rowReaderOptions.setReadType(readType);
+    auto rowReader = reader->createRowReader(rowReaderOptions);
+    auto readBatch = rowReader->createRowBatch(TEST_CASES);
+    EXPECT_EQ(true, rowReader->next(*readBatch));
+
+    auto& readSturctBatch = dynamic_cast<StructVectorBatch&>(*readBatch);
+    auto& readC1 = dynamic_cast<BooleanVectorBatch&>(*readSturctBatch.fields[0]);
+    auto& readC2 = dynamic_cast<IntVectorBatch&>(*readSturctBatch.fields[1]);
+    auto& readC3 = dynamic_cast<FloatVectorBatch&>(*readSturctBatch.fields[2]);
+
+    EXPECT_FALSE(readC1.notNull[0]);
+    EXPECT_FALSE(readC2.notNull[0]);
+    EXPECT_FALSE(readC3.notNull[0]);
+
+    for (int i = 1; i < 4; i++) {
+      EXPECT_TRUE(readC1.notNull[i]);
+      EXPECT_TRUE(readC2.notNull[i]);
+      EXPECT_TRUE(readC3.notNull[i]);
+    }
+
+    for (int i = 4; i <= 5; i++) {
+      EXPECT_FALSE(readC1.notNull[i]) << i;
+      EXPECT_FALSE(readC2.notNull[i]) << i;
+      EXPECT_FALSE(readC3.notNull[i]) << i;
+    }
+
+    EXPECT_EQ(readC1.data[1], 1);
+    EXPECT_EQ(readC2.data[1], 123456);
+    EXPECT_FLOAT_EQ(readC3.data[1], 123456);
+
+    EXPECT_EQ(readC1.data[2], 0);
+    EXPECT_EQ(readC2.data[2], 0);
+    EXPECT_FLOAT_EQ(readC3.data[2], -0.0);
+
+    EXPECT_EQ(readC1.data[3], 1);
+    EXPECT_EQ(readC2.data[3], -1234567890);
+    EXPECT_FLOAT_EQ(readC3.data[3], -123456789.0123);
+  }
+
+  TEST(ConvertColumnReader, TestConvertStringVariant) {
+    constexpr int DEFAULT_MEM_STREAM_SIZE = 10 * 1024 * 1024;
+    constexpr int TEST_CASES = 4;
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    std::unique_ptr<Type> fileType(
+        Type::buildTypeFromString("struct<c1:char(5),c2:varchar(5),c3:string>"));
+    std::shared_ptr<Type> readType(
+        Type::buildTypeFromString("struct<c1:string,c2:char(4),c3:varchar(4)>"));
+    WriterOptions options;
+    auto writer = createWriter(*fileType, &memStream, options);
+    auto batch = writer->createRowBatch(TEST_CASES);
+    auto structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    auto& c1 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[0]);
+    auto& c2 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[1]);
+    auto& c3 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[2]);
+
+    std::vector<std::string> raw1{"", "12345", "1", "1234"};
+    std::vector<std::string> raw2{"", "12345", "1", "1234"};
+    std::vector<std::string> raw3{"", "12345", "1", "1234"};
+
+    c1.notNull[0] = c2.notNull[0] = c3.notNull[0] = false;
+    for (int i = 1; i < TEST_CASES; i++) {
+      c1.data[i] = raw1[i].data();
+      c1.length[i] = raw1[i].length();
+      c1.notNull[i] = true;
+
+      c2.data[i] = raw2[i].data();
+      c2.length[i] = raw2[i].length();
+      c2.notNull[i] = true;
+
+      c3.data[i] = raw3[i].data();
+      c3.length[i] = raw3[i].length();
+      c3.notNull[i] = true;
+    }
+    structBatch->numElements = c1.numElements = c2.numElements = c3.numElements = TEST_CASES;
+    structBatch->hasNulls = c1.hasNulls = c2.hasNulls = c3.hasNulls = true;
+    writer->add(*batch);
+    writer->close();
+    auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(), memStream.getLength());
+    auto pool = getDefaultPool();
+    auto reader = createReader(*pool, std::move(inStream));
+    RowReaderOptions rowReaderOptions;
+    rowReaderOptions.setUseTightNumericVector(true);
+    rowReaderOptions.setReadType(readType);
+    auto rowReader = reader->createRowReader(rowReaderOptions);
+    auto readBatch = rowReader->createRowBatch(TEST_CASES);
+    EXPECT_EQ(true, rowReader->next(*readBatch));
+
+    auto& readSturctBatch = dynamic_cast<StructVectorBatch&>(*readBatch);
+    auto& readC1 = dynamic_cast<StringVectorBatch&>(*readSturctBatch.fields[0]);
+    auto& readC2 = dynamic_cast<StringVectorBatch&>(*readSturctBatch.fields[1]);
+    auto& readC3 = dynamic_cast<StringVectorBatch&>(*readSturctBatch.fields[2]);
+
+    EXPECT_FALSE(readC1.notNull[0]);
+    EXPECT_FALSE(readC2.notNull[0]);
+    EXPECT_FALSE(readC3.notNull[0]);
+
+    for (int i = 1; i < TEST_CASES; i++) {
+      EXPECT_TRUE(readC1.notNull[i]);
+      EXPECT_TRUE(readC2.notNull[i]);
+      EXPECT_TRUE(readC3.notNull[i]);
+    }
+
+    EXPECT_EQ(std::string(readC1.data[1], readC1.length[1]), "12345");
+    EXPECT_EQ(std::string(readC2.data[1], readC2.length[1]), "1234");
+    EXPECT_EQ(std::string(readC3.data[1], readC3.length[1]), "1234");
+
+    EXPECT_EQ(std::string(readC1.data[2], readC1.length[2]), "1    ");
+    EXPECT_EQ(std::string(readC2.data[2], readC2.length[2]), "1   ");
+    EXPECT_EQ(std::string(readC3.data[2], readC3.length[2]), "1");
+
+    EXPECT_EQ(std::string(readC1.data[3], readC1.length[3]), "1234 ");
+    EXPECT_EQ(std::string(readC2.data[3], readC2.length[3]), "1234");
+    EXPECT_EQ(std::string(readC3.data[3], readC3.length[3]), "1234");
+  }
+
+  // Returns year/month/day triple in civil calendar
+  // Preconditions:  z is number of days since 1970-01-01 and is in the range:
+  //                   [numeric_limits<Int>::min(), numeric_limits<Int>::max()-719468].
+  template <class Int>
+  constexpr std::tuple<int, unsigned, unsigned> civil_from_days(Int z) noexcept {
+    static_assert(std::numeric_limits<unsigned>::digits >= 18,
+                  "This algorithm has not been ported to a 16 bit unsigned integer");
+    static_assert(std::numeric_limits<Int>::digits >= 20,
+                  "This algorithm has not been ported to a 16 bit signed integer");
+    z += 719468;
+    const Int era = (z >= 0 ? z : z - 146096) / 146097;
+    const unsigned doe = static_cast<unsigned>(z - era * 146097);                // [0, 146096]
+    const unsigned yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;  // [0, 399]
+    const Int y = static_cast<Int>(yoe) + era * 400;
+    const unsigned doy = doe - (365 * yoe + yoe / 4 - yoe / 100);  // [0, 365]
+    const unsigned mp = (5 * doy + 2) / 153;                       // [0, 11]
+    const unsigned d = doy - (153 * mp + 2) / 5 + 1;               // [1, 31]
+    const unsigned m = mp < 10 ? mp + 3 : mp - 9;                  // [1, 12]
+    return std::tuple<int, unsigned, unsigned>(y + (m <= 2), m, d);
+  }
+
+  static std::string timestampToString(int64_t seconds, int64_t nanos,
+                                       const std::string& zoneName) {
+    auto& timezone = getTimezoneByName(zoneName);
+    seconds = timezone.convertToUTC(seconds);
+    time_t t = static_cast<time_t>(seconds);
+    char buffer[100];
+    constexpr auto SECOND_IN_DAY = 3600 * 24;
+    auto day = t < 0 ? (t - SECOND_IN_DAY + 1) / SECOND_IN_DAY : t / SECOND_IN_DAY;
+
+    auto [y, m, d] = civil_from_days(day);
+    auto second_in_day = t % (3600 * 24);
+    if (second_in_day < 0) {
+      second_in_day += 3600 * 24;
+    }
+    auto h = second_in_day % (3600 * 24) / 3600;
+    auto min = second_in_day % 3600 / 60;
+    auto s = second_in_day % 60;
+    std::snprintf(buffer, sizeof(buffer), "%04d-%02d-%02d %02ld:%02ld:%02ld", y, m, d, h, min, s);
+    std::string result(buffer);
+    if (nanos) {
+      while (nanos % 10 == 0) nanos /= 10;
+      result = result + "." + std::to_string(nanos);
+    }
+    result = result + " " + zoneName;
+    return result;
+  }
+
+  TEST(ConvertColumnReader, TestConvertStringVariantToTimestamp) {
+    constexpr int DEFAULT_MEM_STREAM_SIZE = 10 * 1024 * 1024;
+    constexpr int TEST_CASES = 1024;
+    const std::string writerTimezone = "America/New_York";
+    const std::string readerTimezone = "Australia/Sydney";
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    std::unique_ptr<Type> fileType(Type::buildTypeFromString("struct<c1:string,c2:string>"));
+    std::shared_ptr<Type> readType(
+        Type::buildTypeFromString("struct<c1:timestamp,c2:timestamp with local time zone>"));
+    WriterOptions options;
+    options.setTimezoneName(writerTimezone);
+    auto writer = createWriter(*fileType, &memStream, options);
+    auto batch = writer->createRowBatch(TEST_CASES);
+    auto structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    auto& c1 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[0]);
+    auto& c2 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[1]);
+
+    std::vector<std::string> raw1, raw2;
+    raw1.reserve(TEST_CASES * 3);
+    raw2.reserve(TEST_CASES * 3);
+    std::vector<int64_t> ts1, ts2;
+
+    for (int i = 0; i < TEST_CASES; i++) {
+      char buff[100];
+      auto size = ::snprintf(buff, sizeof(buff), "%04d-%02d-27 12:34:56.789", 1960 + (i / 12),
+                             (i % 12) + 1);
+      raw1.emplace_back(buff, size);
+      raw2.push_back(raw1.back() + " " + writerTimezone);
+      c1.data[i] = const_cast<char*>(raw1.back().c_str());
+      c1.length[i] = raw1.back().length();
+      c2.data[i] = const_cast<char*>(raw2.back().c_str());
+      c2.length[i] = raw2.back().length();
+    }
+    structBatch->numElements = c1.numElements = c2.numElements = TEST_CASES;
+    structBatch->hasNulls = c1.hasNulls = c2.hasNulls = false;
+    writer->add(*batch);
+
+    for (int i = 0; i < TEST_CASES; i++) {
+      char buff[100];
+      auto size =
+          ::snprintf(buff, sizeof(buff), "%04d-%02d-27 12:34:56", 1960 + (i / 12), (i % 12) + 1);
+      raw1.emplace_back(buff, size);
+      raw2.push_back(raw1.back() + " " + writerTimezone);
+      c1.data[i] = const_cast<char*>(raw1.back().c_str());
+      c1.length[i] = raw1.back().length();
+      c2.data[i] = const_cast<char*>(raw2.back().c_str());
+      c2.length[i] = raw2.back().length();
+    }
+    structBatch->numElements = c1.numElements = c2.numElements = TEST_CASES;
+    structBatch->hasNulls = c1.hasNulls = c2.hasNulls = false;
+    writer->add(*batch);
+
+    {
+      raw1.push_back("2024?11-14 00:01:02");
+      raw2.push_back("2024-01-02 03:04:05.678 tz/error");
+      c1.data[0] = const_cast<char*>(raw1.back().c_str());
+      c1.length[0] = raw1.back().length();
+      c2.data[0] = const_cast<char*>(raw2.back().c_str());
+      c2.length[0] = raw2.back().length();
+
+      c1.notNull[1] = false;
+      c2.notNull[1] = false;
+
+      raw1.push_back("2024-12-14 00:01:02.-1");
+      raw2.push_back("2024-01-02 03:04:05.678");
+      c1.data[2] = const_cast<char*>(raw1.back().c_str());
+      c1.length[2] = raw1.back().length();
+      c2.data[2] = const_cast<char*>(raw2.back().c_str());
+      c2.length[2] = raw2.back().length();
+    }
+    structBatch->numElements = c1.numElements = c2.numElements = 3;
+    structBatch->hasNulls = c1.hasNulls = c2.hasNulls = true;
+    writer->add(*batch);
+
+    writer->close();
+
+    auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(), memStream.getLength());
+    auto pool = getDefaultPool();
+    auto reader = createReader(*pool, std::move(inStream));
+    RowReaderOptions rowReaderOptions;
+    rowReaderOptions.setUseTightNumericVector(true);
+    rowReaderOptions.setReadType(readType);
+    rowReaderOptions.setTimezoneName(readerTimezone);
+    rowReaderOptions.throwOnSchemaEvolutionOverflow(true);
+    auto rowReader = reader->createRowReader(rowReaderOptions);
+    auto readBatch = rowReader->createRowBatch(TEST_CASES * 2);
+    EXPECT_EQ(true, rowReader->next(*readBatch));
+
+    auto& readSturctBatch = dynamic_cast<StructVectorBatch&>(*readBatch);
+    auto& readC1 = dynamic_cast<TimestampVectorBatch&>(*readSturctBatch.fields[0]);
+    auto& readC2 = dynamic_cast<TimestampVectorBatch&>(*readSturctBatch.fields[1]);
+
+    for (int i = 0; i < TEST_CASES * 2; i++) {
+      EXPECT_TRUE(readC1.notNull[i]);
+      EXPECT_TRUE(readC2.notNull[i]);
+      EXPECT_EQ(raw1[i] + " " + readerTimezone,
+                timestampToString(readC1.data[i], readC1.nanoseconds[i], readerTimezone));
+      EXPECT_EQ(raw2[i], timestampToString(readC2.data[i], readC2.nanoseconds[i], writerTimezone));
+    }
+
+    rowReaderOptions.throwOnSchemaEvolutionOverflow(false);
+    rowReader = reader->createRowReader(rowReaderOptions);
+    EXPECT_EQ(true, rowReader->next(*readBatch));
+    EXPECT_EQ(true, rowReader->next(*readBatch));
+    EXPECT_EQ(3, readBatch->numElements);
+    EXPECT_FALSE(readC1.notNull[0]);
+    EXPECT_FALSE(readC2.notNull[0]);
+    EXPECT_FALSE(readC1.notNull[1]);
+    EXPECT_FALSE(readC2.notNull[1]);
+    EXPECT_FALSE(readC1.notNull[2]);
+    EXPECT_FALSE(readC2.notNull[2]);
+  }
+
+  TEST(ConvertColumnReader, TestConvertStringVariantToDecimal) {
+    constexpr int DEFAULT_MEM_STREAM_SIZE = 10 * 1024 * 1024;
+    constexpr int TEST_CASES = 1024;
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    std::unique_ptr<Type> fileType(Type::buildTypeFromString("struct<c1:string,c2:string>"));
+    std::shared_ptr<Type> readType(
+        Type::buildTypeFromString("struct<c1:decimal(10,5),c2:decimal(25,10)>"));
+    WriterOptions options;
+    auto writer = createWriter(*fileType, &memStream, options);
+    auto batch = writer->createRowBatch(TEST_CASES);
+    auto structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    auto& c1 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[0]);
+    auto& c2 = dynamic_cast<StringVectorBatch&>(*structBatch->fields[1]);
+
+    // <source_string, failed_to_int64, failed_to_int128, expected_int64, expected_int128>
+    std::vector<std::tuple<std::string, bool, bool, int64_t, Int128>> rawDataAndExpected;
+
+    rawDataAndExpected = {
+        /* 0 */ {"123456789012345678901234567890123456789", false, false, int64_t(), Int128()},
+        /* 1 */ {"123456789012345678901234567890.1234567890", false, false, int64_t(), Int128()},
+        /* 2 */ {"-123456789012345678901234567890.1234567890", false, false, int64_t(), Int128()},
+        /* 3 */ {"-foo.bar", false, false, int64_t(), Int128()},
+        /* 4 */ {"-foo.123", false, false, int64_t(), Int128()},
+        /* 5 */ {"-123.foo", false, false, int64_t(), Int128()},
+        /* 6 */ {"-123foo.123", false, false, int64_t(), Int128()},
+        /* 7 */ {"-123.123foo", false, false, int64_t(), Int128()},
+        /* 8 */ {"-.", false, false, int64_t(), Int128()},
+        /* 9 */ {"-", false, false, int64_t(), Int128()},
+        /* 10 */ {".", false, false, int64_t(), Int128()},
+        /* 11 */ {"", false, false, int64_t(), Int128()},
+        /* 12 */ {".12345", false, false, int64_t(), Int128()},
+        /* 13 */ {"12345.", false, false, int64_t(), Int128()},
+        /* 14 */ {"-1", true, true, -100000LL, Int128("-10000000000")},
+        /* 15 */ {"-1.0", true, true, -100000LL, Int128("-10000000000")},
+        /* 16 */ {"1", true, true, 100000, Int128("10000000000")},
+        /* 17 */ {"1.0", true, true, 100000, Int128("10000000000")},
+        /* 18 */ {"12345", true, true, 1234500000, Int128("123450000000000")},
+        /* 19 */ {"12345.12345", true, true, 1234512345LL, Int128("123451234500000")},
+        /* 20 */ {"-12345.12345", true, true, -1234512345LL, Int128("-123451234500000")},
+        /* 21 */ {"1234567890", false, true, int64_t(), Int128("12345678900000000000")},
+        /* 22 */ {"-1234567890", false, true, int64_t(), Int128("-12345678900000000000")},
+        /* 23 */ {"1234567890.123", false, true, int64_t(), Int128("12345678901230000000")},
+        /* 24 */ {"-1234567890.1234567", false, true, int64_t(), Int128("-12345678901234567000")},
+        /* 25 */ {"1234567890123.12345", false, true, int64_t(), Int128("12345678901231234500000")},
+        /* 26 */
+        {"-1234567890123.12345678901", false, true, int64_t(), Int128("-12345678901231234567890")}};
+    for (int i = 0; i < rawDataAndExpected.size(); i++) {
+      c1.data[i] = c2.data[i] = const_cast<char*>(std::get<0>(rawDataAndExpected[i]).c_str());
+      c1.length[i] = c2.length[i] = std::get<0>(rawDataAndExpected[i]).length();
+    }
+
+    structBatch->numElements = c1.numElements = c2.numElements = rawDataAndExpected.size();
+    structBatch->hasNulls = c1.hasNulls = c2.hasNulls = false;
+    writer->add(*batch);
+    writer->close();
+
+    auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(), memStream.getLength());
+    auto pool = getDefaultPool();
+    auto reader = createReader(*pool, std::move(inStream));
+    RowReaderOptions rowReaderOptions;
+    rowReaderOptions.setUseTightNumericVector(true);
+    rowReaderOptions.setReadType(readType);
+    auto rowReader = reader->createRowReader(rowReaderOptions);
+    auto readBatch = rowReader->createRowBatch(TEST_CASES);
+    EXPECT_EQ(true, rowReader->next(*readBatch));
+
+    auto& readSturctBatch = dynamic_cast<StructVectorBatch&>(*readBatch);
+    auto& readC1 = dynamic_cast<Decimal64VectorBatch&>(*readSturctBatch.fields[0]);
+    auto& readC2 = dynamic_cast<Decimal128VectorBatch&>(*readSturctBatch.fields[1]);
+    EXPECT_EQ(readBatch->numElements, rawDataAndExpected.size());
+
+    for (int i = 0; i < readBatch->numElements; i++) {
+      bool expectedNotNull1 = std::get<1>(rawDataAndExpected[i]);
+      bool expectedNotNull2 = std::get<2>(rawDataAndExpected[i]);
+      EXPECT_EQ(expectedNotNull1, readC1.notNull[i]) << i;
+      EXPECT_EQ(expectedNotNull2, readC2.notNull[i]) << i;
+      if (expectedNotNull1) {
+        EXPECT_EQ(std::get<3>(rawDataAndExpected[i]), readC1.values[i]) << i;
+      }
+      if (expectedNotNull2) {
+        EXPECT_EQ(std::get<4>(rawDataAndExpected[i]), readC2.values[i]) << i;
+      }
+    }
+  }
+
 }  // namespace orc
diff --git a/c++/test/TestDecompression.cc b/c++/test/TestDecompression.cc
index dc6caeda0e..125c5e85a4 100644
--- a/c++/test/TestDecompression.cc
+++ b/c++/test/TestDecompression.cc
@@ -395,6 +395,26 @@ namespace orc {
     ASSERT_TRUE(!result->Next(&ptr, &length));
   }
 
+  TEST_F(TestDecompression, testLzoOverflow) {
+    const unsigned char bad_lzo_data[] = {// Header: compressedSize = 12, original = false
+                                          0x18, 0x00, 0x00,
+
+                                          // LZO body: token and literal length extension
+                                          0x00,  // token: extended literal length
+                                          0xFF,  // extension byte 1
+
+                                          // Literal data: only 10 bytes far less than 273
+                                          'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
+
+    std::unique_ptr<SeekableInputStream> result = createDecompressor(
+        CompressionKind_LZO,
+        std::make_unique<SeekableArrayInputStream>(bad_lzo_data, ARRAY_SIZE(bad_lzo_data)),
+        128 * 1024, *getDefaultPool(), getDefaultReaderMetrics());
+    const void* ptr;
+    int length;
+    EXPECT_THROW(result->Next(&ptr, &length), ParseError);
+  }
+
   TEST_F(TestDecompression, testLz4Empty) {
     const unsigned char buffer[] = {0};
     std::unique_ptr<SeekableInputStream> result = createDecompressor(
@@ -545,7 +565,7 @@ namespace orc {
         *getDefaultPool(), getDefaultReaderMetrics());
     const void* ptr;
     int length;
-    ASSERT_THROW(result->BackUp(20), std::logic_error);
+    ASSERT_THROW(result->BackUp(20), CompressionError);
     ASSERT_EQ(true, result->Next(&ptr, &length));
     ASSERT_EQ(30, length);
     for (int i = 0; i < 10; ++i) {
@@ -554,7 +574,7 @@ namespace orc {
       }
     }
     result->BackUp(10);
-    ASSERT_THROW(result->BackUp(2), std::logic_error);
+    ASSERT_THROW(result->BackUp(2), CompressionError);
     ASSERT_EQ(true, result->Next(&ptr, &length));
     ASSERT_EQ(10, length);
     for (int i = 0; i < 10; ++i) {
diff --git a/c++/test/TestDictionaryEncoding.cc b/c++/test/TestDictionaryEncoding.cc
index f3dcaa0067..40c1b1a605 100644
--- a/c++/test/TestDictionaryEncoding.cc
+++ b/c++/test/TestDictionaryEncoding.cc
@@ -25,6 +25,7 @@
 #include "wrap/gtest-wrapper.h"
 
 #include <cmath>
+#include <iomanip>
 #include <sstream>
 
 namespace orc {
@@ -53,6 +54,7 @@ namespace orc {
 
     WriterOptions options;
     options.setStripeSize(1024);
+    options.setMemoryBlockSize(64);
     options.setCompressionBlockSize(1024);
     options.setCompression(CompressionKind_ZLIB);
     options.setMemoryPool(pool);
@@ -109,6 +111,7 @@ namespace orc {
 
     WriterOptions options;
     options.setStripeSize(1024);
+    options.setMemoryBlockSize(64);
     options.setCompressionBlockSize(1024);
     options.setCompression(CompressionKind_ZLIB);
     options.setMemoryPool(pool);
@@ -171,6 +174,7 @@ namespace orc {
     WriterOptions options;
     options.setStripeSize(1024);
     options.setCompressionBlockSize(1024);
+    options.setMemoryBlockSize(64);
     options.setCompression(CompressionKind_ZLIB);
     options.setMemoryPool(pool);
     options.setDictionaryKeySizeThreshold(threshold);
@@ -233,6 +237,7 @@ namespace orc {
 
     WriterOptions options;
     options.setStripeSize(1024);
+    options.setMemoryBlockSize(64);
     options.setCompressionBlockSize(1024);
     options.setCompression(CompressionKind_ZLIB);
     options.setMemoryPool(pool);
@@ -302,7 +307,8 @@ namespace orc {
 
     WriterOptions options;
     options.setStripeSize(1);
-    options.setCompressionBlockSize(1024);
+    options.setMemoryBlockSize(1024);
+    options.setCompressionBlockSize(2 * 1024);
     options.setCompression(CompressionKind_ZLIB);
     options.setMemoryPool(pool);
     options.setDictionaryKeySizeThreshold(threshold);
@@ -429,4 +435,57 @@ namespace orc {
     testDictionaryMultipleStripes(DICT_THRESHOLD, false);
     testDictionaryMultipleStripes(FALLBACK_THRESHOLD, false);
   }
+
+  TEST(DictionaryEncoding, decodeDictionary) {
+    size_t rowCount = 8192;
+    size_t dictionarySize = 100;
+    auto* memoryPool = getDefaultPool();
+
+    auto encodedStringBatch = std::make_shared<EncodedStringVectorBatch>(rowCount, *memoryPool);
+    EXPECT_FALSE(encodedStringBatch->dictionaryDecoded);
+    encodedStringBatch->numElements = rowCount;
+    encodedStringBatch->hasNulls = true;
+    encodedStringBatch->isEncoded = true;
+    encodedStringBatch->dictionary = std::make_shared<StringDictionary>(*memoryPool);
+
+    auto& dictionary = *encodedStringBatch->dictionary;
+    dictionary.dictionaryBlob.resize(3 * dictionarySize);
+    dictionary.dictionaryOffset.resize(dictionarySize + 1);
+    dictionary.dictionaryOffset[0] = 0;
+    for (uint64_t i = 0; i < dictionarySize; ++i) {
+      std::ostringstream oss;
+      oss << std::setw(3) << std::setfill('0') << i;
+
+      auto str = oss.str();
+      memcpy(&dictionary.dictionaryBlob[3 * i], str.data(), str.size());
+      dictionary.dictionaryOffset[i + 1] = 3 * (i + 1);
+    }
+
+    for (uint64_t i = 0; i < rowCount; ++i) {
+      if (i % 10 == 0) {
+        encodedStringBatch->notNull[i] = 0;
+        encodedStringBatch->index[i] = 0;
+      } else {
+        encodedStringBatch->notNull[i] = 1;
+        encodedStringBatch->index[i] = i % dictionarySize;
+      }
+    }
+
+    encodedStringBatch->decodeDictionary();
+    EXPECT_TRUE(encodedStringBatch->dictionaryDecoded);
+    EXPECT_EQ(0, encodedStringBatch->blob.size());
+
+    for (uint64_t i = 0; i < rowCount; ++i) {
+      if (encodedStringBatch->notNull[i]) {
+        auto index = encodedStringBatch->index[i];
+        char* buf = nullptr;
+        int64_t buf_size = 0;
+        dictionary.getValueByIndex(index, buf, buf_size);
+
+        EXPECT_EQ(buf, encodedStringBatch->data[i]);
+        EXPECT_EQ(buf_size, encodedStringBatch->length[i]);
+      }
+    }
+  }
+
 }  // namespace orc
diff --git a/c++/test/TestInt128.cc b/c++/test/TestInt128.cc
index 54dcff4567..be5b65b3a7 100644
--- a/c++/test/TestInt128.cc
+++ b/c++/test/TestInt128.cc
@@ -555,6 +555,11 @@ namespace orc {
 
     num = Int128("-12345678901122334455667788990011122233");
     EXPECT_EQ("-12345678901122334455667788990011122233", num.toString());
+
+    num = Int128::maximumValue();
+    EXPECT_EQ("170141183460469231731687303715884105727", num.toString());
+    num = Int128::minimumValue();
+    EXPECT_EQ("-170141183460469231731687303715884105728", num.toString());
   }
 
   TEST(Int128, testToDecimalString) {
diff --git a/c++/test/TestPredicateLeaf.cc b/c++/test/TestPredicateLeaf.cc
index 2703776e39..3946123ec5 100644
--- a/c++/test/TestPredicateLeaf.cc
+++ b/c++/test/TestPredicateLeaf.cc
@@ -168,6 +168,12 @@ namespace orc {
     return colStats;
   }
 
+  static proto::ColumnStatistics createIncompleteNullStats() {
+    proto::ColumnStatistics colStats;
+    colStats.set_number_of_values(0);
+    return colStats;
+  }
+
   static TruthValue evaluate(const PredicateLeaf& pred, const proto::ColumnStatistics& pbStats,
                              const BloomFilter* bf = nullptr) {
     return pred.evaluate(WriterVersion_ORC_135, pbStats, bf);
@@ -663,4 +669,10 @@ namespace orc {
               evaluate(pred8, createTimestampStats(2114380800, 1109000, 2114380800, 6789100)));
   }
 
+  TEST(TestPredicateLeaf, testLackOfSataistics) {
+    PredicateLeaf pred(PredicateLeaf::Operator::IS_NULL, PredicateDataType::STRING, 1, {});
+    EXPECT_EQ(TruthValue::YES_NO, evaluate(pred, createStringStats("c", "d", true)));
+    EXPECT_EQ(TruthValue::YES_NO_NULL, evaluate(pred, createIncompleteNullStats()));
+  }
+
 }  // namespace orc
diff --git a/c++/test/TestPredicatePushdown.cc b/c++/test/TestPredicatePushdown.cc
index e949fc2898..5c8ed14e73 100644
--- a/c++/test/TestPredicatePushdown.cc
+++ b/c++/test/TestPredicatePushdown.cc
@@ -33,6 +33,7 @@ namespace orc {
     WriterOptions options;
     options.setStripeSize(1024 * 1024)
         .setCompressionBlockSize(1024)
+        .setMemoryBlockSize(64)
         .setCompression(CompressionKind_NONE)
         .setMemoryPool(pool)
         .setRowIndexStride(rowIndexStride);
@@ -510,6 +511,7 @@ namespace orc {
     WriterOptions options;
     options.setStripeSize(1)
         .setCompressionBlockSize(1024)
+        .setMemoryBlockSize(64)
         .setCompression(CompressionKind_NONE)
         .setMemoryPool(pool)
         .setRowIndexStride(1000);
diff --git a/c++/test/TestReader.cc b/c++/test/TestReader.cc
index f709f693f1..f9df6edc92 100644
--- a/c++/test/TestReader.cc
+++ b/c++/test/TestReader.cc
@@ -155,7 +155,10 @@ namespace orc {
     ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
   }
 
-  std::unique_ptr<Reader> createNestedListMemReader(MemoryOutputStream& memStream) {
+  std::unique_ptr<Reader> createNestedListMemReader(MemoryOutputStream& memStream,
+                                                    const std::vector<uint32_t>& stripesToPrefetch,
+                                                    const std::list<uint64_t>& columnsToPrefetch,
+                                                    bool prefetchTwice) {
     MemoryPool* pool = getDefaultPool();
 
     auto type = std::unique_ptr<Type>(
@@ -166,6 +169,7 @@ namespace orc {
     WriterOptions options;
     options.setStripeSize(1024 * 1024)
         .setCompressionBlockSize(1024)
+        .setMemoryBlockSize(64)
         .setCompression(CompressionKind_NONE)
         .setMemoryPool(pool)
         .setRowIndexStride(1000);
@@ -217,20 +221,43 @@ namespace orc {
     auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(), memStream.getLength());
     ReaderOptions readerOptions;
     readerOptions.setMemoryPool(*pool);
-    return createReader(std::move(inStream), readerOptions);
+    auto reader = createReader(std::move(inStream), readerOptions);
+
+    reader->preBuffer(stripesToPrefetch, columnsToPrefetch);
+    if (prefetchTwice) {
+      reader->preBuffer(stripesToPrefetch, columnsToPrefetch);
+    }
+
+    return reader;
   }
 
-  TEST(TestReadIntent, testListAll) {
+  class TestReadIntentFromNestedList
+      : public ::testing::TestWithParam<
+            std::tuple<std::vector<uint32_t>, std::list<uint64_t>, bool>> {};
+
+  TEST_P(TestReadIntentFromNestedList, testListAll) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedListMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedListMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of int_array.
     verifySelection(reader, {{1, ReadIntent_ALL}}, {0, 1, 2});
   }
 
-  TEST(TestReadIntent, testListOffsets) {
+  TEST_P(TestReadIntentFromNestedList, testListOffsets) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedListMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedListMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select only the offsets of int_array.
     verifySelection(reader, {{1, ReadIntent_OFFSETS}}, {0, 1});
@@ -243,26 +270,44 @@ namespace orc {
     verifySelection(reader, {{3, ReadIntent_OFFSETS}, {5, ReadIntent_OFFSETS}}, {0, 3, 4, 5});
   }
 
-  TEST(TestReadIntent, testListAllAndOffsets) {
+  TEST_P(TestReadIntentFromNestedList, testListAllAndOffsets) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedListMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedListMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of int_array and only the outermost offsets of int_array_array_array.
     verifySelection(reader, {{1, ReadIntent_ALL}, {3, ReadIntent_OFFSETS}}, {0, 1, 2, 3});
   }
 
-  TEST(TestReadIntent, testListConflictingIntent) {
+  TEST_P(TestReadIntentFromNestedList, testListConflictingIntent) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedListMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedListMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // test conflicting ReadIntent on nested list.
     verifySelection(reader, {{3, ReadIntent_OFFSETS}, {5, ReadIntent_ALL}}, {0, 3, 4, 5, 6});
     verifySelection(reader, {{3, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0, 3, 4, 5, 6});
   }
 
-  TEST(TestReadIntent, testRowBatchContent) {
+  TEST_P(TestReadIntentFromNestedList, testRowBatchContent) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedListMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedListMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of int_array and only the offsets of int_array_array.
     RowReaderOptions::IdReadIntentMap idReadIntentMap = {{1, ReadIntent_ALL},
@@ -298,7 +343,24 @@ namespace orc {
     EXPECT_EQ(nullptr, intArrayArrayArrayBatch.elements);
   }
 
-  std::unique_ptr<Reader> createNestedMapMemReader(MemoryOutputStream& memStream) {
+  INSTANTIATE_TEST_SUITE_P(
+      TestReadIntentFromNestedListInstance, TestReadIntentFromNestedList,
+      ::testing::Values(
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{}, true),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{}, false),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{1, 3}, true),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{1, 3}, false),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{}, true),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{}, false),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{1, 3}, true),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{1, 3}, false),
+          std::make_tuple(std::vector<uint32_t>{1000}, std::list<uint64_t>{1000}, true),
+          std::make_tuple(std::vector<uint32_t>{1000}, std::list<uint64_t>{1000}, false)));
+
+  std::unique_ptr<Reader> createNestedMapMemReader(MemoryOutputStream& memStream,
+                                                   const std::vector<uint32_t>& stripesToPrefetch,
+                                                   const std::list<uint64_t>& columnsToPrefetch,
+                                                   bool prefetchTwice) {
     MemoryPool* pool = getDefaultPool();
 
     auto type = std::unique_ptr<Type>(
@@ -310,6 +372,7 @@ namespace orc {
     WriterOptions options;
     options.setStripeSize(1024 * 1024)
         .setCompressionBlockSize(1024)
+        .setMemoryBlockSize(64)
         .setCompression(CompressionKind_NONE)
         .setMemoryPool(pool)
         .setRowIndexStride(1000);
@@ -387,20 +450,42 @@ namespace orc {
     auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(), memStream.getLength());
     ReaderOptions readerOptions;
     readerOptions.setMemoryPool(*pool);
-    return createReader(std::move(inStream), readerOptions);
+    auto reader = createReader(std::move(inStream), readerOptions);
+
+    reader->preBuffer(stripesToPrefetch, columnsToPrefetch);
+    if (prefetchTwice) {
+      reader->preBuffer(stripesToPrefetch, columnsToPrefetch);
+    }
+    return reader;
   }
 
-  TEST(TestReadIntent, testMapAll) {
+  class TestReadIntentFromNestedMap
+      : public ::testing::TestWithParam<
+            std::tuple<std::vector<uint32_t>, std::list<uint64_t>, bool>> {};
+
+  TEST_P(TestReadIntentFromNestedMap, testMapAll) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedMapMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedMapMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of single_map.
     verifySelection(reader, {{2, ReadIntent_ALL}}, {0, 2, 3, 4});
   }
 
-  TEST(TestReadIntent, testMapOffsets) {
+  TEST_P(TestReadIntentFromNestedMap, testMapOffsets) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedMapMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedMapMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select only the offsets of single_map.
     verifySelection(reader, {{2, ReadIntent_OFFSETS}}, {0, 2});
@@ -412,17 +497,29 @@ namespace orc {
     verifySelection(reader, {{5, ReadIntent_OFFSETS}, {9, ReadIntent_OFFSETS}}, {0, 5, 7, 9});
   }
 
-  TEST(TestReadIntent, testMapAllAndOffsets) {
+  TEST_P(TestReadIntentFromNestedMap, testMapAllAndOffsets) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedMapMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedMapMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of single_map and only the outermost offsets of nested_map.
     verifySelection(reader, {{2, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0, 2, 3, 4, 5});
   }
 
-  TEST(TestReadIntent, testMapConflictingIntent) {
+  TEST_P(TestReadIntentFromNestedMap, testMapConflictingIntent) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedMapMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedMapMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // test conflicting ReadIntent on nested_map.
     verifySelection(reader, {{5, ReadIntent_OFFSETS}, {9, ReadIntent_ALL}}, {0, 5, 7, 9, 10, 11});
@@ -432,9 +529,15 @@ namespace orc {
                     {0, 5, 7, 8, 9, 10, 11});
   }
 
-  TEST(TestReadIntent, testMapRowBatchContent) {
+  TEST_P(TestReadIntentFromNestedMap, testMapRowBatchContent) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedMapMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedMapMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of single_map and only the offsets of nested_map.
     RowReaderOptions::IdReadIntentMap idReadIntentMap = {{2, ReadIntent_ALL},
@@ -480,7 +583,24 @@ namespace orc {
     EXPECT_EQ(nullptr, nestedMapBatch.elements);
   }
 
-  std::unique_ptr<Reader> createNestedUnionMemReader(MemoryOutputStream& memStream) {
+  INSTANTIATE_TEST_SUITE_P(
+      TestReadIntentFromNestedMapInstance, TestReadIntentFromNestedMap,
+      ::testing::Values(
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{}, true),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{}, false),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{1, 5}, true),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{1, 5}, false),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{}, true),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{}, false),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{1, 5}, true),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{1, 5}, false),
+          std::make_tuple(std::vector<uint32_t>{1000}, std::list<uint64_t>{1000}, true),
+          std::make_tuple(std::vector<uint32_t>{1000}, std::list<uint64_t>{1000}, false)));
+
+  std::unique_ptr<Reader> createNestedUnionMemReader(MemoryOutputStream& memStream,
+                                                     const std::vector<uint32_t>& stripesToPrefetch,
+                                                     const std::list<uint64_t>& columnsToPrefetch,
+                                                     bool prefetchTwice) {
     MemoryPool* pool = getDefaultPool();
 
     auto type = std::unique_ptr<Type>(
@@ -492,6 +612,7 @@ namespace orc {
     WriterOptions options;
     options.setStripeSize(1024 * 1024)
         .setCompressionBlockSize(1024)
+        .setMemoryBlockSize(64)
         .setCompression(CompressionKind_NONE)
         .setMemoryPool(pool)
         .setRowIndexStride(1000);
@@ -563,20 +684,43 @@ namespace orc {
     ReaderOptions readerOptions;
     readerOptions.setMemoryPool(*pool);
     readerOptions.setReaderMetrics(nullptr);
-    return createReader(std::move(inStream), readerOptions);
+    auto reader = createReader(std::move(inStream), readerOptions);
+
+    reader->preBuffer(stripesToPrefetch, columnsToPrefetch);
+    if (prefetchTwice) {
+      reader->preBuffer(stripesToPrefetch, columnsToPrefetch);
+    }
+
+    return reader;
   }
 
-  TEST(TestReadIntent, testUnionAll) {
+  class TestReadIntentFromNestedUnion
+      : public ::testing::TestWithParam<
+            std::tuple<std::vector<uint32_t>, std::list<uint64_t>, bool>> {};
+
+  TEST_P(TestReadIntentFromNestedUnion, testUnionAll) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedUnionMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedUnionMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of single_union.
     verifySelection(reader, {{2, ReadIntent_ALL}}, {0, 2, 3, 4});
   }
 
-  TEST(TestReadIntent, testUnionOffsets) {
+  TEST_P(TestReadIntentFromNestedUnion, testUnionOffsets) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedUnionMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedUnionMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select only the offsets of single_union.
     verifySelection(reader, {{2, ReadIntent_OFFSETS}}, {0, 2});
@@ -589,17 +733,29 @@ namespace orc {
                     {0, 2, 5, 6, 7, 8, 11});
   }
 
-  TEST(TestReadIntent, testUnionAllAndOffsets) {
+  TEST_P(TestReadIntentFromNestedUnion, testUnionAllAndOffsets) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedUnionMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedUnionMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of single_union and only the outermost offsets of nested_union.
     verifySelection(reader, {{2, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0, 2, 3, 4, 5});
   }
 
-  TEST(TestReadIntent, testUnionConflictingIntent) {
+  TEST_P(TestReadIntentFromNestedUnion, testUnionConflictingIntent) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedUnionMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedUnionMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // test conflicting ReadIntent on nested_union.
     verifySelection(reader, {{5, ReadIntent_OFFSETS}, {8, ReadIntent_ALL}},
@@ -610,9 +766,15 @@ namespace orc {
                     {0, 5, 6, 7, 8, 9, 10, 11});
   }
 
-  TEST(TestReadIntent, testUnionRowBatchContent) {
+  TEST_P(TestReadIntentFromNestedUnion, testUnionRowBatchContent) {
+    const auto& params = GetParam();
+    const std::vector<uint32_t>& stripesToPrefetch = std::get<0>(params);
+    const std::list<uint64_t>& columnsToPrefetch = std::get<1>(params);
+    bool prefetchTwice = std::get<2>(params);
+
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
-    std::unique_ptr<Reader> reader = createNestedUnionMemReader(memStream);
+    std::unique_ptr<Reader> reader =
+        createNestedUnionMemReader(memStream, stripesToPrefetch, columnsToPrefetch, prefetchTwice);
 
     // select all of single_union and only the offsets of nested_union.
     RowReaderOptions::IdReadIntentMap idReadIntentMap = {{2, ReadIntent_ALL},
@@ -662,10 +824,25 @@ namespace orc {
     EXPECT_EQ(1, nestedUnionBatch.offsets.data()[1]);
   }
 
+  INSTANTIATE_TEST_SUITE_P(
+      TestReadIntentFromNestedUnionInstance, TestReadIntentFromNestedUnion,
+      ::testing::Values(
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{}, true),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{}, false),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{1, 2}, true),
+          std::make_tuple(std::vector<uint32_t>{}, std::list<uint64_t>{1, 2}, false),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{}, true),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{}, false),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{1, 2}, true),
+          std::make_tuple(std::vector<uint32_t>{0}, std::list<uint64_t>{1, 2}, false),
+          std::make_tuple(std::vector<uint32_t>{1000}, std::list<uint64_t>{1000}, true),
+          std::make_tuple(std::vector<uint32_t>{1000}, std::list<uint64_t>{1000}, false)));
+
   TEST(TestReadIntent, testSeekOverEmptyPresentStream) {
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
     MemoryPool* pool = getDefaultPool();
     uint64_t rowCount = 5000;
+
     {
       auto type = std::unique_ptr<Type>(
           Type::buildTypeFromString("struct<col1:struct<col2:int>,col3:struct<col4:int>,"
@@ -673,6 +850,7 @@ namespace orc {
       WriterOptions options;
       options.setStripeSize(1024 * 1024)
           .setCompressionBlockSize(1024)
+          .setMemoryBlockSize(64)
           .setCompression(CompressionKind_NONE)
           .setMemoryPool(pool)
           .setRowIndexStride(1000);
diff --git a/c++/test/TestRleEncoder.cc b/c++/test/TestRleEncoder.cc
index 1c24a69515..c69fc9cabc 100644
--- a/c++/test/TestRleEncoder.cc
+++ b/c++/test/TestRleEncoder.cc
@@ -84,8 +84,8 @@ namespace orc {
         std::make_unique<SeekableArrayInputStream>(memStream.getData(), memStream.getLength()),
         isSinged, version, *getDefaultPool(), getDefaultReaderMetrics());
 
-    int64_t* decodedData = new int64_t[numValues];
-    decoder->next(decodedData, numValues, notNull);
+    std::vector<int64_t> decodedData(numValues);
+    decoder->next(decodedData.data(), numValues, notNull);
 
     for (uint64_t i = 0; i < numValues; ++i) {
       if (!notNull || notNull[i]) {
@@ -93,7 +93,12 @@ namespace orc {
       }
     }
 
-    delete[] decodedData;
+    decoder->next(decodedData.data(), numValues, notNull);
+    for (uint64_t i = 0; i < numValues; ++i) {
+      if (!notNull || notNull[i]) {
+        EXPECT_EQ(data[i], decodedData[i]);
+      }
+    }
   }
 
   std::unique_ptr<RleEncoder> RleTest::getEncoder(RleVersion version, MemoryOutputStream& memStream,
@@ -128,6 +133,9 @@ namespace orc {
     char* notNull = numNulls == 0 ? nullptr : new char[numValues];
     int64_t* data = new int64_t[numValues];
     generateData(numValues, start, delta, random, data, numNulls, notNull);
+    encoder->add(data, numValues, notNull);
+    encoder->finishEncode();
+
     encoder->add(data, numValues, notNull);
     encoder->flush();
 
@@ -243,6 +251,9 @@ namespace orc {
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
 
     std::unique_ptr<RleEncoder> encoder = getEncoder(RleVersion_2, memStream, isSigned);
+    encoder->add(data, numValues, nullptr);
+    encoder->finishEncode();
+
     encoder->add(data, numValues, nullptr);
     encoder->flush();
 
@@ -274,5 +285,43 @@ namespace orc {
     runExampleTest(data, 9, expectedEncoded, 13);
   }
 
+  TEST_P(RleTest, RleV2_value_limit_test) {
+    std::vector<int64_t> inputData = {-9007199254740992l,
+                                      -8725724278030337l,
+                                      -1125762467889153l,
+                                      -1l,
+                                      -9007199254740992l,
+                                      -9007199254740992l,
+                                      -497l,
+                                      127l,
+                                      -1l,
+                                      -72057594037927936l,
+                                      -4194304l,
+                                      -9007199254740992l,
+                                      -4503599593816065l,
+                                      -4194304l,
+                                      -8936830510563329l,
+                                      -9007199254740992l,
+                                      -1l,
+                                      -70334384439312l,
+                                      -4063233l,
+                                      -6755399441973249l};
+    int numValues = inputData.size();
+
+    // Invoke the encoder.
+    const bool isSigned = true;
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+
+    std::unique_ptr<RleEncoder> encoder = getEncoder(RleVersion_2, memStream, isSigned);
+    encoder->add(inputData.data(), numValues, nullptr);
+    encoder->finishEncode();
+
+    encoder->add(inputData.data(), numValues, nullptr);
+    encoder->flush();
+
+    // Decode and verify.
+    decodeAndVerify(RleVersion_2, memStream, inputData.data(), numValues, nullptr, isSigned);
+  }
+
   INSTANTIATE_TEST_SUITE_P(OrcTest, RleTest, Values(true, false));
 }  // namespace orc
diff --git a/c++/test/TestSchemaEvolution.cc b/c++/test/TestSchemaEvolution.cc
index c52ba009fa..d146853573 100644
--- a/c++/test/TestSchemaEvolution.cc
+++ b/c++/test/TestSchemaEvolution.cc
@@ -45,17 +45,17 @@ namespace orc {
       directEncoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
       EXPECT_CALL(streams, getEncoding(testing::_)).WillRepeatedly(testing::Return(directEncoding));
 
-      EXPECT_CALL(streams, getStreamProxy(testing::_, testing::_, testing::_))
-          .WillRepeatedly(testing::Return(nullptr));
-
       std::string dummyStream("dummy");
-      ON_CALL(streams, getStreamProxy(1, proto::Stream_Kind_SECONDARY, testing::_))
-          .WillByDefault(testing::Return(
-              new SeekableArrayInputStream(dummyStream.c_str(), dummyStream.length())));
+      EXPECT_CALL(streams, getStreamProxy(testing::_, testing::_, testing::_))
+          .WillRepeatedly(testing::ReturnNew<SeekableArrayInputStream>(dummyStream.c_str(),
+                                                                       dummyStream.length()));
 
+      EXPECT_CALL(streams, isDecimalAsLong()).WillRepeatedly(testing::Return(false));
       EXPECT_CALL(streams, getSchemaEvolution()).WillRepeatedly(testing::Return(&se));
+      EXPECT_CALL(streams, getSelectedColumns())
+          .WillRepeatedly(testing::Return(std::vector<bool>{true, true}));
 
-      EXPECT_TRUE(buildReader(*fileType, streams) != nullptr);
+      EXPECT_TRUE(buildReader(*fileType, streams, true) != nullptr);
     }
     return true;
   }
@@ -66,8 +66,8 @@ namespace orc {
         {2, "struct<t1:smallint>"},       {3, "struct<t1:int>"},
         {4, "struct<t1:bigint>"},         {5, "struct<t1:float>"},
         {6, "struct<t1:double>"},         {7, "struct<t1:string>"},
-        {8, "struct<t1:char(5)>"},        {9, "struct<t1:varchar(5)>"},
-        {10, "struct<t1:char(3)>"},       {11, "struct<t1:varchar(3)>"},
+        {8, "struct<t1:char(6)>"},        {9, "struct<t1:varchar(6)>"},
+        {10, "struct<t1:char(5)>"},       {11, "struct<t1:varchar(5)>"},
         {12, "struct<t1:decimal(25,2)>"}, {13, "struct<t1:decimal(15,2)>"},
         {14, "struct<t1:timestamp>"},     {15, "struct<t1:timestamp with local time zone>"},
         {16, "struct<t1:date>"}};
@@ -148,6 +148,38 @@ namespace orc {
       }
     }
 
+    // conversion from string variant to numeric
+    for (size_t i = 7; i <= 11; i++) {
+      for (size_t j = 0; j <= 6; j++) {
+        canConvert[i][j] = true;
+        needConvert[i][j] = true;
+      }
+    }
+
+    // conversion from string variant to string variant
+    for (size_t i = 7; i <= 11; i++) {
+      for (size_t j = 7; j <= 11; j++) {
+        canConvert[i][j] = true;
+        needConvert[i][j] = (i != j);
+      }
+    }
+
+    // conversion from string variant to decimal
+    for (size_t i = 7; i <= 11; i++) {
+      for (size_t j = 12; j <= 13; j++) {
+        canConvert[i][j] = true;
+        needConvert[i][j] = (i != j);
+      }
+    }
+
+    // conversion from string variant to timestamp
+    for (size_t i = 7; i <= 11; i++) {
+      for (size_t j = 14; j <= 15; j++) {
+        canConvert[i][j] = true;
+        needConvert[i][j] = (i != j);
+      }
+    }
+
     for (size_t i = 0; i < typesSize; i++) {
       for (size_t j = 0; j < typesSize; j++) {
         testConvertReader(types[i], types[j], canConvert[i][j], needConvert[i][j]);
diff --git a/c++/test/TestSearchArgument.cc b/c++/test/TestSearchArgument.cc
index bf9b82ea5c..09904139cb 100644
--- a/c++/test/TestSearchArgument.cc
+++ b/c++/test/TestSearchArgument.cc
@@ -481,4 +481,45 @@ namespace orc {
                  std::invalid_argument);
   }
 
+  TEST(TestSearchArgument, testBadTreeNode) {
+    auto invalidNode = std::make_shared<ExpressionTree>(ExpressionTree::Operator::NOT, NodeList{});
+    EXPECT_THROW(invalidNode->toString(), std::invalid_argument);
+
+    std::vector<TruthValue> leaves;
+    leaves.push_back(TruthValue::YES);
+    EXPECT_THROW(invalidNode->evaluate(leaves), std::invalid_argument);
+  }
+
+  TEST(TestSearchArgument, testMaybe) {
+    auto expectedSarg =
+        SearchArgumentFactory::newBuilder()
+            ->startNot()
+            .startOr()
+            .isNull("x", PredicateDataType::LONG)
+            .between("y", PredicateDataType::DECIMAL, Literal(10, 3, 0), Literal(200, 3, 1))
+            .in("z", PredicateDataType::LONG,
+                {Literal(static_cast<int64_t>(1)), Literal(static_cast<int64_t>(2)),
+                 Literal(static_cast<int64_t>(3))})
+            .nullSafeEquals("a", PredicateDataType::STRING, Literal("stinger", 7))
+            .end()
+            .end()
+            .build();
+
+    auto sargWithMaybe =
+        SearchArgumentFactory::newBuilder()
+            ->startNot()
+            .startOr()
+            .isNull("x", PredicateDataType::LONG)
+            .between("y", PredicateDataType::DECIMAL, Literal(10, 3, 0), Literal(200, 3, 1))
+            .in("z", PredicateDataType::LONG,
+                {Literal(static_cast<int64_t>(1)), Literal(static_cast<int64_t>(2)),
+                 Literal(static_cast<int64_t>(3))})
+            .maybe()
+            .nullSafeEquals("a", PredicateDataType::STRING, Literal("stinger", 7))
+            .end()
+            .end()
+            .build();
+    EXPECT_EQ(expectedSarg->toString(), sargWithMaybe->toString());
+  }
+
 }  // namespace orc
diff --git a/c++/test/TestStatistics.cc b/c++/test/TestStatistics.cc
new file mode 100644
index 0000000000..61c5e08cb6
--- /dev/null
+++ b/c++/test/TestStatistics.cc
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/OrcFile.hh"
+
+#include "MemoryInputStream.hh"
+#include "MemoryOutputStream.hh"
+#include "TestUtil.hh"
+
+#include "wrap/gtest-wrapper.h"
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+namespace orc {
+
+#define ENSURE_DYNAMIC_CAST_NOT_NULL(PTR) \
+  if (PTR == NULL) throw std::logic_error("dynamic_cast returns null");
+
+  const int DEFAULT_MEM_STREAM_SIZE = 1024 * 1024;  // 1M
+
+  static std::unique_ptr<Writer> createWriter(uint64_t stripeSize, const Type& type,
+                                              MemoryPool* memoryPool, OutputStream* stream) {
+    WriterOptions options;
+    options.setStripeSize(stripeSize);
+    options.setCompressionBlockSize(256);
+    options.setMemoryBlockSize(256);
+    options.setCompression(CompressionKind_ZLIB);
+    options.setMemoryPool(memoryPool);
+    options.setRowIndexStride(10);
+    return createWriter(type, stream, options);
+  }
+
+  static std::unique_ptr<Reader> createReader(MemoryPool* memoryPool,
+                                              MemoryOutputStream& memStream) {
+    std::unique_ptr<InputStream> inStream(
+        new MemoryInputStream(memStream.getData(), memStream.getLength()));
+    ReaderOptions options;
+    options.setMemoryPool(*memoryPool);
+    return createReader(std::move(inStream), options);
+  }
+
+  TEST(Statistics, geometryStatsWithNull) {
+    std::unique_ptr<Type> const type(Type::buildTypeFromString("struct<col1:geometry(OGC:CRS84)>"));
+
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    MemoryPool* const pool = getDefaultPool();
+    uint64_t const stripeSize = 32;  // small stripe size to garantee multi stripes
+    std::unique_ptr<Writer> writer = createWriter(stripeSize, *type, pool, &memStream);
+
+    uint64_t const batchCount = 1000;
+    uint64_t const batches = 10;
+    std::unique_ptr<ColumnVectorBatch> const batch = writer->createRowBatch(batchCount);
+    StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    ENSURE_DYNAMIC_CAST_NOT_NULL(structBatch);
+
+    StringVectorBatch* strBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
+    ENSURE_DYNAMIC_CAST_NOT_NULL(strBatch);
+
+    // create str values
+    std::vector<std::string> wkbs;
+    std::array<double, 4> mins = {geospatial::INF, geospatial::INF, geospatial::INF,
+                                  geospatial::INF};
+    std::array<double, 4> maxs = {-geospatial::INF, -geospatial::INF, -geospatial::INF,
+                                  -geospatial::INF};
+    for (uint64_t i = 1; i < batchCount - 1; ++i) {
+      if (i % 3 == 0) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0}, false, false));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+      } else if (i % 3 == 1) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0, i * 1.0}, true, false));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+        mins[2] = std::min(mins[2], i * 1.0);
+        maxs[2] = std::max(maxs[2], i * 1.0);
+      } else if (i % 3 == 2) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0, i * 1.0, i * 1.0}, true, true));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+        mins[2] = std::min(mins[2], i * 1.0);
+        maxs[2] = std::max(maxs[2], i * 1.0);
+        mins[3] = std::min(mins[3], i * 1.0);
+        maxs[3] = std::max(maxs[3], i * 1.0);
+      }
+    }
+    for (uint64_t i = 1; i < batchCount - 1; ++i) {
+      strBatch->data[i] = const_cast<char*>(wkbs[i - 1].c_str());
+      strBatch->length[i] = static_cast<int32_t>(wkbs[i - 1].length());
+    }
+
+    structBatch->numElements = batchCount;
+    strBatch->numElements = batchCount;
+
+    structBatch->hasNulls = true;
+    structBatch->notNull[0] = '\0';
+    structBatch->notNull[batchCount - 1] = '\0';
+    strBatch->hasNulls = true;
+    strBatch->notNull[0] = '\0';
+    strBatch->notNull[batchCount - 1] = '\0';
+
+    for (uint64_t i = 0; i < batches; ++i) {
+      writer->add(*batch.get());
+    }
+    writer->close();
+
+    std::unique_ptr<Reader> reader = createReader(pool, memStream);
+
+    // check column 1 (string) file stats
+    auto stats1 = reader->getColumnStatistics(1);
+    const GeospatialColumnStatistics* geoFileStats =
+        dynamic_cast<const GeospatialColumnStatistics*>(stats1.get());
+    ENSURE_DYNAMIC_CAST_NOT_NULL(geoFileStats);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes().size(), 3);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes()[0], 1);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes()[1], 1001);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes()[2], 3001);
+    std::array<bool, 4> expectValid = {true, true, true, true};
+    std::array<bool, 4> expectEmpty = {false, false, false, false};
+    EXPECT_EQ(geoFileStats->getBoundingBox().dimensionValid(), expectValid);
+    EXPECT_EQ(geoFileStats->getBoundingBox().dimensionEmpty(), expectEmpty);
+    EXPECT_EQ(geoFileStats->getBoundingBox().lowerBound(), mins);
+    EXPECT_EQ(geoFileStats->getBoundingBox().upperBound(), maxs);
+  }
+
+  TEST(Statistics, geographyStatsWithNull) {
+    std::unique_ptr<Type> const type(
+        Type::buildTypeFromString("struct<col1:geography(OGC:CRS84,speherial)>"));
+
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    MemoryPool* const pool = getDefaultPool();
+    uint64_t const stripeSize = 32;  // small stripe size to garantee multi stripes
+    std::unique_ptr<Writer> writer = createWriter(stripeSize, *type, pool, &memStream);
+
+    uint64_t const batchCount = 1000;
+    uint64_t const batches = 10;
+    std::unique_ptr<ColumnVectorBatch> const batch = writer->createRowBatch(batchCount);
+    StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    ENSURE_DYNAMIC_CAST_NOT_NULL(structBatch);
+
+    StringVectorBatch* strBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
+    ENSURE_DYNAMIC_CAST_NOT_NULL(strBatch);
+
+    // create str values
+    std::vector<std::string> wkbs;
+    std::array<double, 4> mins = {geospatial::INF, geospatial::INF, geospatial::INF,
+                                  geospatial::INF};
+    std::array<double, 4> maxs = {-geospatial::INF, -geospatial::INF, -geospatial::INF,
+                                  -geospatial::INF};
+    for (uint64_t i = 1; i < batchCount - 1; ++i) {
+      if (i % 3 == 0) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0}, false, false));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+      } else if (i % 3 == 1) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0, i * 1.0}, true, false));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+        mins[2] = std::min(mins[2], i * 1.0);
+        maxs[2] = std::max(maxs[2], i * 1.0);
+      } else if (i % 3 == 2) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0, i * 1.0, i * 1.0}, true, true));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+        mins[2] = std::min(mins[2], i * 1.0);
+        maxs[2] = std::max(maxs[2], i * 1.0);
+        mins[3] = std::min(mins[3], i * 1.0);
+        maxs[3] = std::max(maxs[3], i * 1.0);
+      }
+    }
+    for (uint64_t i = 1; i < batchCount - 1; ++i) {
+      strBatch->data[i] = const_cast<char*>(wkbs[i - 1].c_str());
+      strBatch->length[i] = static_cast<int32_t>(wkbs[i - 1].length());
+    }
+
+    structBatch->numElements = batchCount;
+    strBatch->numElements = batchCount;
+
+    structBatch->hasNulls = true;
+    structBatch->notNull[0] = '\0';
+    structBatch->notNull[batchCount - 1] = '\0';
+    strBatch->hasNulls = true;
+    strBatch->notNull[0] = '\0';
+    strBatch->notNull[batchCount - 1] = '\0';
+
+    for (uint64_t i = 0; i < batches; ++i) {
+      writer->add(*batch.get());
+    }
+    writer->close();
+
+    std::unique_ptr<Reader> reader = createReader(pool, memStream);
+
+    // check column 1 (string) file stats
+    auto stats1 = reader->getColumnStatistics(1);
+    const GeospatialColumnStatistics* geoFileStats =
+        dynamic_cast<const GeospatialColumnStatistics*>(stats1.get());
+    ENSURE_DYNAMIC_CAST_NOT_NULL(geoFileStats);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes().size(), 0);
+    std::array<bool, 4> expectValid = {false, false, false, false};
+    EXPECT_EQ(geoFileStats->getBoundingBox().dimensionValid(), expectValid);
+  }
+}  // namespace orc
\ No newline at end of file
diff --git a/c++/test/TestStripeIndexStatistics.cc b/c++/test/TestStripeIndexStatistics.cc
index 34a4649c35..a529792c17 100644
--- a/c++/test/TestStripeIndexStatistics.cc
+++ b/c++/test/TestStripeIndexStatistics.cc
@@ -46,18 +46,19 @@ namespace orc {
     intColStats = reinterpret_cast<const orc::IntegerColumnStatistics*>(
         stripeStats->getRowIndexStatistics(1, 0));
     EXPECT_EQ(
-        "Data type: Integer\nValues: 2000\nHas null: no\nMinimum: 1\nMaximum: 2000\nSum: 2001000\n",
+        "Data type: Integer\nValues: 2000\nHas null: yes\nMinimum: 1\nMaximum: 2000\nSum: "
+        "2001000\n",
         intColStats->toString());
     intColStats = reinterpret_cast<const orc::IntegerColumnStatistics*>(
         stripeStats->getRowIndexStatistics(1, 1));
     EXPECT_EQ(
-        "Data type: Integer\nValues: 2000\nHas null: no\nMinimum: 2001\nMaximum: 4000\nSum: "
+        "Data type: Integer\nValues: 2000\nHas null: yes\nMinimum: 2001\nMaximum: 4000\nSum: "
         "6001000\n",
         intColStats->toString());
     intColStats = reinterpret_cast<const orc::IntegerColumnStatistics*>(
         stripeStats->getRowIndexStatistics(1, 2));
     EXPECT_EQ(
-        "Data type: Integer\nValues: 2000\nHas null: no\nMinimum: 4001\nMaximum: 6000\nSum: "
+        "Data type: Integer\nValues: 2000\nHas null: yes\nMinimum: 4001\nMaximum: 6000\nSum: "
         "10001000\n",
         intColStats->toString());
 
@@ -65,23 +66,48 @@ namespace orc {
     stringColStats = reinterpret_cast<const orc::StringColumnStatistics*>(
         stripeStats->getRowIndexStatistics(2, 0));
     EXPECT_EQ(
-        "Data type: String\nValues: 2000\nHas null: no\nMinimum: 1000\nMaximum: 9a\nTotal length: "
+        "Data type: String\nValues: 2000\nHas null: yes\nMinimum: 1000\nMaximum: 9a\nTotal length: "
         "7892\n",
         stringColStats->toString());
     stringColStats = reinterpret_cast<const orc::StringColumnStatistics*>(
         stripeStats->getRowIndexStatistics(2, 1));
     EXPECT_EQ(
-        "Data type: String\nValues: 2000\nHas null: no\nMinimum: 2001\nMaximum: 4000\nTotal "
+        "Data type: String\nValues: 2000\nHas null: yes\nMinimum: 2001\nMaximum: 4000\nTotal "
         "length: "
         "8000\n",
         stringColStats->toString());
     stringColStats = reinterpret_cast<const orc::StringColumnStatistics*>(
         stripeStats->getRowIndexStatistics(2, 2));
     EXPECT_EQ(
-        "Data type: String\nValues: 2000\nHas null: no\nMinimum: 4001\nMaximum: 6000\nTotal "
+        "Data type: String\nValues: 2000\nHas null: yes\nMinimum: 4001\nMaximum: 6000\nTotal "
         "length: "
         "8000\n",
         stringColStats->toString());
+
+    std::unique_ptr<orc::Statistics> stripeLevelStats = reader->getStripeStatistics(0, false);
+    const orc::IntegerColumnStatistics* stripeLevelIntColStats;
+    stripeLevelIntColStats = reinterpret_cast<const orc::IntegerColumnStatistics*>(
+        stripeLevelStats->getColumnStatistics(1));
+    EXPECT_EQ(
+        "Data type: Integer\nValues: 6000\nHas null: yes\nMinimum: 1\nMaximum: 6000\nSum: "
+        "18003000\n",
+        stripeLevelIntColStats->toString());
+
+    const orc::StringColumnStatistics* stripeLevelStringColStats;
+    stripeLevelStringColStats = reinterpret_cast<const orc::StringColumnStatistics*>(
+        stripeLevelStats->getColumnStatistics(2));
+    EXPECT_EQ(
+        "Data type: String\nValues: 6000\nHas null: yes\nMinimum: 1000\nMaximum: 9a\nTotal length: "
+        "23892\n",
+        stripeLevelStringColStats->toString());
+
+    intColStats =
+        reinterpret_cast<const orc::IntegerColumnStatistics*>(stripeStats->getColumnStatistics(1));
+    stringColStats =
+        reinterpret_cast<const orc::StringColumnStatistics*>(stripeStats->getColumnStatistics(2));
+
+    EXPECT_EQ(intColStats->toString(), stripeLevelIntColStats->toString());
+    EXPECT_EQ(stringColStats->toString(), stripeLevelStringColStats->toString());
   }
 
 }  // namespace orc
diff --git a/c++/test/TestTimestampStatistics.cc b/c++/test/TestTimestampStatistics.cc
index d20a049557..e005fa6cf6 100644
--- a/c++/test/TestTimestampStatistics.cc
+++ b/c++/test/TestTimestampStatistics.cc
@@ -68,6 +68,19 @@ namespace orc {
         "00:00:00.688\nLowerBound: 1995-01-01 00:00:00.688\nMaximum: 2037-01-01 "
         "00:00:00.0\nUpperBound: 2037-01-01 00:00:00.1\n",
         stripeColStats->toString());
+
+    std::unique_ptr<orc::StripeStatistics> stripeStatsWithOutRowIndex =
+        reader->getStripeStatistics(0, false);
+    const orc::TimestampColumnStatistics* stripeColStatsOnly =
+        reinterpret_cast<const orc::TimestampColumnStatistics*>(
+            stripeStatsWithOutRowIndex->getColumnStatistics(0));
+
+    EXPECT_TRUE(stripeColStatsOnly->hasMinimum());
+    EXPECT_TRUE(stripeColStatsOnly->hasMaximum());
+    EXPECT_EQ(stripeColStats->toString(), stripeColStatsOnly->toString());
+    EXPECT_EQ(stripeStats->getNumberOfColumns(), stripeStatsWithOutRowIndex->getNumberOfColumns());
+    EXPECT_THROW(stripeStatsWithOutRowIndex->getRowIndexStatistics(1, 1), NotImplementedYet);
+    EXPECT_THROW(stripeStatsWithOutRowIndex->getNumberOfRowIndexStats(1), NotImplementedYet);
   }
 
   TEST(TestTimestampStatistics, testTimezoneUTC) {
diff --git a/c++/test/TestTimezone.cc b/c++/test/TestTimezone.cc
index 2330fcfb04..94895cd700 100644
--- a/c++/test/TestTimezone.cc
+++ b/c++/test/TestTimezone.cc
@@ -21,6 +21,7 @@
 #include "wrap/gmock.h"
 #include "wrap/gtest-wrapper.h"
 
+#include <algorithm>
 #include <cstdlib>
 #include <iostream>
 #include <vector>
@@ -421,20 +422,61 @@ namespace orc {
   }
 
   TEST(TestTimezone, testMissingTZDB) {
-    const char* tzDirBackup = std::getenv("TZDIR");
-    if (tzDirBackup != nullptr) {
+    const char* tzDir = std::getenv("TZDIR");
+    std::string tzDirBackup;
+    if (tzDir != nullptr) {
+      // std::string creates a deepcopy of buffer, which avoids that
+      // unsetting environment variable wrecks pointer to tzDir
+      tzDirBackup = tzDir;
       ASSERT_TRUE(delEnv("TZDIR"));
     }
     ASSERT_TRUE(setEnv("TZDIR", "/path/to/wrong/tzdb"));
-    EXPECT_THAT([]() { getTimezoneByName("America/Los_Angeles"); },
+    EXPECT_THAT([]() { getTimezoneByName("America/Los_Angeles").getVersion(); },
                 testing::ThrowsMessage<TimezoneError>(testing::HasSubstr(
                     "Time zone file /path/to/wrong/tzdb/America/Los_Angeles does not exist."
                     " Please install IANA time zone database and set TZDIR env.")));
-    if (tzDirBackup != nullptr) {
-      ASSERT_TRUE(setEnv("TZDIR", tzDirBackup));
+    if (!tzDirBackup.empty()) {
+      ASSERT_TRUE(setEnv("TZDIR", tzDirBackup.c_str()));
     } else {
       ASSERT_TRUE(delEnv("TZDIR"));
     }
   }
 
+  TEST(TestTimezone, testTzdbFromCondaEnv) {
+    const char* tzDir = std::getenv("TZDIR");
+    // test only makes sense if TZDIR exists
+    if (tzDir != nullptr) {
+      std::string tzDirBackup = tzDir;
+      ASSERT_TRUE(delEnv("TZDIR"));
+
+      // remove "/share/zoneinfo" from TZDIR (as set through TZDATA_DIR in CI) to
+      // get the equivalent of CONDA_PREFIX, relative to the location of the tzdb
+      std::string condaPrefix(tzDirBackup);
+      condaPrefix += "/../..";
+      ASSERT_TRUE(setEnv("CONDA_PREFIX", condaPrefix.c_str()));
+
+      // small test sample to ensure tzbd loads with CONDA_PREFIX, even without TZDIR
+      const Timezone* zrh = &getTimezoneByName("Europe/Zurich");
+      EXPECT_EQ("CET", getVariantFromZone(*zrh, "2024-03-31 00:59:59"));
+      EXPECT_EQ("CEST", getVariantFromZone(*zrh, "2024-03-31 01:00:00"));
+      EXPECT_EQ("CEST", getVariantFromZone(*zrh, "2024-10-27 00:59:59"));
+      EXPECT_EQ("CET", getVariantFromZone(*zrh, "2024-10-27 01:00:00"));
+
+      // CONDA_PREFIX contains backslashes on windows; test that this doesn't blow up
+      std::replace(condaPrefix.begin(), condaPrefix.end(), '/', '\\');
+      ASSERT_TRUE(setEnv("CONDA_PREFIX", condaPrefix.c_str()));
+
+      // as above, but different timezone to avoid hitting cache
+      const Timezone* syd = &getTimezoneByName("Australia/Sydney");
+      EXPECT_EQ("AEDT", getVariantFromZone(*syd, "2024-04-06 15:59:59"));
+      EXPECT_EQ("AEST", getVariantFromZone(*syd, "2024-04-06 16:00:00"));
+      EXPECT_EQ("AEST", getVariantFromZone(*syd, "2024-10-05 15:59:59"));
+      EXPECT_EQ("AEDT", getVariantFromZone(*syd, "2024-10-05 16:00:00"));
+
+      // restore state of environment variables
+      ASSERT_TRUE(delEnv("CONDA_PREFIX"));
+      ASSERT_TRUE(setEnv("TZDIR", tzDirBackup.c_str()));
+    }
+  }
+
 }  // namespace orc
diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc
index c9ac2f2850..cec0d8d2c4 100644
--- a/c++/test/TestType.cc
+++ b/c++/test/TestType.cc
@@ -325,7 +325,7 @@ namespace orc {
     expectLogicErrorDuringParse("int<>", "Invalid < after int type.");
     expectLogicErrorDuringParse("array(int)", "Missing < after array.");
     expectLogicErrorDuringParse("struct<struct<bigint>>",
-                                "Invalid struct type. No field name set.");
+                                "Invalid struct type. Field name can not contain '<'.");
     expectLogicErrorDuringParse("struct<a:bigint;b:string>", "Missing comma after field.");
   }
 
diff --git a/c++/test/TestUtil.cc b/c++/test/TestUtil.cc
new file mode 100644
index 0000000000..a76880340c
--- /dev/null
+++ b/c++/test/TestUtil.cc
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestUtil.hh"
+#include <cassert>
+#include <cstring>
+
+namespace orc {
+  uint32_t GeometryTypeToWKB(geospatial::GeometryType geometryType, bool hasZ, bool hasM) {
+    auto wkbGeomType = static_cast<uint32_t>(geometryType);
+
+    if (hasZ) {
+      wkbGeomType += 1000;
+    }
+
+    if (hasM) {
+      wkbGeomType += 2000;
+    }
+
+    return wkbGeomType;
+  }
+
+  std::string MakeWKBPoint(const std::vector<double>& xyzm, bool hasZ, bool hasM) {
+    // 1:endianness + 4:type + 8:x + 8:y
+    int numBytes = kWkbPointXYSize + (hasZ ? sizeof(double) : 0) + (hasM ? sizeof(double) : 0);
+    std::string wkb(numBytes, 0);
+    char* ptr = wkb.data();
+
+    ptr[0] = kWkbNativeEndianness;
+    uint32_t geom_type = GeometryTypeToWKB(geospatial::GeometryType::POINT, hasZ, hasM);
+    std::memcpy(&ptr[1], &geom_type, 4);
+    std::memcpy(&ptr[5], &xyzm[0], 8);
+    std::memcpy(&ptr[13], &xyzm[1], 8);
+    ptr += 21;
+
+    if (hasZ) {
+      std::memcpy(ptr, &xyzm[2], 8);
+      ptr += 8;
+    }
+
+    if (hasM) {
+      std::memcpy(ptr, &xyzm[3], 8);
+      ptr += 8;
+    }
+
+    assert(static_cast<size_t>(ptr - wkb.data()) == wkb.length());
+    return wkb;
+  }
+
+}  // namespace orc
\ No newline at end of file
diff --git a/c++/test/TestUtil.hh b/c++/test/TestUtil.hh
new file mode 100644
index 0000000000..104fbc0397
--- /dev/null
+++ b/c++/test/TestUtil.hh
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "orc/Geospatial.hh"
+
+#include <cstdint>
+#include <vector>
+
+namespace orc {
+
+  /// \brief Number of bytes in a WKB Point with X and Y dimensions (uint8_t endian,
+  /// uint32_t geometry type, 2 * double coordinates)
+  static constexpr int kWkbPointXYSize = 21;
+
+  static bool isLittleEndian() {
+    static union {
+      uint32_t i;
+      char c[4];
+    } num = {0x01020304};
+    return num.c[0] == 4;
+  }
+
+  static uint8_t kWkbNativeEndianness = isLittleEndian() ? 0x01 : 0x00;
+
+  uint32_t GeometryTypeToWKB(geospatial::GeometryType geometryType, bool hasZ, bool hasM);
+  std::string MakeWKBPoint(const std::vector<double>& xyzm, bool hasZ, bool hasM);
+
+}  // namespace orc
\ No newline at end of file
diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc
index d160f82ff1..11ba0c9dea 100644
--- a/c++/test/TestWriter.cc
+++ b/c++/test/TestWriter.cc
@@ -16,18 +16,20 @@
  * limitations under the License.
  */
 
-#include "orc/ColumnPrinter.hh"
+#include <gtest/gtest.h>
 #include "orc/OrcFile.hh"
 
 #include "MemoryInputStream.hh"
 #include "MemoryOutputStream.hh"
 #include "Reader.hh"
+#include "TestUtil.hh"
 
 #include "wrap/gmock.h"
 #include "wrap/gtest-wrapper.h"
 
 #include <cmath>
 #include <ctime>
+#include <memory>
 #include <sstream>
 
 #ifdef __clang__
@@ -41,11 +43,11 @@ namespace orc {
 
   const int DEFAULT_MEM_STREAM_SIZE = 100 * 1024 * 1024;  // 100M
 
-  std::unique_ptr<Writer> createWriter(uint64_t stripeSize, uint64_t compresionblockSize,
-                                       CompressionKind compression, const Type& type,
-                                       MemoryPool* memoryPool, OutputStream* stream,
-                                       FileVersion version, uint64_t stride = 0,
-                                       const std::string& timezone = "GMT",
+  std::unique_ptr<Writer> createWriter(uint64_t stripeSize, uint64_t memoryBlockSize,
+                                       uint64_t compresionblockSize, CompressionKind compression,
+                                       const Type& type, MemoryPool* memoryPool,
+                                       OutputStream* stream, FileVersion version,
+                                       uint64_t stride = 0, const std::string& timezone = "GMT",
                                        bool useTightNumericVector = false) {
     WriterOptions options;
     options.setStripeSize(stripeSize);
@@ -56,6 +58,9 @@ namespace orc {
     options.setFileVersion(version);
     options.setTimezoneName(timezone);
     options.setUseTightNumericVector(useTightNumericVector);
+    options.setMemoryBlockSize(memoryBlockSize);
+    // enable align block bound to row group when stride is not 0
+    options.setAlignBlockBoundToRowGroup(true);
     return createWriter(type, stream, options);
   }
 
@@ -83,7 +88,56 @@ namespace orc {
     return reader->createRowReader(rowReaderOpts);
   }
 
-  class WriterTest : public TestWithParam<FileVersion> {
+  void verifyCompressionBlockAlignment(std::unique_ptr<Reader>& reader, uint64_t columnCount) {
+    auto stripeCount = reader->getNumberOfStripes();
+    for (uint64_t stripeIndex = 0; stripeIndex < stripeCount; ++stripeIndex) {
+      for (uint64_t i = 0; i < columnCount; ++i) {
+        auto rowGroupIndexMap = reader->getRowGroupIndex(stripeIndex);
+        EXPECT_TRUE(rowGroupIndexMap.size() > 0);
+        auto rowGroupIndex = rowGroupIndexMap[columnCount];
+        auto subType = reader->getType().getSubtype(i);
+        EXPECT_TRUE(rowGroupIndex.positions.size() > 0);
+        for (auto rowGroupPositions : rowGroupIndex.positions) {
+          for (uint64_t posIndex = 0; posIndex < rowGroupPositions.size(); ++posIndex) {
+            // After we call finishStream(), unusedBufferSize is set to 0,
+            // so only the first position is valid in each recordPosition call.
+            switch (subType->getKind()) {
+              case DECIMAL:
+              case STRING:
+              case BINARY:
+              case CHAR:
+              case VARCHAR: {
+                if (posIndex != 0 && posIndex != 2) {
+                  EXPECT_EQ(rowGroupPositions[posIndex], 0);
+                }
+                break;
+              }
+              case TIMESTAMP_INSTANT:
+              case TIMESTAMP: {
+                if (posIndex != 0 && posIndex != 3) {
+                  EXPECT_EQ(rowGroupPositions[posIndex], 0);
+                }
+                break;
+              }
+              default: {
+                if (posIndex != 0) {
+                  EXPECT_EQ(rowGroupPositions[posIndex], 0);
+                }
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  struct TestParams {
+    FileVersion fileVersion;
+    bool enableAlignBlockBoundToRowGroup;
+  };
+
+  class WriterTest : public TestWithParam<TestParams> {
     // You can implement all the usual fixture class members here.
     // To access the test parameter, call GetParam() from class
     // TestWithParam<T>.
@@ -91,13 +145,15 @@ namespace orc {
 
    protected:
     FileVersion fileVersion;
+    bool enableAlignBlockBoundToRowGroup;
 
    public:
-    WriterTest() : fileVersion(FileVersion::v_0_11()) {}
+    WriterTest() : fileVersion(FileVersion::v_0_11()), enableAlignBlockBoundToRowGroup(false) {}
   };
 
   void WriterTest::SetUp() {
-    fileVersion = GetParam();
+    fileVersion = GetParam().fileVersion;
+    enableAlignBlockBoundToRowGroup = GetParam().enableAlignBlockBoundToRowGroup;
   }
 
   TEST_P(WriterTest, writeEmptyFile) {
@@ -107,10 +163,11 @@ namespace orc {
 
     uint64_t stripeSize = 16 * 1024;       // 16K
     uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion);
     writer->close();
 
     auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(), memStream.getLength());
@@ -135,10 +192,11 @@ namespace orc {
 
     uint64_t stripeSize = 16 * 1024;       // 16K
     uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(1024);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     LongVectorBatch* longBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
@@ -195,10 +253,11 @@ namespace orc {
 
     uint64_t stripeSize = 1024;            // 1K
     uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(65535);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     LongVectorBatch* longBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
@@ -241,13 +300,14 @@ namespace orc {
 
     uint64_t stripeSize = 1024;            // 1K
     uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
 
     char dataBuffer[327675];
     uint64_t offset = 0;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(65535);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     StringVectorBatch* strBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
@@ -289,6 +349,9 @@ namespace orc {
       EXPECT_EQ(i, static_cast<uint64_t>(atoi(str.c_str())));
       EXPECT_EQ(i, static_cast<uint64_t>(atoi(bin.c_str())));
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
 
     EXPECT_FALSE(rowReader->next(*batch));
   }
@@ -301,6 +364,7 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 655350;
+    uint64_t memoryBlockSize = 64;
 
     std::vector<double> data(rowCount);
     for (uint64_t i = 0; i < rowCount; ++i) {
@@ -308,8 +372,8 @@ namespace orc {
     }
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     DoubleVectorBatch* doubleBatch = dynamic_cast<DoubleVectorBatch*>(structBatch->fields[0]);
@@ -345,6 +409,10 @@ namespace orc {
                   0.000001f);
     }
     EXPECT_FALSE(rowReader->next(*batch));
+
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeShortIntLong) {
@@ -356,10 +424,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 65535;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     LongVectorBatch* smallIntBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
@@ -396,6 +465,9 @@ namespace orc {
       EXPECT_EQ(static_cast<int32_t>(i), intBatch->data[i]);
       EXPECT_EQ(static_cast<int64_t>(i), bigIntBatch->data[i]);
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeTinyint) {
@@ -406,16 +478,20 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 65535;
+    uint64_t memoryBlockSize = 64;
 
-    std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+    std::unique_ptr<Writer> writer = createWriter(
+        stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZSTD, *type, pool,
+        &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0, "GMT", true);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
-    LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
+    ByteVectorBatch* byteBatch = dynamic_cast<ByteVectorBatch*>(structBatch->fields[0]);
 
+    int64_t sum = 0;
     for (uint64_t i = 0; i < rowCount; ++i) {
-      byteBatch->data[i] = static_cast<int8_t>(i);
+      int8_t x = static_cast<int8_t>(i);
+      byteBatch->data[i] = x;
+      sum += x;
     }
     structBatch->numElements = rowCount;
     byteBatch->numElements = rowCount;
@@ -429,13 +505,29 @@ namespace orc {
     EXPECT_EQ(rowCount, reader->getNumberOfRows());
 
     batch = rowReader->createRowBatch(rowCount);
+    rowReader->seekToRow(20);
     EXPECT_EQ(true, rowReader->next(*batch));
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
 
     structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
-    byteBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
-    for (uint64_t i = 0; i < rowCount; ++i) {
-      EXPECT_EQ(static_cast<int8_t>(i), static_cast<int8_t>(byteBatch->data[i]));
-    }
+    auto outByteBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
+    for (uint64_t i = 0; i < rowCount - 20; ++i) {
+      EXPECT_EQ(static_cast<int8_t>(i + 20), static_cast<int8_t>(outByteBatch->data[i]));
+    }
+
+    auto col_stats = reader->getColumnStatistics(1);
+    ASSERT_NE(col_stats, nullptr);
+    EXPECT_EQ(col_stats->getNumberOfValues(), rowCount);
+    EXPECT_FALSE(col_stats->hasNull());
+    auto int_stats = dynamic_cast<const IntegerColumnStatistics*>(col_stats.get());
+    ASSERT_NE(int_stats, nullptr);
+    EXPECT_TRUE(int_stats->hasMinimum() && int_stats->hasMaximum());
+    EXPECT_EQ(int_stats->getMinimum(), -128);
+    EXPECT_EQ(int_stats->getMaximum(), 127);
+    EXPECT_TRUE(int_stats->hasSum());
+    EXPECT_EQ(int_stats->getSum(), sum);
   }
 
   TEST_P(WriterTest, writeBooleanColumn) {
@@ -446,10 +538,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 65535;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     LongVectorBatch* byteBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
@@ -476,6 +569,9 @@ namespace orc {
     for (uint64_t i = 0; i < rowCount; ++i) {
       EXPECT_EQ((i % 3) == 0 ? 1 : 0, byteBatch->data[i]);
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeDate) {
@@ -486,10 +582,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 1024;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
 
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
@@ -517,6 +614,9 @@ namespace orc {
     for (uint64_t i = 0; i < rowCount; ++i) {
       EXPECT_EQ(static_cast<int32_t>(i), longBatch->data[i]);
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeTimestamp) {
@@ -527,10 +627,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 102400;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     TimestampVectorBatch* tsBatch = dynamic_cast<TimestampVectorBatch*>(structBatch->fields[0]);
@@ -562,14 +663,18 @@ namespace orc {
       EXPECT_EQ(times[i], tsBatch->data[i]);
       EXPECT_EQ(i * 1000, tsBatch->nanoseconds[i]);
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeNegativeTimestamp) {
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
     MemoryPool* pool = getDefaultPool();
     std::unique_ptr<Type> type(Type::buildTypeFromString("struct<a:timestamp>"));
-    auto writer = createWriter(16 * 1024 * 1024, 64 * 1024, CompressionKind_ZLIB, *type, pool,
-                               &memStream, fileVersion);
+    auto writer =
+        createWriter(16 * 1024 * 1024, 64 * 1024, 256 * 1024, CompressionKind_ZLIB, *type, pool,
+                     &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     uint64_t batchCount = 5;
     auto batch = writer->createRowBatch(batchCount * 2);
     auto structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
@@ -619,6 +724,10 @@ namespace orc {
       }
       EXPECT_EQ(1000000, tsBatch->nanoseconds[i]);
     }
+
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
 // TODO: Disable the test below for Windows for following reasons:
@@ -638,10 +747,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 1;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion, 0, writerTimezone);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, 0, writerTimezone);
     auto batch = writer->createRowBatch(rowCount);
     auto& structBatch = dynamic_cast<StructVectorBatch&>(*batch);
     auto& tsBatch = dynamic_cast<TimestampVectorBatch&>(*structBatch.fields[0]);
@@ -734,10 +844,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 102400;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     TimestampVectorBatch* tsBatch = dynamic_cast<TimestampVectorBatch*>(structBatch->fields[0]);
@@ -769,6 +880,9 @@ namespace orc {
       EXPECT_EQ(times[i], tsBatch->data[i]);
       EXPECT_EQ(i * 1000, tsBatch->nanoseconds[i]);
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeCharAndVarcharColumn) {
@@ -779,13 +893,14 @@ namespace orc {
     uint64_t stripeSize = 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 65535;
+    uint64_t memoryBlockSize = 64;
 
     char dataBuffer[327675];
     uint64_t offset = 0;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
 
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
@@ -847,6 +962,9 @@ namespace orc {
     }
 
     EXPECT_FALSE(rowReader->next(*batch));
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeDecimal64Column) {
@@ -858,10 +976,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;       // 16K
     uint64_t compressionBlockSize = 1024;  // 1k
     uint64_t rowCount = 1024;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     Decimal64VectorBatch* decBatch = dynamic_cast<Decimal64VectorBatch*>(structBatch->fields[0]);
@@ -923,6 +1042,9 @@ namespace orc {
       EXPECT_EQ(dec, decBatch->values[i]);
       EXPECT_EQ(-dec, decBatch->values[i + maxPrecision]);
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeDecimal128Column) {
@@ -934,10 +1056,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 1024;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     Decimal128VectorBatch* decBatch = dynamic_cast<Decimal128VectorBatch*>(structBatch->fields[0]);
@@ -1009,6 +1132,9 @@ namespace orc {
       EXPECT_EQ(expected, decBatch->values[i].toString());
       EXPECT_EQ("-" + expected, decBatch->values[i + maxPrecision].toString());
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeListColumn) {
@@ -1022,10 +1148,11 @@ namespace orc {
     uint64_t rowCount = 1024;
     uint64_t maxListLength = 10;
     uint64_t offset = 0;
+    uint64_t memoryBlockSize = 8 * 1024;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount * maxListLength);
 
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
@@ -1071,6 +1198,9 @@ namespace orc {
         EXPECT_EQ(static_cast<int64_t>(i), data[offsets[i] + j]);
       }
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeMapColumn) {
@@ -1081,10 +1211,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 1024, maxListLength = 10, offset = 0;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount * maxListLength);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     MapVectorBatch* mapBatch = dynamic_cast<MapVectorBatch*>(structBatch->fields[0]);
@@ -1151,6 +1282,9 @@ namespace orc {
         EXPECT_EQ(static_cast<int64_t>(i), elemData[offsets[i] + j]);
       }
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeUnionColumn) {
@@ -1162,10 +1296,11 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 3333;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     UnionVectorBatch* unionBatch = dynamic_cast<UnionVectorBatch*>(structBatch->fields[0]);
@@ -1247,6 +1382,9 @@ namespace orc {
           break;
       }
     }
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, writeUTF8CharAndVarcharColumn) {
@@ -1257,9 +1395,10 @@ namespace orc {
     uint64_t stripeSize = 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 3;
+    uint64_t memoryBlockSize = 64;
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     StringVectorBatch* charBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
@@ -1317,6 +1456,9 @@ namespace orc {
     EXPECT_TRUE(memcmp(varcharBatch->data[2], expectedTwoChars, 4) == 0);
 
     EXPECT_FALSE(rowReader->next(*batch));
+    if (enableAlignBlockBoundToRowGroup) {
+      verifyCompressionBlockAlignment(reader, type->getSubtypeCount());
+    }
   }
 
   TEST_P(WriterTest, testWriteListColumnWithNull) {
@@ -1326,10 +1468,11 @@ namespace orc {
 
     uint64_t stripeSize = 1024;
     uint64_t compressionBlockSize = 1024;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion);
 
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(4);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
@@ -1407,10 +1550,11 @@ namespace orc {
 
     uint64_t stripeSize = 1024;
     uint64_t compressionBlockSize = 1024;
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion);
 
     // test data looks like below -
     // {0}
@@ -1485,12 +1629,13 @@ namespace orc {
 
     uint64_t stripeSize = 1024;
     uint64_t compressionBlockSize = 1024;
+    uint64_t memoryBlockSize = 64;
 
     // 10000 rows with every 1000 row as an RG
     // Each RG has 100 null rows except that the 5th RG is all null
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion, 1000);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, 1000);
 
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(10000);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
@@ -1622,12 +1767,13 @@ namespace orc {
   TEST_P(WriterTest, testBloomFilter) {
     WriterOptions options;
     options.setStripeSize(1024)
-        .setCompressionBlockSize(64)
+        .setCompressionBlockSize(1024)
         .setCompression(CompressionKind_ZSTD)
         .setMemoryPool(getDefaultPool())
         .setRowIndexStride(10000)
         .setFileVersion(fileVersion)
-        .setColumnsUseBloomFilter({1, 2, 3});
+        .setColumnsUseBloomFilter({1, 2, 3})
+        .setMemoryBlockSize(64);
 
     // write 65535 rows of data
     MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
@@ -1716,7 +1862,7 @@ namespace orc {
       auto type = std::unique_ptr<Type>(Type::buildTypeFromString("struct<col1:int,col2:int>"));
       WriterOptions options;
       options.setStripeSize(1024 * 1024)
-          .setCompressionBlockSize(1024)
+          .setMemoryBlockSize(1024)
           .setCompression(CompressionKind_NONE)
           .setMemoryPool(pool)
           .setRowIndexStride(1000);
@@ -1809,8 +1955,11 @@ namespace orc {
     uint64_t rowCount = 5000000;
     auto type = std::unique_ptr<Type>(Type::buildTypeFromString("struct<c0:int>"));
     WriterOptions options;
-    options.setStripeSize(1024).setCompressionBlockSize(1024).setCompression(kind).setMemoryPool(
-        pool);
+    options.setStripeSize(1024)
+        .setCompressionBlockSize(1024)
+        .setMemoryBlockSize(64)
+        .setCompression(kind)
+        .setMemoryPool(pool);
 
     auto writer = createWriter(*type, &memStream, options);
     auto batch = writer->createRowBatch(rowCount);
@@ -1853,10 +2002,11 @@ namespace orc {
       WriterOptions options;
       options.setStripeSize(1024 * 1024)
           .setCompressionBlockSize(64 * 1024)
+          .setMemoryBlockSize(1024)
           .setCompression(CompressionKind_NONE)
           .setMemoryPool(pool)
           .setRowIndexStride(1000)
-          .setOutputBufferCapacity(capacity);
+          .setCompressionBlockSize(capacity);
 
       auto writer = createWriter(*type, &memStream, options);
       auto batch = writer->createRowBatch(rowCount);
@@ -1913,6 +2063,7 @@ namespace orc {
     uint64_t stripeSize = 16 * 1024;
     uint64_t compressionBlockSize = 1024;
     uint64_t rowCount = 65530;
+    uint64_t memoryBlockSize = 64;
 
     std::vector<double> data(rowCount);
     for (uint64_t i = 0; i < rowCount; ++i) {
@@ -1920,8 +2071,8 @@ namespace orc {
     }
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion, 0, "GMT", true);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, 0, "GMT", true);
     // start from here/
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(rowCount / 2);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
@@ -2010,10 +2161,11 @@ namespace orc {
 
     uint64_t stripeSize = 1024;            // 1K
     uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(65535);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     LongVectorBatch* longBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
@@ -2065,10 +2217,11 @@ namespace orc {
 
     uint64_t stripeSize = 1024;            // 1K
     uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, compressionBlockSize, CompressionKind_ZLIB, *type, pool,
-                     &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion);
     std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(65535);
     StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
     LongVectorBatch* longBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
@@ -2131,6 +2284,7 @@ namespace orc {
       WriterOptions options;
       options.setStripeSize(16 * 1024)
           .setCompressionBlockSize(1024)
+          .setMemoryBlockSize(64)
           .setCompression(CompressionKind_NONE)
           .setMemoryPool(pool)
           .setRowIndexStride(1000);
@@ -2201,7 +2355,192 @@ namespace orc {
                  std::invalid_argument);
   }
 
-  INSTANTIATE_TEST_SUITE_P(OrcTest, WriterTest,
-                           Values(FileVersion::v_0_11(), FileVersion::v_0_12(),
-                                  FileVersion::UNSTABLE_PRE_2_0()));
+  TEST_P(WriterTest, testLazyLoadTZDB) {
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    MemoryPool* pool = getDefaultPool();
+    std::unique_ptr<Type> type(Type::buildTypeFromString("struct<col1:int>"));
+
+    uint64_t stripeSize = 1024;            // 1K
+    uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
+
+    std::unique_ptr<Writer> writer =
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, 0, "/ERROR/TIMEZONE");
+    std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(10);
+    StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    LongVectorBatch* longBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
+
+    for (uint64_t j = 0; j < 10; ++j) {
+      for (uint64_t i = 0; i < 10; ++i) {
+        longBatch->data[i] = static_cast<int64_t>(i);
+      }
+      structBatch->numElements = 10;
+      longBatch->numElements = 10;
+
+      writer->add(*batch);
+    }
+
+    writer->close();
+
+    auto inStream = std::make_unique<MemoryInputStream>(memStream.getData(), memStream.getLength());
+    std::unique_ptr<Reader> reader = createReader(pool, std::move(inStream));
+    std::unique_ptr<RowReader> rowReader = createRowReader(reader.get(), "/ERROR/TIMEZONE");
+    EXPECT_EQ(100, reader->getNumberOfRows());
+
+    batch = rowReader->createRowBatch(10);
+    for (uint64_t j = 0; j < 10; ++j) {
+      EXPECT_TRUE(rowReader->next(*batch));
+      EXPECT_EQ(10, batch->numElements);
+
+      for (uint64_t i = 0; i < 10; ++i) {
+        structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+        longBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
+        EXPECT_EQ(i, longBatch->data[i]);
+      }
+    }
+    EXPECT_FALSE(rowReader->next(*batch));
+  }
+
+  TEST_P(WriterTest, writeGeometryAndGeographyColumn) {
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    MemoryPool* pool = getDefaultPool();
+    std::unique_ptr<Type> type(Type::buildTypeFromString(
+        "struct<col1:geometry(OGC:CRS84),col2:geography(OGC:CRS84,speherial)>"));
+    uint64_t stripeSize = 1024;            // 1K
+    uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
+    std::unique_ptr<Writer> writer =
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, enableAlignBlockBoundToRowGroup ? 1024 : 0);
+
+    EXPECT_EQ("struct<col1:geometry(OGC:CRS84),),col2:geography(OGC:CRS84,speherial)>",
+              type->toString());
+
+    uint64_t batchCount = 100, batchSize = 1000;
+    std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(batchSize);
+    StructVectorBatch* structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    StringVectorBatch* geometryBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
+    StringVectorBatch* geographyBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[1]);
+
+    std::unique_ptr<char[]> buffer(new char[8000000]);
+    char* buf = buffer.get();
+
+    // write 100 * 1000 rows, every 100 rows are in one row group
+    // every 2 consecutive rows has one null value.
+    uint64_t rowCount = 0;
+    for (uint64_t i = 0; i != batchCount; ++i) {
+      structBatch->hasNulls = false;
+      structBatch->numElements = batchSize;
+
+      geometryBatch->hasNulls = true;
+      geometryBatch->numElements = batchSize;
+      geographyBatch->hasNulls = true;
+      geographyBatch->numElements = batchSize;
+
+      for (uint64_t j = 0; j != batchSize; ++j) {
+        if (rowCount % 2 == 0) {
+          geometryBatch->notNull[j] = 0;
+          geographyBatch->notNull[j] = 0;
+        } else {
+          geometryBatch->notNull[j] = 1;
+          geographyBatch->notNull[j] = 1;
+
+          std::string wkb = MakeWKBPoint({j * 1.0, j * 1.0}, false, false);
+          strncpy(buf, wkb.c_str(), wkb.size());
+
+          geometryBatch->data[j] = buf;
+          geometryBatch->length[j] = static_cast<int64_t>(wkb.size());
+          geographyBatch->data[j] = buf;
+          geographyBatch->length[j] = static_cast<int64_t>(wkb.size());
+
+          buf += wkb.size();
+        }
+        ++rowCount;
+      }
+
+      writer->add(*batch);
+    }
+    writer->close();
+
+    std::unique_ptr<InputStream> inStream(
+        new MemoryInputStream(memStream.getData(), memStream.getLength()));
+    std::unique_ptr<Reader> reader = createReader(pool, std::move(inStream));
+    EXPECT_EQ(batchCount * batchSize, reader->getNumberOfRows());
+    EXPECT_TRUE(reader->getNumberOfStripes() > 1);
+
+    EXPECT_EQ("struct<col1:geometry(OGC:CRS84),),col2:geography(OGC:CRS84,speherial)>",
+              reader->getType().toString());
+    // test sequential reader
+    std::unique_ptr<RowReader> seqReader = createRowReader(reader.get());
+    rowCount = 0;
+    for (uint64_t i = 0; i != batchCount; ++i) {
+      seqReader->next(*batch);
+
+      EXPECT_FALSE(structBatch->hasNulls);
+      EXPECT_EQ(batchSize, structBatch->numElements);
+
+      EXPECT_TRUE(geometryBatch->hasNulls);
+      EXPECT_EQ(batchSize, geometryBatch->numElements);
+      EXPECT_TRUE(geographyBatch->hasNulls);
+      EXPECT_EQ(batchSize, geographyBatch->numElements);
+
+      for (uint64_t j = 0; j != batchSize; ++j) {
+        if (rowCount % 2 == 0) {
+          EXPECT_TRUE(geometryBatch->notNull[j] == 0);
+          EXPECT_TRUE(geographyBatch->notNull[j] == 0);
+        } else {
+          EXPECT_TRUE(geometryBatch->notNull[j] != 0);
+          EXPECT_TRUE(geographyBatch->notNull[j] != 0);
+          std::string wkb = MakeWKBPoint({j * 1.0, j * 1.0}, false, false);
+          EXPECT_EQ(static_cast<int64_t>(wkb.size()), geometryBatch->length[j]);
+          EXPECT_TRUE(strncmp(geometryBatch->data[j], wkb.c_str(), wkb.size()) == 0);
+          EXPECT_EQ(static_cast<int64_t>(wkb.size()), geographyBatch->length[j]);
+          EXPECT_TRUE(strncmp(geographyBatch->data[j], wkb.c_str(), wkb.size()) == 0);
+        }
+        ++rowCount;
+      }
+    }
+    EXPECT_FALSE(seqReader->next(*batch));
+
+    // test seek reader
+    std::unique_ptr<RowReader> seekReader = createRowReader(reader.get());
+    batch = seekReader->createRowBatch(2);
+    structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    geometryBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
+    geographyBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[1]);
+
+    for (uint64_t row = rowCount - 2; row >= 100; row -= 100) {
+      seekReader->seekToRow(row);
+      seekReader->next(*batch);
+
+      EXPECT_FALSE(structBatch->hasNulls);
+      EXPECT_EQ(2, structBatch->numElements);
+      EXPECT_TRUE(geometryBatch->hasNulls);
+      EXPECT_EQ(2, geometryBatch->numElements);
+      EXPECT_TRUE(geographyBatch->hasNulls);
+      EXPECT_EQ(2, geographyBatch->numElements);
+
+      EXPECT_TRUE(geometryBatch->notNull[0] == 0);
+      EXPECT_TRUE(geometryBatch->notNull[1] != 0);
+      EXPECT_TRUE(geographyBatch->notNull[0] == 0);
+      EXPECT_TRUE(geographyBatch->notNull[1] != 0);
+
+      std::string wkb = MakeWKBPoint({(row + 1) * 1.0, (row + 1) * 1.0}, false, false);
+
+      EXPECT_EQ(static_cast<int64_t>(wkb.size()), geometryBatch->length[1]);
+      EXPECT_TRUE(strncmp(geometryBatch->data[1], wkb.c_str(), wkb.size()) == 0);
+      EXPECT_EQ(static_cast<int64_t>(wkb.size()), geographyBatch->length[1]);
+      EXPECT_TRUE(strncmp(geographyBatch->data[1], wkb.c_str(), wkb.size()) == 0);
+    }
+  }
+
+  std::vector<TestParams> testParams = {{FileVersion::v_0_11(), true},
+                                        {FileVersion::v_0_11(), false},
+                                        {FileVersion::v_0_12(), false},
+                                        {FileVersion::v_0_12(), true},
+                                        {FileVersion::UNSTABLE_PRE_2_0(), false},
+                                        {FileVersion::UNSTABLE_PRE_2_0(), true}};
+
+  INSTANTIATE_TEST_SUITE_P(OrcTest, WriterTest, ::testing::ValuesIn(testParams));
 }  // namespace orc
diff --git a/c++/test/meson.build b/c++/test/meson.build
new file mode 100644
index 0000000000..75dcbb0940
--- /dev/null
+++ b/c++/test/meson.build
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_incdir = include_directories(
+    '../include',
+    '../src',
+)
+
+test_sources = [
+    'MemoryInputStream.cc',
+    'MemoryOutputStream.cc',
+    'MockStripeStreams.cc',
+    'TestAttributes.cc',
+    'TestBlockBuffer.cc',
+    'TestBufferedOutputStream.cc',
+    'TestBloomFilter.cc',
+    'TestByteRle.cc',
+    'TestByteRLEEncoder.cc',
+    'TestColumnPrinter.cc',
+    'TestColumnReader.cc',
+    'TestColumnStatistics.cc',
+    'TestCompression.cc',
+    'TestConvertColumnReader.cc',
+    'TestDecompression.cc',
+    'TestDecimal.cc',
+    'TestDictionaryEncoding.cc',
+    'TestDriver.cc',
+    'TestInt128.cc',
+    'TestMurmur3.cc',
+    'TestPredicateLeaf.cc',
+    'TestPredicatePushdown.cc',
+    'TestReader.cc',
+    'TestRleDecoder.cc',
+    'TestRleEncoder.cc',
+    'TestRLEV2Util.cc',
+    'TestSargsApplier.cc',
+    'TestSearchArgument.cc',
+    'TestSchemaEvolution.cc',
+    'TestStatistics.cc',
+    'TestStripeIndexStatistics.cc',
+    'TestTimestampStatistics.cc',
+    'TestTimezone.cc',
+    'TestType.cc',
+    'TestUtil.cc',
+    'TestWriter.cc',
+    'TestCache.cc',        
+]
+
+orc_test = executable(
+    'orc-test',
+    sources: test_sources,
+    include_directories: test_incdir,
+    dependencies: [
+        orc_dep,
+        lz4_dep,
+        protobuf_dep,
+        snappy_dep,
+        zlib_dep,
+        gtest_dep,
+        gmock_dep,
+        sparsehash_c11_dep,
+    ],
+)
+
+exc = executable(
+    'create-test-files',
+    sources: ['CreateTestFiles.cc'],
+    include_directories: test_incdir,    
+    dependencies: [
+        orc_dep,
+        protobuf_dep,
+    ],
+)
+test('orc-test', exc)
diff --git a/cmake_modules/CheckFormat.cmake b/cmake_modules/CheckFormat.cmake
new file mode 100644
index 0000000000..17017da133
--- /dev/null
+++ b/cmake_modules/CheckFormat.cmake
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Needed for linting targets, etc.
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3)
+set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
+
+set(BUILD_SUPPORT_DIR "${PROJECT_SOURCE_DIR}/c++/build-support")
+
+find_program(CLANG_FORMAT_BIN
+        NAMES clang-format-13
+        HINTS ${CLANG_SEARCH_PATH})
+
+find_program(CLANG_TIDY_BIN
+        NAMES clang-tidy-13
+        HINTS ${CLANG_SEARCH_PATH})
+
+find_program(CLANG_APPLY_REPLACEMENTS_BIN
+        NAMES clang-apply-replacements-13
+        HINTS ${CLANG_SEARCH_PATH})
+
+
+if("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND")
+        message(WARNING "Couldn't find clang-format.")
+else()
+        message(STATUS "Found clang-format at ${CLANG_FORMAT_BIN}")
+endif()
+
+if("${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND")
+        message(WARNING "Couldn't find clang-tidy.")
+else()
+        # Output compile_commands.json
+        set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
+        message(STATUS "Found clang-tidy at ${CLANG_TIDY_BIN}")
+endif()
+
+if("${CLANG_APPLY_REPLACEMENTS_BIN}" STREQUAL "CLANG_APPLY_REPLACEMENTS_BIN-NOTFOUND")
+        message(WARNING "Couldn't find clang-apply-replacements.")
+else()
+        # Output compile_commands.json
+        set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
+        message(STATUS "Found clang-apply-replacements at ${CLANG_APPLY_REPLACEMENTS_BIN}")
+endif()
+
+if(NOT LINT_EXCLUSIONS_FILE)
+        # source files matching a glob from a line in this file
+        # will be excluded from linting (cpplint, clang-tidy, clang-format)
+        set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
+endif()
+
+# runs clang-tidy and exits with a non-zero exit code if any errors are found.
+# note that clang-tidy automatically looks for a .clang-tidy file in parent directories
+add_custom_target(check-clang-tidy
+        ${PYTHON_EXECUTABLE}
+        ${BUILD_SUPPORT_DIR}/run_clang_tidy.py # run LLVM's clang-tidy script
+        -clang-tidy-binary ${CLANG_TIDY_BIN} # using our clang-tidy binary
+        -p ${PROJECT_BINARY_DIR} # using cmake's generated compile commands
+)
+
+add_custom_target(fix-clang-tidy
+        ${PYTHON_EXECUTABLE}
+        ${BUILD_SUPPORT_DIR}/run_clang_tidy.py # run LLVM's clang-tidy script
+        -clang-tidy-binary ${CLANG_TIDY_BIN} # using our clang-tidy binary
+        -p ${PROJECT_BINARY_DIR} # using cmake's generated compile commands
+        -clang-apply-replacements-binary ${CLANG_APPLY_REPLACEMENTS_BIN} # using our clang-apply-replacements binary
+        -fix # apply suggested changes generated by clang-tidy
+)
+
+string(CONCAT ORC_FORMAT_DIRS
+        "${PROJECT_SOURCE_DIR}/c++,"
+        "${PROJECT_SOURCE_DIR}/tools,"
+)
+
+add_custom_target(format 
+        ${PYTHON_EXECUTABLE}
+        ${BUILD_SUPPORT_DIR}/run_clang_format.py
+        ${CLANG_FORMAT_BIN}
+        --source_dirs
+        ${ORC_FORMAT_DIRS}
+        --fix
+)
+
+# Runs clang format and exits with a non-zero exit code if any files need to be reformatted
+add_custom_target(check-format 
+        ${PYTHON_EXECUTABLE}
+        ${BUILD_SUPPORT_DIR}/run_clang_format.py
+        ${CLANG_FORMAT_BIN}
+        --source_dirs
+        ${ORC_FORMAT_DIRS}
+)
\ No newline at end of file
diff --git a/cmake_modules/FindLZ4.cmake b/cmake_modules/FindLZ4.cmake
index b1557f496b..3b9cc7fbd1 100644
--- a/cmake_modules/FindLZ4.cmake
+++ b/cmake_modules/FindLZ4.cmake
@@ -22,6 +22,16 @@
 # LZ4_STATIC_LIB: path to lz4.a
 # LZ4_FOUND: whether LZ4 has been found
 
+if (NOT LZ4_HOME)
+  if (DEFINED ENV{LZ4_HOME})
+    set (LZ4_HOME "$ENV{LZ4_HOME}")
+  elseif (LZ4_ROOT)
+    set (LZ4_HOME "${LZ4_ROOT}")
+  elseif (DEFINED ENV{LZ4_ROOT})
+    set (LZ4_HOME "$ENV{LZ4_ROOT}")
+  endif ()
+endif ()
+
 if( NOT "${LZ4_HOME}" STREQUAL "")
     file (TO_CMAKE_PATH "${LZ4_HOME}" _lz4_path)
 endif()
@@ -33,7 +43,7 @@ find_path (LZ4_INCLUDE_DIR lz4.h HINTS
   NO_DEFAULT_PATH
   PATH_SUFFIXES "include")
 
-find_library (LZ4_LIBRARY NAMES lz4 HINTS
+find_library (LZ4_LIBRARY NAMES lz4 liblz4 HINTS
   ${_lz4_path}
   PATH_SUFFIXES "lib" "lib64")
 
@@ -74,3 +84,10 @@ mark_as_advanced (
   LZ4_STATIC_LIB
   LZ4_LIBRARY
 )
+
+if(LZ4_FOUND AND NOT TARGET LZ4::lz4)
+  add_library(LZ4::lz4 UNKNOWN IMPORTED)
+  set_target_properties(LZ4::lz4
+                        PROPERTIES IMPORTED_LOCATION "${LZ4_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES "${LZ4_INCLUDE_DIR}")
+endif()
diff --git a/cmake_modules/FindProtobuf.cmake b/cmake_modules/FindProtobuf.cmake
index cca7c8b87e..ca91fb5ade 100644
--- a/cmake_modules/FindProtobuf.cmake
+++ b/cmake_modules/FindProtobuf.cmake
@@ -17,7 +17,7 @@
 
 # PROTOBUF_HOME environmental variable is used to check for Protobuf headers and static library
 
-# PROTOBUF_FOUND is set if Protobuf is found
+# Protobuf_FOUND is set if Protobuf is found
 # PROTOBUF_INCLUDE_DIR: directory containing headers
 # PROTOBUF_LIBRARY: location of libprotobuf
 # PROTOBUF_STATIC_LIB: location of protobuf.a
@@ -25,6 +25,19 @@
 # PROTOC_STATIC_LIB: location of protoc.a
 # PROTOBUF_EXECUTABLE: location of protoc
 
+if (NOT PROTOBUF_HOME)
+  if (DEFINED ENV{PROTOBUF_HOME})
+    set (PROTOBUF_HOME "$ENV{PROTOBUF_HOME}")
+  elseif (Protobuf_ROOT)
+    set (PROTOBUF_HOME "${Protobuf_ROOT}")
+  elseif (DEFINED ENV{Protobuf_ROOT})
+    set (PROTOBUF_HOME "$ENV{Protobuf_ROOT}")
+  elseif (PROTOBUF_ROOT)
+    set (PROTOBUF_HOME "${PROTOBUF_ROOT}")
+  elseif (DEFINED ENV{PROTOBUF_ROOT})
+    set (PROTOBUF_HOME "$ENV{PROTOBUF_ROOT}")
+  endif ()
+endif ()
 
 if( NOT "${PROTOBUF_HOME}" STREQUAL "")
     file (TO_CMAKE_PATH "${PROTOBUF_HOME}" _protobuf_path)
@@ -32,8 +45,17 @@ endif()
 
 message (STATUS "PROTOBUF_HOME: ${PROTOBUF_HOME}")
 
+if (NOT DEFINED CMAKE_STATIC_LIBRARY_SUFFIX)
+  if (WIN32)
+    set (CMAKE_STATIC_LIBRARY_SUFFIX ".lib")
+  else ()
+    set (CMAKE_STATIC_LIBRARY_SUFFIX ".a")
+  endif ()
+endif ()
+
 find_package (Protobuf CONFIG)
 if (Protobuf_FOUND)
+  if (TARGET protobuf::libprotobuf)
     set (PROTOBUF_LIBRARY protobuf::libprotobuf)
     set (PROTOBUF_STATIC_LIB PROTOBUF_STATIC_LIB-NOTFOUND)
     set (PROTOC_LIBRARY protobuf::libprotoc)
@@ -42,15 +64,34 @@ if (Protobuf_FOUND)
 
     get_target_property (target_type protobuf::libprotobuf TYPE)
     if (target_type STREQUAL "STATIC_LIBRARY")
-        set(PROTOBUF_STATIC_LIB protobuf::libprotobuf)
+      set (PROTOBUF_STATIC_LIB protobuf::libprotobuf)
     endif ()
 
     get_target_property (target_type protobuf::libprotoc TYPE)
     if (target_type STREQUAL "STATIC_LIBRARY")
-        set (PROTOC_STATIC_LIB protobuf::libprotoc)
+      set (PROTOC_STATIC_LIB protobuf::libprotoc)
     endif ()
 
-    get_target_property (PROTOBUF_INCLUDE_DIR protobuf::libprotoc INTERFACE_INCLUDE_DIRECTORIES)
+    get_target_property (PROTOBUF_INCLUDE_DIR protobuf::libprotobuf INTERFACE_INCLUDE_DIRECTORIES)
+    if (NOT PROTOBUF_INCLUDE_DIR)
+      set (PROTOBUF_INCLUDE_DIR ${Protobuf_INCLUDE_DIRS})
+      if (NOT PROTOBUF_INCLUDE_DIR)
+        message (FATAL_ERROR "Cannot determine Protobuf include directory from protobuf::libprotobuf and Protobuf_INCLUDE_DIRS.")
+      endif ()
+    endif ()
+  else ()
+    set (PROTOBUF_LIBRARY ${Protobuf_LIBRARIES})
+    set (PROTOBUF_INCLUDE_DIR ${Protobuf_INCLUDE_DIRS})
+    if (NOT PROTOBUF_INCLUDE_DIR)
+      message (FATAL_ERROR "Cannot determine Protobuf include directory.")
+    endif ()
+
+    if (Protobuf_LIBRARIES MATCHES "\\${CMAKE_STATIC_LIBRARY_SUFFIX}$")
+      set (PROTOBUF_STATIC_LIB ${Protobuf_LIBRARIES})
+    else ()
+      set (PROTOBUF_STATIC_LIB PROTOBUF_STATIC_LIB-NOTFOUND)
+    endif ()
+  endif ()
 
 else()
     find_path (PROTOBUF_INCLUDE_DIR google/protobuf/io/zero_copy_stream.h HINTS
@@ -63,7 +104,7 @@ else()
       NO_DEFAULT_PATH
       PATH_SUFFIXES "include")
 
-    find_library (PROTOBUF_LIBRARY NAMES protobuf HINTS
+    find_library (PROTOBUF_LIBRARY NAMES protobuf libprotobuf HINTS
       ${_protobuf_path}
       PATH_SUFFIXES "lib")
 
@@ -71,7 +112,7 @@ else()
       ${_protobuf_path}
       PATH_SUFFIXES "lib")
 
-    find_library (PROTOC_LIBRARY NAMES protoc HINTS
+    find_library (PROTOC_LIBRARY NAMES protoc libprotoc HINTS
       ${_protobuf_path}
       PATH_SUFFIXES "lib")
 
@@ -86,14 +127,14 @@ else()
 endif ()
 
 if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOC_LIBRARY AND PROTOBUF_EXECUTABLE)
-  set (PROTOBUF_FOUND TRUE)
+  set (Protobuf_FOUND TRUE)
   set (PROTOBUF_LIB_NAME protobuf)
   set (PROTOC_LIB_NAME protoc)
 else ()
-  set (PROTOBUF_FOUND FALSE)
+  set (Protobuf_FOUND FALSE)
 endif ()
 
-if (PROTOBUF_FOUND)
+if (Protobuf_FOUND)
   message (STATUS "Found the Protobuf headers: ${PROTOBUF_INCLUDE_DIR}")
   message (STATUS "Found the Protobuf library: ${PROTOBUF_LIBRARY}")
   message (STATUS "Found the Protoc library: ${PROTOC_LIBRARY}")
@@ -125,3 +166,10 @@ mark_as_advanced (
   PROTOC_STATIC_LIB
   PROTOC_LIBRARY
 )
+
+if(Protobuf_FOUND AND NOT TARGET protobuf::libprotobuf)
+  add_library(protobuf::libprotobuf UNKNOWN IMPORTED)
+  set_target_properties(protobuf::libprotobuf
+                        PROPERTIES IMPORTED_LOCATION "${PROTOBUF_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES "${PROTOBUF_INCLUDE_DIR}")
+endif()
diff --git a/cmake_modules/FindSnappy.cmake b/cmake_modules/FindSnappy.cmake
index f0a0773801..1ad9914542 100644
--- a/cmake_modules/FindSnappy.cmake
+++ b/cmake_modules/FindSnappy.cmake
@@ -20,7 +20,21 @@
 # SNAPPY_INCLUDE_DIR: directory containing headers
 # SNAPPY_LIBRARY: path to libsnappy
 # SNAPPY_STATIC_LIB: path to libsnappy.a
-# SNAPPY_FOUND: whether snappy has been found
+# Snappy_FOUND: whether snappy has been found
+
+if (NOT SNAPPY_HOME)
+  if (DEFINED ENV{SNAPPY_HOME})
+    set (SNAPPY_HOME "$ENV{SNAPPY_HOME}")
+  elseif (Snappy_ROOT)
+    set (SNAPPY_HOME "${Snappy_ROOT}")
+  elseif (DEFINED ENV{Snappy_ROOT})
+    set (SNAPPY_HOME "$ENV{Snappy_ROOT}")
+  elseif (SNAPPY_ROOT)
+    set (SNAPPY_HOME "${SNAPPY_ROOT}")
+  elseif (DEFINED ENV{SNAPPY_ROOT})
+    set (SNAPPY_HOME "$ENV{SNAPPY_ROOT}")
+  endif ()
+endif ()
 
 if( NOT "${SNAPPY_HOME}" STREQUAL "")
     file (TO_CMAKE_PATH "${SNAPPY_HOME}" _snappy_path)
@@ -42,14 +56,14 @@ find_library (SNAPPY_STATIC_LIB NAMES ${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB
   PATH_SUFFIXES "lib" "lib64")
 
 if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
-  set (SNAPPY_FOUND TRUE)
+  set (Snappy_FOUND TRUE)
   set (SNAPPY_HEADER_NAME snappy.h)
   set (SNAPPY_HEADER ${SNAPPY_INCLUDE_DIR}/${SNAPPY_HEADER_NAME})
 else ()
-  set (SNAPPY_FOUND FALSE)
+  set (Snappy_FOUND FALSE)
 endif ()
 
-if (SNAPPY_FOUND)
+if (Snappy_FOUND)
   message (STATUS "Found the Snappy header: ${SNAPPY_HEADER}")
   message (STATUS "Found the Snappy library: ${SNAPPY_LIBRARY}")
   if (SNAPPY_STATIC_LIB)
@@ -74,3 +88,10 @@ mark_as_advanced (
   SNAPPY_STATIC_LIB
   SNAPPY_LIBRARY
 )
+
+if(Snappy_FOUND AND NOT TARGET Snappy::snappy)
+  add_library(Snappy::snappy UNKNOWN IMPORTED)
+  set_target_properties(Snappy::snappy
+                        PROPERTIES IMPORTED_LOCATION "${SNAPPY_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES "${SNAPPY_INCLUDE_DIR}")
+endif()
diff --git a/cmake_modules/FindZLIB.cmake b/cmake_modules/FindZLIB.cmake
index 2f83a974cd..374814a7f9 100644
--- a/cmake_modules/FindZLIB.cmake
+++ b/cmake_modules/FindZLIB.cmake
@@ -22,6 +22,16 @@
 # ZLIB_STATIC_LIB: path to zlib.a
 # ZLIB_FOUND: whether ZLIB has been found
 
+if (NOT ZLIB_HOME)
+  if (DEFINED ENV{ZLIB_HOME})
+    set (ZLIB_HOME "$ENV{ZLIB_HOME}")
+  elseif (ZLIB_ROOT)
+    set (ZLIB_HOME "${ZLIB_ROOT}")
+  elseif (DEFINED ENV{ZLIB_ROOT})
+    set (ZLIB_HOME "$ENV{ZLIB_ROOT}")
+  endif ()
+endif ()
+
 if( NOT "${ZLIB_HOME}" STREQUAL "")
     file (TO_CMAKE_PATH "${ZLIB_HOME}" _zlib_path)
 endif()
@@ -78,3 +88,10 @@ mark_as_advanced (
   ZLIB_STATIC_LIB
   ZLIB_LIBRARY
 )
+
+if(ZLIB_FOUND AND NOT TARGET ZLIB::ZLIB)
+  add_library(ZLIB::ZLIB UNKNOWN IMPORTED)
+  set_target_properties(ZLIB::ZLIB
+                        PROPERTIES IMPORTED_LOCATION "${ZLIB_LIBRARY}"
+                                   INTERFACE_INCLUDE_DIRECTORIES "${ZLIB_INCLUDE_DIR}")
+endif()
diff --git a/cmake_modules/FindZSTD.cmake b/cmake_modules/FindZSTD.cmake
index 7ec197221d..581719453c 100644
--- a/cmake_modules/FindZSTD.cmake
+++ b/cmake_modules/FindZSTD.cmake
@@ -22,6 +22,16 @@
 # ZSTD_STATIC_LIB: path to libzstd.a
 # ZSTD_FOUND: whether zstd has been found
 
+if (NOT ZSTD_HOME)
+  if (DEFINED ENV{ZSTD_HOME})
+    set (ZSTD_HOME "$ENV{ZSTD_HOME}")
+  elseif (ZSTD_ROOT)
+    set (ZSTD_HOME "${ZSTD_ROOT}")
+  elseif (DEFINED ENV{ZSTD_ROOT})
+    set (ZSTD_HOME "$ENV{ZSTD_ROOT}")
+  endif ()
+endif ()
+
 if( NOT "${ZSTD_HOME}" STREQUAL "")
   file (TO_CMAKE_PATH "${ZSTD_HOME}" _zstd_path)
 endif()
@@ -74,3 +84,18 @@ mark_as_advanced (
         ZSTD_STATIC_LIB
         ZSTD_LIBRARY
 )
+
+if(ZSTD_FOUND)
+  if(NOT TARGET zstd::libzstd_static AND ZSTD_STATIC_LIB)
+    add_library(zstd::libzstd_static STATIC IMPORTED)
+    set_target_properties(zstd::libzstd_static
+                          PROPERTIES IMPORTED_LOCATION "${ZSTD_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES "${ZSTD_INCLUDE_DIR}")
+  endif()
+  if(NOT TARGET zstd::libzstd_shared AND NOT ZSTD_STATIC_LIB)
+    add_library(zstd::libzstd_shared SHARED IMPORTED)
+    set_target_properties(zstd::libzstd_shared
+                          PROPERTIES IMPORTED_LOCATION "${ZSTD_LIBRARY}"
+                                     INTERFACE_INCLUDE_DIRECTORIES "${ZSTD_INCLUDE_DIR}")
+  endif()
+endif()
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index ec33193d79..c494710ba1 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -15,13 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set(ORC_FORMAT_VERSION "1.0.0")
-set(LZ4_VERSION "1.9.3")
-set(SNAPPY_VERSION "1.1.7")
-set(ZLIB_VERSION "1.2.11")
+set(ORC_VENDOR_DEPENDENCIES)
+set(ORC_SYSTEM_DEPENDENCIES)
+set(ORC_INSTALL_INTERFACE_TARGETS)
+
+set(ORC_FORMAT_VERSION "1.1.0")
+set(LZ4_VERSION "1.10.0")
+set(SNAPPY_VERSION "1.2.2")
+set(ZLIB_VERSION "1.3.1")
 set(GTEST_VERSION "1.12.1")
 set(PROTOBUF_VERSION "3.5.1")
-set(ZSTD_VERSION "1.5.5")
+set(ZSTD_VERSION "1.5.7")
+set(SPARSEHASH_VERSION "2.11.1")
 
 option(ORC_PREFER_STATIC_PROTOBUF "Prefer static protobuf library, if available" ON)
 option(ORC_PREFER_STATIC_SNAPPY   "Prefer static snappy library, if available"   ON)
@@ -33,7 +38,7 @@ option(ORC_PREFER_STATIC_GMOCK    "Prefer static gmock library, if available"
 # zstd requires us to add the threads
 FIND_PACKAGE(Threads REQUIRED)
 
-set(THIRDPARTY_DIR "${CMAKE_BINARY_DIR}/c++/libs/thirdparty")
+set(THIRDPARTY_DIR "${PROJECT_BINARY_DIR}/c++/libs/thirdparty")
 set(THIRDPARTY_LOG_OPTIONS LOG_CONFIGURE 1
                            LOG_BUILD 1
                            LOG_INSTALL 1
@@ -47,34 +52,101 @@ string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
 
 if (DEFINED ENV{SNAPPY_HOME})
   set (SNAPPY_HOME "$ENV{SNAPPY_HOME}")
+elseif (Snappy_ROOT)
+  set (SNAPPY_HOME "${Snappy_ROOT}")
+elseif (DEFINED ENV{Snappy_ROOT})
+  set (SNAPPY_HOME "$ENV{Snappy_ROOT}")
+elseif (SNAPPY_ROOT)
+  set (SNAPPY_HOME "${SNAPPY_ROOT}")
+elseif (DEFINED ENV{SNAPPY_ROOT})
+  set (SNAPPY_HOME "$ENV{SNAPPY_ROOT}")
 endif ()
 
 if (DEFINED ENV{ZLIB_HOME})
   set (ZLIB_HOME "$ENV{ZLIB_HOME}")
+elseif (ZLIB_ROOT)
+  set (ZLIB_HOME "${ZLIB_ROOT}")
+elseif (DEFINED ENV{ZLIB_ROOT})
+  set (ZLIB_HOME "$ENV{ZLIB_ROOT}")
 endif ()
 
 if (DEFINED ENV{LZ4_HOME})
   set (LZ4_HOME "$ENV{LZ4_HOME}")
+elseif (LZ4_ROOT)
+  set (LZ4_HOME "${LZ4_ROOT}")
+elseif (DEFINED ENV{LZ4_ROOT})
+  set (LZ4_HOME "$ENV{LZ4_ROOT}")
 endif ()
 
 if (DEFINED ENV{PROTOBUF_HOME})
   set (PROTOBUF_HOME "$ENV{PROTOBUF_HOME}")
+elseif (Protobuf_ROOT)
+  set (PROTOBUF_HOME "${Protobuf_ROOT}")
+elseif (DEFINED ENV{Protobuf_ROOT})
+  set (PROTOBUF_HOME "$ENV{Protobuf_ROOT}")
+elseif (PROTOBUF_ROOT)
+  set (PROTOBUF_HOME "${PROTOBUF_ROOT}")
+elseif (DEFINED ENV{PROTOBUF_ROOT})
+  set (PROTOBUF_HOME "$ENV{PROTOBUF_ROOT}")
 endif ()
 
 if (DEFINED ENV{ZSTD_HOME})
   set (ZSTD_HOME "$ENV{ZSTD_HOME}")
+elseif (ZSTD_ROOT)
+  set (ZSTD_HOME "${ZSTD_ROOT}")
+elseif (DEFINED ENV{ZSTD_ROOT})
+  set (ZSTD_HOME "$ENV{ZSTD_ROOT}")
 endif ()
 
 if (DEFINED ENV{GTEST_HOME})
   set (GTEST_HOME "$ENV{GTEST_HOME}")
 endif ()
 
+# ----------------------------------------------------------------------
+# Macros for adding third-party libraries
+macro (orc_add_resolved_library target_name link_lib include_dir)
+  add_library (${target_name} INTERFACE IMPORTED GLOBAL)
+  target_link_libraries (${target_name} INTERFACE ${link_lib})
+  target_include_directories (${target_name} SYSTEM INTERFACE ${include_dir})
+endmacro ()
+
+macro (orc_add_built_library external_project_name target_name link_lib include_dir)
+  file (MAKE_DIRECTORY "${include_dir}")
+
+  add_library (${target_name} STATIC IMPORTED)
+  set_target_properties (${target_name} PROPERTIES IMPORTED_LOCATION "${link_lib}")
+  target_include_directories (${target_name} BEFORE INTERFACE "${include_dir}")
+
+  add_dependencies (${target_name} ${external_project_name})
+  if (INSTALL_VENDORED_LIBS)
+    install (FILES "${link_lib}" DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+  endif ()
+endmacro ()
+
+function(orc_provide_cmake_module MODULE_NAME)
+  set(module "${PROJECT_SOURCE_DIR}/cmake_modules/${MODULE_NAME}.cmake")
+  if(EXISTS "${module}")
+    message(STATUS "Providing CMake module for ${MODULE_NAME} as part of CMake package")
+    install(FILES "${module}" DESTINATION "${ORC_INSTALL_CMAKE_DIR}")
+  endif()
+endfunction()
+
+function(orc_provide_find_module PACKAGE_NAME)
+  orc_provide_cmake_module("Find${PACKAGE_NAME}")
+endfunction()
+
 # ----------------------------------------------------------------------
 # ORC Format
+if(DEFINED ENV{ORC_FORMAT_URL})
+  set(ORC_FORMAT_SOURCE_URL "$ENV{ORC_FORMAT_URL}")
+  message(STATUS "Using ORC_FORMAT_URL: ${ORC_FORMAT_SOURCE_URL}")
+else()
+  set(ORC_FORMAT_SOURCE_URL "https://www.apache.org/dyn/closer.lua/orc/orc-format-$%7BORC_FORMAT_VERSION%7D/orc-format-$%7BORC_FORMAT_VERSION%7D.tar.gz?action=download" )
+  message(STATUS "Using DEFAULT URL: ${ORC_FORMAT_SOURCE_URL}")
+endif()
 ExternalProject_Add (orc-format_ep
-  URL "https://dlcdn.apache.org/orc/orc-format-$%7BORC_FORMAT_VERSION%7D/orc-format-$%7BORC_FORMAT_VERSION%7D.tar.gz"
-  URL "https://archive.apache.org/dist/orc/orc-format-$%7BORC_FORMAT_VERSION%7D/orc-format-$%7BORC_FORMAT_VERSION%7D.tar.gz"
-  URL_HASH SHA256=739fae5ff94b1f812b413077280361045bf92e510ef04b34a610e23a945d8cd5
+  URL ${ORC_FORMAT_SOURCE_URL}
+  URL_HASH SHA256=d4a7ac76c5442abf7119e2cb84e71b677e075aff53518aa866055e2ead0450d7
   CONFIGURE_COMMAND ""
   BUILD_COMMAND     ""
   INSTALL_COMMAND     ""
@@ -83,16 +155,36 @@ ExternalProject_Add (orc-format_ep
 
 # ----------------------------------------------------------------------
 # Snappy
-
-if (NOT "${SNAPPY_HOME}" STREQUAL "" OR ORC_PACKAGE_KIND STREQUAL "conan")
+if (ORC_PACKAGE_KIND STREQUAL "conan")
+  find_package (Snappy REQUIRED CONFIG)
+  add_library (orc_snappy INTERFACE)
+  target_link_libraries(orc_snappy INTERFACE Snappy::snappy)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:Snappy::snappy>")
+elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+  find_package(Snappy CONFIG REQUIRED)
+  add_library (orc_snappy INTERFACE IMPORTED)
+  target_link_libraries(orc_snappy INTERFACE Snappy::snappy)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:Snappy::snappy>")
+elseif (NOT "${SNAPPY_HOME}" STREQUAL "")
   find_package (Snappy REQUIRED)
-  set(SNAPPY_VENDORED FALSE)
+  if (ORC_PREFER_STATIC_SNAPPY AND SNAPPY_STATIC_LIB)
+    orc_add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR})
+  else ()
+    orc_add_resolved_library (orc_snappy ${SNAPPY_LIBRARY} ${SNAPPY_INCLUDE_DIR})
+  endif ()
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:Snappy::snappy>")
+  orc_provide_find_module (Snappy)
 else ()
   set(SNAPPY_HOME "${THIRDPARTY_DIR}/snappy_ep-install")
   set(SNAPPY_INCLUDE_DIR "${SNAPPY_HOME}/include")
-  set(SNAPPY_STATIC_LIB "${SNAPPY_HOME}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}snappy${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(SNAPPY_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}snappy${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(SNAPPY_STATIC_LIB "${SNAPPY_HOME}/lib/${SNAPPY_STATIC_LIB_NAME}")
   set(SNAPPY_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${SNAPPY_HOME}
-                        -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_LIBDIR=lib)
+                        -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_LIBDIR=lib
+                        -DSNAPPY_BUILD_BENCHMARKS=OFF)
 
   if (BUILD_POSITION_INDEPENDENT_LIB)
     set(SNAPPY_CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} -DCMAKE_POSITION_INDEPENDENT_CODE=ON)
@@ -104,39 +196,39 @@ else ()
     ${THIRDPARTY_LOG_OPTIONS}
     BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
 
-  set(SNAPPY_LIBRARY ${SNAPPY_STATIC_LIB})
-  set(SNAPPY_VENDORED TRUE)
-endif ()
+  orc_add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR})
 
-add_library (orc_snappy INTERFACE)
-add_library (orc::snappy ALIAS orc_snappy)
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_link_libraries(orc_snappy INTERFACE ${Snappy_LIBRARIES})
-elseif (ORC_PREFER_STATIC_SNAPPY AND ${SNAPPY_STATIC_LIB})
-  target_link_libraries(orc_snappy INTERFACE ${SNAPPY_STATIC_LIB})
-else ()
-  target_link_libraries(orc_snappy INTERFACE ${SNAPPY_LIBRARY})
-endif ()
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_include_directories (orc_snappy SYSTEM INTERFACE ${Snappy_INCLUDE_DIR})
-else()
-  target_include_directories (orc_snappy SYSTEM INTERFACE ${SNAPPY_INCLUDE_DIR})
+  list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_snappy|${SNAPPY_STATIC_LIB_NAME}")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_snappy>")
 endif ()
 
-if (SNAPPY_VENDORED)
-  add_dependencies (orc_snappy snappy_ep)
-  if (INSTALL_VENDORED_LIBS)
-    install(FILES "${SNAPPY_STATIC_LIB}"
-            DESTINATION "lib")
-  endif ()
-endif ()
+add_library (orc::snappy ALIAS orc_snappy)
 
 # ----------------------------------------------------------------------
 # ZLIB
 
-if (NOT "${ZLIB_HOME}" STREQUAL "" OR ORC_PACKAGE_KIND STREQUAL "conan")
+if (ORC_PACKAGE_KIND STREQUAL "conan")
+  find_package (ZLIB REQUIRED CONFIG)
+  add_library (orc_zlib INTERFACE)
+  target_link_libraries(orc_zlib INTERFACE ZLIB::ZLIB)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:ZLIB::ZLIB>")
+elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+  find_package(ZLIB REQUIRED)
+  add_library (orc_zlib INTERFACE IMPORTED)
+  target_link_libraries(orc_zlib INTERFACE ZLIB::ZLIB)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:ZLIB::ZLIB>")
+elseif (NOT "${ZLIB_HOME}" STREQUAL "")
   find_package (ZLIB REQUIRED)
-  set(ZLIB_VENDORED FALSE)
+  if (ORC_PREFER_STATIC_ZLIB AND ZLIB_STATIC_LIB)
+    orc_add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
+  else ()
+    orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR})
+  endif ()
+  list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:ZLIB::ZLIB>")
+  orc_provide_find_module (ZLIB)
 else ()
   set(ZLIB_PREFIX "${THIRDPARTY_DIR}/zlib_ep-install")
   set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include")
@@ -148,7 +240,8 @@ else ()
   else ()
     set(ZLIB_STATIC_LIB_NAME z)
   endif ()
-  set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ZLIB_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(ZLIB_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${ZLIB_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}")
   set(ZLIB_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}
                       -DBUILD_SHARED_LIBS=OFF)
 
@@ -162,35 +255,43 @@ else ()
     ${THIRDPARTY_LOG_OPTIONS}
     BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}")
 
-  set(ZLIB_LIBRARY ${ZLIB_STATIC_LIB})
-  set(ZLIB_VENDORED TRUE)
-endif ()
+  orc_add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
 
-add_library (orc_zlib INTERFACE)
-add_library (orc::zlib ALIAS orc_zlib)
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_link_libraries (orc_zlib INTERFACE ${ZLIB_LIBRARIES})
-elseif (ORC_PREFER_STATIC_ZLIB AND ${ZLIB_STATIC_LIB})
-  target_link_libraries (orc_zlib INTERFACE ${ZLIB_STATIC_LIB})
-else ()
-  target_link_libraries (orc_zlib INTERFACE ${ZLIB_LIBRARY})
+  list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_zlib|${ZLIB_STATIC_LIB_NAME}")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_zlib>")
 endif ()
-target_include_directories (orc_zlib SYSTEM INTERFACE ${ZLIB_INCLUDE_DIR})
 
-if (ZLIB_VENDORED)
-  add_dependencies (orc_zlib zlib_ep)
-  if (INSTALL_VENDORED_LIBS)
-    install(FILES "${ZLIB_STATIC_LIB}"
-            DESTINATION "lib")
-  endif ()
-endif ()
+add_library (orc::zlib ALIAS orc_zlib)
 
 # ----------------------------------------------------------------------
 # Zstd
 
-if (NOT "${ZSTD_HOME}" STREQUAL "" OR ORC_PACKAGE_KIND STREQUAL "conan")
+if (ORC_PACKAGE_KIND STREQUAL "conan")
+  find_package (ZSTD REQUIRED CONFIG)
+  add_library (orc_zstd INTERFACE)
+  target_link_libraries (orc_zstd INTERFACE
+    $<TARGET_NAME_IF_EXISTS:zstd::libzstd_static>
+    $<TARGET_NAME_IF_EXISTS:zstd::libzstd_shared>
+  )
+  list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>>")
+elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+  find_package(zstd CONFIG REQUIRED)
+  add_library(orc_zstd INTERFACE)
+  target_link_libraries(orc_zstd INTERFACE $<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>)
+  list(APPEND ORC_SYSTEM_DEPENDENCIES zstd)
+  list(APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>>")
+elseif (NOT "${ZSTD_HOME}" STREQUAL "")
   find_package (ZSTD REQUIRED)
-  set(ZSTD_VENDORED FALSE)
+  if (ORC_PREFER_STATIC_ZSTD AND ZSTD_STATIC_LIB)
+    orc_add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
+    list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:zstd::libzstd_static>")
+  else ()
+    orc_add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR})
+    list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>>")
+  endif ()
+  list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD)
+  orc_provide_find_module (ZSTD)
 else ()
   set(ZSTD_HOME "${THIRDPARTY_DIR}/zstd_ep-install")
   set(ZSTD_INCLUDE_DIR "${ZSTD_HOME}/include")
@@ -202,7 +303,8 @@ else ()
   else ()
     set(ZSTD_STATIC_LIB_NAME zstd)
   endif ()
-  set(ZSTD_STATIC_LIB "${ZSTD_HOME}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ZSTD_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(ZSTD_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${ZSTD_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(ZSTD_STATIC_LIB "${ZSTD_HOME}/lib/${ZSTD_STATIC_LIB_NAME}")
   set(ZSTD_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZSTD_HOME}
           -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_LIBDIR=lib)
 
@@ -223,43 +325,46 @@ else ()
           ${THIRDPARTY_LOG_OPTIONS}
           BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB})
 
-  set(ZSTD_LIBRARY ${ZSTD_STATIC_LIB})
-  set(ZSTD_VENDORED TRUE)
-endif ()
+  orc_add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
 
-add_library (orc_zstd INTERFACE)
-add_library (orc::zstd ALIAS orc_zstd)
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_link_libraries (orc_zstd INTERFACE ${zstd_LIBRARIES})
-elseif (ORC_PREFER_STATIC_ZSTD AND ${ZSTD_STATIC_LIB})
-  target_link_libraries (orc_zstd INTERFACE ${ZSTD_STATIC_LIB})
-else ()
-  target_link_libraries (orc_zstd INTERFACE ${ZSTD_LIBRARY})
-endif ()
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_include_directories (orc_zstd SYSTEM INTERFACE ${zstd_INCLUDE_DIR})
-else()
-  target_include_directories (orc_zstd SYSTEM INTERFACE ${ZSTD_INCLUDE_DIR})
+  list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_zstd|${ZSTD_STATIC_LIB_NAME}")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_zstd>")
 endif ()
 
-if (ZSTD_VENDORED)
-  add_dependencies (orc_zstd zstd_ep)
-  if (INSTALL_VENDORED_LIBS)
-    install(FILES "${ZSTD_STATIC_LIB}"
-            DESTINATION "lib")
-  endif ()
-endif ()
+add_library (orc::zstd ALIAS orc_zstd)
 
 # ----------------------------------------------------------------------
 # LZ4
-
-if (NOT "${LZ4_HOME}" STREQUAL "" OR ORC_PACKAGE_KIND STREQUAL "conan")
+if (ORC_PACKAGE_KIND STREQUAL "conan")
+  find_package (LZ4 REQUIRED CONFIG)
+  add_library (orc_lz4 INTERFACE)
+  target_link_libraries (orc_lz4 INTERFACE
+    $<TARGET_NAME_IF_EXISTS:LZ4::lz4_shared>
+    $<TARGET_NAME_IF_EXISTS:LZ4::lz4_static>
+  )
+  list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<IF:$<TARGET_EXISTS:LZ4::lz4_shared>,LZ4::lz4_shared,LZ4::lz4_static>>")
+elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+  find_package(lz4 CONFIG REQUIRED)
+  add_library (orc_lz4 INTERFACE IMPORTED)
+  target_link_libraries(orc_lz4 INTERFACE LZ4::lz4)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES lz4)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:LZ4::lz4>")
+elseif (NOT "${LZ4_HOME}" STREQUAL "")
   find_package (LZ4 REQUIRED)
-  set(LZ4_VENDORED FALSE)
+  if (ORC_PREFER_STATIC_LZ4 AND LZ4_STATIC_LIB)
+    orc_add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
+  else ()
+    orc_add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR})
+  endif ()
+  list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:LZ4::lz4>")
+  orc_provide_find_module (LZ4)
 else ()
   set(LZ4_PREFIX "${THIRDPARTY_DIR}/lz4_ep-install")
   set(LZ4_INCLUDE_DIR "${LZ4_PREFIX}/include")
-  set(LZ4_STATIC_LIB "${LZ4_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}lz4${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(LZ4_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}lz4${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(LZ4_STATIC_LIB "${LZ4_PREFIX}/lib/${LZ4_STATIC_LIB_NAME}")
   set(LZ4_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LZ4_PREFIX}
                      -DCMAKE_INSTALL_LIBDIR=lib
                      -DBUILD_SHARED_LIBS=OFF)
@@ -281,32 +386,13 @@ else ()
     ${THIRDPARTY_LOG_OPTIONS}
     BUILD_BYPRODUCTS ${LZ4_STATIC_LIB})
 
-  set(LZ4_LIBRARY ${LZ4_STATIC_LIB})
-  set(LZ4_VENDORED TRUE)
-endif ()
+  orc_add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
 
-add_library (orc_lz4 INTERFACE)
-add_library (orc::lz4 ALIAS orc_lz4)
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_link_libraries (orc_lz4 INTERFACE ${lz4_LIBRARIES})
-elseif (ORC_PREFER_STATIC_LZ4 AND ${LZ4_STATIC_LIB})
-  target_link_libraries (orc_lz4 INTERFACE ${LZ4_STATIC_LIB})
-else ()
-  target_link_libraries (orc_lz4 INTERFACE ${LZ4_LIBRARY})
-endif ()
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_include_directories (orc_lz4 SYSTEM INTERFACE ${lz4_INCLUDE_DIR})
-else()
-  target_include_directories (orc_lz4 SYSTEM INTERFACE ${LZ4_INCLUDE_DIR})
+  list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_lz4|${LZ4_STATIC_LIB_NAME}")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_lz4>")
 endif ()
 
-if (LZ4_VENDORED)
-  add_dependencies (orc_lz4 lz4_ep)
-  if (INSTALL_VENDORED_LIBS)
-    install(FILES "${LZ4_STATIC_LIB}"
-            DESTINATION "lib")
-  endif ()
-endif ()
+add_library (orc::lz4 ALIAS orc_lz4)
 
 # ----------------------------------------------------------------------
 # IANA - Time Zone Database
@@ -390,7 +476,7 @@ if (BUILD_CPP_TESTS)
   add_library (orc::gmock ALIAS orc_gmock)
   add_library (orc_gtest INTERFACE)
   add_library (orc::gtest ALIAS orc_gtest)
-  if (ORC_PREFER_STATIC_GMOCK AND ${GMOCK_STATIC_LIB})
+  if (ORC_PREFER_STATIC_GMOCK AND GMOCK_STATIC_LIB)
     target_link_libraries (orc_gmock INTERFACE ${GMOCK_STATIC_LIB})
     target_link_libraries (orc_gtest INTERFACE ${GTEST_STATIC_LIB})
   else ()
@@ -414,14 +500,43 @@ endif ()
 # ----------------------------------------------------------------------
 # Protobuf
 
-if (NOT "${PROTOBUF_HOME}" STREQUAL "" OR ORC_PACKAGE_KIND STREQUAL "conan")
+if (ORC_PACKAGE_KIND STREQUAL "conan")
+  find_package (Protobuf REQUIRED CONFIG)
+  add_library (orc_protobuf INTERFACE)
+  target_link_libraries(orc_protobuf INTERFACE protobuf::protobuf)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:protobuf::protobuf>")
+elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg")
+  find_package(Protobuf CONFIG REQUIRED)
+  add_library (orc_protobuf INTERFACE IMPORTED)
+  target_link_libraries(orc_protobuf INTERFACE protobuf::libprotobuf)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:protobuf::libprotobuf>")
+  set (PROTOBUF_EXECUTABLE protobuf::protoc)
+elseif (NOT "${PROTOBUF_HOME}" STREQUAL "")
   find_package (Protobuf REQUIRED)
-  set(PROTOBUF_VENDORED FALSE)
+
+  if (ORC_PREFER_STATIC_PROTOBUF AND PROTOBUF_STATIC_LIB)
+    orc_add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
+  else ()
+    orc_add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY} ${PROTOBUF_INCLUDE_DIR})
+  endif ()
+
+  if (ORC_PREFER_STATIC_PROTOBUF AND PROTOC_STATIC_LIB)
+    orc_add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
+  else ()
+    orc_add_resolved_library (orc_protoc ${PROTOC_LIBRARY} ${PROTOBUF_INCLUDE_DIR})
+  endif ()
+
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:protobuf::libprotobuf>")
+  orc_provide_find_module (Protobuf)
 else ()
   set(PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install")
   set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include")
   set(PROTOBUF_CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PROTOBUF_PREFIX}
                           -DCMAKE_INSTALL_LIBDIR=lib
+                          -DCMAKE_POLICY_VERSION_MINIMUM=3.12
                           -DBUILD_SHARED_LIBS=OFF
                           -Dprotobuf_BUILD_TESTS=OFF)
 
@@ -436,7 +551,8 @@ else ()
   else ()
     set(PROTOBUF_STATIC_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
   endif ()
-  set(PROTOBUF_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${PROTOBUF_STATIC_LIB_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(PROTOBUF_STATIC_LIB_NAME "${PROTOBUF_STATIC_LIB_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  set(PROTOBUF_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${PROTOBUF_STATIC_LIB_NAME}")
   set(PROTOC_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${PROTOBUF_STATIC_LIB_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}")
   set(PROTOBUF_EXECUTABLE "${PROTOBUF_PREFIX}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}")
 
@@ -453,46 +569,56 @@ else ()
     ${THIRDPARTY_LOG_OPTIONS}
     BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}")
 
-  set(PROTOBUF_LIBRARY ${PROTOBUF_STATIC_LIB})
-  set(PROTOC_LIBRARY ${PROTOC_STATIC_LIB})
-  set(PROTOBUF_VENDORED TRUE)
+  orc_add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
+  orc_add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})
+
+  list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_protobuf|${PROTOBUF_STATIC_LIB_NAME}")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_protobuf>")
 endif ()
 
-add_library (orc_protobuf INTERFACE)
 add_library (orc::protobuf ALIAS orc_protobuf)
-add_library (orc_protoc INTERFACE)
-add_library (orc::protoc ALIAS orc_protoc)
+if (NOT (ORC_PACKAGE_KIND STREQUAL "conan" OR ORC_PACKAGE_KIND STREQUAL "vcpkg"))
+  add_library (orc::protoc ALIAS orc_protoc)
+endif ()
 
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_link_libraries (orc_protobuf INTERFACE ${protobuf_LIBRARIES})
-elseif (ORC_PREFER_STATIC_PROTOBUF AND ${PROTOBUF_STATIC_LIB})
-  target_link_libraries (orc_protobuf INTERFACE ${PROTOBUF_STATIC_LIB})
-else ()
-  target_link_libraries (orc_protobuf INTERFACE ${PROTOBUF_LIBRARY})
-endif()
-if (ORC_PACKAGE_KIND STREQUAL "conan")
-  target_include_directories (orc_protobuf SYSTEM INTERFACE ${protobuf_INCLUDE_DIR})
-else ()
-  target_include_directories (orc_protobuf SYSTEM INTERFACE ${PROTOBUF_INCLUDE_DIR})
+# ----------------------------------------------------------------------
+# SPARSEHASH
+
+set(SPARSEHASH_HOME "${THIRDPARTY_DIR}/sparsehash_ep-install")
+set(SPARSEHASH_INCLUDE_DIR "${SPARSEHASH_HOME}/include/google")
+set(SPARSEHASH_CMAKE_ARGS
+    -DCMAKE_INSTALL_PREFIX=${SPARSEHASH_HOME}
+    -DBUILD_SHARED_LIBS=OFF
+    -DCMAKE_INSTALL_LIBDIR=lib
+    -DCMAKE_POLICY_VERSION_MINIMUM=3.5
+)
+if (BUILD_POSITION_INDEPENDENT_LIB)
+  set(SPARSEHASH_CMAKE_ARGS ${SPARSEHASH_CMAKE_ARGS} -DCMAKE_POSITION_INDEPENDENT_CODE=ON)
 endif ()
 
-if (NOT ORC_PACKAGE_KIND STREQUAL "conan")
-  if (ORC_PREFER_STATIC_PROTOBUF AND ${PROTOC_STATIC_LIB})
-    target_link_libraries (orc_protoc INTERFACE ${PROTOC_STATIC_LIB})
-  else ()
-    target_link_libraries (orc_protoc INTERFACE ${PROTOC_LIBRARY})
-  endif()
-  target_include_directories (orc_protoc SYSTEM INTERFACE ${PROTOBUF_INCLUDE_DIR})
+if (CMAKE_VERSION VERSION_GREATER "3.7")
+    set(SPARSEHASH_CONFIGURE SOURCE_SUBDIR "" CMAKE_ARGS ${SPARSEHASH_CMAKE_ARGS})
+  else()
+    set(SPARSEHASH_CONFIGURE CONFIGURE_COMMAND "${THIRDPARTY_CONFIGURE_COMMAND}" ${SPARSEHASH_CMAKE_ARGS}
+            "${CMAKE_CURRENT_BINARY_DIR}/sparsehash_ep-prefix/src/sparsehash_ep/")
 endif()
 
-if (PROTOBUF_VENDORED)
-  add_dependencies (orc_protoc protobuf_ep)
-  add_dependencies (orc_protobuf protobuf_ep)
-  if (INSTALL_VENDORED_LIBS)
-    install(FILES "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}"
-            DESTINATION "lib")
-  endif ()
-endif ()
+ExternalProject_Add(sparsehash_ep
+    URL "https://github.com/sparsehash/sparsehash-c11/archive/refs/tags/v$%7BSPARSEHASH_VERSION%7D.tar.gz"
+    ${SPARSEHASH_CONFIGURE}
+    ${THIRDPARTY_LOG_OPTIONS})
+
+# sparsehash-c11 is header-only, create interface library
+add_library(orc_sparsehash INTERFACE)
+target_include_directories(orc_sparsehash INTERFACE 
+    $<BUILD_INTERFACE:${SPARSEHASH_INCLUDE_DIR}>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+add_dependencies(orc_sparsehash sparsehash_ep)
+
+list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_sparsehash")
+list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:orc::vendored_sparsehash>")
+
+add_library (orc::sparsehash ALIAS orc_sparsehash)
 
 # ----------------------------------------------------------------------
 # LIBHDFSPP
@@ -509,7 +635,7 @@ if(BUILD_LIBHDFSPP)
       set (LIBHDFSPP_INCLUDE_DIR "${LIBHDFSPP_PREFIX}/include")
       set (LIBHDFSPP_STATIC_LIB_NAME hdfspp_static)
       set (LIBHDFSPP_STATIC_LIB "${LIBHDFSPP_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${LIBHDFSPP_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
-      set (LIBHDFSPP_SRC_URL "${CMAKE_SOURCE_DIR}/c++/libs/libhdfspp/libhdfspp.tar.gz")
+      set (LIBHDFSPP_SRC_URL "${PROJECT_SOURCE_DIR}/c++/libs/libhdfspp/libhdfspp.tar.gz")
       set (LIBHDFSPP_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                                 -DCMAKE_INSTALL_PREFIX=${LIBHDFSPP_PREFIX}
                                 -DPROTOBUF_INCLUDE_DIR=${PROTOBUF_INCLUDE_DIR}
@@ -536,15 +662,7 @@ if(BUILD_LIBHDFSPP)
         BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}"
         CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS})
 
-      include_directories (SYSTEM ${LIBHDFSPP_INCLUDE_DIR})
-
-      add_library (libhdfspp STATIC IMPORTED)
-      set_target_properties (libhdfspp PROPERTIES IMPORTED_LOCATION ${LIBHDFSPP_STATIC_LIB})
-      add_dependencies (libhdfspp libhdfspp_ep)
-      if (INSTALL_VENDORED_LIBS)
-        install(FILES "${LIBHDFSPP_STATIC_LIB}"
-                DESTINATION "lib")
-      endif ()
+      orc_add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} ${LIBHDFSPP_INCLUDE_DIR})
 
       set (LIBHDFSPP_LIBRARIES
            libhdfspp
diff --git a/docker/README.md b/docker/README.md
index e9a3b65b12..2247cea966 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,9 +1,12 @@
+# Docker Test
+
 ## Supported OSes
 
 * Debian 11 and 12
 * Fedora 37
-* Ubuntu 20, 22, 24
+* Ubuntu 22 and 24
 * Oracle Linux 9
+* Amazon Linux 2023
 
 ## Pre-built Images
 
@@ -25,6 +28,7 @@ To test against all of the Linux OSes against Apache's main branch:
 Using `local` as the owner will cause the scripts to use the local repository.
 
 The scripts are:
+
 * `run-all.sh` *owner* *branch* - test the given owner's branch on all OSes
 * `run-one.sh` *owner* *branch* *os* - test the owner's branch on one OS
 * `reinit.sh` - rebuild all of the base images without the image cache
diff --git a/docker/ubuntu20/Dockerfile b/docker/amazonlinux23/Dockerfile
similarity index 60%
rename from docker/ubuntu20/Dockerfile
rename to docker/amazonlinux23/Dockerfile
index 59a487bb8d..806a58f898 100644
--- a/docker/ubuntu20/Dockerfile
+++ b/docker/amazonlinux23/Dockerfile
@@ -14,43 +14,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# ORC compile for Ubuntu 20
+# ORC compile for Amazon Linux 2023
 #
 
-FROM ubuntu:20.04
-LABEL maintainer="Apache ORC project <dev@orc.apache.org>"
-ARG jdk=17
-ARG cc=gcc
+FROM amazonlinux:2023
+LABEL org.opencontainers.image.authors="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache ORC on Amazon Linux 2023"
+LABEL org.opencontainers.image.version=""
 
-RUN ln -fs /usr/share/zoneinfo/America/Los_Angeles /etc/localtime
-RUN apt-get update
-RUN apt-get install -y \
-  cmake \
+RUN yum check-update || true
+RUN yum install -y \
+  cmake3 \
+  curl-devel \
+  cyrus-sasl-devel \
+  expat-devel \
+  gcc \
+  gcc-c++ \
+  gettext-devel \
   git \
-  libsasl2-dev \
-  libssl-dev \
+  libtool \
   make \
-  curl \
-  maven \
-  openjdk-${jdk}-jdk \
-  tzdata; \
-  if [ "${cc}" = "gcc" ] ; then \
-    apt-get install -y \
-    gcc \
-    g++ \
-  ; else \
-    apt-get install -y \
-    clang \
-    && \
-    update-alternatives --set cc  /usr/bin/clang && \
-    update-alternatives --set c++ /usr/bin/clang++ \
-  ; fi
-RUN update-alternatives --set java $(update-alternatives --list java | grep ${jdk}) && \
-    update-alternatives --set javac $(update-alternatives --list javac | grep ${jdk})
-
-ENV CC=cc
-ENV CXX=c++
+  openssl-devel \
+  tar \
+  wget \
+  which \
+  zlib-devel \
+  java-17-amazon-corretto-devel
 
+ENV TZ=America/Los_Angeles
 WORKDIR /root
 VOLUME /root/.m2/repository
 
diff --git a/docker/debian11/Dockerfile b/docker/debian11/Dockerfile
index fb804a316b..7af433de18 100644
--- a/docker/debian11/Dockerfile
+++ b/docker/debian11/Dockerfile
@@ -18,7 +18,10 @@
 #
 
 FROM debian:bullseye
-LABEL maintainer="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.authors="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache ORC on Debian 11"
+LABEL org.opencontainers.image.version=""
 ARG jdk=17
 
 RUN apt-get update
diff --git a/docker/debian12/Dockerfile b/docker/debian12/Dockerfile
index f0c2a600eb..ae341183f9 100644
--- a/docker/debian12/Dockerfile
+++ b/docker/debian12/Dockerfile
@@ -18,7 +18,10 @@
 #
 
 FROM debian:bookworm
-LABEL maintainer="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.authors="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache ORC on Debian 12"
+LABEL org.opencontainers.image.version=""
 ARG jdk=17
 
 RUN apt-get update
diff --git a/docker/fedora37/Dockerfile b/docker/oraclelinux8/Dockerfile
similarity index 88%
rename from docker/fedora37/Dockerfile
rename to docker/oraclelinux8/Dockerfile
index bf4a50fc1c..4951f26bec 100644
--- a/docker/fedora37/Dockerfile
+++ b/docker/oraclelinux8/Dockerfile
@@ -14,11 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# ORC compile for CentOS 7
+# ORC compile for Oracle Linux 8
 #
 
-FROM fedora:37
-LABEL maintainer="Apache ORC project <dev@orc.apache.org>"
+FROM oraclelinux:8
+LABEL org.opencontainers.image.authors="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
 
 RUN yum check-update || true
 RUN yum install -y \
diff --git a/docker/oraclelinux9/Dockerfile b/docker/oraclelinux9/Dockerfile
index 094ec828f2..a0f9623490 100644
--- a/docker/oraclelinux9/Dockerfile
+++ b/docker/oraclelinux9/Dockerfile
@@ -18,7 +18,8 @@
 #
 
 FROM oraclelinux:9
-LABEL maintainer="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.authors="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
 
 RUN yum check-update || true
 RUN yum install -y \
diff --git a/docker/os-list.txt b/docker/os-list.txt
index 3966df3245..e138aaf493 100644
--- a/docker/os-list.txt
+++ b/docker/os-list.txt
@@ -1,7 +1,7 @@
 debian11
 debian12
-ubuntu20
 ubuntu22
 ubuntu24
-fedora37
+oraclelinux8
 oraclelinux9
+amazonlinux23
diff --git a/docker/ubuntu22/Dockerfile b/docker/ubuntu22/Dockerfile
index 81f6269518..03863f20a4 100644
--- a/docker/ubuntu22/Dockerfile
+++ b/docker/ubuntu22/Dockerfile
@@ -18,7 +18,10 @@
 #
 
 FROM ubuntu:22.04
-LABEL maintainer="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.authors="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache ORC on Ubuntu 22"
+LABEL org.opencontainers.image.version=""
 ARG jdk=17
 ARG cc=gcc
 
diff --git a/docker/ubuntu24/Dockerfile b/docker/ubuntu24/Dockerfile
index 34b3924330..00cd2d67e7 100644
--- a/docker/ubuntu24/Dockerfile
+++ b/docker/ubuntu24/Dockerfile
@@ -18,7 +18,10 @@
 #
 
 FROM ubuntu:24.04
-LABEL maintainer="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.authors="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache ORC on Ubuntu 24"
+LABEL org.opencontainers.image.version=""
 ARG jdk=21
 ARG cc=gcc
 
diff --git a/java/.mvn/jvm.config b/java/.mvn/jvm.config
new file mode 100644
index 0000000000..81b88d8173
--- /dev/null
+++ b/java/.mvn/jvm.config
@@ -0,0 +1 @@
+--enable-native-access=ALL-UNNAMED
diff --git a/java/bench/README.md b/java/bench/README.md
index 838433567b..7854c5d6f7 100644
--- a/java/bench/README.md
+++ b/java/bench/README.md
@@ -15,7 +15,7 @@ There are three sub-modules to try to mitigate dependency hell:
 
 To build this library, run the following in the parent directory:
 
-```
+```bash
 % ./mvnw clean package -Pbenchmark -DskipTests
 % cd bench
 ```
@@ -57,4 +57,3 @@ To run row-filter benchmark:
 To run spark benchmark:
 
 ```% java -jar spark/target/orc-benchmarks-spark-${ORC_VERSION}.jar spark data```
-
diff --git a/java/bench/core/pom.xml b/java/bench/core/pom.xml
index cf6fe1ad51..75baa7b69a 100644
--- a/java/bench/core/pom.xml
+++ b/java/bench/core/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc-benchmarks</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>..</relativePath>
   </parent>
 
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
index 0450088d57..0f1e1965cf 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
@@ -122,6 +122,10 @@ public void run(String[] args) throws Exception {
     Configuration conf = new Configuration();
     // Disable Hadoop checksums
     conf.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem");
+    for (String key: System.getProperties().stringPropertyNames()) {
+      if (!key.startsWith("orc.")) continue;
+      conf.set(key, System.getProperty(key));
+    }
     Path root = new Path(cli.getArgs()[0]);
 
     for (final String data: dataList) {
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
index 97b58a8fea..8474351f2b 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroReader.java
@@ -41,6 +41,7 @@
 import org.apache.orc.bench.core.convert.BatchReader;
 
 import java.io.IOException;
+import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.util.List;
 
@@ -201,7 +202,14 @@ public void convert(ColumnVector cv, int row, Object value) {
         cv.isNull[row] = true;
       } else {
         DecimalColumnVector tc = (DecimalColumnVector) cv;
-        tc.vector[row].set(HiveDecimal.create(Math.round((double) value * multiplier)));
+        if (value instanceof ByteBuffer) {
+          tc.vector[row].set(getHiveDecimalFromByteBuffer((ByteBuffer) value, scale));
+        } else if (value instanceof GenericData.Fixed) {
+          tc.vector[row].set(getHiveDecimalFromByteBuffer(
+              ByteBuffer.wrap(((GenericData.Fixed) value).bytes()), scale));
+        } else {
+          tc.vector[row].set(HiveDecimal.create(Math.round((double) value * multiplier)));
+        }
       }
     }
   }
@@ -289,6 +297,13 @@ static AvroConverter createConverter(TypeDescription types) {
     }
   }
 
+  static HiveDecimal getHiveDecimalFromByteBuffer(ByteBuffer byteBuffer,
+                                                  int scale) {
+    byte[] result = getBytesFromByteBuffer(byteBuffer);
+    HiveDecimal dec = HiveDecimal.create(new BigInteger(result), scale);
+    return dec;
+  }
+
   static byte[] getBytesFromByteBuffer(ByteBuffer byteBuffer) {
     byteBuffer.rewind();
     byte[] result = new byte[byteBuffer.limit()];
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroSchemaUtils.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroSchemaUtils.java
index 96df6b5ba1..65753553a4 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroSchemaUtils.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroSchemaUtils.java
@@ -78,8 +78,11 @@ public static Schema createAvroSchema(TypeDescription typeInfo) {
       case DECIMAL:
         String precision = String.valueOf(typeInfo.getPrecision());
         String scale = String.valueOf(typeInfo.getScale());
+        int bytes = PRECISION_TO_BYTE_COUNT[typeInfo.getPrecision() - 1];
         schema = getSchemaFor("{" +
-            "\"type\":\"bytes\"," +
+            "\"type\":\"fixed\"," +
+            "\"name\":\"" + typeInfo.getFullFieldName() + "\"," +
+            "\"size\":" + bytes + "," +
             "\"logicalType\":\"decimal\"," +
             "\"precision\":" + precision + "," +
             "\"scale\":" + scale + "}");
@@ -189,4 +192,16 @@ private static Schema getSchemaFor(String str) {
     Schema.Parser parser = new Schema.Parser();
     return parser.parse(str);
   }
+
+  // org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
+  // Map precision to the number bytes needed for binary conversion.
+  public static final int[] PRECISION_TO_BYTE_COUNT = new int[38];
+
+  static {
+    for (int prec = 1; prec <= 38; prec++) {
+      // Estimated number of bytes needed.
+      PRECISION_TO_BYTE_COUNT[prec - 1] = (int)
+          Math.ceil((Math.log(Math.pow(10, prec) - 1) / Math.log(2) + 1) / 8);
+    }
+  }
 }
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
index d60ef6745d..34fa166673 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/avro/AvroWriter.java
@@ -40,7 +40,6 @@
 import org.apache.orc.bench.core.convert.BatchWriter;
 
 import java.io.IOException;
-import java.nio.Buffer;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -166,8 +165,12 @@ public Object convert(ColumnVector cv, int row) {
   }
 
   private static class DecimalConverter implements AvroConverter {
+    final Schema avroSchema;
+    final int precision;
     final int scale;
-    DecimalConverter(int scale) {
+    DecimalConverter(Schema avroSchema, int precision, int scale) {
+      this.avroSchema = avroSchema;
+      this.precision = precision;
       this.scale = scale;
     }
     public Object convert(ColumnVector cv, int row) {
@@ -176,8 +179,7 @@ public Object convert(ColumnVector cv, int row) {
       }
       if (cv.noNulls || !cv.isNull[row]) {
         DecimalColumnVector vector = (DecimalColumnVector) cv;
-        return getBufferFromDecimal(
-            vector.vector[row].getHiveDecimal(), scale);
+        return decimalToBinary(vector.vector[row].getHiveDecimal(), avroSchema, precision, scale);
       } else {
         return null;
       }
@@ -270,7 +272,7 @@ public static AvroConverter createConverter(TypeDescription types,
       case TIMESTAMP:
         return new TimestampConverter();
       case DECIMAL:
-        return new DecimalConverter(types.getScale());
+        return new DecimalConverter(avroSchema, types.getPrecision(), types.getScale());
       case LIST:
         return new ListConverter(types, avroSchema);
       case STRUCT:
@@ -356,11 +358,28 @@ public void close() throws IOException {
     writer.close();
   }
 
-  static Buffer getBufferFromDecimal(HiveDecimal dec, int scale) {
-    if (dec == null) {
-      return null;
+  // org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter.DecimalDataWriter.decimalToBinary()
+  private static GenericData.Fixed decimalToBinary(HiveDecimal hiveDecimal,
+                                                   Schema avroSchema, int prec, int scale) {
+    byte[] decimalBytes = hiveDecimal.setScale(scale).unscaledValue().toByteArray();
+
+    // Estimated number of bytes needed.
+    int precToBytes = AvroSchemaUtils.PRECISION_TO_BYTE_COUNT[prec - 1];
+    if (precToBytes == decimalBytes.length) {
+      // No padding needed.
+      return new GenericData.Fixed(avroSchema, decimalBytes);
+    }
+
+    byte[] tgt = new byte[precToBytes];
+    if (hiveDecimal.signum() == -1) {
+      // For negative number, initializing bits to 1
+      for (int i = 0; i < precToBytes; i++) {
+        tgt[i] |= 0xFF;
+      }
     }
 
-    return ByteBuffer.wrap(dec.bigIntegerBytesScaled(scale));
+    System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length,
+        decimalBytes.length); // Padding leading zeroes/ones.
+    return new GenericData.Fixed(avroSchema, tgt);
   }
 }
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/json/JsonReader.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/json/JsonReader.java
index 893b738b1c..ece88f08b8 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/json/JsonReader.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/json/JsonReader.java
@@ -172,8 +172,12 @@ public void convert(JsonElement value, ColumnVector vect, int row) {
         vect.isNull[row] = true;
       } else {
         TimestampColumnVector vector = (TimestampColumnVector) vect;
-        vector.set(row, Timestamp.valueOf(value.getAsString()
-            .replaceAll("[TZ]", " ")));
+        try {
+          vector.set(row, new Timestamp(value.getAsLong()));
+        } catch (NumberFormatException e) {
+          vector.set(row, Timestamp.valueOf(value.getAsString()
+              .replaceAll("[TZ]", " ")));
+        }
       }
     }
   }
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/json/JsonWriter.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/json/JsonWriter.java
index 00b3de22e6..527d8bf1cc 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/json/JsonWriter.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/json/JsonWriter.java
@@ -160,8 +160,7 @@ static void printValue(com.google.gson.stream.JsonWriter writer, ColumnVector ve
               (int) ((LongColumnVector) vector).vector[row]).toString());
           break;
         case TIMESTAMP:
-          writer.value(((TimestampColumnVector) vector)
-              .asScratchTimestamp(row).toString());
+          writer.value(((TimestampColumnVector) vector).getTimestampAsLong(row));
           break;
         case LIST:
           printList(writer, (ListColumnVector) vector, schema, row);
diff --git a/java/bench/core/src/resources/log4j.properties b/java/bench/core/src/resources/log4j.properties
deleted file mode 100644
index 0df3f70e53..0000000000
--- a/java/bench/core/src/resources/log4j.properties
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-log4j.rootLogger=WARN, CONSOLE
-
-# CONSOLE is set to be a ConsoleAppender using a PatternLayout
-log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
-log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
-log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %m%n
diff --git a/java/bench/core/src/resources/taxi.schema b/java/bench/core/src/resources/taxi.schema
index 720848faaa..adb1f54f8d 100644
--- a/java/bench/core/src/resources/taxi.schema
+++ b/java/bench/core/src/resources/taxi.schema
@@ -9,13 +9,13 @@ struct<
   PULocationID: bigint,
   DOLocationID: bigint,
   payment_type: bigint,
-  fare_amount: decimal(8,2),
-  extra: decimal(8,2),
-  mta_tax: decimal(8,2),
-  tip_amount: decimal(8,2),
-  tolls_amount: decimal(8,2),
-  improvement_surcharge: decimal(8,2),
-  total_amount: decimal(8,2),
+  fare_amount: decimal(10,2),
+  extra: decimal(10,2),
+  mta_tax: decimal(10,2),
+  tip_amount: decimal(10,2),
+  tolls_amount: decimal(10,2),
+  improvement_surcharge: decimal(10,2),
+  total_amount: decimal(10,2),
   congestion_surcharge: int,
   airport_fee: int
 >
diff --git a/java/bench/core/src/test/org/apache/orc/bench/core/impl/ChunkReadUtilTest.java b/java/bench/core/src/test/org/apache/orc/bench/core/impl/ChunkReadUtilTest.java
index 1169998d86..7091927521 100644
--- a/java/bench/core/src/test/org/apache/orc/bench/core/impl/ChunkReadUtilTest.java
+++ b/java/bench/core/src/test/org/apache/orc/bench/core/impl/ChunkReadUtilTest.java
@@ -21,6 +21,9 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.VersionInfo;
+import org.apache.orc.impl.HadoopShims;
+import org.apache.orc.impl.HadoopShimsFactory;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
@@ -38,6 +41,9 @@ class ChunkReadUtilTest {
   private static long fileLength;
   private static final int ROW_COUNT = 524288;
   private static final int COL_COUNT = 16;
+  private static final HadoopShims SHIMS = HadoopShimsFactory.get();
+  private static final boolean supportVectoredIO =
+      SHIMS.supportVectoredIO(VersionInfo.getVersion());
 
   @BeforeAll
   public static void setup() throws IOException {
@@ -57,7 +63,7 @@ public void testReadAll() throws IOException {
     Configuration conf = new Configuration();
     readStart();
     assertEquals(ROW_COUNT, ChunkReadUtil.readORCFile(filePath, conf, false));
-    assertTrue((readEnd().getBytesRead() / (double) fileLength) > 1);
+    assertTrue(supportVectoredIO || (readEnd().getBytesRead() / (double) fileLength) > 1);
   }
 
   @Test
@@ -75,7 +81,7 @@ public void testReadAlternateWMinSeekSize() throws IOException {
     readStart();
     assertEquals(ROW_COUNT, ChunkReadUtil.readORCFile(filePath, conf, true));
     double readFraction = readEnd().getBytesRead() / (double) fileLength;
-    assertTrue(readFraction > 1 && readFraction < 1.01);
+    assertTrue(supportVectoredIO || (readFraction > 1 && readFraction < 1.01));
   }
 
   @Test
@@ -85,6 +91,6 @@ public void testReadAlternateWMinSeekSizeDrop() throws IOException {
     readStart();
     assertEquals(ROW_COUNT, ChunkReadUtil.readORCFile(filePath, conf, true));
     double readFraction = readEnd().getBytesRead() / (double) fileLength;
-    assertTrue(readFraction > 1 && readFraction < 1.01);
+    assertTrue(supportVectoredIO || (readFraction > 1 && readFraction < 1.01));
   }
-}
\ No newline at end of file
+}
diff --git a/java/bench/hive/pom.xml b/java/bench/hive/pom.xml
index 8dba74a0de..1ede9d05da 100644
--- a/java/bench/hive/pom.xml
+++ b/java/bench/hive/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc-benchmarks</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>..</relativePath>
   </parent>
 
diff --git a/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java b/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
index 9c1b7fd21a..48806faffe 100644
--- a/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
+++ b/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
@@ -19,6 +19,7 @@
 package org.apache.orc.bench.hive;
 
 import com.google.auto.service.AutoService;
+import org.apache.commons.cli.CommandLine;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -38,6 +39,7 @@
 import org.apache.orc.bench.core.IOCounters;
 import org.apache.orc.bench.core.OrcBenchmark;
 import org.apache.orc.bench.core.Utilities;
+import org.apache.orc.bench.core.convert.GenerateVariants;
 import org.apache.parquet.hadoop.ParquetInputFormat;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -47,6 +49,7 @@
 import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
 
 import java.net.URI;
 import java.util.List;
@@ -78,7 +81,13 @@ public String getDescription() {
 
   @Override
   public void run(String[] args) throws Exception {
-    new Runner(Utilities.parseOptions(args, getClass())).run();
+    CommandLine cmds = GenerateVariants.parseCommandLine(args);
+    new Runner(new OptionsBuilder()
+        .parent(Utilities.parseOptions(args, this.getClass()))
+        .param("compression", cmds.getOptionValue("compress", "snappy,gz,zstd").split(","))
+        .param("dataset", cmds.getOptionValue("data", "github,sales,taxi").split(","))
+        .build()
+    ).run();
   }
 
   @Benchmark
diff --git a/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java b/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
index dc1bcca922..8f3b1cbbaa 100644
--- a/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
+++ b/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
@@ -25,6 +25,7 @@
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumReader;
 import org.apache.avro.mapred.FsInput;
+import org.apache.commons.cli.CommandLine;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -45,6 +46,7 @@
 import org.apache.orc.bench.core.IOCounters;
 import org.apache.orc.bench.core.OrcBenchmark;
 import org.apache.orc.bench.core.Utilities;
+import org.apache.orc.bench.core.convert.GenerateVariants;
 import org.apache.parquet.hadoop.ParquetInputFormat;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -54,6 +56,7 @@
 import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
 
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -87,7 +90,13 @@ public String getDescription() {
 
   @Override
   public void run(String[] args) throws Exception {
-    new Runner(Utilities.parseOptions(args, getClass())).run();
+    CommandLine cmds = GenerateVariants.parseCommandLine(args);
+    new Runner(new OptionsBuilder()
+        .parent(Utilities.parseOptions(args, this.getClass()))
+        .param("compression", cmds.getOptionValue("compress", "gz,snappy,zstd").split(","))
+        .param("dataset", cmds.getOptionValue("data", "taxi,sales,github").split(","))
+        .build()
+    ).run();
   }
 
   @Benchmark
diff --git a/java/bench/pom.xml b/java/bench/pom.xml
index a50eb3a425..025d0ab97e 100644
--- a/java/bench/pom.xml
+++ b/java/bench/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -33,13 +33,15 @@
   </modules>
 
   <properties>
-    <avro.version>1.11.3</avro.version>
-    <hive.version>4.0.0</hive.version>
+    <avro.version>1.12.0</avro.version>
+    <hive.version>4.0.1</hive.version>
     <jmh.version>1.37</jmh.version>
-    <junit.version>5.10.2</junit.version>
+    <junit.version>5.12.2</junit.version>
     <orc.version>${project.version}</orc.version>
-    <parquet.version>1.13.1</parquet.version>
-    <spark.version>3.5.1</spark.version>
+    <parquet.version>1.15.2</parquet.version>
+    <scala.binary.version>2.13</scala.binary.version>
+    <scala.version>2.13.16</scala.version>
+    <spark.version>4.0.0</spark.version>
   </properties>
 
   <dependencyManagement>
@@ -71,7 +73,7 @@
       <dependency>
         <groupId>com.google.code.gson</groupId>
         <artifactId>gson</artifactId>
-        <version>2.2.4</version>
+        <version>2.13.0</version>
       </dependency>
       <dependency>
         <groupId>com.google.guava</groupId>
@@ -80,17 +82,17 @@
       <dependency>
         <groupId>commons-cli</groupId>
         <artifactId>commons-cli</artifactId>
-        <version>1.6.0</version>
+        <version>1.9.0</version>
       </dependency>
       <dependency>
         <groupId>io.airlift</groupId>
         <artifactId>aircompressor</artifactId>
-        <version>0.26</version>
+        <version>2.0.2</version>
       </dependency>
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.1.96.Final</version>
+        <version>4.1.110.Final</version>
         <scope>runtime</scope>
       </dependency>
       <dependency>
@@ -106,7 +108,7 @@
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-csv</artifactId>
-        <version>1.10.0</version>
+        <version>1.14.0</version>
       </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
@@ -275,7 +277,7 @@
       <dependency>
         <groupId>org.xerial.snappy</groupId>
         <artifactId>snappy-java</artifactId>
-        <version>1.1.10.5</version>
+        <version>1.1.10.7</version>
       </dependency>
       <dependency>
         <groupId>org.apache.parquet</groupId>
@@ -284,12 +286,12 @@
       </dependency>
       <dependency>
         <groupId>org.apache.spark</groupId>
-        <artifactId>spark-catalyst_2.12</artifactId>
+        <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
         <version>${spark.version}</version>
       </dependency>
       <dependency>
         <groupId>org.apache.spark</groupId>
-        <artifactId>spark-core_2.12</artifactId>
+        <artifactId>spark-core_${scala.binary.version}</artifactId>
         <version>${spark.version}</version>
         <exclusions>
           <exclusion>
@@ -316,7 +318,7 @@
       </dependency>
       <dependency>
         <groupId>org.apache.spark</groupId>
-        <artifactId>spark-sql_2.12</artifactId>
+        <artifactId>spark-sql_${scala.binary.version}</artifactId>
         <version>${spark.version}</version>
         <exclusions>
           <exclusion>
@@ -335,7 +337,7 @@
       </dependency>
       <dependency>
         <groupId>org.apache.spark</groupId>
-        <artifactId>spark-avro_2.12</artifactId>
+        <artifactId>spark-avro_${scala.binary.version}</artifactId>
         <version>${spark.version}</version>
       </dependency>
       <dependency>
@@ -357,7 +359,7 @@
       <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-library</artifactId>
-        <version>2.12.18</version>
+        <version>${scala.version}</version>
       </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
diff --git a/java/bench/spark/pom.xml b/java/bench/spark/pom.xml
index 7eeef0d00a..0ad26152a1 100644
--- a/java/bench/spark/pom.xml
+++ b/java/bench/spark/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc-benchmarks</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>..</relativePath>
   </parent>
 
@@ -32,7 +32,7 @@
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 
     <!-- Spark Jackson version may not be same as ORC -->
-    <spark.jackson.version>2.15.2</spark.jackson.version>
+    <spark.jackson.version>2.18.2</spark.jackson.version>
   </properties>
 
   <dependencies>
@@ -51,6 +51,8 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
+      <!-- Spark uses org.apache.commons.lang3.SystemUtils -->
+      <scope>compile</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
@@ -71,15 +73,15 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-catalyst_2.12</artifactId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.12</artifactId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_2.12</artifactId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.parquet</groupId>
@@ -88,7 +90,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-avro_2.12</artifactId>
+      <artifactId>spark-avro_${scala.binary.version}</artifactId>
     </dependency>
     <dependency>
       <groupId>org.jodd</groupId>
@@ -125,7 +127,7 @@
     <dependency>
       <groupId>org.objenesis</groupId>
       <artifactId>objenesis</artifactId>
-      <version>3.2</version>
+      <version>3.3</version>
       <scope>compile</scope>
     </dependency>
   </dependencies>
diff --git a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
index 1285875dcf..86e65ae81e 100644
--- a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
+++ b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
@@ -61,9 +61,9 @@
 import scala.Tuple2;
 import scala.collection.Iterator;
 import scala.collection.JavaConverters;
-import scala.collection.Seq;
 import scala.collection.immutable.Map;
 import scala.collection.immutable.Map$;
+import scala.collection.immutable.Seq;
 
 import java.io.IOException;
 import java.sql.Timestamp;
@@ -74,7 +74,8 @@
 @BenchmarkMode(Mode.AverageTime)
 @OutputTimeUnit(TimeUnit.MICROSECONDS)
 @AutoService(OrcBenchmark.class)
-@Fork(jvmArgsAppend = "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED")
+@Fork(jvmArgsAppend = {"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
+    "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED"})
 public class SparkBenchmark implements OrcBenchmark {
 
   private static final Path root = Utilities.getBenchmarkRoot();
@@ -195,6 +196,9 @@ public void fullRead(InputSource source,
       case "orc":
         options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918
         break;
+      case "parquet":
+        options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918
+        break;
       default:
         break;
     }
@@ -228,6 +232,9 @@ public void partialRead(InputSource source,
       case "orc":
         options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918
         break;
+      case "parquet":
+        options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918
+        break;
       default:
         break;
     }
@@ -303,6 +310,9 @@ public void pushDown(InputSource source,
       case "orc":
         options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918
         break;
+      case "parquet":
+        options.add(new Tuple2<>("returning_batch", "true")); // SPARK-40918
+        break;
       default:
         break;
     }
diff --git a/java/core/pom.xml b/java/core/pom.xml
index 4cafffc714..5095c1be1c 100644
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -43,10 +43,6 @@
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <dependency>
       <groupId>io.airlift</groupId>
       <artifactId>aircompressor</artifactId>
@@ -84,8 +80,18 @@
       <groupId>com.aayushatharva.brotli4j</groupId>
       <artifactId>brotli4j</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.locationtech.jts</groupId>
+      <artifactId>jts-core</artifactId>
+      <version>${jts.version}</version>
+    </dependency>
 
     <!-- test inter-project -->
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
@@ -138,6 +144,11 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <compilerArgs>
+            <arg>-proc:full</arg>
+          </compilerArgs>
+        </configuration>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
diff --git a/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java b/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
new file mode 100644
index 0000000000..db66084c13
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/GeospatialColumnStatistics.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc;
+
+import org.apache.orc.geospatial.BoundingBox;
+import org.apache.orc.geospatial.GeospatialTypes;
+
+public interface GeospatialColumnStatistics extends ColumnStatistics {
+  BoundingBox getBoundingBox();
+  GeospatialTypes getGeospatialTypes();
+}
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index 9bc2b4492e..6516517ba2 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -18,7 +18,6 @@
 
 package org.apache.orc;
 
-import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 
 import java.util.ArrayList;
@@ -353,12 +352,12 @@ public String getString(Configuration conf) {
   public List<String> getStringAsList(Configuration conf) {
     String value = getString(null, conf);
     List<String> confList = new ArrayList<>();
-    if (StringUtils.isEmpty(value)) {
+    if (value == null || value.isEmpty()) {
       return confList;
     }
     for (String str: value.split(",")) {
-      String trimStr = StringUtils.trim(str);
-      if (StringUtils.isNotEmpty(trimStr)) {
+      String trimStr = str.trim();
+      if (!trimStr.isEmpty()) {
         confList.add(trimStr);
       }
     }
diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java
index 7dde0bc0fd..ded04b8abc 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -17,6 +17,7 @@
  */
 package org.apache.orc;
 
+import org.apache.orc.TypeDescription.EdgeInterpolationAlgorithm;
 import org.apache.orc.impl.ParserUtils;
 import org.apache.orc.impl.ReaderImpl;
 import org.apache.orc.impl.SchemaEvolution;
@@ -171,6 +172,23 @@ private static void appendOrcTypes(List<OrcProto.Type> result, TypeDescription t
         type.setPrecision(typeDescr.getPrecision());
         type.setScale(typeDescr.getScale());
         break;
+      case Geography:
+        type.setKind(OrcProto.Type.Kind.GEOGRAPHY);
+        type.setAlgorithm(switch (typeDescr.getEdgeInterpolationAlgorithm()) {
+          case SPHERICAL -> OrcProto.Type.EdgeInterpolationAlgorithm.SPHERICAL;
+          case VINCENTY -> OrcProto.Type.EdgeInterpolationAlgorithm.VINCENTY;
+          case THOMAS -> OrcProto.Type.EdgeInterpolationAlgorithm.THOMAS;
+          case ANDOYER -> OrcProto.Type.EdgeInterpolationAlgorithm.ANDOYER;
+          case KARNEY -> OrcProto.Type.EdgeInterpolationAlgorithm.KARNEY;
+          default -> throw new IllegalArgumentException("Unknown interpolation algorithm: " +
+                  typeDescr.getEdgeInterpolationAlgorithm());
+        });
+        type.setCrs(typeDescr.getCrs());
+        break;
+      case Geometry:
+        type.setKind(OrcProto.Type.Kind.GEOMETRY);
+        type.setCrs(typeDescr.getCrs());
+        break;
       case LIST:
         type.setKind(OrcProto.Type.Kind.LIST);
         type.addSubtypes(children.get(0).getId());
@@ -325,6 +343,29 @@ TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,
           result.withPrecision(type.getPrecision());
         }
         break;
+      case GEOMETRY:
+        result = TypeDescription.createGeometry();
+        if (type.hasCrs()) {
+          result.withCRS(type.getCrs());
+        }
+        break;
+      case GEOGRAPHY:
+        result = TypeDescription.createGeography();
+        if (type.hasCrs()) {
+          result.withCRS(type.getCrs());
+        }
+        result.withEdgeInterpolationAlgorithm(
+            switch (type.getAlgorithm()) {
+              case SPHERICAL -> EdgeInterpolationAlgorithm.SPHERICAL;
+              case VINCENTY -> EdgeInterpolationAlgorithm.VINCENTY;
+              case THOMAS -> EdgeInterpolationAlgorithm.THOMAS;
+              case ANDOYER -> EdgeInterpolationAlgorithm.ANDOYER;
+              case KARNEY -> EdgeInterpolationAlgorithm.KARNEY;
+              default -> throw new IllegalArgumentException("Unknown interpolation algorithm: " +
+                      type.getAlgorithm());
+          }
+        );
+        break;
       case LIST:
         if (type.getSubtypesCount() != 1) {
           throw new FileFormatException("LIST type should contain exactly " +
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java
index 8ea9fca1b2..c5ef48b047 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -44,12 +44,29 @@ public class TypeDescription
   public static final long MAX_DECIMAL64 = 999_999_999_999_999_999L;
   public static final long MIN_DECIMAL64 = -MAX_DECIMAL64;
   private static final int DEFAULT_LENGTH = 256;
+  private static final String DEFAULT_CRS = "OGC:CRS84";
   static final Pattern UNQUOTED_NAMES = Pattern.compile("^[a-zA-Z0-9_]+$");
 
   // type attributes
   public static final String ENCRYPT_ATTRIBUTE = "encrypt";
   public static final String MASK_ATTRIBUTE = "mask";
 
+  public enum EdgeInterpolationAlgorithm {
+    SPHERICAL("spherical"),
+    VINCENTY("vincenty"),
+    THOMAS("thomas"),
+    ANDOYER("andoyer"),
+    KARNEY("karney");
+
+    EdgeInterpolationAlgorithm(String name) {
+      this.name = name;
+    }
+    final String name;
+  }
+
+  private static final EdgeInterpolationAlgorithm DEFAULT_EDGE_INTERPOLATION_ALGORITHM
+          = EdgeInterpolationAlgorithm.SPHERICAL;
+
   @Override
   public int compareTo(TypeDescription other) {
     if (this == other) {
@@ -116,7 +133,9 @@ public enum Category {
     MAP("map", false),
     STRUCT("struct", false),
     UNION("uniontype", false),
-    TIMESTAMP_INSTANT("timestamp with local time zone", true);
+    TIMESTAMP_INSTANT("timestamp with local time zone", true),
+    Geometry("geometry", true),
+    Geography("geography", true);
 
     Category(String name, boolean isPrimitive) {
       this.name = name;
@@ -187,6 +206,14 @@ public static TypeDescription createDecimal() {
     return new TypeDescription(Category.DECIMAL);
   }
 
+  public static TypeDescription createGeometry() {
+    return new TypeDescription(Category.Geometry);
+  }
+
+  public static TypeDescription createGeography() {
+    return new TypeDescription(Category.Geography);
+  }
+
   /**
    * Parse TypeDescription from the Hive type names. This is the inverse
    * of TypeDescription.toString()
@@ -239,6 +266,26 @@ public TypeDescription withScale(int scale) {
     return this;
   }
 
+  public TypeDescription withCRS(String crs) {
+    if (category != Category.Geometry &&
+        category != Category.Geography) {
+      throw new IllegalArgumentException("crs is only allowed on Geometry/Geography" +
+          " and not " + category.name);
+    }
+    this.crs = crs;
+    return this;
+  }
+
+  public TypeDescription withEdgeInterpolationAlgorithm(
+          EdgeInterpolationAlgorithm edgeInterpolationAlgorithm) {
+    if (category != Category.Geography) {
+      throw new IllegalArgumentException("edgeInterpolationAlgorithm is only allowed on Geography" +
+              " and not " + category.name);
+    }
+    this.edgeInterpolationAlgorithm = edgeInterpolationAlgorithm;
+    return this;
+  }
+
   /**
    * Set an attribute on this type.
    * @param key the attribute name
@@ -366,6 +413,8 @@ public TypeDescription clone() {
     result.maxLength = maxLength;
     result.precision = precision;
     result.scale = scale;
+    result.crs = crs;
+    result.edgeInterpolationAlgorithm = edgeInterpolationAlgorithm;
     if (fieldNames != null) {
       result.fieldNames.addAll(fieldNames);
     }
@@ -557,6 +606,14 @@ public int getScale() {
     return scale;
   }
 
+  public String getCrs() {
+    return crs;
+  }
+
+  public EdgeInterpolationAlgorithm getEdgeInterpolationAlgorithm() {
+    return edgeInterpolationAlgorithm;
+  }
+
   /**
    * For struct types, get the list of field names.
    * @return the list of field names.
@@ -664,6 +721,9 @@ public TypeDescription(Category category) {
   private int maxLength = DEFAULT_LENGTH;
   private int precision = DEFAULT_PRECISION;
   private int scale = DEFAULT_SCALE;
+  private String crs = DEFAULT_CRS;
+  private EdgeInterpolationAlgorithm edgeInterpolationAlgorithm
+            = DEFAULT_EDGE_INTERPOLATION_ALGORITHM;
 
   static void printFieldName(StringBuilder buffer, String name) {
     if (UNQUOTED_NAMES.matcher(name).matches()) {
@@ -691,6 +751,18 @@ public void printToBuffer(StringBuilder buffer) {
         buffer.append(maxLength);
         buffer.append(')');
         break;
+      case Geometry:
+        buffer.append('(');
+        buffer.append(crs);
+        buffer.append(')');
+        break;
+      case Geography:
+        buffer.append('(');
+        buffer.append(crs);
+        buffer.append(',');
+        buffer.append(edgeInterpolationAlgorithm.name());
+        buffer.append(')');
+        break;
       case LIST:
       case MAP:
       case UNION:
@@ -751,6 +823,16 @@ private void printJsonToBuffer(String prefix, StringBuilder buffer,
         buffer.append(", \"length\": ");
         buffer.append(maxLength);
         break;
+      case Geometry:
+        buffer.append(", \"crs\": ");
+        buffer.append(crs);
+        break;
+      case Geography:
+        buffer.append(", \"crs\": ");
+        buffer.append(crs);
+        buffer.append(", \"edge_interpolation_algorithm\": ");
+        buffer.append(edgeInterpolationAlgorithm.name());
+        break;
       case LIST:
       case MAP:
       case UNION:
diff --git a/java/core/src/java/org/apache/orc/geospatial/BoundingBox.java b/java/core/src/java/org/apache/orc/geospatial/BoundingBox.java
new file mode 100644
index 0000000000..093e2c96c8
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/geospatial/BoundingBox.java
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.geospatial;
+
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Envelope;
+import org.locationtech.jts.geom.Geometry;
+
+/**
+ * Bounding box for Geometry or Geography type in the representation of min/max
+ * value pairs of coordinates from each axis.
+ * A bounding box is considered valid if none of the X / Y dimensions contain NaN.
+ */
+public class BoundingBox {
+
+  private double xMin = Double.POSITIVE_INFINITY;
+  private double xMax = Double.NEGATIVE_INFINITY;
+  private double yMin = Double.POSITIVE_INFINITY;
+  private double yMax = Double.NEGATIVE_INFINITY;
+  private double zMin = Double.POSITIVE_INFINITY;
+  private double zMax = Double.NEGATIVE_INFINITY;
+  private double mMin = Double.POSITIVE_INFINITY;
+  private double mMax = Double.NEGATIVE_INFINITY;
+  private boolean valid = true;
+
+  public BoundingBox() {
+  }
+
+  public BoundingBox(
+          double xMin, double xMax, double yMin, double yMax,
+          double zMin, double zMax, double mMin, double mMax) {
+    this.xMin = xMin;
+    this.xMax = xMax;
+    this.yMin = yMin;
+    this.yMax = yMax;
+    this.zMin = zMin;
+    this.zMax = zMax;
+    this.mMin = mMin;
+    this.mMax = mMax;
+
+    // Update the validity
+    valid = isXYValid();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (!(obj instanceof BoundingBox other)) {
+      return false;
+    }
+    if (obj == this) {
+      return true;
+    }
+
+    // Valid flag must be checked since invalid bounding boxes may have equal coordinates with the initial one
+    return xMin == other.xMin && xMax == other.xMax && yMin == other.yMin && yMax == other.yMax &&
+           zMin == other.zMin && zMax == other.zMax && mMin == other.mMin && mMax == other.mMax &&
+           valid == other.valid;
+  }
+
+  @Override
+  public int hashCode() {
+    return Double.hashCode(xMin) ^ Double.hashCode(xMax) ^
+           Double.hashCode(yMin) ^ Double.hashCode(yMax) ^
+           Double.hashCode(zMin) ^ Double.hashCode(zMax) ^
+           Double.hashCode(mMin) ^ Double.hashCode(mMax) ^
+           Boolean.hashCode(valid);
+  }
+
+  // Don't change `valid` here and let the caller maintain it
+  private void resetBBox() {
+    xMin = Double.POSITIVE_INFINITY;
+    xMax = Double.NEGATIVE_INFINITY;
+    yMin = Double.POSITIVE_INFINITY;
+    yMax = Double.NEGATIVE_INFINITY;
+    zMin = Double.POSITIVE_INFINITY;
+    zMax = Double.NEGATIVE_INFINITY;
+    mMin = Double.POSITIVE_INFINITY;
+    mMax = Double.NEGATIVE_INFINITY;
+  }
+
+  public double getXMin() {
+    return xMin;
+  }
+
+  public double getXMax() {
+    return xMax;
+  }
+
+  public double getYMin() {
+    return yMin;
+  }
+
+  public double getYMax() {
+    return yMax;
+  }
+
+  public double getZMin() {
+    return zMin;
+  }
+
+  public double getZMax() {
+    return zMax;
+  }
+
+  public double getMMin() {
+    return mMin;
+  }
+
+  public double getMMax() {
+    return mMax;
+  }
+
+  /**
+   * Checks if the bounding box is valid.
+   * A bounding box is considered valid if none of the X / Y dimensions contain NaN.
+   *
+   * @return true if the bounding box is valid, false otherwise.
+   */
+  public boolean isValid() {
+    return valid;
+  }
+
+  /**
+   * Checks if the X and Y dimensions of the bounding box are valid.
+   * The X and Y dimensions are considered valid if none of the bounds contain NaN.
+   *
+   * @return true if the X and Y dimensions are valid, false otherwise.
+   */
+  public boolean isXYValid() {
+    return isXValid() && isYValid();
+  }
+
+  /**
+   * Checks if the X dimension of the bounding box is valid.
+   * The X dimension is considered valid if neither bound contains NaN.
+   *
+   * @return true if the X dimension is valid, false otherwise.
+   */
+  public boolean isXValid() {
+    return !(Double.isNaN(xMin) || Double.isNaN(xMax));
+  }
+
+  /**
+   * Checks if the Y dimension of the bounding box is valid.
+   * The Y dimension is considered valid if neither bound contains NaN.
+   *
+   * @return true if the Y dimension is valid, false otherwise.
+   */
+  public boolean isYValid() {
+    return !(Double.isNaN(yMin) || Double.isNaN(yMax));
+  }
+
+  /**
+   * Checks if the Z dimension of the bounding box is valid.
+   * The Z dimension is considered valid if none of the bounds contain NaN.
+   *
+   * @return true if the Z dimension is valid, false otherwise.
+   */
+  public boolean isZValid() {
+    return !(Double.isNaN(zMin) || Double.isNaN(zMax));
+  }
+
+  /**
+   * Checks if the M dimension of the bounding box is valid.
+   * The M dimension is considered valid if none of the bounds contain NaN.
+   *
+   * @return true if the M dimension is valid, false otherwise.
+   */
+  public boolean isMValid() {
+    return !(Double.isNaN(mMin) || Double.isNaN(mMax));
+  }
+
+  /**
+   * Checks if the bounding box is empty in the X / Y dimension.
+   *
+   * @return true if the bounding box is empty, false otherwise.
+   */
+  public boolean isXYEmpty() {
+    return isXEmpty() || isYEmpty();
+  }
+
+  /**
+   * Checks if the bounding box is empty in the X dimension.
+   *
+   * @return true if the X dimension is empty, false otherwise.
+   */
+  public boolean isXEmpty() {
+    return Double.isInfinite(xMin) && Double.isInfinite(xMax);
+  }
+
+  /**
+   * Checks if the bounding box is empty in the Y dimension.
+   *
+   * @return true if the Y dimension is empty, false otherwise.
+   */
+  public boolean isYEmpty() {
+    return Double.isInfinite(yMin) && Double.isInfinite(yMax);
+  }
+
+  /**
+   * Checks if the bounding box is empty in the Z dimension.
+   *
+   * @return true if the Z dimension is empty, false otherwise.
+   */
+  public boolean isZEmpty() {
+    return Double.isInfinite(zMin) && Double.isInfinite(zMax);
+  }
+
+  /**
+   * Checks if the bounding box is empty in the M dimension.
+   *
+   * @return true if the M dimension is empty, false otherwise.
+   */
+  public boolean isMEmpty() {
+    return Double.isInfinite(mMin) && Double.isInfinite(mMax);
+  }
+
+  /**
+   * Expands this bounding box to include the bounds of another box.
+   * After merging, this bounding box will contain both its original extent
+   * and the extent of the other bounding box.
+   *
+   * @param other the other BoundingBox whose bounds will be merged into this one
+   */
+  public void merge(BoundingBox other) {
+    if (!valid) {
+      return;
+    }
+
+    // If other is null or invalid, mark this as invalid
+    if (other == null || !other.valid) {
+      valid = false;
+      resetBBox();
+      return;
+    }
+
+    this.xMin = Math.min(this.xMin, other.xMin);
+    this.xMax = Math.max(this.xMax, other.xMax);
+    this.yMin = Math.min(this.yMin, other.yMin);
+    this.yMax = Math.max(this.yMax, other.yMax);
+    this.zMin = Math.min(this.zMin, other.zMin);
+    this.zMax = Math.max(this.zMax, other.zMax);
+    this.mMin = Math.min(this.mMin, other.mMin);
+    this.mMax = Math.max(this.mMax, other.mMax);
+
+    // Update the validity of this bounding box based on the other bounding box
+    valid = isXYValid();
+  }
+
+  /**
+   * Extends this bounding box to include the spatial extent of the provided geometry.
+   * The bounding box coordinates (min/max values for x, y, z, m) will be adjusted
+   * to encompass both the current bounds and the geometry's bounds.
+   *
+   * @param geometry The geometry whose coordinates will be used to update this bounding box.
+   *                 If null or empty, the method returns without making any changes.
+   */
+  public void update(Geometry geometry) {
+    if (!valid) {
+      return;
+    }
+
+    if (geometry == null || geometry.isEmpty()) {
+      return;
+    }
+
+    // Updates the X and Y bounds of this bounding box with the given coordinates.
+    // Updates are conditional:
+    // - X bounds are only updated if both minX and maxX are not NaN
+    // - Y bounds are only updated if both minY and maxY are not NaN
+    // This allows partial updates while preserving valid dimensions.
+    Envelope envelope = geometry.getEnvelopeInternal();
+    if (!Double.isNaN(envelope.getMinX()) && !Double.isNaN(envelope.getMaxX())) {
+      xMin = Math.min(xMin, envelope.getMinX());
+      xMax = Math.max(xMax, envelope.getMaxX());
+    }
+    if (!Double.isNaN(envelope.getMinY()) && !Double.isNaN(envelope.getMaxY())) {
+      yMin = Math.min(yMin, envelope.getMinY());
+      yMax = Math.max(yMax, envelope.getMaxY());
+    }
+
+    for (Coordinate coord : geometry.getCoordinates()) {
+      if (!Double.isNaN(coord.getZ())) {
+        zMin = Math.min(zMin, coord.getZ());
+        zMax = Math.max(zMax, coord.getZ());
+      }
+      if (!Double.isNaN(coord.getM())) {
+        mMin = Math.min(mMin, coord.getM());
+        mMax = Math.max(mMax, coord.getM());
+      }
+    }
+
+    // Update the validity of this bounding box based on the other bounding box
+    valid = isXYValid();
+  }
+
+  /**
+   * Resets the bounding box to its initial state.
+   */
+  public void reset() {
+    resetBBox();
+    valid = true;
+  }
+
+  /**
+   * Creates a copy of the current bounding box.
+   *
+   * @return a new BoundingBox instance with the same values as this one.
+   */
+  public BoundingBox copy() {
+    return new BoundingBox(
+            this.xMin, this.xMax,
+            this.yMin, this.yMax,
+            this.zMin, this.zMax,
+            this.mMin, this.mMax);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("BoundingBox{xMin=")
+            .append(xMin)
+            .append(", xMax=")
+            .append(xMax)
+            .append(", yMin=")
+            .append(yMin)
+            .append(", yMax=")
+            .append(yMax)
+            .append(", zMin=")
+            .append(zMin)
+            .append(", zMax=")
+            .append(zMax)
+            .append(", mMin=")
+            .append(mMin)
+            .append(", mMax=")
+            .append(mMax);
+
+    // Only include the valid flag when it's false
+    if (!valid) {
+      sb.append(", valid=false");
+    }
+
+    sb.append('}');
+    return sb.toString();
+  }
+}
diff --git a/java/core/src/java/org/apache/orc/geospatial/GeospatialTypes.java b/java/core/src/java/org/apache/orc/geospatial/GeospatialTypes.java
new file mode 100644
index 0000000000..c1067adad4
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/geospatial/GeospatialTypes.java
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.geospatial;
+
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Geometry;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * A list of geospatial types from all instances in the Geometry or Geography column,
+ * or an empty list if they are not known.
+ *
+ * The GeospatialTypes instance becomes invalid in the following cases:
+ * - When an unknown or unsupported geometry type is encountered during update
+ * - When merging with another invalid GeospatialTypes instance
+ * - When explicitly aborted using abort()
+ *
+ * When invalid, the types list is cleared and remains empty. All subsequent
+ * updates and merges are ignored until reset() is called.
+ */
+public class GeospatialTypes {
+
+  private static final int UNKNOWN_TYPE_ID = -1;
+  private Set<Integer> types = new HashSet<>();
+  private boolean valid = true;
+
+  public GeospatialTypes(Set<Integer> types) {
+    this.types = types;
+    this.valid = true;
+  }
+
+  public GeospatialTypes(Set<Integer> types, boolean valid) {
+    this.types = types;
+    this.valid = valid;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (!(obj instanceof GeospatialTypes other)) {
+      return false;
+    }
+    if (obj == this) {
+      return true;
+    }
+    return valid == other.valid && types.equals(other.types);
+  }
+
+  @Override
+  public int hashCode() {
+    return types.hashCode() ^ Boolean.hashCode(valid);
+  }
+
+  public GeospatialTypes() {}
+
+  public Set<Integer> getTypes() {
+    return types;
+  }
+
+  /**
+   * Updates the types list with the given geometry's type.
+   * If the geometry type is unknown, the instance becomes invalid.
+   *
+   * @param geometry the geometry to process
+   */
+  public void update(Geometry geometry) {
+    if (!valid) {
+      return;
+    }
+
+    if (geometry == null || geometry.isEmpty()) {
+      return;
+    }
+
+    int code = getGeometryTypeCode(geometry);
+    if (code != UNKNOWN_TYPE_ID) {
+      types.add(code);
+    } else {
+      valid = false;
+      types.clear();
+    }
+  }
+
+  public void merge(GeospatialTypes other) {
+    if (!valid) {
+      return;
+    }
+
+    if (other == null || !other.valid) {
+      valid = false;
+      types.clear();
+      return;
+    }
+    types.addAll(other.types);
+  }
+
+  public void reset() {
+    types.clear();
+    valid = true;
+  }
+
+  public boolean isValid() {
+    return valid;
+  }
+
+  public GeospatialTypes copy() {
+    return new GeospatialTypes(new HashSet<>(types), valid);
+  }
+
+  /**
+   * Extracts the base geometry type code from a full type code.
+   * For example: 1001 (XYZ Point) -> 1 (Point)
+   *
+   * @param typeId the full geometry type code
+   * @return the base type code (1-7)
+   */
+  private int getBaseTypeCode(int typeId) {
+    return typeId % 1000;
+  }
+
+  /**
+   * Extracts the dimension prefix from a full type code.
+   * For example: 1001 (XYZ Point) -> 1000 (XYZ)
+   *
+   * @param typeId the full geometry type code
+   * @return the dimension prefix (0, 1000, 2000, or 3000)
+   */
+  private int getDimensionPrefix(int typeId) {
+    return (typeId / 1000) * 1000;
+  }
+
+  @Override
+  public String toString() {
+    return "GeospatialTypes{" + "types="
+            + types.stream().map(this::typeIdToString).collect(Collectors.toSet()) + '}';
+  }
+
+  private int getGeometryTypeId(Geometry geometry) {
+    return switch (geometry.getGeometryType()) {
+      case Geometry.TYPENAME_POINT -> 1;
+      case Geometry.TYPENAME_LINESTRING -> 2;
+      case Geometry.TYPENAME_POLYGON -> 3;
+      case Geometry.TYPENAME_MULTIPOINT -> 4;
+      case Geometry.TYPENAME_MULTILINESTRING -> 5;
+      case Geometry.TYPENAME_MULTIPOLYGON -> 6;
+      case Geometry.TYPENAME_GEOMETRYCOLLECTION -> 7;
+      default -> UNKNOWN_TYPE_ID;
+    };
+  }
+
+  /**
+   * Geospatial type codes:
+   *
+   * | Type               | XY   | XYZ  | XYM  | XYZM |
+   * | :----------------- | :--- | :--- | :--- | :--: |
+   * | Point              | 0001 | 1001 | 2001 | 3001 |
+   * | LineString         | 0002 | 1002 | 2002 | 3002 |
+   * | Polygon            | 0003 | 1003 | 2003 | 3003 |
+   * | MultiPoint         | 0004 | 1004 | 2004 | 3004 |
+   * | MultiLineString    | 0005 | 1005 | 2005 | 3005 |
+   * | MultiPolygon       | 0006 | 1006 | 2006 | 3006 |
+   * | GeometryCollection | 0007 | 1007 | 2007 | 3007 |
+   *
+   * See https://github.com/apache/parquet-format/blob/master/Geospatial.md#geospatial-types
+   */
+  private int getGeometryTypeCode(Geometry geometry) {
+    int typeId = getGeometryTypeId(geometry);
+    if (typeId == UNKNOWN_TYPE_ID) {
+      return UNKNOWN_TYPE_ID;
+    }
+    Coordinate[] coordinates = geometry.getCoordinates();
+    boolean hasZ = false;
+    boolean hasM = false;
+    if (coordinates.length > 0) {
+      Coordinate firstCoord = coordinates[0];
+      hasZ = !Double.isNaN(firstCoord.getZ());
+      hasM = !Double.isNaN(firstCoord.getM());
+    }
+    if (hasZ) {
+      typeId += 1000;
+    }
+    if (hasM) {
+      typeId += 2000;
+    }
+    return typeId;
+  }
+
+  private String typeIdToString(int typeId) {
+    String typeString;
+
+    typeString = switch (typeId % 1000) {
+      case 1 -> Geometry.TYPENAME_POINT;
+      case 2 -> Geometry.TYPENAME_LINESTRING;
+      case 3 -> Geometry.TYPENAME_POLYGON;
+      case 4 -> Geometry.TYPENAME_MULTIPOINT;
+      case 5 -> Geometry.TYPENAME_MULTILINESTRING;
+      case 6 -> Geometry.TYPENAME_MULTIPOLYGON;
+      case 7 -> Geometry.TYPENAME_GEOMETRYCOLLECTION;
+      default -> {
+        yield "Unknown";
+      }
+    };
+    if (typeId >= 3000) {
+      typeString += " (XYZM)";
+    } else if (typeId >= 2000) {
+      typeString += " (XYM)";
+    } else if (typeId >= 1000) {
+      typeString += " (XYZ)";
+    } else {
+      typeString += " (XY)";
+    }
+    return typeString;
+  }
+}
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index c5e13cc3c0..46b87bfdef 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -30,11 +30,17 @@
 import org.apache.orc.DateColumnStatistics;
 import org.apache.orc.DecimalColumnStatistics;
 import org.apache.orc.DoubleColumnStatistics;
+import org.apache.orc.GeospatialColumnStatistics;
 import org.apache.orc.IntegerColumnStatistics;
 import org.apache.orc.OrcProto;
 import org.apache.orc.StringColumnStatistics;
 import org.apache.orc.TimestampColumnStatistics;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.geospatial.BoundingBox;
+import org.apache.orc.geospatial.GeospatialTypes;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.io.ParseException;
+import org.locationtech.jts.io.WKBReader;
 import org.threeten.extra.chrono.HybridChronology;
 
 import java.sql.Date;
@@ -42,6 +48,11 @@
 import java.time.chrono.ChronoLocalDate;
 import java.time.chrono.Chronology;
 import java.time.chrono.IsoChronology;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 import java.util.TimeZone;
 
 
@@ -102,6 +113,8 @@ public void updateBoolean(boolean value, int repetitions) {
     public void merge(ColumnStatisticsImpl other) {
       if (other instanceof BooleanStatisticsImpl bkt) {
         trueCount += bkt.trueCount;
+      } else if (!(other instanceof BooleanColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
       } else {
         if (isStatsExists() && trueCount != 0) {
           throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
@@ -222,6 +235,8 @@ public void merge(ColumnStatisticsImpl other) {
           }
         }
         sum += otherColl.sum;
+      } else if (!(other instanceof CollectionColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of collection column statistics");
       } else {
         if (isStatsExists()) {
           throw new IllegalArgumentException("Incompatible merging of collection column statistics");
@@ -397,6 +412,8 @@ public void merge(ColumnStatisticsImpl other) {
             overflow = true;
           }
         }
+      } else if (!(other instanceof IntegerColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of integer column statistics");
       } else {
         if (isStatsExists() && hasMinimum) {
           throw new IllegalArgumentException("Incompatible merging of integer column statistics");
@@ -560,6 +577,8 @@ public void merge(ColumnStatisticsImpl other) {
           }
         }
         sum += dbl.sum;
+      } else if (!(other instanceof DoubleColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of double column statistics");
       } else {
         if (isStatsExists() && hasMinimum) {
           throw new IllegalArgumentException("Incompatible merging of double column statistics");
@@ -763,6 +782,8 @@ public void merge(ColumnStatisticsImpl other) {
           }
         }
         sum += str.sum;
+      } else if (!(other instanceof StringColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of string column statistics");
       } else {
         if (isStatsExists()) {
           throw new IllegalArgumentException("Incompatible merging of string column statistics");
@@ -993,9 +1014,10 @@ public void updateBinary(byte[] bytes, int offset, int length,
 
     @Override
     public void merge(ColumnStatisticsImpl other) {
-      if (other instanceof BinaryColumnStatistics) {
-        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
+      if (other instanceof BinaryStatisticsImpl bin) {
         sum += bin.sum;
+      } else if (!(other instanceof BinaryColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of binary column statistics");
       } else {
         if (isStatsExists() && sum != 0) {
           throw new IllegalArgumentException("Incompatible merging of binary column statistics");
@@ -1128,6 +1150,8 @@ public void merge(ColumnStatisticsImpl other) {
             sum.mutateAdd(dec.sum);
           }
         }
+      } else if (!(other instanceof DecimalColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
       } else {
         if (isStatsExists() && minimum != null) {
           throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
@@ -1321,6 +1345,8 @@ public void merge(ColumnStatisticsImpl other) {
             hasSum = false;
           }
         }
+      } else if (!(other instanceof DecimalColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
       } else {
         if (other.getNumberOfValues() != 0) {
           throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
@@ -1486,6 +1512,8 @@ public void merge(ColumnStatisticsImpl other) {
       if (other instanceof DateStatisticsImpl dateStats) {
         minimum = Math.min(minimum, dateStats.minimum);
         maximum = Math.max(maximum, dateStats.maximum);
+      } else if (!(other instanceof DateColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of date column statistics");
       } else {
         if (isStatsExists() && count != 0) {
           throw new IllegalArgumentException("Incompatible merging of date column statistics");
@@ -1698,6 +1726,8 @@ public void merge(ColumnStatisticsImpl other) {
             maximum = timestampStats.maximum;
           }
         }
+      } else if (!(other instanceof TimestampColumnStatistics)) {
+        throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
       } else {
         if (isStatsExists() && count != 0) {
           throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
@@ -1839,6 +1869,167 @@ public Timestamp getMaximum() {
   private boolean hasNull = false;
   private long bytesOnDisk = 0;
 
+  private static final class GeospatialStatisticsImpl extends ColumnStatisticsImpl
+          implements GeospatialColumnStatistics {
+    private final BoundingBox boundingBox;
+    private final GeospatialTypes geospatialTypes;
+    private final WKBReader reader = new WKBReader();
+
+    GeospatialStatisticsImpl() {
+      this.boundingBox = new BoundingBox();
+      this.geospatialTypes = new GeospatialTypes();
+    }
+
+    GeospatialStatisticsImpl(OrcProto.ColumnStatistics stats) {
+      super(stats);
+      BoundingBox boundingBoxOut = null;
+      GeospatialTypes geospatialTypesOut = null;
+
+      OrcProto.GeospatialStatistics geoStatistics = stats.getGeospatialStatistics();
+      if (geoStatistics.hasBbox()) {
+        OrcProto.BoundingBox bbox = geoStatistics.getBbox();
+        boundingBoxOut = new BoundingBox(
+                bbox.hasXmin() ? bbox.getXmin() : Double.NaN,
+                bbox.hasXmax() ? bbox.getXmax() : Double.NaN,
+                bbox.hasYmin() ? bbox.getYmin() : Double.NaN,
+                bbox.hasYmax() ? bbox.getYmax() : Double.NaN,
+                bbox.hasZmin() ? bbox.getZmin() : Double.NaN,
+                bbox.hasZmax() ? bbox.getZmax() : Double.NaN,
+                bbox.hasMmin() ? bbox.getMmin() : Double.NaN,
+                bbox.hasMmax() ? bbox.getMmax() : Double.NaN);
+      }
+
+      if (!geoStatistics.getGeospatialTypesList().isEmpty()) {
+        Set<Integer> types = new HashSet<>(geoStatistics.getGeospatialTypesList());
+        geospatialTypesOut = new GeospatialTypes(types);
+      }
+      this.boundingBox = boundingBoxOut;
+      this.geospatialTypes = geospatialTypesOut;
+    }
+
+    @Override
+    public void updateGeometry(BytesWritable value) {
+      if (value == null) {
+        return;
+      }
+
+      try {
+        Geometry geom = reader.read(value.getBytes());
+        boundingBox.update(geom);
+        geospatialTypes.update(geom);
+      } catch (ParseException e) {
+        throw new IllegalArgumentException("Invalid geospatial data - failed to parse WKB format", e);
+      }
+    }
+
+    @Override
+    public void updateGeometry(byte[] bytes, int offset, int length) {
+      if (bytes == null) {
+        return;
+      }
+      BytesWritable value = new BytesWritable();
+      value.set(bytes, offset, length);
+      updateGeometry(value);
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      boundingBox.reset();;
+      geospatialTypes.reset();
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof GeospatialStatisticsImpl geoStats) {
+        boundingBox.merge(geoStats.boundingBox);
+        geospatialTypes.merge(geoStats.geospatialTypes);
+      } else {
+        throw new IllegalArgumentException("Incompatible merging of geospatial column statistics");
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder builder = super.serialize();
+      OrcProto.GeospatialStatistics.Builder geoStats = OrcProto.GeospatialStatistics.newBuilder();
+
+      OrcProto.BoundingBox.Builder bboxBuilder = OrcProto.BoundingBox.newBuilder();
+      if (boundingBox.isValid() && !boundingBox.isXYEmpty()) {
+        bboxBuilder.setXmin(boundingBox.getXMin());
+        bboxBuilder.setXmax(boundingBox.getXMax());
+        bboxBuilder.setYmin(boundingBox.getYMin());
+        bboxBuilder.setYmax(boundingBox.getYMax());
+        if (boundingBox.isZValid() && !boundingBox.isZEmpty()) {
+          bboxBuilder.setZmin(boundingBox.getZMin());
+          bboxBuilder.setZmax(boundingBox.getZMax());
+        }
+        if (boundingBox.isMValid() && !boundingBox.isMEmpty()) {
+          bboxBuilder.setMmin(boundingBox.getMMin());
+          bboxBuilder.setMmax(boundingBox.getMMax());
+        }
+        geoStats.setBbox(bboxBuilder);
+      }
+      if (geospatialTypes.isValid() && !geospatialTypes.getTypes().isEmpty()) {
+        List<Integer> sortedTypes = new ArrayList<>(geospatialTypes.getTypes());
+        Collections.sort(sortedTypes);
+        geoStats.addAllGeospatialTypes(sortedTypes);
+      }
+      builder.setGeospatialStatistics(geoStats);
+      return builder;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (boundingBox.isValid()) {
+        buf.append(" bbox: ");
+        buf.append(boundingBox.toString());
+      }
+      if (geospatialTypes.isValid()) {
+        buf.append(" types: ");
+        buf.append(geospatialTypes.toString());
+      }
+      return buf.toString();
+    }
+
+    @Override
+    public BoundingBox getBoundingBox() {
+      return boundingBox;
+    }
+
+    @Override
+    public GeospatialTypes getGeospatialTypes() {
+      return geospatialTypes;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof GeospatialStatisticsImpl that)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      return boundingBox.equals(that.boundingBox) &&
+              geospatialTypes.equals(that.geospatialTypes);
+    }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = super.hashCode();
+      result = prime * result + boundingBox.hashCode();
+      result = prime * result + geospatialTypes.hashCode();
+      return result;
+    }
+  }
+
   ColumnStatisticsImpl(OrcProto.ColumnStatistics stats) {
     if (stats.hasNumberOfValues()) {
       count = stats.getNumberOfValues();
@@ -1936,6 +2127,14 @@ public void updateTimestamp(long value, int nanos) {
     throw new UnsupportedOperationException("Can't update timestamp");
   }
 
+  public void updateGeometry(BytesWritable value) {
+    throw new UnsupportedOperationException("Can't update Geometry");
+  }
+
+  public void updateGeometry(byte[] bytes, int offset, int length) {
+    throw new UnsupportedOperationException("Can't update Geometry");
+  }
+
   public boolean isStatsExists() {
     return (count > 0 || hasNull == true);
   }
@@ -2027,6 +2226,9 @@ public static ColumnStatisticsImpl create(TypeDescription schema,
         return new TimestampInstantStatisticsImpl();
       case BINARY:
         return new BinaryStatisticsImpl();
+      case Geography:
+      case Geometry:
+        return new GeospatialStatisticsImpl();
       default:
         return new ColumnStatisticsImpl();
     }
@@ -2070,6 +2272,8 @@ public static ColumnStatisticsImpl deserialize(TypeDescription schema,
                      writerUsedProlepticGregorian, convertToProlepticGregorian);
     } else if(stats.hasBinaryStatistics()) {
       return new BinaryStatisticsImpl(stats);
+    } else if (stats.hasGeospatialStatistics()) {
+      return new GeospatialStatisticsImpl(stats);
     } else {
       return new ColumnStatisticsImpl(stats);
     }
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 4635973ab5..4861aa61fa 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.commons.lang3.ArrayUtils;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
@@ -1446,6 +1445,7 @@ public void nextVector(ColumnVector previousVector,
   }
 
   public static class StringGroupFromBinaryTreeReader extends ConvertTreeReader {
+    public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
     private final TypeDescription readerType;
     private BytesColumnVector inBytesColVector;
     private BytesColumnVector outBytesColVector;
@@ -1461,7 +1461,7 @@ public void setConvertVectorElement(int elementNum) throws IOException {
       byte[] bytes = inBytesColVector.vector[elementNum];
       int start = inBytesColVector.start[elementNum];
       int length = inBytesColVector.length[elementNum];
-      final byte[] string = (length == 0) ? ArrayUtils.EMPTY_BYTE_ARRAY : new byte[3 * length - 1];
+      final byte[] string = (length == 0) ? EMPTY_BYTE_ARRAY : new byte[3 * length - 1];
       for(int p = 0; p < string.length; p += 2) {
         if (p != 0) {
           string[p++] = ' ';
@@ -1533,6 +1533,7 @@ public void nextVector(ColumnVector previousVector,
         // Allocate column vector for file; cast column vector for reader.
         longColVector = new LongColumnVector(batchSize);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         longColVector.ensureSize(batchSize, false);
       }
@@ -1597,6 +1598,7 @@ public void nextVector(ColumnVector previousVector,
         // Allocate column vector for file; cast column vector for reader.
         doubleColVector = new DoubleColumnVector(batchSize);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         doubleColVector.ensureSize(batchSize, false);
       }
@@ -1661,6 +1663,7 @@ public void nextVector(ColumnVector previousVector,
         // Allocate column vector for file; cast column vector for reader.
         decimalColVector = new DecimalColumnVector(batchSize, precision, scale);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         decimalColVector.ensureSize(batchSize, false);
       }
@@ -1676,6 +1679,7 @@ public void nextVector(ColumnVector previousVector,
   public static class TimestampFromStringGroupTreeReader extends ConvertTreeReader {
     private BytesColumnVector bytesColVector;
     private TimestampColumnVector timestampColVector;
+    private final boolean useUtc;
     private final DateTimeFormatter formatter;
     private final boolean useProlepticGregorian;
 
@@ -1683,6 +1687,7 @@ public static class TimestampFromStringGroupTreeReader extends ConvertTreeReader
                                        Context context, boolean isInstant)
         throws IOException {
       super(columnId, getStringGroupTreeReader(columnId, fileType, context), context);
+      useUtc = isInstant || context.getUseUTCTimestamp();
       useProlepticGregorian = context.useProlepticGregorian();
       Chronology chronology = useProlepticGregorian
           ? IsoChronology.INSTANCE
@@ -1722,6 +1727,7 @@ public void nextVector(ColumnVector previousVector,
         // Allocate column vector for file; cast column vector for reader.
         bytesColVector = new BytesColumnVector(batchSize);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         bytesColVector.ensureSize(batchSize, false);
       }
@@ -1768,6 +1774,7 @@ public void nextVector(ColumnVector previousVector,
         // Allocate column vector for file; cast column vector for reader.
         longColVector = new DateColumnVector(batchSize);
         timestampColVector = (TimestampColumnVector) previousVector;
+        timestampColVector.setIsUTC(useUtc);
       } else {
         longColVector.ensureSize(batchSize, false);
       }
diff --git a/java/core/src/java/org/apache/orc/impl/ParserUtils.java b/java/core/src/java/org/apache/orc/impl/ParserUtils.java
index df2f8b5e19..c864465bde 100644
--- a/java/core/src/java/org/apache/orc/impl/ParserUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/ParserUtils.java
@@ -31,6 +31,8 @@
 import java.util.regex.Pattern;
 
 public class ParserUtils {
+  private static final TypeDescription.Category[] TYPE_DESCRIPTION_CATEGORY_VALUES
+      = TypeDescription.Category.values();
 
   static TypeDescription.Category parseCategory(ParserUtils.StringPosition source) {
     StringBuilder word = new StringBuilder();
@@ -56,7 +58,7 @@ static TypeDescription.Category parseCategory(ParserUtils.StringPosition source)
       catString = catString.trim();
     }
     if (!catString.isEmpty()) {
-      for (TypeDescription.Category cat : TypeDescription.Category.values()) {
+      for (TypeDescription.Category cat : TYPE_DESCRIPTION_CATEGORY_VALUES) {
         if (cat.getName().equals(catString)) {
           return cat;
         }
diff --git a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
index 4eb5f85623..87f777a7e1 100644
--- a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
@@ -116,8 +116,7 @@ public PhysicalFsWriter(FSDataOutputStream outputStream,
     CompressionCodec codec = OrcCodecPool.getCodec(opts.getCompress());
     if (codec != null){
       CompressionCodec.Options tempOptions = codec.getDefaultOptions();
-      if (codec instanceof ZstdCodec &&
-              codec.getDefaultOptions() instanceof ZstdCodec.ZstdOptions options) {
+      if (codec instanceof ZstdCodec && tempOptions instanceof ZstdCodec.ZstdOptions options) {
         OrcFile.ZstdCompressOptions zstdCompressOptions = opts.getZstdCompressOptions();
         if (zstdCompressOptions != null) {
           options.setLevel(zstdCompressOptions.getCompressionZstdLevel());
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 3afbff5fc3..9e018157f6 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -65,6 +65,10 @@ public class ReaderImpl implements Reader {
 
   private static final Logger LOG = LoggerFactory.getLogger(ReaderImpl.class);
 
+  private static final OrcFile.Version[] ORC_FILE_VERSION_VALUES = OrcFile.Version.values();
+  private static final OrcFile.WriterVersion[] ORC_FILE_WRITER_VERSION_VALUES
+      = OrcFile.WriterVersion.values();
+
   private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
   public static final int DEFAULT_COMPRESSION_BLOCK_SIZE = 256 * 1024;
 
@@ -268,7 +272,7 @@ public static OrcFile.Version getFileVersion(List<Integer> versionList) {
     if (versionList == null || versionList.isEmpty()) {
       return OrcFile.Version.V_0_11;
     }
-    for (OrcFile.Version version: OrcFile.Version.values()) {
+    for (OrcFile.Version version: ORC_FILE_VERSION_VALUES) {
       if (version.getMajor() == versionList.get(0) &&
           version.getMinor() == versionList.get(1)) {
         return version;
@@ -620,7 +624,7 @@ protected Supplier<FileSystem> getFileSystemSupplier() {
    * @return the version of the software that produced the file
    */
   public static OrcFile.WriterVersion getWriterVersion(int writerVersion) {
-    for(OrcFile.WriterVersion version: OrcFile.WriterVersion.values()) {
+    for(OrcFile.WriterVersion version: ORC_FILE_WRITER_VERSION_VALUES) {
       if (version.getId() == writerVersion) {
         return version;
       }
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 323f242471..5bd9809253 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.commons.lang3.ArrayUtils;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -340,7 +339,14 @@ protected RecordReaderImpl(ReaderImpl fileReader,
       this.startReadPhase = TypeReader.ReadPhase.ALL;
     }
 
-    this.rowIndexColsToRead = ArrayUtils.contains(rowIndexCols, true) ? rowIndexCols : null;
+    var hasTrue = false;
+    for (boolean value: rowIndexCols) {
+      if (value) {
+        hasTrue = true;
+        break;
+      }
+    }
+    this.rowIndexColsToRead = hasTrue ? rowIndexCols : null;
     TreeReaderFactory.ReaderContext readerContext =
         new TreeReaderFactory.ReaderContext()
           .setSchemaEvolution(evolution)
@@ -757,6 +763,13 @@ static TruthValue evaluatePredicateRange(PredicateLeaf predicate,
     if (!range.hasValues()) {
       if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
         return TruthValue.YES;
+      } else if (predicate.getOperator() == PredicateLeaf.Operator.NULL_SAFE_EQUALS) {
+        Object literal = predicate.getLiteral();
+        if (literal == null) {
+          return TruthValue.YES;
+        } else {
+          return TruthValue.NO;
+        }
       } else {
         return TruthValue.NULL;
       }
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
index 0eabb421e0..e88cccc33a 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
@@ -17,12 +17,12 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.commons.lang3.builder.HashCodeBuilder;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileRange;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hadoop.util.VersionInfo;
 import org.apache.orc.CompressionCodec;
 import org.apache.orc.DataReader;
 import org.apache.orc.OrcProto;
@@ -48,7 +48,8 @@
  */
 public class RecordReaderUtils {
   private static final HadoopShims SHIMS = HadoopShimsFactory.get();
-  private static final boolean supportVectoredIO = SHIMS.supportVectoredIO();
+  private static final boolean supportVectoredIO =
+      SHIMS.supportVectoredIO(VersionInfo.getVersion());
   private static final Logger LOG = LoggerFactory.getLogger(RecordReaderUtils.class);
 
   private static class DefaultDataReader implements DataReader {
@@ -302,9 +303,9 @@ public static boolean isDictionary(OrcProto.Stream.Kind kind,
     assert kind != OrcProto.Stream.Kind.DICTIONARY_COUNT;
     OrcProto.ColumnEncoding.Kind encodingKind = encoding.getKind();
     return kind == OrcProto.Stream.Kind.DICTIONARY_DATA ||
-      (kind == OrcProto.Stream.Kind.LENGTH &&
-       (encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
-        encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2));
+        (kind == OrcProto.Stream.Kind.LENGTH &&
+            (encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+            encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2));
   }
 
   /**
@@ -635,8 +636,8 @@ public boolean equals(Object rhs) {
 
       @Override
       public int hashCode() {
-        return new HashCodeBuilder().append(capacity).append(insertionGeneration)
-            .toHashCode();
+        // This is idential to the previous hashCode from HashCodeBuilder
+        return (17 * 37 + capacity) * 37 + (int) (insertionGeneration ^ insertionGeneration >> 32);
       }
     }
 
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 09b4b2ae61..eacff4b063 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -461,6 +461,8 @@ void buildConversion(TypeDescription fileType,
         case TIMESTAMP_INSTANT:
         case BINARY:
         case DATE:
+        case Geometry:
+        case Geography:
           // these are always a match
           break;
         case CHAR:
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 2a2adf50d7..785f568ff4 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -1154,6 +1154,12 @@ public void skipRows(long items, ReadPhase readPhase) throws IOException {
     }
   }
 
+  public static class GeospatialTreeReader extends BinaryTreeReader {
+    GeospatialTreeReader(int columnId, Context context) throws IOException {
+      super(columnId, context);
+    }
+  }
+
   public static class TimestampTreeReader extends TreeReader {
     protected IntegerReader data = null;
     protected IntegerReader nanos = null;
@@ -1551,7 +1557,6 @@ private void nextVector(DecimalColumnVector result,
       HiveDecimalWritable[] vector = result.vector;
       HiveDecimalWritable decWritable;
       if (result.noNulls) {
-        result.isRepeating = true;
         for (int r = 0; r < batchSize; ++r) {
           decWritable = vector[r];
           if (!decWritable.serializationUtilsRead(
@@ -1563,7 +1568,6 @@ private void nextVector(DecimalColumnVector result,
           setIsRepeatingIfNeeded(result, r);
         }
       } else if (!result.isRepeating || !result.isNull[0]) {
-        result.isRepeating = true;
         for (int r = 0; r < batchSize; ++r) {
           if (!result.isNull[r]) {
             decWritable = vector[r];
@@ -1595,7 +1599,6 @@ private void nextVector(DecimalColumnVector result,
       HiveDecimalWritable[] vector = result.vector;
       HiveDecimalWritable decWritable;
       if (result.noNulls) {
-        result.isRepeating = true;
         int previousIdx = 0;
         for (int r = 0; r != filterContext.getSelectedSize(); ++r) {
           int idx = filterContext.getSelected()[r];
@@ -1614,7 +1617,6 @@ private void nextVector(DecimalColumnVector result,
         }
         skipStreamRows(batchSize - previousIdx);
       } else if (!result.isRepeating || !result.isNull[0]) {
-        result.isRepeating = true;
         int previousIdx = 0;
         for (int r = 0; r != filterContext.getSelectedSize(); ++r) {
           int idx = filterContext.getSelected()[r];
@@ -1651,14 +1653,12 @@ private void nextVector(Decimal64ColumnVector result,
       // read the scales
       scaleReader.nextVector(result, scratchScaleVector, batchSize);
       if (result.noNulls) {
-        result.isRepeating = true;
         for (int r = 0; r < batchSize; ++r) {
           final long scaleFactor = powerOfTenTable[scale - scratchScaleVector[r]];
           result.vector[r] = SerializationUtils.readVslong(valueStream) * scaleFactor;
           setIsRepeatingIfNeeded(result, r);
         }
       } else if (!result.isRepeating || !result.isNull[0]) {
-        result.isRepeating = true;
         for (int r = 0; r < batchSize; ++r) {
           if (!result.isNull[r]) {
             final long scaleFactor = powerOfTenTable[scale - scratchScaleVector[r]];
@@ -1686,7 +1686,6 @@ private void nextVector(Decimal64ColumnVector result,
       // Read all the scales
       scaleReader.nextVector(result, scratchScaleVector, batchSize);
       if (result.noNulls) {
-        result.isRepeating = true;
         int previousIdx = 0;
         for (int r = 0; r != filterContext.getSelectedSize(); r++) {
           int idx = filterContext.getSelected()[r];
@@ -1702,7 +1701,6 @@ private void nextVector(Decimal64ColumnVector result,
         }
         skipStreamRows(batchSize - previousIdx);
       } else if (!result.isRepeating || !result.isNull[0]) {
-        result.isRepeating = true;
         int previousIdx = 0;
         for (int r = 0; r != filterContext.getSelectedSize(); r++) {
           int idx = filterContext.getSelected()[r];
@@ -3036,6 +3034,9 @@ public static TypeReader createTreeReader(TypeDescription readerType,
         }
         return new DecimalTreeReader(fileType.getId(), fileType.getPrecision(),
             fileType.getScale(), context);
+      case Geography:
+      case Geometry:
+        return new GeospatialTreeReader(fileType.getId(), context);
       case STRUCT:
         return new StructTreeReader(fileType.getId(), readerType, context);
       case LIST:
diff --git a/java/core/src/java/org/apache/orc/impl/TypeUtils.java b/java/core/src/java/org/apache/orc/impl/TypeUtils.java
index a5daa89572..40d22e2c43 100644
--- a/java/core/src/java/org/apache/orc/impl/TypeUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/TypeUtils.java
@@ -69,6 +69,8 @@ public static ColumnVector createColumn(TypeDescription schema,
       case BINARY:
       case CHAR:
       case VARCHAR:
+      case Geometry:
+      case Geography:
         return new BytesColumnVector(maxSize);
       case STRUCT: {
         List<TypeDescription> children = schema.getChildren();
diff --git a/java/core/src/java/org/apache/orc/impl/ZlibCodec.java b/java/core/src/java/org/apache/orc/impl/ZlibCodec.java
index 398ac0d16b..d4275a4c26 100644
--- a/java/core/src/java/org/apache/orc/impl/ZlibCodec.java
+++ b/java/core/src/java/org/apache/orc/impl/ZlibCodec.java
@@ -169,6 +169,17 @@ public void decompress(ByteBuffer in, ByteBuffer out) throws IOException {
                                        out.arrayOffset() + out.position(),
                                        out.remaining());
           out.position(count + out.position());
+
+          if (!inflater.finished() && !inflater.needsDictionary() && !inflater.needsInput() &&
+              count == 0) {
+            if (out.remaining() == 0) {
+              throw new IOException("Decompress output buffer too small. in = " + in +
+                  ", out = " + out);
+            } else {
+              throw new IOException("Decompress error. in = " + in +
+                  ", out = " + out);
+            }
+          }
         } catch (DataFormatException dfe) {
           throw new IOException("Bad compression data", dfe);
         }
diff --git a/java/core/src/java/org/apache/orc/impl/ZstdCodec.java b/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
index 6703a82c19..d352c860f4 100644
--- a/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
+++ b/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
@@ -152,7 +152,7 @@ public int hashCode() {
 
   @Override
   public Options getDefaultOptions() {
-    return DEFAULT_OPTIONS;
+    return DEFAULT_OPTIONS.copy();
   }
 
   /**
@@ -165,7 +165,7 @@ public Options getDefaultOptions() {
    * @param out      the compressed bytes
    * @param overflow put any additional bytes here
    * @param options  the options to control compression
-   * @return ZstdOptions
+   * @return true if input data is compressed. Otherwise, false.
    */
   @Override
   public boolean compress(ByteBuffer in, ByteBuffer out,
diff --git a/java/core/src/java/org/apache/orc/impl/mask/RedactMaskFactory.java b/java/core/src/java/org/apache/orc/impl/mask/RedactMaskFactory.java
index c6b65c3e8f..1debb93497 100644
--- a/java/core/src/java/org/apache/orc/impl/mask/RedactMaskFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/mask/RedactMaskFactory.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc.impl.mask;
 
-import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
@@ -135,7 +134,7 @@ public RedactMaskFactory(String... params) {
     OTHER_NUMBER_REPLACEMENT = getNextCodepoint(param, DEFAULT_NUMBER_OTHER);
     OTHER_REPLACEMENT = getNextCodepoint(param, DEFAULT_OTHER);
     String[] timeParams;
-    if (params.length < 2 || StringUtils.isBlank(params[1])) {
+    if (params.length < 2 || params[1].isBlank()) {
       timeParams = null;
     } else {
       timeParams = params[1].split("\\W+");
@@ -154,7 +153,7 @@ public RedactMaskFactory(String... params) {
         (SECOND_REPLACEMENT != UNMASKED_DATE);
 
     /* un-mask range */
-    if(!(params.length < 3 || StringUtils.isBlank(params[2]))) {
+    if(!(params.length < 3 || params[2].isBlank())) {
       String[] unmaskIndexes = params[2].split(",");
 
       for(int i=0; i < unmaskIndexes.length; i++ ) {
diff --git a/java/core/src/java/org/apache/orc/impl/reader/StripePlanner.java b/java/core/src/java/org/apache/orc/impl/reader/StripePlanner.java
index 23afe89180..d796bcb0ad 100644
--- a/java/core/src/java/org/apache/orc/impl/reader/StripePlanner.java
+++ b/java/core/src/java/org/apache/orc/impl/reader/StripePlanner.java
@@ -206,7 +206,7 @@ public String getWriterTimezone() {
   public InStream getStream(StreamName name) throws IOException {
     StreamInformation stream = streams.get(name);
     return stream == null ? null
-      : InStream.create(name, stream.firstChunk, stream.offset, stream.length,
+        : InStream.create(name, stream.firstChunk, stream.offset, stream.length,
             getStreamOptions(stream.column, stream.kind));
   }
 
diff --git a/java/core/src/java/org/apache/orc/impl/writer/GeospatialTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/GeospatialTreeWriter.java
new file mode 100644
index 0000000000..676ca32a9b
--- /dev/null
+++ b/java/core/src/java/org/apache/orc/impl/writer/GeospatialTreeWriter.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl.writer;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.orc.OrcProto;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.CryptoUtils;
+import org.apache.orc.impl.IntegerWriter;
+import org.apache.orc.impl.PositionRecorder;
+import org.apache.orc.impl.PositionedOutputStream;
+import org.apache.orc.impl.StreamName;
+
+import java.io.IOException;
+import java.util.function.Consumer;
+
+public class GeospatialTreeWriter extends TreeWriterBase {
+  private final PositionedOutputStream stream;
+  private final IntegerWriter length;
+  private boolean isDirectV2 = true;
+  private long rawDataSize = 0;
+  private boolean isGeometry = false;
+
+  public GeospatialTreeWriter(TypeDescription schema,
+                              WriterEncryptionVariant encryption,
+                              WriterContext context) throws IOException {
+    super(schema, encryption, context);
+    this.isGeometry = schema.getCategory() == TypeDescription.Category.Geometry;
+    this.stream = context.createStream(
+            new StreamName(id, OrcProto.Stream.Kind.DATA, encryption));
+    this.isDirectV2 = isNewWriteFormat(context);
+    this.length = createIntegerWriter(context.createStream(
+                    new StreamName(id, OrcProto.Stream.Kind.LENGTH, encryption)),
+            false, isDirectV2, context);
+    if (rowIndexPosition != null) {
+      recordPosition(rowIndexPosition);
+    }
+  }
+
+  @Override
+  OrcProto.ColumnEncoding.Builder getEncoding() {
+    OrcProto.ColumnEncoding.Builder result = super.getEncoding();
+    if (isDirectV2) {
+      result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2);
+    } else {
+      result.setKind(OrcProto.ColumnEncoding.Kind.DIRECT);
+    }
+    return result;
+  }
+
+  @Override
+  public void writeBatch(ColumnVector vector, int offset,
+                         int length) throws IOException {
+    super.writeBatch(vector, offset, length);
+    BytesColumnVector vec = (BytesColumnVector) vector;
+    if (vector.isRepeating) {
+      if (vector.noNulls || !vector.isNull[0]) {
+        for (int i = 0; i < length; ++i) {
+          stream.write(vec.vector[0], vec.start[0],
+                  vec.length[0]);
+          this.length.write(vec.length[0]);
+        }
+        rawDataSize += (long) length * vec.length[0];
+        if (isGeometry) {
+          indexStatistics.updateGeometry(vec.vector[0], vec.start[0], vec.length[0]);
+        }
+        if (createBloomFilter) {
+          if (bloomFilter != null) {
+            bloomFilter.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
+          }
+          bloomFilterUtf8.addBytes(vec.vector[0], vec.start[0], vec.length[0]);
+        }
+      }
+    } else {
+      for (int i = 0; i < length; ++i) {
+        if (vec.noNulls || !vec.isNull[i + offset]) {
+          stream.write(vec.vector[offset + i],
+                  vec.start[offset + i], vec.length[offset + i]);
+          this.length.write(vec.length[offset + i]);
+          rawDataSize += vec.length[offset + i];
+          if (isGeometry) {
+            indexStatistics.updateGeometry(vec.vector[offset + i],
+                    vec.start[offset + i], vec.length[offset + i]);
+          }
+          if (createBloomFilter) {
+            if (bloomFilter != null) {
+              bloomFilter.addBytes(vec.vector[offset + i],
+                      vec.start[offset + i], vec.length[offset + i]);
+            }
+            bloomFilterUtf8.addBytes(vec.vector[offset + i],
+                    vec.start[offset + i], vec.length[offset + i]);
+          }
+        }
+      }
+    }
+  }
+
+  @Override
+  public void writeStripe(int requiredIndexEntries) throws IOException {
+    super.writeStripe(requiredIndexEntries);
+    if (rowIndexPosition != null) {
+      recordPosition(rowIndexPosition);
+    }
+  }
+
+  @Override
+  void recordPosition(PositionRecorder recorder) throws IOException {
+    super.recordPosition(recorder);
+    stream.getPosition(recorder);
+    length.getPosition(recorder);
+  }
+
+  @Override
+  public long estimateMemory() {
+    return super.estimateMemory() + stream.getBufferSize() +
+            length.estimateMemory();
+  }
+
+  @Override
+  public long getRawDataSize() {
+    return rawDataSize;
+  }
+
+  @Override
+  public void flushStreams() throws IOException {
+    super.flushStreams();
+    stream.flush();
+    length.flush();
+  }
+
+  @Override
+  public void prepareStripe(int stripeId) {
+    super.prepareStripe(stripeId);
+    Consumer<byte[]> updater = CryptoUtils.modifyIvForStripe(stripeId);
+    stream.changeIv(updater);
+    length.changeIv(updater);
+  }
+}
diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java
index 71eb3a5648..de63f9efb6 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriter.java
@@ -185,6 +185,9 @@ static TreeWriter createSubtree(TypeDescription schema,
           return new ListTreeWriter(schema, encryption, streamFactory);
         case UNION:
           return new UnionTreeWriter(schema, encryption, streamFactory);
+        case Geometry:
+        case Geography:
+          return new GeospatialTreeWriter(schema, encryption, streamFactory);
         default:
           throw new IllegalArgumentException("Bad category: " +
                                                schema.getCategory());
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index 2ef96e5f50..dea3359d92 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -20,18 +20,22 @@
 
 import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.commons.lang3.StringEscapeUtils;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.orc.geospatial.BoundingBox;
+import org.apache.orc.geospatial.GeospatialTypes;
 import org.apache.orc.impl.ColumnStatisticsImpl;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.TestInfo;
+import org.locationtech.jts.geom.*;
+import org.locationtech.jts.io.WKBWriter;
 
 import java.io.File;
 import java.math.BigDecimal;
@@ -44,12 +48,13 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
  * Test ColumnStatisticsImpl for ORC.
  */
-public class TestColumnStatistics {
+public class TestColumnStatistics implements TestConf {
 
   @Test
   public void testLongSumOverflow() {
@@ -699,20 +704,253 @@ public void testDecimalMinMaxStatistics() throws Exception {
         "Incorrect minimum value");
   }
 
+  @Test
+  public void testBinaryMerge() {
+    TypeDescription schema = TypeDescription.createBinary();
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.increment(3);
+    stats1.updateBinary(new BytesWritable("bob".getBytes(StandardCharsets.UTF_8)));
+    stats1.updateBinary(new BytesWritable("david".getBytes(StandardCharsets.UTF_8)));
+    stats1.updateBinary(new BytesWritable("charles".getBytes(StandardCharsets.UTF_8)));
+    stats2.increment(2);
+    stats2.updateBinary(new BytesWritable("anne".getBytes(StandardCharsets.UTF_8)));
+    stats2.updateBinary(new BytesWritable("abcdef".getBytes(StandardCharsets.UTF_8)));
+
+    assertEquals(15, ((BinaryColumnStatistics) stats1).getSum());
+    assertEquals(10, ((BinaryColumnStatistics) stats2).getSum());
+
+    stats1.merge(stats2);
+
+    assertEquals(25, ((BinaryColumnStatistics) stats1).getSum());
+  }
+
+  @Test
+  public void testMergeIncompatible() {
+    TypeDescription stringSchema = TypeDescription.createString();
+    ColumnStatisticsImpl stringStats = ColumnStatisticsImpl.create(stringSchema);
+
+    TypeDescription doubleSchema = TypeDescription.createDouble();
+    ColumnStatisticsImpl doubleStats = ColumnStatisticsImpl.create(doubleSchema);
+
+    stringStats.increment(3);
+    stringStats.updateString(new Text("bob"));
+    stringStats.updateString(new Text("david"));
+    stringStats.updateString(new Text("charles"));
+
+    assertThrows(IllegalArgumentException.class, () -> {
+      doubleStats.merge(stringStats);
+    });
+
+    assertEquals(0, ((DoubleColumnStatistics) doubleStats).getNumberOfValues());
+  }
+
+  @Test
+  public void testUpdateGeometry() {
+    TypeDescription desc = TypeDescription.createGeometry();
+    ColumnStatisticsImpl stats = ColumnStatisticsImpl.create(desc);
+    GeometryFactory geometryFactory = new GeometryFactory();
+    WKBWriter wkbWriter = new WKBWriter();
+
+    byte[][] points = {
+            wkbWriter.write(geometryFactory.createPoint(new Coordinate(1.0, 1.0))),
+            wkbWriter.write(geometryFactory.createPoint(new Coordinate(2.0, 2.0))),
+    };
+
+    for (byte[] point : points) {
+      stats.updateGeometry(new BytesWritable(point));
+    }
+
+    GeospatialColumnStatistics geometryStatistics = (GeospatialColumnStatistics) stats;
+    BoundingBox bbox = geometryStatistics.getBoundingBox();
+    assertEquals(1.0, bbox.getXMin(), 0.0);
+    assertEquals(2.0, bbox.getXMax(), 0.0);
+    assertEquals(1.0, bbox.getYMin(), 0.0);
+    assertEquals(2.0, bbox.getYMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getZMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getZMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getMMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getMMax(), 0.0);
+    assertEquals("BoundingBox{xMin=1.0, xMax=2.0, yMin=1.0, yMax=2.0, zMin=Infinity, zMax=-Infinity, mMin=Infinity, mMax=-Infinity}",
+            bbox.toString());
+    assertEquals("count: 0 hasNull: false bbox: BoundingBox{xMin=1.0, xMax=2.0, yMin=1.0, yMax=2.0, zMin=Infinity, zMax=-Infinity, mMin=Infinity, mMax=-Infinity} types: GeospatialTypes{types=[Point (XY)]}",
+            geometryStatistics.toString());
+
+    GeospatialTypes geospatialTypes = geometryStatistics.getGeospatialTypes();
+    assertTrue(geospatialTypes.getTypes().contains(1));
+    assertEquals(1, geospatialTypes.getTypes().size());
+  }
+
+  @Test
+  public void testUpdateGeometryWithDifferentTypes() {
+    TypeDescription desc = TypeDescription.createGeometry();
+    ColumnStatisticsImpl stats = ColumnStatisticsImpl.create(desc);
+    GeometryFactory geometryFactory = new GeometryFactory();
+    WKBWriter wkbWriter = new WKBWriter();
+
+    Point point = geometryFactory.createPoint(new Coordinate(1, 1));
+    Coordinate[] lineCoords = new Coordinate[]{new Coordinate(1, 1), new Coordinate(2, 2)};
+    LineString line = geometryFactory.createLineString(lineCoords);
+    Coordinate[] polygonCoords = new Coordinate[]{
+      new Coordinate(0, 0), new Coordinate(3, 0),
+      new Coordinate(1, 3), new Coordinate(0, 1),
+      new Coordinate(0, 0)
+    };
+    LinearRing shell = geometryFactory.createLinearRing(polygonCoords);
+    Polygon polygon = geometryFactory.createPolygon(shell);
+
+    GeospatialColumnStatistics geometryStatistics = (GeospatialColumnStatistics) stats;
+    BoundingBox bbox = geometryStatistics.getBoundingBox();
+    GeospatialTypes geospatialTypes = geometryStatistics.getGeospatialTypes();
+    // Generate WKB and update stats
+    byte[] pointWkb = wkbWriter.write(point);
+    stats.updateGeometry(new BytesWritable(pointWkb));
+
+    assertEquals(1.0, bbox.getXMin(), 0.0);
+    assertEquals(1.0, bbox.getXMax(), 0.0);
+    assertEquals(1.0, bbox.getYMin(), 0.0);
+    assertEquals(1.0, bbox.getYMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getZMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getZMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getMMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getMMax(), 0.0);
+    assertEquals("BoundingBox{xMin=1.0, xMax=1.0, yMin=1.0, yMax=1.0, zMin=Infinity, zMax=-Infinity, mMin=Infinity, mMax=-Infinity}",
+            bbox.toString());
+
+    assertTrue(geospatialTypes.getTypes().contains(1));
+    assertEquals(1, geospatialTypes.getTypes().size());
+    assertEquals("GeospatialTypes{types=[Point (XY)]}", geospatialTypes.toString());
+
+
+    assertEquals("count: 0 hasNull: false bbox: BoundingBox{xMin=1.0, xMax=1.0, yMin=1.0, yMax=1.0, zMin=Infinity, zMax=-Infinity, mMin=Infinity, mMax=-Infinity} types: GeospatialTypes{types=[Point (XY)]}",
+            geometryStatistics.toString());
+
+    byte[] lineWkb = wkbWriter.write(line);
+    stats.updateGeometry(new BytesWritable(lineWkb));
+    assertEquals(1.0, bbox.getXMin(), 0.0);
+    assertEquals(2.0, bbox.getXMax(), 0.0);
+    assertEquals(1.0, bbox.getYMin(), 0.0);
+    assertEquals(2.0, bbox.getYMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getZMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getZMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getMMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getMMax(), 0.0);
+
+    assertTrue(geospatialTypes.getTypes().contains(1));
+    assertTrue(geospatialTypes.getTypes().contains(2));
+    assertEquals(2, geospatialTypes.getTypes().size());
+    assertEquals("GeospatialTypes{types=[Point (XY), LineString (XY)]}",
+            geospatialTypes.toString());
+
+    byte[] polygonWkb = wkbWriter.write(polygon);
+    stats.updateGeometry(new BytesWritable(polygonWkb));
+    stats.updateGeometry(new BytesWritable(lineWkb));
+    assertEquals(0.0, bbox.getXMin(), 0.0);
+    assertEquals(3.0, bbox.getXMax(), 0.0);
+    assertEquals(0.0, bbox.getYMin(), 0.0);
+    assertEquals(3.0, bbox.getYMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getZMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getZMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getMMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getMMax(), 0.0);
+
+    assertTrue(geospatialTypes.getTypes().contains(1));
+    assertTrue(geospatialTypes.getTypes().contains(2));
+    assertTrue(geospatialTypes.getTypes().contains(2));
+    assertEquals(3, geospatialTypes.getTypes().size());
+    assertEquals("GeospatialTypes{types=[Point (XY), LineString (XY), Polygon (XY)]}",
+            geospatialTypes.toString());
+
+  }
+
+  @Test
+  public void testUpdateGeometryWithZCoordinates() {
+    TypeDescription desc = TypeDescription.createGeometry();
+    ColumnStatisticsImpl stats = ColumnStatisticsImpl.create(desc);
+    GeometryFactory geometryFactory = new GeometryFactory();
+    WKBWriter wkbWriter = new WKBWriter(3);
+
+    Point point1 = geometryFactory.createPoint(new Coordinate(0, 1, 2));
+    Point point2 = geometryFactory.createPoint(new Coordinate(2, 1, 0));
+
+    stats.updateGeometry(new BytesWritable(wkbWriter.write(point1)));
+    stats.updateGeometry(new BytesWritable(wkbWriter.write(point2)));
+
+    GeospatialColumnStatistics geometryStatistics = (GeospatialColumnStatistics) stats;
+    BoundingBox bbox = geometryStatistics.getBoundingBox();
+    assertEquals(0.0, bbox.getXMin(), 0.0);
+    assertEquals(2.0, bbox.getXMax(), 0.0);
+    assertEquals(1.0, bbox.getYMin(), 0.0);
+    assertEquals(1.0, bbox.getYMax(), 0.0);
+    assertEquals(0.0, bbox.getZMin(), 0.0);
+    assertEquals(2.0, bbox.getZMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getMMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getMMax(), 0.0);
+    assertEquals("BoundingBox{xMin=0.0, xMax=2.0, yMin=1.0, yMax=1.0, zMin=0.0, zMax=2.0, mMin=Infinity, mMax=-Infinity}",
+            bbox.toString());
+    assertEquals("count: 0 hasNull: false bbox: BoundingBox{xMin=0.0, xMax=2.0, yMin=1.0, yMax=1.0, zMin=0.0, zMax=2.0, mMin=Infinity, mMax=-Infinity} types: GeospatialTypes{types=[Point (XYZ)]}",
+            geometryStatistics.toString());
+
+    GeospatialTypes geospatialTypes = geometryStatistics.getGeospatialTypes();
+    assertTrue(geospatialTypes.getTypes().contains(1001));
+    assertEquals(1, geospatialTypes.getTypes().size());
+  }
+
+  @Test
+  public void TestGeospatialMerge() {
+    TypeDescription desc = TypeDescription.createGeometry();
+    ColumnStatisticsImpl stats0 = ColumnStatisticsImpl.create(desc);
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(desc);
+    GeometryFactory geometryFactory = new GeometryFactory();
+    WKBWriter wkbWriter = new WKBWriter();
+
+    byte[][] points = {
+            wkbWriter.write(geometryFactory.createPoint(new Coordinate(1.0, 1.0))),
+            wkbWriter.write(geometryFactory.createPoint(new Coordinate(2.0, 2.0))),
+    };
+
+    stats0.updateGeometry(new BytesWritable(points[0]));
+    stats1.updateGeometry(new BytesWritable(points[1]));
+
+    GeospatialColumnStatistics geometryStatistics = (GeospatialColumnStatistics) stats0;
+    stats0.merge(stats1);
+
+    BoundingBox bbox = geometryStatistics.getBoundingBox();
+    assertTrue(bbox.isXYValid());
+    assertFalse(bbox.isXYEmpty());
+    assertTrue(bbox.isZValid());
+    assertTrue(bbox.isMValid());
+    assertTrue(bbox.isZEmpty());
+    assertTrue(bbox.isMEmpty());
+    assertEquals(1.0, bbox.getXMin(), 0.0);
+    assertEquals(2.0, bbox.getXMax(), 0.0);
+    assertEquals(1.0, bbox.getYMin(), 0.0);
+    assertEquals(2.0, bbox.getYMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getZMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getZMax(), 0.0);
+    assertEquals(Double.POSITIVE_INFINITY, bbox.getMMin(), 0.0);
+    assertEquals(Double.NEGATIVE_INFINITY, bbox.getMMax(), 0.0);
+    assertEquals("BoundingBox{xMin=1.0, xMax=2.0, yMin=1.0, yMax=2.0, zMin=Infinity, zMax=-Infinity, mMin=Infinity, mMax=-Infinity}",
+            bbox.toString());
+    assertEquals("count: 0 hasNull: false bbox: BoundingBox{xMin=1.0, xMax=2.0, yMin=1.0, yMax=2.0, zMin=Infinity, zMax=-Infinity, mMin=Infinity, mMax=-Infinity} types: GeospatialTypes{types=[Point (XY)]}",
+            geometryStatistics.toString());
+
+    GeospatialTypes geospatialTypes = geometryStatistics.getGeospatialTypes();
+    assertTrue(geospatialTypes.getTypes().contains(1));
+    assertEquals(1, geospatialTypes.getTypes().size());
+  }
 
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
 
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path(
+    testFilePath = new Path(workDir + File.separator +
         "TestOrcFile." + testInfo.getTestMethod().get().getName() + ".orc");
     fs.delete(testFilePath, false);
   }
diff --git a/java/core/src/test/org/apache/orc/TestConf.java b/java/core/src/test/org/apache/orc/TestConf.java
new file mode 100644
index 0000000000..aedbeb8d47
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/TestConf.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.jupiter.api.BeforeEach;
+
+/**
+ * A shared configuration for ORC tests.
+ */
+public interface TestConf {
+
+  Configuration conf = getNewConf();
+
+  @BeforeEach
+  default void clear() {
+    conf.clear();
+    conf.setIfUnset("fs.defaultFS", "file:///");
+    conf.setIfUnset("fs.file.impl.disable.cache", "true");
+  }
+
+  private static Configuration getNewConf() {
+    Configuration conf = new Configuration();
+    conf.setIfUnset("fs.defaultFS", "file:///");
+    conf.setIfUnset("fs.file.impl.disable.cache", "true");
+    return conf;
+  }
+}
diff --git a/java/core/src/test/org/apache/orc/TestMinSeekSize.java b/java/core/src/test/org/apache/orc/TestMinSeekSize.java
index 8e69bf678c..0040501fc4 100644
--- a/java/core/src/test/org/apache/orc/TestMinSeekSize.java
+++ b/java/core/src/test/org/apache/orc/TestMinSeekSize.java
@@ -18,7 +18,6 @@
 
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -40,13 +39,12 @@
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestMinSeekSize {
+public class TestMinSeekSize implements TestConf {
   private static final Logger LOG = LoggerFactory.getLogger(TestMinSeekSize.class);
   private static final Path workDir = new Path(System.getProperty("test.tmp.dir",
                                                                   "target" + File.separator + "test"
                                                                   + File.separator + "tmp"));
   private static final Path filePath = new Path(workDir, "min_seek_size_file.orc");
-  private static Configuration conf;
   private static FileSystem fs;
 
   private static final TypeDescription schema = TypeDescription.createStruct()
@@ -62,7 +60,6 @@ public class TestMinSeekSize {
 
   @BeforeAll
   public static void setup() throws IOException {
-    conf = new Configuration();
     fs = FileSystem.get(conf);
 
     LOG.info("Creating file {} with schema {}", filePath, schema);
diff --git a/java/core/src/test/org/apache/orc/TestNewIntegerEncoding.java b/java/core/src/test/org/apache/orc/TestNewIntegerEncoding.java
index 7e1b1aa898..75508c3ad2 100644
--- a/java/core/src/test/org/apache/orc/TestNewIntegerEncoding.java
+++ b/java/core/src/test/org/apache/orc/TestNewIntegerEncoding.java
@@ -19,7 +19,6 @@
 
 import com.google.common.collect.Lists;
 import com.google.common.primitives.Longs;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -42,7 +41,7 @@
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestNewIntegerEncoding {
+public class TestNewIntegerEncoding implements TestConf {
 
   private static Stream<Arguments> data() {
     return Stream.of(
@@ -72,13 +71,11 @@ public static void appendLong(VectorizedRowBatch batch,
   Path workDir = new Path(System.getProperty("test.tmp.dir", "target"
       + File.separator + "test" + File.separator + "tmp"));
 
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcFile."
         + testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestOrcDSTNoTimezone.java b/java/core/src/test/org/apache/orc/TestOrcDSTNoTimezone.java
index eb9095d609..49529add61 100644
--- a/java/core/src/test/org/apache/orc/TestOrcDSTNoTimezone.java
+++ b/java/core/src/test/org/apache/orc/TestOrcDSTNoTimezone.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -40,15 +39,13 @@
  * and it was written from a time zone that observes DST for one of the timestamp
  * values stored ('2014-06-06 12:34:56.0').
  */
-public class TestOrcDSTNoTimezone {
-  Configuration conf;
+public class TestOrcDSTNoTimezone implements TestConf {
   FileSystem fs;
   SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
   static TimeZone defaultTimeZone = TimeZone.getDefault();
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
   }
 
diff --git a/java/core/src/test/org/apache/orc/TestOrcFilterContext.java b/java/core/src/test/org/apache/orc/TestOrcFilterContext.java
index 265956890d..a8916a2568 100644
--- a/java/core/src/test/org/apache/orc/TestOrcFilterContext.java
+++ b/java/core/src/test/org/apache/orc/TestOrcFilterContext.java
@@ -29,7 +29,6 @@
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.orc.impl.OrcFilterContextImpl;
@@ -47,7 +46,7 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestOrcFilterContext {
+public class TestOrcFilterContext implements TestConf {
   private final TypeDescription schema = TypeDescription.createStruct()
     .addField("f1", TypeDescription.createLong())
     .addField("f2", TypeDescription.createString())
@@ -74,7 +73,6 @@ public class TestOrcFilterContext {
                                            TypeDescription.createList(TypeDescription.createChar()))
                 )
     );
-  private static Configuration configuration;
   private static FileSystem fileSystem;
   private static final Path workDir = new Path(System.getProperty("test.tmp.dir",
           "target" + File.separator + "test"
@@ -270,11 +268,10 @@ public void testRowFilterWithACIDTable() throws IOException {
   }
 
   private void createAcidORCFile() throws IOException {
-    configuration = new Configuration();
-    fileSystem = FileSystem.get(configuration);
+    fileSystem = FileSystem.get(conf);
 
     try (Writer writer = OrcFile.createWriter(filePath,
-            OrcFile.writerOptions(configuration)
+            OrcFile.writerOptions(conf)
                     .fileSystem(fileSystem)
                     .overwrite(true)
                     .rowIndexStride(8192)
@@ -325,7 +322,7 @@ private void populateColumnValues(TypeDescription typeDescription, ColumnVector[
   }
 
   private void readSingleRowWithFilter(int id) throws IOException {
-    Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(configuration).filesystem(fileSystem));
+    Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf).filesystem(fileSystem));
     SearchArgument searchArgument = SearchArgumentFactory.newBuilder()
             .in("int1", PredicateLeaf.Type.LONG, new Long(id))
             .build();
diff --git a/java/core/src/test/org/apache/orc/TestOrcGeospatial.java b/java/core/src/test/org/apache/orc/TestOrcGeospatial.java
new file mode 100644
index 0000000000..f0e148fb0d
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/TestOrcGeospatial.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.impl.ColumnStatisticsImpl;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.GeometryFactory;
+import org.locationtech.jts.io.WKBReader;
+import org.locationtech.jts.io.WKBWriter;
+
+import java.io.File;
+import java.util.Arrays;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class TestOrcGeospatial implements TestConf {
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+          "target" + File.separator + "test" + File.separator + "tmp"));
+  FileSystem fs;
+  Path testFilePath;
+
+  @BeforeEach
+  public void openFileSystem(TestInfo testInfo) throws Exception {
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcGeospatial." +
+            testInfo.getTestMethod().get().getName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testGeometryWriterWithNulls() throws Exception {
+    // Create a geometry schema and ORC file writer
+    TypeDescription schema = TypeDescription.createGeometry();
+    Writer writer = OrcFile.createWriter(testFilePath,
+            OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+                    .bufferSize(10000));
+    GeometryFactory geometryFactory = new GeometryFactory();
+    WKBWriter wkbWriter = new WKBWriter();
+    WKBReader wkbReader = new WKBReader();
+
+    // Add data
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector geos = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 100; i++) {
+      if (i % 2 == 0) {
+        byte[] bytes = wkbWriter.write(geometryFactory.createPoint(new Coordinate(i, i)));
+        geos.setVal(batch.size++, bytes);
+      } else {
+        geos.noNulls = false;
+        geos.isNull[batch.size++] = true;
+      }
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    // Verify reader schema
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals("geometry(OGC:CRS84)", reader.getSchema().toString());
+    assertEquals(100, reader.getNumberOfRows());
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    geos = (BytesColumnVector) batch.cols[0];
+
+    // Verify statistics
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(1, stats.length);
+    assertEquals(50, stats[0].getNumberOfValues());
+    assertTrue(stats[0].hasNull());
+    assertInstanceOf(GeospatialColumnStatistics.class, stats[0]);
+    assertTrue(((GeospatialColumnStatistics) stats[0]).getBoundingBox().isXYValid());
+    assertFalse(((GeospatialColumnStatistics) stats[0]).getBoundingBox().isZValid());
+    assertFalse(((GeospatialColumnStatistics) stats[0]).getBoundingBox().isMValid());
+    assertEquals("BoundingBox{xMin=0.0, xMax=98.0, yMin=0.0, yMax=98.0, zMin=NaN, zMax=NaN, mMin=NaN, mMax=NaN}", ((GeospatialColumnStatistics) stats[0]).getBoundingBox().toString());
+    assertEquals("GeospatialTypes{types=[Point (XY)]}", ((GeospatialColumnStatistics) stats[0]).getGeospatialTypes().toString());
+
+    // Verify data
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for (int r = 0; r < batch.size; ++r) {
+        if (idx % 2 == 0) {
+          Geometry geom = wkbReader.read(Arrays.copyOfRange(geos.vector[r], geos.start[r], geos.start[r] + geos.length[r]));
+          assertEquals("Point", geom.getGeometryType());
+          assertEquals(geom, geometryFactory.createPoint(new Coordinate(idx, idx)));
+        } else {
+          assertTrue(geos.isNull[r]);
+        }
+        idx += 1;
+      }
+    }
+    rows.close();
+  }
+
+  @Test
+  public void testGeographyWriterWithNulls() throws Exception {
+    // Create geography schema and ORC file writer
+    TypeDescription schema = TypeDescription.createGeography();
+    Writer writer = OrcFile.createWriter(testFilePath,
+            OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+                    .bufferSize(10000));
+    GeometryFactory geometryFactory = new GeometryFactory();
+    WKBWriter wkbWriter = new WKBWriter();
+    WKBReader wkbReader = new WKBReader();
+
+    // Add data
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector geos = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 100; i++) {
+      if (i % 2 == 0) {
+        byte[] bytes = wkbWriter.write(geometryFactory.createPoint(new Coordinate(i, i)));
+        geos.setVal(batch.size++, bytes);
+      } else {
+        geos.noNulls = false;
+        geos.isNull[batch.size++] = true;
+      }
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    // Verify reader schema
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals("geography(OGC:CRS84,SPHERICAL)", reader.getSchema().toString());
+    assertEquals(100, reader.getNumberOfRows());
+
+    // Verify statistics, make sure there are no bounding box and geospatial types
+    ColumnStatistics[] stats = reader.getStatistics();
+    assertEquals(1, stats.length);
+    assertEquals(50, stats[0].getNumberOfValues());
+    assertTrue(stats[0].hasNull());
+    assertInstanceOf(GeospatialColumnStatistics.class, stats[0]);
+    assertNull(((GeospatialColumnStatistics) stats[0]).getBoundingBox());
+    assertNull(((GeospatialColumnStatistics) stats[0]).getGeospatialTypes());
+
+    // Verify Data
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    geos = (BytesColumnVector) batch.cols[0];
+    int idx = 0;
+    while (rows.nextBatch(batch)) {
+      for (int r = 0; r < batch.size; ++r) {
+        if (idx % 2 == 0) {
+          Geometry geom = wkbReader.read(Arrays.copyOfRange(geos.vector[r], geos.start[r], geos.start[r] + geos.length[r]));
+          assertEquals("Point", geom.getGeometryType());
+          assertEquals(geom, geometryFactory.createPoint(new Coordinate(idx, idx)));
+        } else {
+          assertTrue(geos.isNull[r]);
+        }
+        idx += 1;
+      }
+    }
+    rows.close();
+  }
+}
diff --git a/java/core/src/test/org/apache/orc/TestOrcNoTimezone.java b/java/core/src/test/org/apache/orc/TestOrcNoTimezone.java
index 1b72e33e10..5a53738623 100644
--- a/java/core/src/test/org/apache/orc/TestOrcNoTimezone.java
+++ b/java/core/src/test/org/apache/orc/TestOrcNoTimezone.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -39,15 +38,13 @@
  * Test over an orc file that does not store time zone information in the footer
  * and it was written from a time zone that does not observe DST.
  */
-public class TestOrcNoTimezone {
-  Configuration conf;
+public class TestOrcNoTimezone implements TestConf {
   FileSystem fs;
   SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S");
   static TimeZone defaultTimeZone = TimeZone.getDefault();
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
   }
 
diff --git a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
index b509b8a254..79473063ca 100644
--- a/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
+++ b/java/core/src/test/org/apache/orc/TestOrcNullOptimization.java
@@ -18,7 +18,6 @@
 package org.apache.orc;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -43,7 +42,7 @@
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestOrcNullOptimization {
+public class TestOrcNullOptimization implements TestConf {
 
   TypeDescription createMyStruct() {
     return TypeDescription.createStruct()
@@ -103,13 +102,11 @@ void addRow(Writer writer, VectorizedRowBatch batch,
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
 
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcNullOptimization." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java b/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java
index 0803d890bb..142c7423a4 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimestampPPD.java
@@ -18,7 +18,6 @@
 package org.apache.orc;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -40,10 +39,9 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-public class TestOrcTimestampPPD {
+public class TestOrcTimestampPPD implements TestConf {
   Path workDir =
       new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   static TimeZone defaultTimeZone = TimeZone.getDefault();
@@ -53,7 +51,6 @@ public TestOrcTimestampPPD() {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir,
         "TestOrcTimestampPPD." + testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimezone1.java b/java/core/src/test/org/apache/orc/TestOrcTimezone1.java
index fe871b9c45..e9ccb38314 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimezone1.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimezone1.java
@@ -18,7 +18,6 @@
 package org.apache.orc;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -43,10 +42,9 @@
 /**
  *
  */
-public class TestOrcTimezone1 {
+public class TestOrcTimezone1 implements TestConf {
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   static TimeZone defaultTimeZone = TimeZone.getDefault();
@@ -79,7 +77,6 @@ private static Stream<Arguments> data() {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcFile." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimezone2.java b/java/core/src/test/org/apache/orc/TestOrcTimezone2.java
index 69b6d676b0..488cc2d26c 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimezone2.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimezone2.java
@@ -18,7 +18,6 @@
 package org.apache.orc;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -42,10 +41,9 @@
 /**
  *
  */
-public class TestOrcTimezone2 {
+public class TestOrcTimezone2 implements TestConf {
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   static TimeZone defaultTimeZone = TimeZone.getDefault();
@@ -66,7 +64,6 @@ private static Stream<Arguments> data() {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcFile." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimezone3.java b/java/core/src/test/org/apache/orc/TestOrcTimezone3.java
index 112d5dedd6..f8a16b16b1 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimezone3.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimezone3.java
@@ -18,7 +18,6 @@
 package org.apache.orc;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -41,10 +40,9 @@
 /**
  *
  */
-public class TestOrcTimezone3 {
+public class TestOrcTimezone3 implements TestConf {
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   static TimeZone defaultTimeZone = TimeZone.getDefault();
@@ -55,7 +53,6 @@ private static Stream<Arguments> data() {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcTimezone3." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimezone4.java b/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
index 8c06e473ce..cb03e18210 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimezone4.java
@@ -18,7 +18,6 @@
 package org.apache.orc;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -35,14 +34,14 @@
 import java.util.TimeZone;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
  *
  */
-public class TestOrcTimezone4 {
+public class TestOrcTimezone4 implements TestConf {
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
@@ -53,7 +52,6 @@ public TestOrcTimezone4() {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcTimezone4." +
         testInfo.getTestMethod().get().getName() + ".orc");
@@ -95,6 +93,7 @@ public void testTimestampWriter() throws Exception {
     times = (TimestampColumnVector) batch.cols[0];
     int idx = 0;
     while (rows.nextBatch(batch)) {
+      assertTrue(times.isUTC());
       for(int r=0; r < batch.size; ++r) {
         Timestamp timestamp = times.asScratchTimestamp(r);
         assertEquals(ts.get(idx++), formatter.format(timestamp));
diff --git a/java/core/src/test/org/apache/orc/TestOrcTimezonePPD.java b/java/core/src/test/org/apache/orc/TestOrcTimezonePPD.java
index f21ef810c0..ea0af05af8 100644
--- a/java/core/src/test/org/apache/orc/TestOrcTimezonePPD.java
+++ b/java/core/src/test/org/apache/orc/TestOrcTimezonePPD.java
@@ -16,7 +16,6 @@
 package org.apache.orc;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -54,12 +53,11 @@
 /**
  *
  */
-public class TestOrcTimezonePPD {
+public class TestOrcTimezonePPD implements TestConf {
   private static final Logger LOG = LoggerFactory.getLogger(TestOrcTimezonePPD.class);
 
   Path workDir = new Path(System.getProperty("test.tmp.dir",
     "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   static TimeZone defaultTimeZone = TimeZone.getDefault();
@@ -90,7 +88,6 @@ private static Stream<Arguments> data() {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcFile." +
       testInfo.getTestMethod().get().getName() + ".orc");
@@ -387,7 +384,7 @@ public void testTimestampAllNulls(String writerTimeZone, String readerTimeZone)
     PredicateLeaf pred = createPredicateLeaf(
       PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.TIMESTAMP, "x",
       Timestamp.valueOf("2007-08-01 00:00:00.0"), null);
-    assertEquals(SearchArgument.TruthValue.NULL, RecordReaderImpl.evaluatePredicate(colStats[1], pred, bf));
+    assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl.evaluatePredicate(colStats[1], pred, bf));
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.TIMESTAMP, "x", null, null);
     assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl.evaluatePredicate(colStats[1], pred, bf));
diff --git a/java/core/src/test/org/apache/orc/TestOrcWithLargeStripeStatistics.java b/java/core/src/test/org/apache/orc/TestOrcWithLargeStripeStatistics.java
index 30b2604bf9..9f86f017e8 100644
--- a/java/core/src/test/org/apache/orc/TestOrcWithLargeStripeStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestOrcWithLargeStripeStatistics.java
@@ -16,7 +16,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -41,7 +40,7 @@
  * </p>
  */
 @Disabled("ORC-1361")
-public class TestOrcWithLargeStripeStatistics {
+public class TestOrcWithLargeStripeStatistics implements TestConf {
 
   @ParameterizedTest
   @EnumSource(value = OrcFile.Version.class, mode = EnumSource.Mode.EXCLUDE, names = "FUTURE")
@@ -49,7 +48,7 @@ public void testGetStripeStatisticsNoProtocolBufferExceptions(OrcFile.Version ve
       throws Exception {
     // Use a size that exceeds the protobuf limit (e.g., 1GB) to trigger protobuf exception
     Path p = createOrcFile(1024L << 20, version);
-    try (Reader reader = OrcFile.createReader(p, OrcFile.readerOptions(new Configuration()))) {
+    try (Reader reader = OrcFile.createReader(p, OrcFile.readerOptions(conf))) {
       assertTrue(reader.getStripeStatistics().isEmpty());
     }
   }
@@ -75,7 +74,6 @@ private static Path createOrcFile(long metadataSize, OrcFile.Version version) th
         TestOrcWithLargeStripeStatistics.class.getSimpleName()
             + "_" + ROW_STRIPE_NUM + "_" + version + ".orc");
     // Modify defaults to force one row per stripe.
-    Configuration conf = new Configuration();
     conf.set(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute(), "0");
     TypeDescription schema = createTypeDescription();
     OrcFile.WriterOptions writerOptions =
diff --git a/java/core/src/test/org/apache/orc/TestProlepticConversions.java b/java/core/src/test/org/apache/orc/TestProlepticConversions.java
index ff983b3c86..ae8201c60f 100644
--- a/java/core/src/test/org/apache/orc/TestProlepticConversions.java
+++ b/java/core/src/test/org/apache/orc/TestProlepticConversions.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -56,7 +55,7 @@
  * This class tests all of the combinations of reading and writing the hybrid
  * and proleptic calendars.
  */
-public class TestProlepticConversions {
+public class TestProlepticConversions implements TestConf {
 
   private static Stream<Arguments> data() {
     return Stream.of(
@@ -69,12 +68,10 @@ private static Stream<Arguments> data() {
   private Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
 
-  private final Configuration conf;
   private final TimeZone UTC = TimeZone.getTimeZone("UTC");
   private final GregorianCalendar PROLEPTIC = new GregorianCalendar();
   private final GregorianCalendar HYBRID = new GregorianCalendar();
   {
-    conf = new Configuration();
     PROLEPTIC.setTimeZone(UTC);
     PROLEPTIC.setGregorianChange(new Date(Long.MIN_VALUE));
     HYBRID.setTimeZone(UTC);
diff --git a/java/core/src/test/org/apache/orc/TestReader.java b/java/core/src/test/org/apache/orc/TestReader.java
index d4b648f5ed..f3c11d54a0 100644
--- a/java/core/src/test/org/apache/orc/TestReader.java
+++ b/java/core/src/test/org/apache/orc/TestReader.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -30,16 +29,14 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
-public class TestReader {
+public class TestReader implements TestConf {
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, TestReader.class.getSimpleName() + "." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestRowFilteringComplexTypes.java b/java/core/src/test/org/apache/orc/TestRowFilteringComplexTypes.java
index bebe3817ce..0f6b76e622 100644
--- a/java/core/src/test/org/apache/orc/TestRowFilteringComplexTypes.java
+++ b/java/core/src/test/org/apache/orc/TestRowFilteringComplexTypes.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
@@ -39,11 +38,10 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestRowFilteringComplexTypes {
+public class TestRowFilteringComplexTypes implements TestConf {
     private Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
             + File.separator + "tmp"));
 
-    private Configuration conf;
     private FileSystem fs;
     private Path testFilePath;
 
@@ -51,7 +49,6 @@ public class TestRowFilteringComplexTypes {
 
     @BeforeEach
     public void openFileSystem(TestInfo testInfo) throws Exception {
-        conf = new Configuration();
         OrcConf.READER_USE_SELECTED.setBoolean(conf, true);
         fs = FileSystem.getLocal(conf);
         testFilePath = new Path(workDir,
diff --git a/java/core/src/test/org/apache/orc/TestRowFilteringComplexTypesNulls.java b/java/core/src/test/org/apache/orc/TestRowFilteringComplexTypesNulls.java
index c45c94e166..248e6c88dd 100644
--- a/java/core/src/test/org/apache/orc/TestRowFilteringComplexTypesNulls.java
+++ b/java/core/src/test/org/apache/orc/TestRowFilteringComplexTypesNulls.java
@@ -18,7 +18,6 @@
 
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -47,14 +46,13 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestRowFilteringComplexTypesNulls {
+public class TestRowFilteringComplexTypesNulls implements TestConf {
   private static final Logger LOG =
     LoggerFactory.getLogger(TestRowFilteringComplexTypesNulls.class);
   private static final Path workDir = new Path(System.getProperty("test.tmp.dir",
                                                                   "target" + File.separator + "test"
                                                                   + File.separator + "tmp"));
   private static final Path filePath = new Path(workDir, "complex_null_file.orc");
-  private static Configuration conf;
   private static FileSystem fs;
 
   private static final TypeDescription schema = TypeDescription.createStruct()
@@ -75,7 +73,6 @@ public class TestRowFilteringComplexTypesNulls {
 
   @BeforeAll
   public static void setup() throws IOException {
-    conf = new Configuration();
     fs = FileSystem.get(conf);
 
     LOG.info("Creating file {} with schema {}", filePath, schema);
diff --git a/java/core/src/test/org/apache/orc/TestRowFilteringIOSkip.java b/java/core/src/test/org/apache/orc/TestRowFilteringIOSkip.java
index d0b19a9c05..fd32a431d7 100644
--- a/java/core/src/test/org/apache/orc/TestRowFilteringIOSkip.java
+++ b/java/core/src/test/org/apache/orc/TestRowFilteringIOSkip.java
@@ -48,13 +48,12 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestRowFilteringIOSkip {
+public class TestRowFilteringIOSkip implements TestConf {
   private static final Logger LOG = LoggerFactory.getLogger(TestRowFilteringIOSkip.class);
   private static final Path workDir = new Path(System.getProperty("test.tmp.dir",
                                                                   "target" + File.separator + "test"
                                                                   + File.separator + "tmp"));
   private static final Path filePath = new Path(workDir, "skip_file.orc");
-  private static Configuration conf;
   private static FileSystem fs;
 
   private static final TypeDescription schema = TypeDescription.createStruct()
@@ -71,7 +70,6 @@ public class TestRowFilteringIOSkip {
 
   @BeforeAll
   public static void setup() throws IOException {
-    conf = new Configuration();
     fs = FileSystem.get(conf);
 
     LOG.info("Creating file {} with schema {}", filePath, schema);
diff --git a/java/core/src/test/org/apache/orc/TestRowFilteringNoSkip.java b/java/core/src/test/org/apache/orc/TestRowFilteringNoSkip.java
index 87c390e8a4..b4a677d869 100644
--- a/java/core/src/test/org/apache/orc/TestRowFilteringNoSkip.java
+++ b/java/core/src/test/org/apache/orc/TestRowFilteringNoSkip.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -42,12 +41,11 @@
  * As it turns out it is more expensive to skip non-selected rows rather that just decode all and propagate the
  * selected array. Skipping for these type breaks instruction pipelining and introduces more branch mispredictions.
  */
-public class TestRowFilteringNoSkip {
+public class TestRowFilteringNoSkip implements TestConf {
 
   private Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
       + File.separator + "tmp"));
 
-  private Configuration conf;
   private FileSystem fs;
   private Path testFilePath;
 
@@ -55,7 +53,6 @@ public class TestRowFilteringNoSkip {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     OrcConf.READER_USE_SELECTED.setBoolean(conf, true);
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestRowFilteringNoSkip." +
diff --git a/java/core/src/test/org/apache/orc/TestRowFilteringSkip.java b/java/core/src/test/org/apache/orc/TestRowFilteringSkip.java
index dafbd35d36..ea4bc583c0 100644
--- a/java/core/src/test/org/apache/orc/TestRowFilteringSkip.java
+++ b/java/core/src/test/org/apache/orc/TestRowFilteringSkip.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -53,12 +52,11 @@
  * Types that are skipped at row-level include: Decimal, Decimal64, Double, Float, Char, VarChar, String, Boolean, Timestamp
  * For the remaining types that are not row-skipped see {@link TestRowFilteringNoSkip}
  */
-public class TestRowFilteringSkip {
+public class TestRowFilteringSkip implements TestConf {
 
   private Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
       + File.separator + "tmp"));
 
-  private Configuration conf;
   private FileSystem fs;
   private Path testFilePath;
 
@@ -66,7 +64,6 @@ public class TestRowFilteringSkip {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     OrcConf.READER_USE_SELECTED.setBoolean(conf, true);
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestRowFilteringSkip." +
diff --git a/java/core/src/test/org/apache/orc/TestSelectedVector.java b/java/core/src/test/org/apache/orc/TestSelectedVector.java
index 3e2e4750f0..b1accd78a9 100644
--- a/java/core/src/test/org/apache/orc/TestSelectedVector.java
+++ b/java/core/src/test/org/apache/orc/TestSelectedVector.java
@@ -18,7 +18,6 @@
 
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -49,17 +48,15 @@
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestSelectedVector {
+public class TestSelectedVector implements TestConf {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   Random random = new Random();
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), VectorizedRowBatch.DEFAULT_SIZE);
     fs = FileSystem.getLocal(conf);
     fs.setWorkingDirectory(workDir);
diff --git a/java/core/src/test/org/apache/orc/TestStringDictionary.java b/java/core/src/test/org/apache/orc/TestStringDictionary.java
index a7a1d714ca..9f3d4eb118 100644
--- a/java/core/src/test/org/apache/orc/TestStringDictionary.java
+++ b/java/core/src/test/org/apache/orc/TestStringDictionary.java
@@ -51,18 +51,16 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 
-public class TestStringDictionary {
+public class TestStringDictionary implements TestConf {
 
   private Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
       + File.separator + "tmp"));
 
-  private Configuration conf;
   private FileSystem fs;
   private Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestStringDictionary." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestTypeDescription.java b/java/core/src/test/org/apache/orc/TestTypeDescription.java
index 7dba23a9f9..3f811803f9 100644
--- a/java/core/src/test/org/apache/orc/TestTypeDescription.java
+++ b/java/core/src/test/org/apache/orc/TestTypeDescription.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.junit.jupiter.api.Test;
 
@@ -33,7 +32,7 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
-public class TestTypeDescription {
+public class TestTypeDescription implements TestConf {
   @Test
   public void testJson() {
     TypeDescription bin = TypeDescription.createBinary();
@@ -369,7 +368,6 @@ public void testAttributes() throws IOException {
     // write a file with those attributes
     Path path = new Path(System.getProperty("test.tmp.dir",
         "target" + File.separator + "test" + File.separator + "tmp"), "attribute.orc");
-    Configuration conf = new Configuration();
     Writer writer = OrcFile.createWriter(path,
         OrcFile.writerOptions(conf).setSchema(schema).overwrite(true));
     writer.close();
diff --git a/java/core/src/test/org/apache/orc/TestUnicode.java b/java/core/src/test/org/apache/orc/TestUnicode.java
index 3706644315..c901ec72f9 100644
--- a/java/core/src/test/org/apache/orc/TestUnicode.java
+++ b/java/core/src/test/org/apache/orc/TestUnicode.java
@@ -18,7 +18,6 @@
 package org.apache.orc;
 
 import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -36,11 +35,10 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-public class TestUnicode {
+public class TestUnicode implements TestConf {
   Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
       + File.separator + "tmp"));
 
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
@@ -66,7 +64,6 @@ private static Stream<Arguments> data() {
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcFile." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestUnrolledBitPack.java b/java/core/src/test/org/apache/orc/TestUnrolledBitPack.java
index d30fc98df0..7735b59a08 100644
--- a/java/core/src/test/org/apache/orc/TestUnrolledBitPack.java
+++ b/java/core/src/test/org/apache/orc/TestUnrolledBitPack.java
@@ -20,7 +20,6 @@
 
 import com.google.common.collect.Lists;
 import com.google.common.primitives.Longs;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -37,7 +36,7 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-public class TestUnrolledBitPack {
+public class TestUnrolledBitPack implements TestConf {
 
   private static Stream<Arguments> data() {
     return Stream.of(
@@ -57,13 +56,11 @@ private static Stream<Arguments> data() {
   Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
       + File.separator + "tmp"));
 
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcFile." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index c24514f697..76681f4621 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -19,7 +19,6 @@
 package org.apache.orc;
 
 import com.google.common.collect.Lists;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -102,7 +101,7 @@
 /**
  * Tests for the vectorized reader and writer for ORC files.
  */
-public class TestVectorOrcFile {
+public class TestVectorOrcFile implements TestConf {
 
   private static Stream<Arguments> data() {
     return Stream.of(
@@ -193,13 +192,11 @@ private static ByteBuffer byteBuf(int... items) {
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
 
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestVectorOrcFile." +
         testInfo.getTestMethod().get().getName().replaceFirst("\\[[0-9]+\\]", "")
diff --git a/java/core/src/test/org/apache/orc/impl/TestBitPack.java b/java/core/src/test/org/apache/orc/impl/TestBitPack.java
index 53ac1ce4b0..e2e1a67c83 100644
--- a/java/core/src/test/org/apache/orc/impl/TestBitPack.java
+++ b/java/core/src/test/org/apache/orc/impl/TestBitPack.java
@@ -18,10 +18,10 @@
 package org.apache.orc.impl;
 
 import com.google.common.primitives.Longs;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.orc.impl.writer.StreamOptions;
+import org.apache.orc.TestConf;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.TestInfo;
@@ -35,20 +35,18 @@
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-public class TestBitPack {
+public class TestBitPack implements TestConf {
 
   private static final int SIZE = 100;
   private static Random rand = new Random(100);
   Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
       + File.separator + "tmp"));
 
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcFile." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java b/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
index 54d5ac143f..f16d042fdb 100644
--- a/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
@@ -18,13 +18,13 @@
 
 package org.apache.orc.impl;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.orc.DecimalColumnStatistics;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto;
 import org.apache.orc.Reader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TimestampColumnStatistics;
 import org.apache.orc.TypeDescription;
 import org.junit.jupiter.api.Test;
@@ -37,7 +37,7 @@
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestColumnStatisticsImpl {
+public class TestColumnStatisticsImpl implements TestConf {
 
   @Test
   public void testUpdateDate() {
@@ -78,7 +78,6 @@ public void testOldTimestamps() throws IOException {
     TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"));
     Path exampleDir = new Path(System.getProperty("example.dir"));
     Path file = new Path(exampleDir, "TestOrcFile.testTimestamp.orc");
-    Configuration conf = new Configuration();
     Reader reader = OrcFile.createReader(file, OrcFile.readerOptions(conf));
     TimestampColumnStatistics stats =
         (TimestampColumnStatistics) reader.getStatistics()[0];
diff --git a/java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java b/java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java
index a90a285a65..53f94cbf7e 100644
--- a/java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java
+++ b/java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java
@@ -36,6 +36,7 @@
 import org.apache.orc.OrcFile.WriterOptions;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TestProlepticConversions;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
@@ -58,12 +59,11 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.mockito.Mockito.mock;
 
-public class TestConvertTreeReaderFactory {
+public class TestConvertTreeReaderFactory implements TestConf {
 
   private Path workDir =
       new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test" + File.separator + "tmp"));
 
-  private Configuration conf;
   private FileSystem fs;
   private Path testFilePath;
   private int LARGE_BATCH_SIZE;
@@ -74,7 +74,6 @@ public class TestConvertTreeReaderFactory {
   public void setupPath(TestInfo testInfo) throws Exception {
     // Default CV length is 1024
     this.LARGE_BATCH_SIZE = 1030;
-    this.conf = new Configuration();
     this.fs = FileSystem.getLocal(conf);
     this.testFilePath = new Path(workDir, TestWriterImpl.class.getSimpleName() +
         testInfo.getTestMethod().get().getName().replaceFirst("\\[[0-9]+]", "") +
@@ -85,7 +84,6 @@ public void setupPath(TestInfo testInfo) throws Exception {
   public <TExpectedColumnVector extends ColumnVector> TExpectedColumnVector createORCFileWithLargeArray(
       TypeDescription schema, Class<TExpectedColumnVector> expectedColumnType, boolean useDecimal64)
       throws IOException, ParseException {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     fs.setWorkingDirectory(workDir);
     Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).setSchema(schema));
@@ -115,7 +113,6 @@ public <TExpectedColumnVector extends ColumnVector> TExpectedColumnVector create
   public <TExpectedColumnVector extends ColumnVector> TExpectedColumnVector createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(
       TypeDescription schema, Class<TExpectedColumnVector> typeClass, boolean useDecimal64)
       throws IOException, ParseException {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     fs.setWorkingDirectory(workDir);
     WriterOptions options = OrcFile.writerOptions(conf);
@@ -178,8 +175,6 @@ public <TExpectedColumnVector extends ColumnVector> TExpectedColumnVector readOR
     options.schema(schema);
     String expected = options.toString();
 
-    Configuration conf = new Configuration();
-
     Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
     RecordReader rows = reader.rows(options);
     VectorizedRowBatch batch = schema.createRowBatchV2();
@@ -200,8 +195,6 @@ public void readORCFileIncreasingBatchSize(String typeString, Class<?> expectedC
     options.schema(schema);
     String expected = options.toString();
 
-    Configuration conf = new Configuration();
-
     Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
     RecordReader rows = reader.rows(options);
     VectorizedRowBatch batch = schema.createRowBatchV2();
@@ -693,8 +686,6 @@ private void readDecimalInNullStripe(String typeString, Class<?> expectedColumnT
     options.schema(schema);
     String expected = options.toString();
 
-    Configuration conf = new Configuration();
-
     Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
     RecordReader rows = reader.rows(options);
     VectorizedRowBatch batch = schema.createRowBatch();
@@ -707,7 +698,7 @@ private void readDecimalInNullStripe(String typeString, Class<?> expectedColumnT
     assertTrue(batch.cols[0].isRepeating);
     StringBuilder sb = new StringBuilder();
     batch.cols[0].stringifyValue(sb, 1023);
-    assertEquals(sb.toString(), expectedResult[0]);
+    assertEquals(expectedResult[0], sb.toString());
 
     rows.nextBatch(batch);
     assertEquals(1024, batch.size);
@@ -717,17 +708,17 @@ private void readDecimalInNullStripe(String typeString, Class<?> expectedColumnT
     assertFalse(batch.cols[0].isRepeating);
     StringBuilder sb2 = new StringBuilder();
     batch.cols[0].stringifyValue(sb2, 1023);
-    assertEquals(sb2.toString(), expectedResult[1]);
+    assertEquals(expectedResult[1], sb2.toString());
 
     rows.nextBatch(batch);
     assertEquals(1024, batch.size);
     assertEquals(expected, options.toString());
     assertEquals(batch.cols.length, 1);
     assertEquals(batch.cols[0].getClass(), expectedColumnType);
-    assertTrue(batch.cols[0].isRepeating);
+    assertFalse(batch.cols[0].isRepeating);
     StringBuilder sb3 = new StringBuilder();
     batch.cols[0].stringifyValue(sb3, 1023);
-    assertEquals(sb3.toString(), expectedResult[2]);
+    assertEquals(expectedResult[2], sb3.toString());
   }
 
   private void testDecimalConvertToLongInNullStripe() throws Exception {
diff --git a/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java b/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java
index 73d7231e61..9c2d891109 100644
--- a/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java
+++ b/java/core/src/test/org/apache/orc/impl/TestCryptoUtils.java
@@ -18,12 +18,12 @@
 
 package org.apache.orc.impl;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.orc.EncryptionAlgorithm;
 import org.apache.orc.InMemoryKeystore;
 import org.apache.orc.OrcConf;
 import org.apache.orc.OrcProto;
+import org.apache.orc.TestConf;
 import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
@@ -35,7 +35,7 @@
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestCryptoUtils {
+public class TestCryptoUtils implements TestConf {
 
   @Test
   public void testCreateStreamIv() throws Exception {
@@ -56,7 +56,6 @@ public void testCreateStreamIv() throws Exception {
 
   @Test
   public void testMemoryKeyProvider() throws IOException {
-    Configuration conf = new Configuration();
     OrcConf.KEY_PROVIDER.setString(conf, "memory");
     // Hard code the random so that we know the bytes that will come out.
     InMemoryKeystore provider =
@@ -91,7 +90,6 @@ public void testMemoryKeyProvider() throws IOException {
 
   @Test
   public void testInvalidKeyProvider() throws IOException {
-    Configuration conf = new Configuration();
     OrcConf.KEY_PROVIDER.setString(conf, "");
     assertNull(CryptoUtils.getKeyProvider(conf, new Random()));
   }
diff --git a/java/core/src/test/org/apache/orc/impl/TestEncryption.java b/java/core/src/test/org/apache/orc/impl/TestEncryption.java
index 64fcbcf921..3ab2bb8b77 100644
--- a/java/core/src/test/org/apache/orc/impl/TestEncryption.java
+++ b/java/core/src/test/org/apache/orc/impl/TestEncryption.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -32,6 +31,7 @@
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.junit.jupiter.api.AfterEach;
@@ -43,10 +43,9 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-public class TestEncryption {
+public class TestEncryption implements TestConf {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   TypeDescription schema;
@@ -56,11 +55,9 @@ public class TestEncryption {
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), VectorizedRowBatch.DEFAULT_SIZE);
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("testWriterImpl.orc");
+    testFilePath = new Path(workDir, "TestEncryption.orc");
     fs.create(testFilePath, true);
     schema = TypeDescription.fromString("struct<id:int,name:string>");
     byte[] kmsKey = "secret123".getBytes(StandardCharsets.UTF_8);
diff --git a/java/core/src/test/org/apache/orc/impl/TestMemoryManager.java b/java/core/src/test/org/apache/orc/impl/TestMemoryManager.java
index 247f615a47..7f1f8359d7 100644
--- a/java/core/src/test/org/apache/orc/impl/TestMemoryManager.java
+++ b/java/core/src/test/org/apache/orc/impl/TestMemoryManager.java
@@ -17,9 +17,9 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.orc.MemoryManager;
+import org.apache.orc.TestConf;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
 
@@ -32,7 +32,7 @@
 /**
  * Test the ORC memory manager.
  */
-public class TestMemoryManager {
+public class TestMemoryManager implements TestConf {
   private static final double ERROR = 0.000001;
 
   private static class NullCallback implements MemoryManagerImpl.Callback {
@@ -43,7 +43,6 @@ public boolean checkMemory(double newScale) {
 
   @Test
   public void testBasics() throws Exception {
-    Configuration conf = new Configuration();
     MemoryManagerImpl mgr = new MemoryManagerImpl(conf);
     NullCallback callback = new NullCallback();
     long poolSize = mgr.getTotalMemoryPool();
@@ -71,7 +70,6 @@ public void testBasics() throws Exception {
 
   @Test
   public void testConfig() throws Exception {
-    Configuration conf = new Configuration();
     conf.set("hive.exec.orc.memory.pool", "0.9");
     MemoryManagerImpl mgr = new MemoryManagerImpl(conf);
     long mem =
@@ -84,7 +82,6 @@ public void testConfig() throws Exception {
 
   @Test
   public void testCallback() throws Exception {
-    Configuration conf = new Configuration();
     MemoryManagerImpl mgr = new MemoryManagerImpl(conf);
     long pool = mgr.getTotalMemoryPool();
     MemoryManager.Callback[] calls = new MemoryManager.Callback[20];
diff --git a/java/core/src/test/org/apache/orc/impl/TestOrcLargeStripe.java b/java/core/src/test/org/apache/orc/impl/TestOrcLargeStripe.java
index 54463a0797..22ae8226fa 100644
--- a/java/core/src/test/org/apache/orc/impl/TestOrcLargeStripe.java
+++ b/java/core/src/test/org/apache/orc/impl/TestOrcLargeStripe.java
@@ -15,7 +15,6 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -26,6 +25,7 @@
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.junit.jupiter.api.BeforeEach;
@@ -56,18 +56,16 @@
 import static org.mockito.Mockito.when;
 
 @ExtendWith(MockitoExtension.class)
-public class TestOrcLargeStripe {
+public class TestOrcLargeStripe implements TestConf {
 
   private Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
       + File.separator + "tmp"));
 
-  Configuration conf;
   FileSystem fs;
   private Path testFilePath;
 
   @BeforeEach
   public void openFileSystem(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestOrcFile." +
         testInfo.getTestMethod().get().getName() + ".orc");
@@ -136,7 +134,6 @@ public void testEmpty() throws Exception {
 
   @Test
   public void testConfigMaxChunkLimit() throws IOException {
-    Configuration conf = new Configuration();
     FileSystem fs = FileSystem.getLocal(conf);
     TypeDescription schema = TypeDescription.createTimestamp();
     fs.delete(testFilePath, false);
@@ -151,7 +148,6 @@ public void testConfigMaxChunkLimit() throws IOException {
     assertTrue(recordReader instanceof RecordReaderImpl);
     assertEquals(Integer.MAX_VALUE - 1024, ((RecordReaderImpl) recordReader).getMaxDiskRangeChunkLimit());
 
-    conf = new Configuration();
     conf.setInt(OrcConf.ORC_MAX_DISK_RANGE_CHUNK_LIMIT.getHiveConfName(), 1000);
     opts = OrcFile.readerOptions(conf);
     reader = OrcFile.createReader(testFilePath, opts);
diff --git a/java/core/src/test/org/apache/orc/impl/TestPhysicalFsWriter.java b/java/core/src/test/org/apache/orc/impl/TestPhysicalFsWriter.java
index 9feac31047..62fcc80b31 100644
--- a/java/core/src/test/org/apache/orc/impl/TestPhysicalFsWriter.java
+++ b/java/core/src/test/org/apache/orc/impl/TestPhysicalFsWriter.java
@@ -26,7 +26,9 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.util.Progressable;
+import org.apache.orc.CompressionCodec;
 import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcConf;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto;
 import org.apache.orc.PhysicalWriter;
@@ -330,4 +332,23 @@ public void testShortBlock() throws IOException {
     assertEquals(62 * 1024, dirEntry.getDataLength());
     assertEquals(endOfStripe, shim.lastShortBlock);
   }
+
+  @Test
+  public void testZstdCodec() throws IOException {
+    CompressionCodec zstdCodec = OrcCodecPool.getCodec(CompressionKind.ZSTD);
+    int originalHashCode = zstdCodec.getDefaultOptions().hashCode();
+
+    Configuration conf = new Configuration();
+    conf.setInt(OrcConf.COMPRESSION_ZSTD_LEVEL.getAttribute(), 9);
+    MockHadoopShim shim = new MockHadoopShim();
+    TypeDescription schema = TypeDescription.fromString("int");
+    OrcFile.WriterOptions opts =
+        OrcFile.writerOptions(conf)
+            .compress(CompressionKind.ZSTD)
+            .setSchema(schema)
+            .setShims(shim);
+    MemoryFileSystem fs = new MemoryFileSystem();
+    PhysicalFsWriter writer = new PhysicalFsWriter(fs, new Path("test1.orc"), opts);
+    assertEquals(originalHashCode, zstdCodec.getDefaultOptions().hashCode());
+  }
 }
diff --git a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
index c2799ff901..aec865201c 100644
--- a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
+++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
@@ -54,26 +53,20 @@ public void testCornerCases() {
     BloomFilter bf = new BloomFilter(100);
     // FFF... to PPP...
     for (int i = 70; i <= 80; i++) {
-      final String inputString = StringUtils
-          .repeat(Character.toString((char) i), stringLength);
+      final String inputString = Character.toString((char) i).repeat(stringLength);
       bf.addString(inputString);
     }
 
-    final String longStringF = StringUtils
-        .repeat(Character.toString('F'), stringLength);
-    final String longStringP = StringUtils
-        .repeat(Character.toString('P'), stringLength);
+    final String longStringF = Character.toString('F').repeat(stringLength);
+    final String longStringP = Character.toString('P').repeat(stringLength);
 
     /* String that matches the upperbound value after truncation */
-    final String upperboundString =
-        StringUtils.repeat(Character.toString('P'), 1023) + "Q";
+    final String upperboundString = Character.toString('P').repeat(1023) + "Q";
     /* String that matches the lower value after truncation */
-    final String lowerboundString = StringUtils
-        .repeat(Character.toString('F'), 1024);
+    final String lowerboundString = Character.toString('F').repeat(1024);
 
-    final String shortStringF = StringUtils.repeat(Character.toString('F'), 50);
-    final String shortStringP =
-        StringUtils.repeat(Character.toString('P'), 50) + "Q";
+    final String shortStringF = Character.toString('F').repeat(50);
+    final String shortStringP = Character.toString('P').repeat(50) + "Q";
 
     /* Test for a case EQUALS where only upperbound is set */
     final PredicateLeaf predicateUpperBoundEquals = TestRecordReaderImpl
@@ -165,17 +158,13 @@ public void testNormalCase() throws Exception {
     BloomFilter bf = new BloomFilter(100);
     // FFF... to PPP...
     for (int i = 70; i <= 80; i++) {
-      final String inputString = StringUtils
-          .repeat(Character.toString((char) i), bfStringLength);
+      final String inputString = Character.toString((char) i).repeat(bfStringLength);
       bf.addString(inputString);
     }
 
-    final String longStringF = StringUtils
-        .repeat(Character.toString('F'), stringLength);
-    final String longStringP = StringUtils
-        .repeat(Character.toString('P'), stringLength);
-    final String predicateString = StringUtils
-        .repeat(Character.toString('I'), 50);
+    final String longStringF = Character.toString('F').repeat(stringLength);
+    final String longStringP = Character.toString('P').repeat(stringLength);
+    final String predicateString = Character.toString('I').repeat(50);
 
 
     /* Test for a case where only upperbound is set */
@@ -215,26 +204,20 @@ public void testIN() throws Exception {
     final BloomFilter bf = new BloomFilter(100);
     // FFF... to PPP...
     for (int i = 70; i <= 80; i++) {
-      final String inputString = StringUtils
-          .repeat(Character.toString((char) i), stringLength);
+      final String inputString = Character.toString((char) i).repeat(stringLength);
       bf.addString(inputString);
     }
 
-    final String longStringF = StringUtils
-        .repeat(Character.toString('F'), stringLength);
-    final String longStringP = StringUtils
-        .repeat(Character.toString('P'), stringLength);
+    final String longStringF = Character.toString('F').repeat(stringLength);
+    final String longStringP = Character.toString('P').repeat(stringLength);
 
     /* String that matches the upperbound value after truncation */
-    final String upperboundString =
-        StringUtils.repeat(Character.toString('P'), 1023) + "Q";
+    final String upperboundString = Character.toString('P').repeat(1023) + "Q";
     /* String that matches the lower value after truncation */
-    final String lowerboundString = StringUtils
-        .repeat(Character.toString('F'), 1024);
+    final String lowerboundString = Character.toString('F').repeat(1024);
 
-    final String shortStringF = StringUtils.repeat(Character.toString('F'), 50);
-    final String shortStringP =
-        StringUtils.repeat(Character.toString('P'), 50) + "Q";
+    final String shortStringF = Character.toString('F').repeat(50);
+    final String shortStringP = Character.toString('P').repeat(50) + "Q";
 
     final List<Object> args = new ArrayList<Object>();
     args.add(upperboundString);
diff --git a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
index e343b8f428..003ae22a7b 100644
--- a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
@@ -39,6 +39,7 @@
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
 import org.apache.orc.StripeStatistics;
+import org.apache.orc.TestConf;
 import org.apache.orc.TestVectorOrcFile;
 import org.apache.orc.TypeDescription;
 import org.junit.jupiter.api.BeforeEach;
@@ -60,7 +61,7 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestReaderImpl {
+public class TestReaderImpl implements TestConf {
   private Path workDir = new Path(System.getProperty("example.dir",
       "../../examples/"));
 
@@ -106,7 +107,6 @@ public void testEnsureOrcFooterCorrectORCFooter() throws IOException {
   public void testOptionSafety() throws IOException {
     Reader.Options options = new Reader.Options();
     String expected = options.toString();
-    Configuration conf = new Configuration();
     Path path = new Path(TestVectorOrcFile.getFileFromClasspath
         ("orc-file-11-format.orc"));
     try (Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
@@ -310,7 +310,6 @@ public FileStatus getFileStatus(Path path) {
 
   @Test
   public void testClosingRowsFirst() throws Exception {
-    Configuration conf = new Configuration();
     MockFileSystem fs = new MockFileSystem(conf);
     Reader reader = OrcFile.createReader(new Path("/foo"),
         OrcFile.readerOptions(conf).filesystem(fs));
@@ -329,7 +328,6 @@ public void testClosingRowsFirst() throws Exception {
 
   @Test
   public void testClosingReaderFirst() throws Exception {
-    Configuration conf = new Configuration();
     MockFileSystem fs = new MockFileSystem(conf);
     Reader reader = OrcFile.createReader(new Path("/foo"),
         OrcFile.readerOptions(conf).filesystem(fs));
@@ -344,7 +342,6 @@ public void testClosingReaderFirst() throws Exception {
 
   @Test
   public void testClosingMultiple() throws Exception {
-    Configuration conf = new Configuration();
     MockFileSystem fs = new MockFileSystem(conf);
     Reader reader = OrcFile.createReader(new Path("/foo"),
         OrcFile.readerOptions(conf).filesystem(fs));
@@ -359,7 +356,6 @@ public void testClosingMultiple() throws Exception {
 
   @Test
   public void testOrcTailStripeStats() throws Exception {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "orc_split_elim_new.orc");
     FileSystem fs = path.getFileSystem(conf);
     try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
@@ -398,7 +394,6 @@ public void testOrcTailStripeStats() throws Exception {
 
   @Test
   public void testGetRawDataSizeFromColIndices() throws Exception {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "orc_split_elim_new.orc");
     FileSystem fs = path.getFileSystem(conf);
     try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
@@ -420,7 +415,6 @@ public void testGetRawDataSizeFromColIndices() throws Exception {
 
   private void CheckFileWithSargs(String fileName, String softwareVersion)
       throws IOException {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, fileName);
     FileSystem fs = path.getFileSystem(conf);
     try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
@@ -450,7 +444,6 @@ public void testSkipBadBloomFilters() throws IOException {
 
   @Test
   public void testReadDecimalV2File() throws IOException {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "decimal64_v2_cplusplus.orc");
     FileSystem fs = path.getFileSystem(conf);
     try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
@@ -489,7 +482,6 @@ public void testReadDecimalV2File() throws IOException {
 
   @Test
   public void testExtractFileTailIndexOutOfBoundsException() throws Exception {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "demo-11-none.orc");
     FileSystem fs = path.getFileSystem(conf);
     FileStatus fileStatus = fs.getFileStatus(path);
@@ -508,7 +500,6 @@ public void testExtractFileTailIndexOutOfBoundsException() throws Exception {
 
   @Test
   public void testWithoutCompressionBlockSize() throws IOException {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "TestOrcFile.testWithoutCompressionBlockSize.orc");
     FileSystem fs = path.getFileSystem(conf);
     try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
@@ -530,7 +521,6 @@ public void testWithoutCompressionBlockSize() throws IOException {
 
   @Test
   public void testSargSkipPickupGroupWithoutIndex() throws IOException {
-    Configuration conf = new Configuration();
     // We use ORC files in two languages to test, the previous Java version could not work
     // well when orc.row.index.stride > 0 and orc.create.index=false, now it can skip these row groups.
     Path[] paths = new Path[] {
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index f0124715b8..f785e6e58e 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -28,6 +28,7 @@
 import org.apache.hadoop.hive.common.io.DiskRangeList;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -48,6 +49,7 @@
 import org.apache.orc.OrcProto;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TestVectorOrcFile;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
@@ -102,7 +104,7 @@
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
-public class TestRecordReaderImpl {
+public class TestRecordReaderImpl implements TestConf {
 
   // This is a work around until we update storage-api to allow ChronoLocalDate in
   // predicates.
@@ -113,7 +115,6 @@ static Date toDate(ChronoLocalDate date) {
 
   @Test
   public void testFindColumn() throws Exception {
-    Configuration conf = new Configuration();
     TypeDescription file = TypeDescription.fromString("struct<a:int,c:string,e:int>");
     TypeDescription reader = TypeDescription.fromString("struct<a:int,b:double,c:string,d:double,e:bigint>");
     SchemaEvolution evo = new SchemaEvolution(file, reader, new Reader.Options(conf));
@@ -126,7 +127,6 @@ public void testFindColumn() throws Exception {
 
   @Test
   public void testFindColumnCaseInsensitively() throws Exception {
-    Configuration conf = new Configuration();
     TypeDescription file = TypeDescription.fromString("struct<A:int>");
     TypeDescription reader = TypeDescription.fromString("struct<a:int>");
     conf.setBoolean("orc.schema.evolution.case.sensitive", false);
@@ -136,8 +136,6 @@ public void testFindColumnCaseInsensitively() throws Exception {
 
   @Test
   public void testForcePositionalEvolution() throws Exception {
-    Configuration conf = new Configuration();
-
     Path oldFilePath = new Path(TestVectorOrcFile.getFileFromClasspath("orc-file-11-format.orc"));
     Reader reader = OrcFile.createReader(oldFilePath,
         OrcFile.readerOptions(conf).filesystem(FileSystem.getLocal(conf)));
@@ -263,7 +261,6 @@ public boolean seekToNewSource(long position) throws IOException {
 
   @Test
   public void testMaxLengthToReader() throws Exception {
-    Configuration conf = new Configuration();
     OrcProto.Type rowType = OrcProto.Type.newBuilder()
         .setKind(OrcProto.Type.Kind.STRUCT).build();
     OrcProto.Footer footer = OrcProto.Footer.newBuilder()
@@ -1975,7 +1972,6 @@ public void testCloseWithException() throws Exception {
       "target" + File.separator + "test" + File.separator + "tmp"));
 
   private void closeMockedRecordReader(DataReader mockedDataReader) throws IOException {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "empty.orc");
     FileSystem.get(conf).delete(path, true);
     Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf)
@@ -2252,7 +2248,6 @@ static OrcProto.RowIndexEntry createIndexEntry(Long min, Long max) {
 
   @Test
   public void testPickRowGroups() throws Exception {
-    Configuration conf = new Configuration();
     TypeDescription schema = TypeDescription.fromString("struct<x:int,y:int>");
     SchemaEvolution evolution = new SchemaEvolution(schema, schema,
         new Reader.Options(conf));
@@ -2301,7 +2296,6 @@ public void testPickRowGroups() throws Exception {
 
   @Test
   public void testPickRowGroupsError() throws Exception {
-    Configuration conf = new Configuration();
     TypeDescription schema = TypeDescription.fromString("struct<x:int,y:int>");
     SchemaEvolution evolution = new SchemaEvolution(schema, schema,
         new Reader.Options(conf));
@@ -2398,7 +2392,6 @@ public void testSkipDataReaderOpen() throws Exception {
     when(mockedDataReader.clone()).thenReturn(mockedDataReader);
     doNothing().when(mockedDataReader).close();
 
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "empty.orc");
     FileSystem.get(conf).delete(path, true);
     OrcFile.WriterOptions options = OrcFile.writerOptions(conf).setSchema(TypeDescription.createLong());
@@ -2413,7 +2406,6 @@ public void testSkipDataReaderOpen() throws Exception {
 
   @Test
   public void testCloseAtConstructorException() throws Exception {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "oneRow.orc");
     FileSystem.get(conf).delete(path, true);
 
@@ -2444,7 +2436,6 @@ public void testCloseAtConstructorException() throws Exception {
 
   @Test
   public void testSargApplier() throws Exception {
-    Configuration conf = new Configuration();
     TypeDescription schema = TypeDescription.createLong();
     SearchArgument sarg = SearchArgumentFactory.newBuilder().build();
     SchemaEvolution evo = new SchemaEvolution(schema, schema, new Reader.Options(conf));
@@ -2480,7 +2471,6 @@ public void testWithoutStatistics() {
   @Test
   public void testStatisticsWithNoWrites() throws Exception {
     Path testFilePath = new Path(workDir, "rowIndexStrideNegative.orc");
-    Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(conf);
     fs.delete(testFilePath, true);
 
@@ -2536,7 +2526,6 @@ public void testDoubleColumnWithoutDoubleStatistics() throws Exception {
     Path filePath = new Path(ClassLoader.getSystemResource("orc-file-no-double-statistic.orc")
         .getPath());
 
-    Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(conf);
 
     Reader reader = OrcFile.createReader(filePath,
@@ -2626,7 +2615,6 @@ public void testRgEndOffset() throws IOException {
   }
 
   private void testSmallCompressionSizeOrc(int compressionSize) throws IOException {
-    Configuration conf = new Configuration();
     Path path = new Path(workDir, "smallCompressionSize.orc");
     FileSystem.get(conf).delete(path, true);
 
@@ -2672,7 +2660,6 @@ private void testSmallCompressionSizeOrc(int compressionSize) throws IOException
   @Test
   public void testRowIndexStrideNegativeFilter() throws Exception {
     Path testFilePath = new Path(workDir, "rowIndexStrideNegative.orc");
-    Configuration conf = new Configuration();
     FileSystem fs = FileSystem.get(conf);
     fs.delete(testFilePath, true);
 
@@ -2715,7 +2702,6 @@ public void testRowIndexStrideNegativeFilter() throws Exception {
 
   @Test
   public void testHadoopVectoredIO() throws Exception {
-    Configuration conf = new Configuration();
     Path filePath = new Path(TestVectorOrcFile.getFileFromClasspath("orc-file-11-format.orc"));
 
     FileSystem localFileSystem = FileSystem.getLocal(conf);
@@ -2732,4 +2718,70 @@ public void testHadoopVectoredIO() throws Exception {
 
     verify(spyFSDataInputStream, atLeastOnce()).readVectored(any(), any());
   }
+
+  @Test
+  public  void testDecimalIsRepeatingFlag() throws IOException {
+    FileSystem fs = FileSystem.get(conf);
+    Path testFilePath = new Path(workDir, "testDecimalIsRepeatingFlag.orc");
+    fs.delete(testFilePath, true);
+
+    Configuration decimalConf = new Configuration(conf);
+    decimalConf.set(OrcConf.STRIPE_ROW_COUNT.getAttribute(), "1024");
+    decimalConf.set(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute(), "1");
+    String typeStr = "decimal(20,10)";
+    TypeDescription schema = TypeDescription.fromString("struct<col1:" + typeStr + ">");
+    Writer w = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(decimalConf).setSchema(schema));
+
+    VectorizedRowBatch b = schema.createRowBatch();
+    DecimalColumnVector f1 = (DecimalColumnVector) b.cols[0];
+    for (int i = 0; i < 1024; i++) {
+      f1.set(i, HiveDecimal.create("-119.4594594595"));
+    }
+    b.size = 1024;
+    w.addRowBatch(b);
+
+    b.reset();
+    for (int i = 0; i < 1024; i++) {
+      f1.set(i, HiveDecimal.create("9318.4351351351"));
+    }
+    b.size = 1024;
+    w.addRowBatch(b);
+
+    b.reset();
+    for (int i = 0; i < 1024; i++) {
+      f1.set(i, HiveDecimal.create("-4298.1513513514"));
+    }
+    b.size = 1024;
+    w.addRowBatch(b);
+
+    b.reset();
+    w.close();
+
+    Reader.Options options = new Reader.Options();
+    try (Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
+         RecordReader rows = reader.rows(options)) {
+      VectorizedRowBatch batch = schema.createRowBatch();
+
+      rows.nextBatch(batch);
+      assertEquals(1024, batch.size);
+      assertFalse(batch.cols[0].isRepeating);
+      for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
+        assertEquals(HiveDecimal.create("-119.4594594595"), hiveDecimalWritable.getHiveDecimal());
+      }
+
+      rows.nextBatch(batch);
+      assertEquals(1024, batch.size);
+      assertFalse(batch.cols[0].isRepeating);
+      for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
+        assertEquals(HiveDecimal.create("9318.4351351351"), hiveDecimalWritable.getHiveDecimal());
+      }
+
+      rows.nextBatch(batch);
+      assertEquals(1024, batch.size);
+      assertFalse(batch.cols[0].isRepeating);
+      for (HiveDecimalWritable hiveDecimalWritable : ((DecimalColumnVector) batch.cols[0]).vector) {
+        assertEquals(HiveDecimal.create("-4298.1513513514"), hiveDecimalWritable.getHiveDecimal());
+      }
+    }
+  }
 }
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 3a82fb5f29..fde63021f9 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -32,6 +32,7 @@
 import org.apache.orc.OrcProto;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.impl.reader.ReaderEncryption;
@@ -62,9 +63,8 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestSchemaEvolution {
+public class TestSchemaEvolution implements TestConf {
 
-  Configuration conf;
   Reader.Options options;
   Path testFilePath;
   FileSystem fs;
@@ -73,7 +73,6 @@ public class TestSchemaEvolution {
 
   @BeforeEach
   public void setup(TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     options = new Reader.Options(conf);
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestSchemaEvolution." +
@@ -2357,6 +2356,9 @@ public void testEvolutionToTimestamp() throws Exception {
     final ZoneId WRITER_ZONE = ZoneId.of("America/New_York");
     final ZoneId READER_ZONE = ZoneId.of("Australia/Sydney");
 
+    final String EXPECT_LOCAL = "expected %s in local time zone";
+    final String EXPECT_UTC = "expected %s in UTC time zone";
+
     final TimeZone oldDefault = TimeZone.getDefault();
     final ZoneId UTC = ZoneId.of("UTC");
 
@@ -2421,50 +2423,62 @@ public void testEvolutionToTimestamp() throws Exception {
           assertEquals(expected1.replace(".1 ", " "),
               timestampToString(l1.time[current], l1.nanos[current], READER_ZONE),
               msg);
+          assertFalse(l1.isUTC(), EXPECT_LOCAL.formatted("l1"));
 
           assertEquals(expected2.replace(".1 ", " "),
               timestampToString(l2.time[current], l2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(l2.isUTC(), EXPECT_UTC.formatted("l2"));
 
           assertEquals(longTimestampToString(((r % 128) - offset), READER_ZONE),
               timestampToString(t1.time[current], t1.nanos[current], READER_ZONE),
               msg);
+          assertFalse(t1.isUTC(), EXPECT_LOCAL.formatted("t1"));
 
           assertEquals(longTimestampToString((r % 128), WRITER_ZONE),
               timestampToString(t2.time[current], t2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(t2.isUTC(), EXPECT_UTC.formatted("t2"));
 
           assertEquals(expected1,
               timestampToString(d1.time[current], d1.nanos[current], READER_ZONE),
               msg);
+          assertFalse(d1.isUTC(), EXPECT_LOCAL.formatted("d1"));
 
           assertEquals(expected2,
               timestampToString(d2.time[current], d2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(d2.isUTC(), EXPECT_UTC.formatted("d2"));
 
           assertEquals(expected1,
               timestampToString(dbl1.time[current], dbl1.nanos[current], READER_ZONE),
               msg);
+          assertFalse(dbl1.isUTC(), EXPECT_LOCAL.formatted("dbl1"));
 
           assertEquals(expected2,
               timestampToString(dbl2.time[current], dbl2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(dbl2.isUTC(), EXPECT_UTC.formatted("dbl2"));
 
           assertEquals(expectedDate1,
               timestampToString(dt1.time[current], dt1.nanos[current], READER_ZONE),
               msg);
+          assertFalse(dt1.isUTC(), EXPECT_LOCAL.formatted("dt1"));
 
           assertEquals(expectedDate2,
               timestampToString(dt2.time[current], dt2.nanos[current], UTC),
               msg);
+          assertTrue(dt2.isUTC(), EXPECT_UTC.formatted("dt2"));
 
           assertEquals(expected1,
               timestampToString(s1.time[current], s1.nanos[current], READER_ZONE),
               msg);
+          assertFalse(s1.isUTC(), EXPECT_LOCAL.formatted("s1"));
 
           assertEquals(expected2,
               timestampToString(s2.time[current], s2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(s2.isUTC(), EXPECT_UTC.formatted("s2"));
           current += 1;
         }
         assertFalse(rows.nextBatch(batch));
@@ -2489,42 +2503,52 @@ public void testEvolutionToTimestamp() throws Exception {
           assertEquals(expected1.replace(".1 ", " "),
               timestampToString(l1.time[current], l1.nanos[current], UTC),
               msg);
+          assertTrue(l1.isUTC(), EXPECT_UTC.formatted("l1"));
 
           assertEquals(expected2.replace(".1 ", " "),
               timestampToString(l2.time[current], l2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(l2.isUTC(), EXPECT_UTC.formatted("l2"));
 
           assertEquals(expected1,
               timestampToString(d1.time[current], d1.nanos[current], UTC),
               msg);
+          assertTrue(d1.isUTC(), EXPECT_UTC.formatted("d1"));
 
           assertEquals(expected2,
               timestampToString(d2.time[current], d2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(d2.isUTC(), EXPECT_UTC.formatted("d2"));
 
           assertEquals(expected1,
               timestampToString(dbl1.time[current], dbl1.nanos[current], UTC),
               msg);
+          assertTrue(dbl1.isUTC(), EXPECT_UTC.formatted("dbl1"));
 
           assertEquals(expected2,
               timestampToString(dbl2.time[current], dbl2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(dbl2.isUTC(), EXPECT_UTC.formatted("dbl2"));
 
           assertEquals(expectedDate,
               timestampToString(dt1.time[current], dt1.nanos[current], UTC),
               msg);
+          assertTrue(dt1.isUTC(), EXPECT_UTC.formatted("dt1"));
 
           assertEquals(expectedDate,
               timestampToString(dt2.time[current], dt2.nanos[current], UTC),
               msg);
+          assertTrue(dt2.isUTC(), EXPECT_UTC.formatted("dt2"));
 
           assertEquals(expected1,
               timestampToString(s1.time[current], s1.nanos[current], UTC),
               msg);
+          assertTrue(s1.isUTC(), EXPECT_UTC.formatted("s1"));
 
           assertEquals(expected2,
               timestampToString(s2.time[current], s2.nanos[current], WRITER_ZONE),
               msg);
+          assertTrue(s2.isUTC(), EXPECT_UTC.formatted("s2"));
           current += 1;
         }
         assertFalse(rows.nextBatch(batch));
diff --git a/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
index e5d2616cc6..58236502d4 100644
--- a/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
@@ -19,9 +19,9 @@
 package org.apache.orc.impl;
 
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.OrcConf;
@@ -30,9 +30,15 @@
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.*;
+import org.apache.orc.geospatial.BoundingBox;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+import org.locationtech.jts.io.ParseException;
+import org.locationtech.jts.io.WKBWriter;
+import org.locationtech.jts.io.WKTReader;
 
 import java.io.IOException;
 
@@ -40,20 +46,17 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 
-public class TestWriterImpl {
+public class TestWriterImpl implements TestConf {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
   TypeDescription schema;
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("testWriterImpl.orc");
+    testFilePath = new Path(workDir, "testWriterImpl.orc");
     fs.create(testFilePath, true);
     schema = TypeDescription.fromString("struct<x:int,y:int>");
   }
@@ -182,6 +185,59 @@ public void testStripeRowCountLimit() throws Exception {
     assertEquals(10, w.getStripes().size());
   }
 
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testGeospatialColumnStatistics(boolean useFilter) throws IOException, ParseException {
+    conf.set(OrcConf.OVERWRITE_OUTPUT_FILE.getAttribute(), "true");
+    // Use the Geometry type
+    schema = TypeDescription.createGeometry();
+    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).setSchema(schema));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector geomColumn = (BytesColumnVector) batch.cols[0];
+
+    WKTReader wktReader = new WKTReader();
+    WKBWriter wkbWriter = new WKBWriter();
+    byte[] point1 = wkbWriter.write(wktReader.read("POINT (1 2)"));
+    byte[] point2 = wkbWriter.write(wktReader.read("POINT (3 4)"));
+    byte[] point3 = wkbWriter.write(wktReader.read("POINT (5 6)"));
+    byte[] point4 = wkbWriter.write(wktReader.read("POINT (7 8)"));
+
+    geomColumn.setVal(0, point1);
+    geomColumn.setVal(1, point2);
+    geomColumn.setVal(2, point3);
+    geomColumn.setVal(3, point4);
+
+    if (useFilter) {
+      int[] selected = {2};
+      batch.setFilterContext(true, selected, selected.length);
+    } else {
+      batch.size = 4;
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
+    ColumnStatistics[] statistics = reader.getStatistics();
+    GeospatialColumnStatistics geometryStatistics = (GeospatialColumnStatistics) statistics[0];
+    BoundingBox bbox = geometryStatistics.getBoundingBox();
+    if (useFilter) {
+      assertEquals(5.0, bbox.getXMin());
+      assertEquals(5.0, bbox.getXMax());
+      assertEquals(6.0, bbox.getYMin());
+      assertEquals(6.0, bbox.getYMax());
+    } else {
+      assertEquals(1.0, bbox.getXMin());
+      assertEquals(7.0, bbox.getXMax());
+      assertEquals(2.0, bbox.getYMin());
+      assertEquals(8.0, bbox.getYMax());
+    }
+    assertEquals(Double.NaN, bbox.getZMin());
+    assertEquals(Double.NaN, bbox.getZMax());
+    assertEquals(Double.NaN, bbox.getMMin());
+    assertEquals(Double.NaN, bbox.getMMax());
+    reader.close();
+  }
+
   @Test
   public void testCloseIsIdempotent() throws IOException {
     conf.set(OrcConf.OVERWRITE_OUTPUT_FILE.getAttribute(), "true");
diff --git a/java/core/src/test/org/apache/orc/impl/TestZlib.java b/java/core/src/test/org/apache/orc/impl/TestZlib.java
index 4ca62ca2af..f9d5936bc1 100644
--- a/java/core/src/test/org/apache/orc/impl/TestZlib.java
+++ b/java/core/src/test/org/apache/orc/impl/TestZlib.java
@@ -18,16 +18,24 @@
 
 package org.apache.orc.impl;
 
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.CompressionCodec;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
 
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
-public class TestZlib {
+public class TestZlib implements TestConf {
 
   @Test
   public void testNoOverflow() throws Exception {
@@ -54,4 +62,23 @@ public void testCorrupt() throws Exception {
       // EXPECTED
     }
   }
+
+  @Test
+  public void testCorruptZlibFile() {
+    Path testFilePath = new Path(ClassLoader.
+        getSystemResource("orc_corrupt_zlib.orc").getPath());
+
+    IOException exception = assertThrows(
+        IOException.class,
+        () -> {
+          try (Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf))) {
+            RecordReader rows = reader.rows();
+            VectorizedRowBatch batch = reader.getSchema().createRowBatch();
+            while (rows.nextBatch(batch)) {
+            }
+          }
+        }
+    );
+    assertTrue(exception.getMessage().contains("Decompress output buffer too small"));
+  }
 }
diff --git a/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java b/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
index 923910ded1..861cafa0e3 100644
--- a/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
+++ b/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilterService.java
@@ -20,6 +20,8 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.orc.filter.BatchFilter;
+import org.apache.orc.TestConf;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 import java.lang.reflect.Method;
@@ -31,11 +33,9 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-public class TestPluginFilterService {
-  private final Configuration conf;
-
-  public TestPluginFilterService() {
-    conf = new Configuration();
+public class TestPluginFilterService implements TestConf {
+  @BeforeEach
+  public void addFilter() {
     conf.set("my.filter.col.name", "f2");
     conf.set("my.filter.col.value", "aBcd");
     conf.set("my.filter.scope", "file://db/table1/.*");
diff --git a/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilters.java b/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilters.java
index 85ec869dba..d08416de35 100644
--- a/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilters.java
+++ b/java/core/src/test/org/apache/orc/impl/filter/TestPluginFilters.java
@@ -18,13 +18,13 @@
 
 package org.apache.orc.impl.filter;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.orc.OrcConf;
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
+import org.apache.orc.TestConf;
 import org.apache.orc.filter.BatchFilter;
 import org.junit.jupiter.api.Test;
 
@@ -32,7 +32,7 @@
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestPluginFilters extends ATestFilter {
+public class TestPluginFilters extends ATestFilter implements TestConf {
 
   @Test
   public void testPluginFilterWithSArg() {
@@ -40,7 +40,6 @@ public void testPluginFilterWithSArg() {
              new String[] {"a", "B", "c", "dE", "e", "f"});
 
     // Define the plugin filter
-    Configuration conf = new Configuration();
     OrcConf.ALLOW_PLUGIN_FILTER.setBoolean(conf, true);
     conf.set("my.filter.name", "my_str_i_eq");
     conf.set("my.filter.col.name", "f2");
@@ -75,7 +74,6 @@ public void testPluginSelectsNone() {
              new String[] {"a", "B", "c", "dE", "e", "f"});
 
     // Define the plugin filter
-    Configuration conf = new Configuration();
     OrcConf.ALLOW_PLUGIN_FILTER.setBoolean(conf, true);
     conf.set("my.filter.name", "my_str_i_eq");
     conf.set("my.filter.col.name", "f2");
@@ -109,7 +107,6 @@ public void testPluginDisabled() {
              new String[] {"a", "B", "c", "dE", "e", "f"});
 
     // Define the plugin filter
-    Configuration conf = new Configuration();
     OrcConf.ALLOW_PLUGIN_FILTER.setBoolean(conf, false);
     conf.set("my.filter.name", "my_str_i_eq");
     conf.set("my.filter.col.name", "f2");
@@ -143,7 +140,6 @@ public void testPluginNonMatchingPath() {
              new String[] {"a", "B", "c", "dE", "e", "f"});
 
     // Define the plugin filter
-    Configuration conf = new Configuration();
     OrcConf.ALLOW_PLUGIN_FILTER.setBoolean(conf, true);
     conf.set("my.filter.name", "my_str_i_eq");
     conf.set("my.filter.col.name", "f2");
@@ -177,7 +173,6 @@ public void testPluginSelectsAll() {
              new String[] {"abcdef", "Abcdef", "aBcdef", null, "abcDef", "abcdEf"});
 
     // Define the plugin filter
-    Configuration conf = new Configuration();
     OrcConf.ALLOW_PLUGIN_FILTER.setBoolean(conf, true);
     conf.set("my.filter.name", "my_str_i_eq");
     conf.set("my.filter.col.name", "f2");
@@ -211,7 +206,6 @@ public void testPluginSameColumn() {
              new String[] {"abcdef", "Abcdef", "aBcdef", null, "abcDef", "abcdEf"});
 
     // Define the plugin filter
-    Configuration conf = new Configuration();
     OrcConf.ALLOW_PLUGIN_FILTER.setBoolean(conf, true);
     conf.set("my.filter.name", "my_str_i_eq");
     conf.set("my.filter.col.name", "f2");
diff --git a/java/core/src/test/org/apache/orc/util/TestStreamWrapperFileSystem.java b/java/core/src/test/org/apache/orc/util/TestStreamWrapperFileSystem.java
index f53b8f415a..f04ccec8d7 100644
--- a/java/core/src/test/org/apache/orc/util/TestStreamWrapperFileSystem.java
+++ b/java/core/src/test/org/apache/orc/util/TestStreamWrapperFileSystem.java
@@ -18,7 +18,6 @@
 
 package org.apache.orc.util;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -27,6 +26,7 @@
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TestVectorOrcFile;
 import org.apache.orc.TypeDescription;
 import org.junit.jupiter.api.Test;
@@ -40,11 +40,10 @@
 /**
  * Tests for StreamWrapperFileSystem.
  */
-public class TestStreamWrapperFileSystem {
+public class TestStreamWrapperFileSystem implements TestConf {
 
   @Test
   public void testWrapper() throws IOException {
-    Configuration conf = new Configuration();
     Path realFilename = new Path(TestVectorOrcFile.getFileFromClasspath(
         "orc-file-11-format.orc"));
     FileSystem local = FileSystem.getLocal(conf);
diff --git a/java/core/src/test/resources/log4j.properties b/java/core/src/test/resources/log4j.properties
deleted file mode 100644
index 3979ce0787..0000000000
--- a/java/core/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-log4j.rootLogger=WARN,stdout
-
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n
-
-# Suppress the warnings about native io not being available
-log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
diff --git a/java/core/src/test/resources/orc_corrupt_zlib.orc b/java/core/src/test/resources/orc_corrupt_zlib.orc
new file mode 100644
index 0000000000..e083a07c84
Binary files /dev/null and b/java/core/src/test/resources/orc_corrupt_zlib.orc differ
diff --git a/java/examples/pom.xml b/java/examples/pom.xml
index 119e00b0d4..a36cd7a409 100644
--- a/java/examples/pom.xml
+++ b/java/examples/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/java/mapreduce/pom.xml b/java/mapreduce/pom.xml
index 30bd83e5ba..2d803351ac 100644
--- a/java/mapreduce/pom.xml
+++ b/java/mapreduce/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -39,10 +39,6 @@
       <groupId>com.esotericsoftware</groupId>
       <artifactId>kryo-shaded</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
diff --git a/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java b/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
index ddb6a6ac2a..947d9b6f80 100644
--- a/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
+++ b/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
@@ -21,7 +21,6 @@
 import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
@@ -68,7 +67,7 @@ public static boolean[] parseInclude(TypeDescription schema,
 
     boolean[] result = new boolean[schema.getMaximumId() + 1];
     result[0] = true;
-    if (StringUtils.isBlank(columnsStr)) {
+    if (columnsStr.isBlank()) {
       return result;
     }
 
diff --git a/java/mapreduce/src/test/resources/log4j.properties b/java/mapreduce/src/test/resources/log4j.properties
deleted file mode 100644
index 3979ce0787..0000000000
--- a/java/mapreduce/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-log4j.rootLogger=WARN,stdout
-
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n
-
-# Suppress the warnings about native io not being available
-log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
diff --git a/java/pom.xml b/java/pom.xml
index 578e69a53d..5a88d5877f 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -17,11 +17,11 @@
   <parent>
     <groupId>org.apache</groupId>
     <artifactId>apache</artifactId>
-    <version>27</version>
+    <version>34</version>
   </parent>
   <groupId>org.apache.orc</groupId>
   <artifactId>orc</artifactId>
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.3.0-SNAPSHOT</version>
   <packaging>pom</packaging>
 
   <name>Apache ORC</name>
@@ -60,31 +60,32 @@
   </modules>
 
   <properties>
-    <bouncycastle.version>1.78</bouncycastle.version>
-    <brotli4j.version>1.16.0</brotli4j.version>
-    <checkstyle.version>10.15.0</checkstyle.version>
+    <bouncycastle.version>1.80</bouncycastle.version>
+    <brotli4j.version>1.18.0</brotli4j.version>
+    <checkstyle.version>10.26.1</checkstyle.version>
     <example.dir>${project.basedir}/../../examples</example.dir>
-    <hadoop.version>3.4.0</hadoop.version>
+    <hadoop.version>3.4.1</hadoop.version>
     <java.version>17</java.version>
     <javadoc.location>${project.basedir}/../target/javadoc</javadoc.location>
-    <junit.version>5.10.2</junit.version>
+    <jts.version>1.20.0</jts.version>
+    <junit.version>5.13.1</junit.version>
     <maven-assembly-plugin.version>3.7.1</maven-assembly-plugin.version>
-    <maven-dependency-plugin.version>3.6.1</maven-dependency-plugin.version>
-    <maven-shade-plugin.version>3.5.2</maven-shade-plugin.version>
-    <maven.compiler.release>17</maven.compiler.release>
+    <maven-dependency-plugin.version>3.8.1</maven-dependency-plugin.version>
+    <maven-shade-plugin.version>3.6.0</maven-shade-plugin.version>
+    <maven.compiler.release>${java.version}</maven.compiler.release>
     <maven.compiler.useIncrementalCompilation>false</maven.compiler.useIncrementalCompilation>
-    <maven.version>3.9.6</maven.version>
+    <maven.version>3.9.10</maven.version>
 
     <mockito.version>5.10.0</mockito.version>
-    <orc-format.version>1.0.0</orc-format.version>
+    <orc-format.version>1.1.0</orc-format.version>
     <!-- Build Properties -->
-    <project.build.outputTimestamp>2024-01-08T16:47:56Z</project.build.outputTimestamp>
-    <protobuf.version>3.25.3</protobuf.version>
-    <slf4j.version>2.0.12</slf4j.version>
+    <project.build.outputTimestamp>2025-07-08T15:22:41Z</project.build.outputTimestamp>
+    <protobuf.version>3.25.8</protobuf.version>
+    <slf4j.version>2.0.17</slf4j.version>
     <storage-api.version>2.8.1</storage-api.version>
-    <surefire.version>3.0.0-M5</surefire.version>
+    <surefire.version>3.5.3</surefire.version>
     <test.tmp.dir>${project.build.directory}/testing-tmp</test.tmp.dir>
-    <zstd-jni.version>1.5.6-2</zstd-jni.version>
+    <zstd-jni.version>1.5.7-4</zstd-jni.version>
   </properties>
 
   <dependencyManagement>
@@ -98,7 +99,7 @@
       <dependency>
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-shims</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.3.0-SNAPSHOT</version>
         <exclusions>
           <exclusion>
             <groupId>org.apache.hadoop</groupId>
@@ -113,17 +114,17 @@
       <dependency>
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-core</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.3.0-SNAPSHOT</version>
       </dependency>
       <dependency>
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-mapreduce</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.3.0-SNAPSHOT</version>
       </dependency>
       <dependency>
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-tools</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.3.0-SNAPSHOT</version>
       </dependency>
 
       <!-- inter-project dependencies -->
@@ -135,7 +136,7 @@
       <dependency>
         <groupId>com.google.code.gson</groupId>
         <artifactId>gson</artifactId>
-        <version>2.9.0</version>
+        <version>2.13.0</version>
       </dependency>
       <dependency>
         <groupId>com.google.protobuf</groupId>
@@ -145,28 +146,24 @@
       <dependency>
         <groupId>commons-cli</groupId>
         <artifactId>commons-cli</artifactId>
-        <version>1.6.0</version>
+        <version>1.9.0</version>
       </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-lang3</artifactId>
-        <version>3.14.0</version>
+        <version>3.18.0</version>
+        <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>io.airlift</groupId>
         <artifactId>aircompressor</artifactId>
-        <version>0.26</version>
+        <version>2.0.2</version>
       </dependency>
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
         <version>${zstd-jni.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.apache.commons</groupId>
-        <artifactId>commons-csv</artifactId>
-        <version>1.10.0</version>
-      </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-client-api</artifactId>
@@ -192,7 +189,8 @@
       <dependency>
         <groupId>org.jetbrains</groupId>
         <artifactId>annotations</artifactId>
-        <version>17.0.0</version>
+        <version>26.0.2</version>
+        <scope>provided</scope>
       </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
@@ -208,7 +206,7 @@
       <dependency>
         <groupId>org.threeten</groupId>
         <artifactId>threeten-extra</artifactId>
-        <version>1.7.1</version>
+        <version>1.8.0</version>
       </dependency>
       <dependency>
         <groupId>com.aayushatharva.brotli4j</groupId>
@@ -221,7 +219,7 @@
       <dependency>
         <groupId>com.google.guava</groupId>
         <artifactId>guava</artifactId>
-        <version>33.1.0-jre</version>
+        <version>33.4.0-jre</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -251,19 +249,19 @@
       <dependency>
         <groupId>org.objenesis</groupId>
         <artifactId>objenesis</artifactId>
-        <version>3.2</version>
+        <version>3.3</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>net.bytebuddy</groupId>
         <artifactId>byte-buddy</artifactId>
-        <version>1.14.11</version>
+        <version>1.17.5</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>net.bytebuddy</groupId>
         <artifactId>byte-buddy-agent</artifactId>
-        <version>1.14.11</version>
+        <version>1.17.5</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -340,7 +338,7 @@
         <plugin>
           <groupId>com.diffplug.spotless</groupId>
           <artifactId>spotless-maven-plugin</artifactId>
-          <version>2.43.0</version>
+          <version>2.44.4</version>
           <configuration>
             <pom>
               <sortPom>
@@ -375,7 +373,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-jar-plugin</artifactId>
-          <version>3.3.0</version>
+          <version>3.4.2</version>
           <configuration>
             <archive>
               <manifest>
@@ -395,7 +393,7 @@
         <plugin>
           <groupId>com.github.spotbugs</groupId>
           <artifactId>spotbugs-maven-plugin</artifactId>
-          <version>4.8.3.0</version>
+          <version>4.9.3.0</version>
           <configuration>
             <includeFilterFile>spotbugs-include.xml</includeFilterFile>
             <excludeFilterFile>spotbugs-exclude.xml</excludeFilterFile>
@@ -404,7 +402,7 @@
             <dependency>
               <groupId>com.github.spotbugs</groupId>
               <artifactId>spotbugs</artifactId>
-              <version>4.7.3</version>
+              <version>4.9.3</version>
             </dependency>
           </dependencies>
           <executions>
@@ -436,6 +434,7 @@
               <exclude>.idea/**</exclude>
               <exclude>**/*.iml</exclude>
               <exclude>**/dependency-reduced-pom.xml</exclude>
+              <exclude>.mvn/jvm.config</exclude>
             </excludes>
           </configuration>
           <executions>
@@ -450,7 +449,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-checkstyle-plugin</artifactId>
-          <version>3.3.1</version>
+          <version>3.6.0</version>
           <configuration>
             <sourceDirectories>
               <directory>${basedir}/src/java</directory>
@@ -495,7 +494,7 @@
         <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>build-helper-maven-plugin</artifactId>
-          <version>3.5.0</version>
+          <version>3.6.0</version>
           <executions>
             <execution>
               <id>add-source</id>
@@ -600,7 +599,7 @@
         <plugin>
           <groupId>io.github.zlika</groupId>
           <artifactId>reproducible-build-maven-plugin</artifactId>
-          <version>0.16</version>
+          <version>0.17</version>
         </plugin>
       </plugins>
     </pluginManagement>
@@ -608,12 +607,12 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-enforcer-plugin</artifactId>
-        <version>3.4.0</version>
+        <version>3.6.0</version>
         <dependencies>
           <dependency>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>extra-enforcer-rules</artifactId>
-            <version>1.8.0</version>
+            <version>1.10.0</version>
           </dependency>
         </dependencies>
         <executions>
@@ -634,9 +633,6 @@
                   <maxJdkVersion>${java.version}</maxJdkVersion>
                   <ignoredScopes>test</ignoredScopes>
                   <ignoredScopes>provided</ignoredScopes>
-                  <excludes>
-                    <exclude>org.threeten:threeten-extra</exclude>
-                  </excludes>
                 </enforceBytecodeVersion>
                 <bannedDependencies>
                   <excludes>
@@ -718,7 +714,7 @@
         <configuration>
           <trimStackTrace>false</trimStackTrace>
           <reuseForks>false</reuseForks>
-          <argLine>-Xmx2048m -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED</argLine>
+          <argLine>-Xmx2048m -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED --enable-native-access=ALL-UNNAMED -XX:+EnableDynamicAgentLoading</argLine>
           <environmentVariables>
             <TZ>US/Pacific</TZ>
             <LANG>en_US.UTF-8</LANG>
@@ -728,6 +724,8 @@
           <systemPropertyVariables>
             <test.tmp.dir>${test.tmp.dir}</test.tmp.dir>
             <example.dir>${example.dir}</example.dir>
+            <org.slf4j.simpleLogger.log.org.apache.hadoop>error</org.slf4j.simpleLogger.log.org.apache.hadoop>
+            <com.google.protobuf.use_unsafe_pre22_gencode></com.google.protobuf.use_unsafe_pre22_gencode>
           </systemPropertyVariables>
         </configuration>
       </plugin>
@@ -750,7 +748,7 @@
       <plugin>
         <groupId>org.cyclonedx</groupId>
         <artifactId>cyclonedx-maven-plugin</artifactId>
-        <version>2.7.11</version>
+        <version>2.9.1</version>
         <executions>
           <execution>
             <goals>
diff --git a/java/shims/pom.xml b/java/shims/pom.xml
index 5e9231b970..1412164f1e 100644
--- a/java/shims/pom.xml
+++ b/java/shims/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/java/shims/src/java/org/apache/orc/impl/HadoopShims.java b/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
index 2ae0364f25..f79f353647 100644
--- a/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
+++ b/java/shims/src/java/org/apache/orc/impl/HadoopShims.java
@@ -20,7 +20,6 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.util.VersionInfo;
 import org.apache.orc.EncryptionAlgorithm;
 
 import java.io.Closeable;
@@ -132,9 +131,9 @@ ByteBuffer readBuffer(int maxLength,
    */
   boolean endVariableLengthBlock(OutputStream output) throws IOException;
 
-  default boolean supportVectoredIO() {
+  default boolean supportVectoredIO(String version) {
     // HADOOP-18103 is available since Apache Hadoop 3.3.5+
-    String[] versionParts = VersionInfo.getVersion().split("[.]");
+    String[] versionParts = version.split("[.-]");
     int major = Integer.parseInt(versionParts[0]);
     int minor = Integer.parseInt(versionParts[1]);
     int patch = Integer.parseInt(versionParts[2]);
diff --git a/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPost3_3_4.java b/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPost3_3_4.java
new file mode 100644
index 0000000000..774dac3c24
--- /dev/null
+++ b/java/shims/src/test/org/apache/orc/impl/TestHadoopShimsPost3_3_4.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.impl;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class TestHadoopShimsPost3_3_4 {
+
+  @Test
+  public void testOlderVersionForSupportVectoredIO() {
+    assertFalse(new HadoopShimsCurrent().supportVectoredIO("3.3.4"));
+  }
+
+  @Test
+  public void testSupportedVersionForSupportVectoredIO() {
+    assertTrue(new HadoopShimsCurrent().supportVectoredIO("3.3.5"));
+  }
+
+  @Test
+  public void testExtendedSemanticVersionForSupportVectoredIO() {
+    assertTrue(new HadoopShimsCurrent().supportVectoredIO("3.3.6-co-3"));
+  }
+}
diff --git a/java/shims/src/test/resources/log4j.properties b/java/shims/src/test/resources/log4j.properties
deleted file mode 100644
index 3979ce0787..0000000000
--- a/java/shims/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-log4j.rootLogger=WARN,stdout
-
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n
-
-# Suppress the warnings about native io not being available
-log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
diff --git a/java/spotbugs-exclude.xml b/java/spotbugs-exclude.xml
index d5f7fe7d9c..26bec6fe95 100644
--- a/java/spotbugs-exclude.xml
+++ b/java/spotbugs-exclude.xml
@@ -74,4 +74,10 @@
       <Class name="org.apache.orc.tools.ScanData" />
     </And>
   </Match>
+  <Match>
+    <Bug pattern="PA_PUBLIC_PRIMITIVE_ATTRIBUTE" />
+  </Match>
+  <Match>
+    <Bug pattern="CT_CONSTRUCTOR_THROW" />
+  </Match>
 </FindBugsFilter>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index cc7cdd34f6..4b51d701bd 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>org.apache.orc</groupId>
     <artifactId>orc</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.3.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -48,7 +48,7 @@
     <dependency>
       <groupId>com.opencsv</groupId>
       <artifactId>opencsv</artifactId>
-      <version>5.9</version>
+      <version>5.10</version>
       <exclusions>
         <exclusion>
           <groupId>commons-beanutils</groupId>
@@ -60,10 +60,6 @@
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
       <artifactId>hive-storage-api</artifactId>
@@ -80,6 +76,13 @@
     </dependency>
 
     <!-- test inter-project -->
+    <dependency>
+      <groupId>org.apache.orc</groupId>
+      <artifactId>orc-core</artifactId>
+      <version>${project.version}</version>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.bouncycastle</groupId>
       <artifactId>bcpkix-jdk18on</artifactId>
diff --git a/java/tools/src/java/org/apache/orc/tools/ColumnSizes.java b/java/tools/src/java/org/apache/orc/tools/ColumnSizes.java
index 2347ac7449..c4d5c29738 100644
--- a/java/tools/src/java/org/apache/orc/tools/ColumnSizes.java
+++ b/java/tools/src/java/org/apache/orc/tools/ColumnSizes.java
@@ -216,7 +216,11 @@ public static void main(Configuration conf, String[] args) throws Exception {
   }
 
   public static void main(String[] args) throws Exception {
-    main(new Configuration(), args);
+    Configuration conf = new Configuration();
+    if (Runtime.version().feature() > 21) {
+      conf.setIfUnset("fs.file.impl.disable.cache", "true");
+    }
+    main(conf, args);
   }
 
   private static Options createOptions() {
diff --git a/java/tools/src/java/org/apache/orc/tools/Driver.java b/java/tools/src/java/org/apache/orc/tools/Driver.java
index 0d2778b410..cdf594fe27 100644
--- a/java/tools/src/java/org/apache/orc/tools/Driver.java
+++ b/java/tools/src/java/org/apache/orc/tools/Driver.java
@@ -77,6 +77,7 @@ static class DriverOptions {
   }
 
   public static void main(String[] args) throws Exception {
+    System.setProperty("org.slf4j.simpleLogger.log.org.apache.hadoop", "error");
     DriverOptions options = new DriverOptions(args);
 
     if (options.command == null) {
@@ -102,6 +103,9 @@ public static void main(String[] args) throws Exception {
       System.exit(1);
     }
     Configuration conf = new Configuration();
+    if (Runtime.version().feature() > 21) {
+      conf.setIfUnset("fs.file.impl.disable.cache", "true");
+    }
     Properties confSettings = options.genericOptions.getOptionProperties("D");
     for(Map.Entry pair: confSettings.entrySet()) {
       conf.set(pair.getKey().toString(), pair.getValue().toString());
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index c235053106..7e952effb6 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -22,7 +22,6 @@
 import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
-import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -61,7 +60,7 @@
  */
 public final class FileDump {
   public static final String UNKNOWN = "UNKNOWN";
-  public static final String SEPARATOR = StringUtils.repeat("_", 120) + "\n";
+  public static final String SEPARATOR = "_".repeat(120) + "\n";
   public static final String RECOVER_READ_SIZE = "orc.recover.read.size"; // only for testing
   public static final int DEFAULT_BLOCK_SIZE = 256 * 1024 * 1024;
   public static final String DEFAULT_BACKUP_PATH = System.getProperty("java.io.tmpdir");
@@ -134,13 +133,18 @@ public static void main(Configuration conf, String[] args) throws Exception {
         boolean prettyPrint = cli.hasOption('p');
         JsonFileDump.printJsonMetaData(filesInPath, conf, rowIndexCols, prettyPrint, printTimeZone);
       } else {
-        printMetaData(filesInPath, conf, rowIndexCols, printTimeZone, recover, backupPath);
+        boolean printColumnType = cli.hasOption("column-type");
+        printMetaData(filesInPath, conf, rowIndexCols, printTimeZone, recover, backupPath,
+            printColumnType);
       }
     }
   }
 
   public static void main(String[] args) throws Exception {
     Configuration conf = new Configuration();
+    if (Runtime.version().feature() > 21) {
+      conf.setIfUnset("fs.file.impl.disable.cache", "true");
+    }
     main(conf, args);
   }
 
@@ -268,11 +272,11 @@ public static Collection<String> getAllFilesInPath(final Path path,
 
   private static void printMetaData(List<String> files, Configuration conf,
       List<Integer> rowIndexCols, boolean printTimeZone, final boolean recover,
-      final String backupPath)
+      final String backupPath, final boolean printColumnType)
       throws IOException {
     List<String> corruptFiles = new ArrayList<>();
     for (String filename : files) {
-      printMetaDataImpl(filename, conf, rowIndexCols, printTimeZone, corruptFiles);
+      printMetaDataImpl(filename, conf, rowIndexCols, printTimeZone, corruptFiles, printColumnType);
       System.out.println(SEPARATOR);
     }
 
@@ -294,6 +298,15 @@ private static void printMetaData(List<String> files, Configuration conf,
     }
   }
 
+  static void printColumnsType(TypeDescription schema) {
+    int maximumId = schema.getMaximumId();
+    for (int c = schema.getId(); c < maximumId + 1; ++c) {
+      TypeDescription type = schema.findSubtype(c);
+      System.out.println("  Column " + type.getId() + ": field: " + type.getFullFieldName() +
+          " type: " + type.toString());
+    }
+  }
+
   static void printTypeAnnotations(TypeDescription type, String prefix) {
     List<String> attributes = type.getAttributeNames();
     if (attributes.size() > 0) {
@@ -329,7 +342,7 @@ static void printTypeAnnotations(TypeDescription type, String prefix) {
 
   private static void printMetaDataImpl(final String filename,
       final Configuration conf, List<Integer> rowIndexCols, final boolean printTimeZone,
-      final List<String> corruptFiles) throws IOException {
+      final List<String> corruptFiles, final boolean printColumnType) throws IOException {
     Path file = new Path(filename);
     Reader reader = getReader(file, conf, corruptFiles);
     // if we can create reader then footer is not corrupt and file will readable
@@ -351,15 +364,20 @@ private static void printMetaDataImpl(final String filename,
                            ? "Proleptic Gregorian"
                            : "Julian/Gregorian"));
     System.out.println("Type: " + reader.getSchema().toString());
+    if (printColumnType) {
+      System.out.println("Columns type:");
+      printColumnsType(reader.getSchema());
+    }
     printTypeAnnotations(reader.getSchema(), "root");
     System.out.println("\nStripe Statistics:");
     List<StripeStatistics> stripeStats = reader.getStripeStatistics();
     for (int n = 0; n < stripeStats.size(); n++) {
       System.out.println("  Stripe " + (n + 1) + ":");
       StripeStatistics ss = stripeStats.get(n);
-      for (int i = 0; i < ss.getColumnStatistics().length; ++i) {
+      ColumnStatistics[] columnStatistics = ss.getColumnStatistics();
+      for (int i = 0; i < columnStatistics.length; ++i) {
         System.out.println("    Column " + i + ": " +
-            ss.getColumnStatistics()[i].toString());
+            columnStatistics[i].toString());
       }
     }
     ColumnStatistics[] stats = reader.getStatistics();
@@ -834,6 +852,11 @@ static Options createOptions() {
         .desc("specify a backup path to store the corrupted files (default: /tmp)")
         .hasArg()
         .build());
+
+    result.addOption(Option.builder()
+        .longOpt("column-type")
+        .desc("Print the column id, name and type of each column")
+        .build());
     return result;
   }
 
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index d6166ea91d..7d893a54c4 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -49,6 +49,8 @@
 
 import java.io.IOException;
 import java.io.StringWriter;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -110,10 +112,11 @@ public static void printJsonMetaData(List<String> files,
           writer.name("stripeNumber").value(n + 1);
           StripeStatistics ss = stripeStatistics.get(n);
           writer.name("columnStatistics").beginArray();
-          for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+          ColumnStatistics[] columnStatistics = ss.getColumnStatistics();
+          for (int i = 0; i < columnStatistics.length; i++) {
             writer.beginObject();
             writer.name("columnId").value(i);
-            writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
+            writeColumnStatistics(writer, columnStatistics[i]);
             writer.endObject();
           }
           writer.endArray();
@@ -222,6 +225,17 @@ public static void printJsonMetaData(List<String> files,
           writer.name("numDeletes").value(acidStats.deletes);
           writer.name("numUpdates").value(acidStats.updates);
         }
+        List<String> keys = reader.getMetadataKeys();
+        keys.remove(OrcAcidUtils.ACID_STATS);
+        if (!keys.isEmpty()) {
+          writer.name("userMetadata").beginObject();
+          for (String key : keys) {
+            writer.name(key);
+            ByteBuffer byteBuffer = reader.getMetadataValue(key);
+            writer.value(String.valueOf(StandardCharsets.UTF_8.decode(byteBuffer)));
+          }
+          writer.endObject();
+        }
         writer.name("status").value("OK");
         rows.close();
 
diff --git a/java/tools/src/java/org/apache/orc/tools/PrintData.java b/java/tools/src/java/org/apache/orc/tools/PrintData.java
index 37a7209421..6c7c18ba15 100644
--- a/java/tools/src/java/org/apache/orc/tools/PrintData.java
+++ b/java/tools/src/java/org/apache/orc/tools/PrintData.java
@@ -238,6 +238,7 @@ private static Options getOptions() {
             .build();
     Option linesOpt = Option.builder("n").longOpt("lines")
             .argName("LINES")
+            .desc("Sets lines of data to be printed")
             .hasArg()
             .build();
 
diff --git a/java/tools/src/java/org/apache/orc/tools/RowCount.java b/java/tools/src/java/org/apache/orc/tools/RowCount.java
index fce0db3f4c..779b90b853 100644
--- a/java/tools/src/java/org/apache/orc/tools/RowCount.java
+++ b/java/tools/src/java/org/apache/orc/tools/RowCount.java
@@ -72,7 +72,11 @@ public static void main(Configuration conf, String[] args) throws Exception {
   }
 
   public static void main(String[] args) throws Exception {
-    main(new Configuration(), args);
+    Configuration conf = new Configuration();
+    if (Runtime.version().feature() > 21) {
+      conf.setIfUnset("fs.file.impl.disable.cache", "true");
+    }
+    main(conf, args);
   }
 
   private static Options createOptions() {
diff --git a/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java b/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
index fbdb8696de..f7e9bb1054 100644
--- a/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
+++ b/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
@@ -199,7 +199,7 @@ public ConvertTool(Configuration conf,
     this.csvHeaderLines = getIntOption(opts, 'H', 0);
     this.csvNullString = opts.getOptionValue('n', "");
     this.timestampFormat = opts.getOptionValue("t", DEFAULT_TIMESTAMP_FORMAT);
-    this.bloomFilterColumns = opts.getOptionValue('b', null);
+    this.bloomFilterColumns = opts.getOptionValue('b');
     this.unionTag = opts.getOptionValue("union-tag", "tag");
     this.unionValue = opts.getOptionValue("union-value", "value");
     String outFilename = opts.hasOption('o')
diff --git a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
index 358eb21a5d..7a07650493 100644
--- a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
+++ b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
@@ -29,7 +29,6 @@
 import org.apache.commons.cli.Option;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
-import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.TypeDescriptionPrettyPrint;
@@ -40,6 +39,8 @@
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
 import java.math.BigInteger;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -264,6 +265,15 @@ public void addFile(java.io.Reader reader, String filename) {
     }
   }
 
+  public static String getStackTrace(final Throwable throwable) {
+    if (throwable == null) {
+      return "";
+    }
+    final StringWriter sw = new StringWriter();
+    throwable.printStackTrace(new PrintWriter(sw, true));
+    return sw.toString();
+  }
+
   private void printParseExceptionMsg(JsonParseException e, String filename) {
     System.err.printf(
         "A JsonParseException was thrown while processing the %dth record of file %s.%n",
@@ -282,7 +292,7 @@ private void printParseExceptionMsg(JsonParseException e, String filename) {
         System.exit(1);
       }
     }
-    System.err.printf("Please check the file.%n%n%s%n", ExceptionUtils.getStackTrace(e));
+    System.err.printf("Please check the file.%n%n%s%n", getStackTrace(e));
     System.exit(1);
   }
 
diff --git a/java/tools/src/test/org/apache/orc/impl/TestRLEv2.java b/java/tools/src/test/org/apache/orc/impl/TestRLEv2.java
index 2c9b7e5555..69a6656e6b 100644
--- a/java/tools/src/test/org/apache/orc/impl/TestRLEv2.java
+++ b/java/tools/src/test/org/apache/orc/impl/TestRLEv2.java
@@ -17,7 +17,6 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -27,6 +26,7 @@
 import org.apache.orc.PhysicalWriter;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.impl.writer.StreamOptions;
@@ -50,16 +50,14 @@
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestRLEv2 {
+public class TestRLEv2 implements TestConf {
   Path workDir = new Path(System.getProperty("test.tmp.dir",
       "target" + File.separator + "test" + File.separator + "tmp"));
   Path testFilePath;
-  Configuration conf;
   FileSystem fs;
 
   @BeforeEach
   public void openFileSystem (TestInfo testInfo) throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
     testFilePath = new Path(workDir, "TestRLEv2." +
         testInfo.getTestMethod().get().getName() + ".orc");
diff --git a/java/tools/src/test/org/apache/orc/tools/TestCheckTool.java b/java/tools/src/test/org/apache/orc/tools/TestCheckTool.java
index a524f7a505..9787867061 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestCheckTool.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestCheckTool.java
@@ -18,13 +18,13 @@
 
 package org.apache.orc.tools;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.OrcFile;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.tools.CheckTool;
@@ -32,24 +32,22 @@
 import org.junit.jupiter.api.Test;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
 
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestCheckTool {
+public class TestCheckTool implements TestConf {
   private Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  private Configuration conf;
   private FileSystem fs;
   private Path testFilePath;
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("TestCheckTool.testCheckTool.orc");
+    testFilePath = new Path(workDir + File.separator + "TestCheckTool.testCheckTool.orc");
     fs.delete(testFilePath, false);
     createFile();
   }
diff --git a/java/tools/src/test/org/apache/orc/tools/TestColumnSizes.java b/java/tools/src/test/org/apache/orc/tools/TestColumnSizes.java
index b28c2c308e..02a9d2388d 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestColumnSizes.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestColumnSizes.java
@@ -18,13 +18,13 @@
 
 package org.apache.orc.tools;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.OrcFile;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.tools.ColumnSizes;
@@ -32,6 +32,7 @@
 import org.junit.jupiter.api.Test;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Paths;
@@ -40,17 +41,14 @@
 
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestColumnSizes {
+public class TestColumnSizes implements TestConf {
   private Path workDir = new Path(
       Paths.get(System.getProperty("test.tmp.dir"), "orc-test-sizes").toString());
-  private Configuration conf;
   private FileSystem fs;
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
     fs.mkdirs(workDir);
     fs.deleteOnExit(workDir);
   }
@@ -59,8 +57,8 @@ public void openFileSystem() throws Exception {
   public void testSizes() throws Exception {
     TypeDescription schema = TypeDescription.fromString("struct<x:int,y:string>");
     Map<String, Integer> fileToRowCountMap = new LinkedHashMap<>();
-    fileToRowCountMap.put("test-sizes-1.orc", 10000);
-    fileToRowCountMap.put("test-sizes-2.orc", 20000);
+    fileToRowCountMap.put(workDir + File.separator + "test-sizes-1.orc", 10000);
+    fileToRowCountMap.put(workDir + File.separator + "test-sizes-2.orc", 20000);
     for (Map.Entry<String, Integer> fileToRowCount : fileToRowCountMap.entrySet()) {
       Writer writer = OrcFile.createWriter(new Path(fileToRowCount.getKey()),
           OrcFile.writerOptions(conf)
diff --git a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
index c265a7400e..fc4a90c8e2 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
@@ -18,10 +18,10 @@
 
 package org.apache.orc.tools;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
@@ -39,6 +39,7 @@
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.StripeStatistics;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.junit.jupiter.api.BeforeEach;
@@ -58,6 +59,7 @@
 import java.nio.file.Paths;
 import java.sql.Timestamp;
 import java.text.SimpleDateFormat;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -73,19 +75,16 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
-public class TestFileDump {
+public class TestFileDump implements TestConf {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem () throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("TestFileDump.testDump.orc");
+    testFilePath = new Path(workDir + File.separator + "TestFileDump.testDump.orc");
     fs.delete(testFilePath, false);
   }
 
@@ -231,6 +230,7 @@ public static void checkOutput(String expected,
         TestJsonFileDump.getFileFromClasspath(expected)), StandardCharsets.UTF_8);
     BufferedReader aStream = Files.newBufferedReader(Paths.get(actual), StandardCharsets.UTF_8);
     Object expectedLine = preprocessLine(eStream.readLine());
+    final String[] filenames = {"Structure for", "\"fileName\":"};
     while (expectedLine != null) {
       Object actualLine = preprocessLine(aStream.readLine());
       if (expectedLine instanceof Long && actualLine instanceof Long) {
@@ -238,7 +238,10 @@ public static void checkOutput(String expected,
         assertTrue(diff < SIZE_SLOP,
             "expected: " + expectedLine + ", actual: " + actualLine);
       } else {
-        assertEquals(expectedLine, actualLine);
+        String line = (String)expectedLine;
+        if (!Arrays.stream(filenames).anyMatch(s -> line.startsWith(s))) { // Ignore file path
+          assertEquals(expectedLine, actualLine);
+        }
       }
       expectedLine = preprocessLine(eStream.readLine());
     }
@@ -384,7 +387,6 @@ public void testDataDump() throws Exception {
   @Test
   public void testDictionaryThreshold() throws Exception {
     TypeDescription schema = getMyRecordType();
-    Configuration conf = new Configuration();
     conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
     conf.setFloat(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), 0.49f);
     Writer writer = OrcFile.createWriter(testFilePath,
@@ -752,9 +754,7 @@ public void testRecover() throws Exception {
 
     long fileSize = fs.getFileStatus(testFilePath).getLen();
 
-    String testFilePathStr = Path.mergePaths(
-        workDir, Path.mergePaths(new Path(Path.SEPARATOR), testFilePath))
-        .toUri().getPath();
+    String testFilePathStr = testFilePath.toUri().getPath();
 
     String copyTestFilePathStr = Path.mergePaths(
         workDir, Path.mergePaths(new Path(Path.SEPARATOR),
@@ -827,6 +827,74 @@ public void testDoubleNaNAndInfinite() throws Exception {
     assertEquals("{\"x\":12.34}", lines[2]);
   }
 
+  @Test
+  public void testDumpColumnType() throws Exception {
+    TypeDescription schema =
+        TypeDescription.fromString("struct<a:boolean,b:tinyint,c:smallint,d:int,e:bigint," +
+            "f:float,g:double,h:string,i:date,j:timestamp,k:binary,l:decimal(20,5),m:varchar(5)," +
+            "n:char(5)>");
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema));
+
+    VectorizedRowBatch batch = schema.createRowBatch();
+    LongColumnVector a = (LongColumnVector) batch.cols[0];
+    LongColumnVector b = (LongColumnVector) batch.cols[1];
+    LongColumnVector c = (LongColumnVector) batch.cols[2];
+    LongColumnVector d = (LongColumnVector) batch.cols[3];
+    LongColumnVector e = (LongColumnVector) batch.cols[4];
+    DoubleColumnVector f = (DoubleColumnVector) batch.cols[5];
+    DoubleColumnVector g = (DoubleColumnVector) batch.cols[6];
+    BytesColumnVector h = (BytesColumnVector) batch.cols[7];
+    DateColumnVector i = (DateColumnVector) batch.cols[8];
+    TimestampColumnVector j = (TimestampColumnVector) batch.cols[9];
+    BytesColumnVector k = (BytesColumnVector) batch.cols[10];
+    DecimalColumnVector l = (DecimalColumnVector) batch.cols[11];
+    BytesColumnVector m = (BytesColumnVector) batch.cols[12];
+    BytesColumnVector n = (BytesColumnVector) batch.cols[13];
+
+    for (int o = 0; o < VectorizedRowBatch.DEFAULT_SIZE * 2; o++) {
+      int row = batch.size++;
+      a.vector[row] = row % 2;
+      b.vector[row] = row % 128;
+      c.vector[row] = row;
+      d.vector[row] = row;
+      e.vector[row] = row * 10000000L;
+      f.vector[row] = row * 1.0f;
+      g.vector[row] = row * 1.0d;
+      byte[] bytes = String.valueOf(row).getBytes(StandardCharsets.UTF_8);
+      h.setRef(row, bytes, 0, bytes.length);
+      i.vector[row] = row;
+      j.time[row] = row * 1000L;
+      j.nanos[row] = row;
+      k.setRef(row, bytes, 0, bytes.length);
+      l.vector[row] = new HiveDecimalWritable(row);
+      m.setRef(row, bytes, 0, bytes.length);
+      bytes = String.valueOf(10000 - row).getBytes(StandardCharsets.UTF_8);
+      n.setRef(row, bytes, 0, bytes.length);
+
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    writer.close();
+    assertEquals(VectorizedRowBatch.DEFAULT_SIZE * 2, writer.getNumberOfRows());
+
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-column-type.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut, false, StandardCharsets.UTF_8.toString()));
+    FileDump.main(new String[]{testFilePath.toString(), "--column-type"});
+    System.out.flush();
+    System.setOut(origOut);
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
   private static boolean contentEquals(String filePath, String otherFilePath) throws IOException {
     try (InputStream is = new BufferedInputStream(new FileInputStream(filePath));
          InputStream otherIs = new BufferedInputStream(new FileInputStream(otherFilePath))) {
diff --git a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
index 225d7c34d0..3f3354e3ef 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
@@ -18,7 +18,6 @@
 
 package org.apache.orc.tools;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -28,6 +27,7 @@
 import org.apache.orc.CompressionKind;
 import org.apache.orc.OrcConf;
 import org.apache.orc.OrcFile;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.junit.jupiter.api.BeforeEach;
@@ -42,7 +42,7 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-public class TestJsonFileDump {
+public class TestJsonFileDump implements TestConf {
 
   public static String getFileFromClasspath(String name) {
     URL url = ClassLoader.getSystemResource(name);
@@ -53,16 +53,13 @@ public static String getFileFromClasspath(String name) {
   }
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem () throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("TestFileDump.testDump.orc");
+    testFilePath = new Path(workDir + File.separator + "TestFileDump.testDump.orc");
     fs.delete(testFilePath, false);
   }
 
@@ -117,6 +114,10 @@ public void testJsonDump() throws Exception {
       writer.addRowBatch(batch);
     }
 
+    writer.addUserMetadata("hive.acid.key.index",
+        StandardCharsets.UTF_8.encode("1,1,1;2,3,5;"));
+    writer.addUserMetadata("some.user.property",
+        StandardCharsets.UTF_8.encode("foo#bar$baz&"));
     writer.close();
     PrintStream origOut = System.out;
     String outputFilename = "orc-file-dump.json";
diff --git a/java/tools/src/test/org/apache/orc/tools/TestMergeFiles.java b/java/tools/src/test/org/apache/orc/tools/TestMergeFiles.java
index bac2ee36c4..3fdfeba0c4 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestMergeFiles.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestMergeFiles.java
@@ -18,7 +18,6 @@
 
 package org.apache.orc.tools;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -27,6 +26,7 @@
 import org.apache.orc.CompressionKind;
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.tools.MergeFiles;
@@ -34,6 +34,7 @@
 import org.junit.jupiter.api.Test;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Paths;
@@ -43,21 +44,18 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestMergeFiles {
+public class TestMergeFiles implements TestConf {
   private Path workDir = new Path(
       Paths.get(System.getProperty("test.tmp.dir"), "orc-test-merge").toString());
-  private Configuration conf;
   private FileSystem fs;
   private Path testFilePath;
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
     fs.mkdirs(workDir);
     fs.deleteOnExit(workDir);
-    testFilePath = new Path("TestMergeFiles.testMerge.orc");
+    testFilePath = new Path(workDir + File.separator + "TestMergeFiles.testMerge.orc");
     fs.delete(testFilePath, false);
   }
 
@@ -65,8 +63,8 @@ public void openFileSystem() throws Exception {
   public void testMerge() throws Exception {
     TypeDescription schema = TypeDescription.fromString("struct<x:int,y:string>");
     Map<String, Integer> fileToRowCountMap = new LinkedHashMap<>();
-    fileToRowCountMap.put("test-merge-1.orc", 10000);
-    fileToRowCountMap.put("test-merge-2.orc", 20000);
+    fileToRowCountMap.put(workDir + File.separator + "test-merge-1.orc", 10000);
+    fileToRowCountMap.put(workDir + File.separator + "test-merge-2.orc", 20000);
     for (Map.Entry<String, Integer> fileToRowCount : fileToRowCountMap.entrySet()) {
       Writer writer = OrcFile.createWriter(new Path(fileToRowCount.getKey()),
           OrcFile.writerOptions(conf)
diff --git a/java/tools/src/test/org/apache/orc/tools/TestRowCount.java b/java/tools/src/test/org/apache/orc/tools/TestRowCount.java
index 5cf6ffc489..232a1c2b98 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestRowCount.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestRowCount.java
@@ -18,12 +18,12 @@
 
 package org.apache.orc.tools;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.OrcFile;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.tools.RowCount;
@@ -31,6 +31,7 @@
 import org.junit.jupiter.api.Test;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Paths;
@@ -39,17 +40,14 @@
 
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestRowCount {
+public class TestRowCount implements TestConf {
   private Path workDir = new Path(
       Paths.get(System.getProperty("test.tmp.dir"), "orc-test-count").toString());
-  private Configuration conf;
   private FileSystem fs;
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
     fs.mkdirs(workDir);
     fs.deleteOnExit(workDir);
   }
@@ -58,8 +56,8 @@ public void openFileSystem() throws Exception {
   public void testCount() throws Exception {
     TypeDescription schema = TypeDescription.fromString("struct<x:int>");
     Map<String, Integer> fileToRowCountMap = new LinkedHashMap<>();
-    fileToRowCountMap.put("test-count-1.orc", 10000);
-    fileToRowCountMap.put("test-count-2.orc", 20000);
+    fileToRowCountMap.put(workDir + File.separator + "test-count-1.orc", 10000);
+    fileToRowCountMap.put(workDir + File.separator + "test-count-2.orc", 20000);
     for (Map.Entry<String, Integer> fileToRowCount : fileToRowCountMap.entrySet()) {
       Writer writer = OrcFile.createWriter(new Path(fileToRowCount.getKey()),
           OrcFile.writerOptions(conf)
diff --git a/java/tools/src/test/org/apache/orc/tools/TesScanData.java b/java/tools/src/test/org/apache/orc/tools/TestScanData.java
similarity index 90%
rename from java/tools/src/test/org/apache/orc/tools/TesScanData.java
rename to java/tools/src/test/org/apache/orc/tools/TestScanData.java
index df73abc900..93c9b77b3a 100644
--- a/java/tools/src/test/org/apache/orc/tools/TesScanData.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestScanData.java
@@ -18,36 +18,34 @@
 
 package org.apache.orc.tools;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.OrcFile;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
 
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TesScanData {
+public class TestScanData implements TestConf {
   private Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  private Configuration conf;
   private FileSystem fs;
   private Path testFilePath;
 
   @BeforeEach
   public void openFileSystem() throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("TesScanData.testScan.orc");
+    testFilePath = new Path(workDir + File.separator + "TestScanData.testScan.orc");
     fs.delete(testFilePath, false);
   }
 
@@ -86,6 +84,6 @@ public void testScan() throws Exception {
     assertTrue(output.contains("{\"category\": \"struct\", \"id\": 0, \"max\": 2, \"fields\": [\n" +
         "{  \"x\": {\"category\": \"int\", \"id\": 1, \"max\": 1}},\n" +
         "{  \"y\": {\"category\": \"string\", \"id\": 2, \"max\": 2}}]}"));
-    assertTrue(output.contains("File: TesScanData.testScan.orc, bad batches: 0, rows: 10000/10000"));
+    assertTrue(output.contains("TestScanData.testScan.orc, bad batches: 0, rows: 10000/10000"));
   }
 }
diff --git a/java/tools/src/test/org/apache/orc/tools/convert/TestConvert.java b/java/tools/src/test/org/apache/orc/tools/convert/TestConvert.java
index 84ec10137f..f208485596 100644
--- a/java/tools/src/test/org/apache/orc/tools/convert/TestConvert.java
+++ b/java/tools/src/test/org/apache/orc/tools/convert/TestConvert.java
@@ -19,7 +19,6 @@
 package org.apache.orc.tools.convert;
 
 import org.apache.commons.cli.ParseException;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -28,12 +27,14 @@
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
+import java.io.File;
 import java.io.IOException;
 import java.sql.Timestamp;
 import java.util.TimeZone;
@@ -41,21 +42,18 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestConvert {
+public class TestConvert implements TestConf {
 
   public static final TimeZone DEFAULT_TIME_ZONE = TimeZone.getDefault();
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
-  Configuration conf;
   FileSystem fs;
   Path testFilePath;
 
   @BeforeEach
   public void openFileSystem () throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
-    testFilePath = new Path("TestConvert.testConvert.orc");
+    testFilePath = new Path(workDir + File.separator + "TestConvert.testConvert.orc");
     fs.delete(testFilePath, false);
   }
 
diff --git a/java/tools/src/test/org/apache/orc/tools/convert/TestConvertORC.java b/java/tools/src/test/org/apache/orc/tools/convert/TestConvertORC.java
index 37e944891d..0610ad4d92 100644
--- a/java/tools/src/test/org/apache/orc/tools/convert/TestConvertORC.java
+++ b/java/tools/src/test/org/apache/orc/tools/convert/TestConvertORC.java
@@ -19,7 +19,6 @@
 package org.apache.orc.tools.convert;
 
 import org.apache.commons.cli.ParseException;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -28,11 +27,13 @@
 import org.apache.orc.CompressionKind;
 import org.apache.orc.OrcFile;
 import org.apache.orc.Reader;
+import org.apache.orc.TestConf;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
+import java.io.File;
 import java.io.IOException;
 import java.nio.file.Paths;
 import java.util.HashMap;
@@ -42,22 +43,19 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestConvertORC {
+public class TestConvertORC implements TestConf {
 
   private Path workDir = new Path(
       Paths.get(System.getProperty("test.tmp.dir"), "orc-test-convert-orc").toString());
-  private Configuration conf;
   private FileSystem fs;
   private Path testFilePath;
 
   @BeforeEach
   public void openFileSystem () throws Exception {
-    conf = new Configuration();
     fs = FileSystem.getLocal(conf);
-    fs.setWorkingDirectory(workDir);
     fs.mkdirs(workDir);
     fs.deleteOnExit(workDir);
-    testFilePath = new Path("TestConvertORC.testConvertORC.orc");
+    testFilePath = new Path(workDir + File.separator + "TestConvertORC.testConvertORC.orc");
     fs.delete(testFilePath, false);
   }
 
@@ -65,11 +63,13 @@ public void openFileSystem () throws Exception {
   public void testConvertFromORC() throws IOException, ParseException {
     TypeDescription schema = TypeDescription.fromString("struct<x:int,y:string>");
     Map<String, Integer> fileToRowCountMap = new LinkedHashMap<>();
-    fileToRowCountMap.put("test-convert-1.orc", 10000);
-    fileToRowCountMap.put("test-convert-2.orc", 20000);
+    String file1 = workDir + File.separator + "test-convert-1.orc";
+    String file2 = workDir + File.separator + "test-convert-2.orc";
+    fileToRowCountMap.put(file1, 10000);
+    fileToRowCountMap.put(file2, 20000);
     Map<String, CompressionKind> fileToCompressMap = new HashMap<>();
-    fileToCompressMap.put("test-convert-1.orc", CompressionKind.ZLIB);
-    fileToCompressMap.put("test-convert-2.orc", CompressionKind.SNAPPY);
+    fileToCompressMap.put(file1, CompressionKind.ZLIB);
+    fileToCompressMap.put(file2, CompressionKind.SNAPPY);
 
     for (Map.Entry<String, Integer> fileToRowCount : fileToRowCountMap.entrySet()) {
       Writer writer = OrcFile.createWriter(new Path(fileToRowCount.getKey()),
@@ -95,20 +95,20 @@ public void testConvertFromORC() throws IOException, ParseException {
       writer.close();
     }
 
-    try (Reader reader = OrcFile.createReader(new Path("test-convert-1.orc"), OrcFile.readerOptions(conf))) {
+    try (Reader reader = OrcFile.createReader(new Path(file1), OrcFile.readerOptions(conf))) {
       assertEquals(schema, reader.getSchema());
       assertEquals(CompressionKind.ZLIB, reader.getCompressionKind());
       assertEquals(10000, reader.getNumberOfRows());
     }
 
-    try (Reader reader = OrcFile.createReader(new Path("test-convert-2.orc"), OrcFile.readerOptions(conf))) {
+    try (Reader reader = OrcFile.createReader(new Path(file2), OrcFile.readerOptions(conf))) {
       assertEquals(schema, reader.getSchema());
       assertEquals(CompressionKind.SNAPPY, reader.getCompressionKind());
       assertEquals(20000, reader.getNumberOfRows());
     }
 
     ConvertTool.main(conf, new String[]{"-o", testFilePath.toString(),
-        "test-convert-1.orc", "test-convert-2.orc"});
+        file1, file2});
 
     assertTrue(fs.exists(testFilePath));
     try (Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf))) {
diff --git a/java/tools/src/test/resources/log4j.properties b/java/tools/src/test/resources/log4j.properties
deleted file mode 100644
index 8feed70c28..0000000000
--- a/java/tools/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-log4j.rootLogger=WARN,stdout
-
-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
-log4j.appender.stdout.Target   = System.err
-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n
-
-# Suppress the warnings about native io not being available
-log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
diff --git a/java/tools/src/test/resources/orc-file-dump-column-type.out b/java/tools/src/test/resources/orc-file-dump-column-type.out
new file mode 100644
index 0000000000..73267e7287
--- /dev/null
+++ b/java/tools/src/test/resources/orc-file-dump-column-type.out
@@ -0,0 +1,121 @@
+Structure for TestFileDump.testDump.orc
+File Version: 0.12 with ORC_14 by ORC Java 2.1.0-SNAPSHOT
+Rows: 2048
+Compression: ZSTD
+Compression size: 262144
+Calendar: Julian/Gregorian
+Type: struct<a:boolean,b:tinyint,c:smallint,d:int,e:bigint,f:float,g:double,h:string,i:date,j:timestamp,k:binary,l:decimal(20,5),m:varchar(5),n:char(5)>
+Columns type:
+  Column 0: field: 0 type: struct<a:boolean,b:tinyint,c:smallint,d:int,e:bigint,f:float,g:double,h:string,i:date,j:timestamp,k:binary,l:decimal(20,5),m:varchar(5),n:char(5)>
+  Column 1: field: a type: boolean
+  Column 2: field: b type: tinyint
+  Column 3: field: c type: smallint
+  Column 4: field: d type: int
+  Column 5: field: e type: bigint
+  Column 6: field: f type: float
+  Column 7: field: g type: double
+  Column 8: field: h type: string
+  Column 9: field: i type: date
+  Column 10: field: j type: timestamp
+  Column 11: field: k type: binary
+  Column 12: field: l type: decimal(20,5)
+  Column 13: field: m type: varchar(5)
+  Column 14: field: n type: char(5)
+
+Stripe Statistics:
+  Stripe 1:
+    Column 0: count: 2048 hasNull: false
+    Column 1: count: 2048 hasNull: false bytesOnDisk: 7 true: 1024
+    Column 2: count: 2048 hasNull: false bytesOnDisk: 152 min: 0 max: 127 sum: 130048
+    Column 3: count: 2048 hasNull: false bytesOnDisk: 21 min: 0 max: 1023 sum: 1047552
+    Column 4: count: 2048 hasNull: false bytesOnDisk: 21 min: 0 max: 1023 sum: 1047552
+    Column 5: count: 2048 hasNull: false bytesOnDisk: 35 min: 0 max: 10230000000 sum: 10475520000000
+    Column 6: count: 2048 hasNull: false bytesOnDisk: 2361 min: 0.0 max: 1023.0 sum: 1047552.0
+    Column 7: count: 2048 hasNull: false bytesOnDisk: 973 min: 0.0 max: 1023.0 sum: 1047552.0
+    Column 8: count: 2048 hasNull: false bytesOnDisk: 2988 min: 0 max: 999 sum: 5972
+    Column 9: count: 2048 hasNull: false bytesOnDisk: 21 min: Hybrid AD 1970-01-01 max: Hybrid AD 1972-10-20
+    Column 10: count: 2048 hasNull: false bytesOnDisk: 1626 min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:17:03.000001023
+    Column 11: count: 2048 hasNull: false bytesOnDisk: 1404 sum: 5972
+    Column 12: count: 2048 hasNull: false bytesOnDisk: 1666 min: 0 max: 1023 sum: 1047552
+    Column 13: count: 2048 hasNull: false bytesOnDisk: 2988 min: 0 max: 999 sum: 5972
+    Column 14: count: 2048 hasNull: false bytesOnDisk: 1277 min: 10000 max: 9999  sum: 10240
+
+File Statistics:
+  Column 0: count: 2048 hasNull: false
+  Column 1: count: 2048 hasNull: false bytesOnDisk: 7 true: 1024
+  Column 2: count: 2048 hasNull: false bytesOnDisk: 152 min: 0 max: 127 sum: 130048
+  Column 3: count: 2048 hasNull: false bytesOnDisk: 21 min: 0 max: 1023 sum: 1047552
+  Column 4: count: 2048 hasNull: false bytesOnDisk: 21 min: 0 max: 1023 sum: 1047552
+  Column 5: count: 2048 hasNull: false bytesOnDisk: 35 min: 0 max: 10230000000 sum: 10475520000000
+  Column 6: count: 2048 hasNull: false bytesOnDisk: 2361 min: 0.0 max: 1023.0 sum: 1047552.0
+  Column 7: count: 2048 hasNull: false bytesOnDisk: 973 min: 0.0 max: 1023.0 sum: 1047552.0
+  Column 8: count: 2048 hasNull: false bytesOnDisk: 2988 min: 0 max: 999 sum: 5972
+  Column 9: count: 2048 hasNull: false bytesOnDisk: 21 min: Hybrid AD 1970-01-01 max: Hybrid AD 1972-10-20
+  Column 10: count: 2048 hasNull: false bytesOnDisk: 1626 min: 1969-12-31 16:00:00.0 max: 1969-12-31 16:17:03.000001023
+  Column 11: count: 2048 hasNull: false bytesOnDisk: 1404 sum: 5972
+  Column 12: count: 2048 hasNull: false bytesOnDisk: 1666 min: 0 max: 1023 sum: 1047552
+  Column 13: count: 2048 hasNull: false bytesOnDisk: 2988 min: 0 max: 999 sum: 5972
+  Column 14: count: 2048 hasNull: false bytesOnDisk: 1277 min: 10000 max: 9999  sum: 10240
+
+Stripes:
+  Stripe: offset: 3 data: 15540 rows: 2048 tail: 225 index: 464
+    Stream: column 0 section ROW_INDEX start: 3 length 12
+    Stream: column 1 section ROW_INDEX start: 15 length 24
+    Stream: column 2 section ROW_INDEX start: 39 length 28
+    Stream: column 3 section ROW_INDEX start: 67 length 28
+    Stream: column 4 section ROW_INDEX start: 95 length 28
+    Stream: column 5 section ROW_INDEX start: 123 length 35
+    Stream: column 6 section ROW_INDEX start: 158 length 45
+    Stream: column 7 section ROW_INDEX start: 203 length 45
+    Stream: column 8 section ROW_INDEX start: 248 length 30
+    Stream: column 9 section ROW_INDEX start: 278 length 24
+    Stream: column 10 section ROW_INDEX start: 302 length 35
+    Stream: column 11 section ROW_INDEX start: 337 length 24
+    Stream: column 12 section ROW_INDEX start: 361 length 39
+    Stream: column 13 section ROW_INDEX start: 400 length 30
+    Stream: column 14 section ROW_INDEX start: 430 length 37
+    Stream: column 1 section DATA start: 467 length 7
+    Stream: column 2 section DATA start: 474 length 152
+    Stream: column 3 section DATA start: 626 length 21
+    Stream: column 4 section DATA start: 647 length 21
+    Stream: column 5 section DATA start: 668 length 35
+    Stream: column 6 section DATA start: 703 length 2361
+    Stream: column 7 section DATA start: 3064 length 973
+    Stream: column 8 section DATA start: 4037 length 1575
+    Stream: column 8 section LENGTH start: 5612 length 47
+    Stream: column 8 section DICTIONARY_DATA start: 5659 length 1366
+    Stream: column 9 section DATA start: 7025 length 21
+    Stream: column 10 section DATA start: 7046 length 35
+    Stream: column 10 section SECONDARY start: 7081 length 1591
+    Stream: column 11 section DATA start: 8672 length 1368
+    Stream: column 11 section LENGTH start: 10040 length 36
+    Stream: column 12 section DATA start: 10076 length 1647
+    Stream: column 12 section SECONDARY start: 11723 length 19
+    Stream: column 13 section DATA start: 11742 length 1575
+    Stream: column 13 section LENGTH start: 13317 length 47
+    Stream: column 13 section DICTIONARY_DATA start: 13364 length 1366
+    Stream: column 14 section DATA start: 14730 length 753
+    Stream: column 14 section LENGTH start: 15483 length 11
+    Stream: column 14 section DICTIONARY_DATA start: 15494 length 513
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT
+    Encoding column 3: DIRECT_V2
+    Encoding column 4: DIRECT_V2
+    Encoding column 5: DIRECT_V2
+    Encoding column 6: DIRECT
+    Encoding column 7: DIRECT
+    Encoding column 8: DICTIONARY_V2[1024]
+    Encoding column 9: DIRECT_V2
+    Encoding column 10: DIRECT_V2
+    Encoding column 11: DIRECT_V2
+    Encoding column 12: DIRECT_V2
+    Encoding column 13: DICTIONARY_V2[1024]
+    Encoding column 14: DICTIONARY_V2[1024]
+
+File length: 16919 bytes
+File raw data size: 1048404 bytes
+Padding length: 0 bytes
+Padding ratio: 0%
+________________________________________________________________________________________________________________________
+
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index d94c59bb6a..15fdba74a8 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -2,7 +2,7 @@
   "fileName": "TestFileDump.testDump.orc",
   "fileVersion": "0.12",
   "writerVersion": "ORC_14",
-  "softwareVersion": "ORC Java 1.9.0-SNAPSHOT",
+  "softwareVersion": "ORC Java 2.1.0-SNAPSHOT",
   "numberOfRows": 21000,
   "compression": "ZLIB",
   "compressionBufferSize": 4096,
@@ -461,48 +461,48 @@
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 1,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 2,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 3,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 4,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             }
           ],
           "stripeLevelBloomFilter": {
             "numHashFunctions": 7,
             "bitCount": 9600,
             "popCount": 238,
-            "loadFactor": 0.024791667237877846,
-            "expectedFpp": 5.756256582500896E-12
+            "loadFactor": 0.024791667,
+            "expectedFpp": 5.7562566E-12
           }
         }
       ]
@@ -704,48 +704,48 @@
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 1,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 2,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 3,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 4,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             }
           ],
           "stripeLevelBloomFilter": {
             "numHashFunctions": 7,
             "bitCount": 9600,
             "popCount": 238,
-            "loadFactor": 0.024791667237877846,
-            "expectedFpp": 5.756256582500896E-12
+            "loadFactor": 0.024791667,
+            "expectedFpp": 5.7562566E-12
           }
         }
       ]
@@ -947,48 +947,48 @@
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 1,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 2,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 3,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 4,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             }
           ],
           "stripeLevelBloomFilter": {
             "numHashFunctions": 7,
             "bitCount": 9600,
             "popCount": 238,
-            "loadFactor": 0.024791667237877846,
-            "expectedFpp": 5.756256582500896E-12
+            "loadFactor": 0.024791667,
+            "expectedFpp": 5.7562566E-12
           }
         }
       ]
@@ -1190,48 +1190,48 @@
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 1,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 2,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 3,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             },
             {
               "entryId": 4,
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             }
           ],
           "stripeLevelBloomFilter": {
             "numHashFunctions": 7,
             "bitCount": 9600,
             "popCount": 238,
-            "loadFactor": 0.024791667237877846,
-            "expectedFpp": 5.756256582500896E-12
+            "loadFactor": 0.024791667,
+            "expectedFpp": 5.7562566E-12
           }
         }
       ]
@@ -1361,16 +1361,16 @@
               "numHashFunctions": 7,
               "bitCount": 9600,
               "popCount": 238,
-              "loadFactor": 0.024791667237877846,
-              "expectedFpp": 5.756256582500896E-12
+              "loadFactor": 0.024791667,
+              "expectedFpp": 5.7562566E-12
             }
           ],
           "stripeLevelBloomFilter": {
             "numHashFunctions": 7,
             "bitCount": 9600,
             "popCount": 238,
-            "loadFactor": 0.024791667237877846,
-            "expectedFpp": 5.756256582500896E-12
+            "loadFactor": 0.024791667,
+            "expectedFpp": 5.7562566E-12
           }
         }
       ]
@@ -1380,5 +1380,9 @@
   "rawDataSize": 2144730,
   "paddingLength": 0,
   "paddingRatio": 0.0,
+  "userMetadata": {
+    "hive.acid.key.index": "1,1,1;2,3,5;",
+    "some.user.property": "foo#bar$baz&"
+  },
   "status": "OK"
 }
diff --git a/meson.build b/meson.build
new file mode 100644
index 0000000000..db23c300aa
--- /dev/null
+++ b/meson.build
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+project(
+    'orc',
+    'cpp',
+    version: '2.2.0-SNAPSHOT',
+    license: 'Apache-2.0',
+    meson_version: '>=1.3.0',
+    default_options: [
+        'warning_level=2',
+        'cpp_std=c++17',
+    ],
+)
+
+subdir('c++')
+
+install_data(
+    [
+        'LICENSE',
+        'NOTICE',
+    ],
+    install_dir: 'share/doc/orc',
+)
+
+if get_option('tools').enabled()
+    subdir('tools')
+endif
diff --git a/meson.options b/meson.options
new file mode 100644
index 0000000000..c56f89e02b
--- /dev/null
+++ b/meson.options
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+option(
+    'tests',
+    type: 'feature',
+    value: 'enabled',    
+    description: 'Build the googletest unit tests',
+)
+
+option(
+    'tools',
+    type: 'feature',
+    value: 'enabled',
+    description: 'Build the tools',
+)
diff --git a/site/Dockerfile b/site/Dockerfile
index ff0a613974..a2a26a285f 100644
--- a/site/Dockerfile
+++ b/site/Dockerfile
@@ -17,8 +17,11 @@
 # ORC site builder
 #
 
-FROM ubuntu:20.04
-LABEL maintainer="Apache ORC project <dev@orc.apache.org>"
+FROM ubuntu:24.04
+LABEL org.opencontainers.image.authors="Apache ORC project <dev@orc.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache ORC site builder"
+LABEL org.opencontainers.image.version=""
 
 RUN ln -fs /usr/share/zoneinfo/America/Los_Angeles /etc/localtime
 RUN apt-get update
@@ -37,8 +40,8 @@ RUN gem install \
   liquid \
   listen \
   rouge
-RUN gem install jekyll -v 3.8.6
-RUN gem install github-pages
+RUN gem install jekyll
+RUN gem install -f github-pages
 
 RUN useradd -ms /bin/bash orc
 COPY . /home/orc/site
diff --git a/site/Gemfile b/site/Gemfile
index 1c529c9ce1..200c6ce7b2 100644
--- a/site/Gemfile
+++ b/site/Gemfile
@@ -1,3 +1,2 @@
 source 'https://rubygems.org/'
-gem 'rouge'
-gem 'jekyll', "~> 3.8.3"
+gem 'jekyll', "~> 4.3"
diff --git a/site/README.md b/site/README.md
index d77b39d4ab..5de23f9c11 100644
--- a/site/README.md
+++ b/site/README.md
@@ -9,7 +9,7 @@ the site is to use docker to use a standard environment.
 1. `cd site`
 2. `git clone git@github.com:apache/orc.git -b asf-site target`
 
-## Run the docker container with the preview of the site.
+## Run the docker container with the preview of the site
 
 1. `docker run -d --name orc-container -p 4000:4000 -v $PWD:/home/orc/site apache/orc-dev:site`
 
diff --git a/site/_data/releases.yml b/site/_data/releases.yml
index e4f0fc3600..d9a7ef804b 100644
--- a/site/_data/releases.yml
+++ b/site/_data/releases.yml
@@ -1,16 +1,128 @@
 # List the releases in reverse logical order
 # Only one release should be tagged latest
+2.1.3:
+  date: 2025-05-09
+  state: latest
+  tar: orc-2.1.3.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 75f3a876eb520ec8327c275ee179027427ee77dc8105d773c6a415031b9bd74e41213a8b8f0dafb0a32318b26450002e843d764f8b8e46479e16147675b4eaca
+  known-issues:
+
+2.1.2:
+  date: 2025-05-06
+  state: archived
+  tar: orc-2.1.2.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 55451e65dea6ed42afb39fe33a88f9dcea8928dca0a0c9c23ef5545587810b4c
+  known-issues:
+
+2.1.1:
+  date: 2025-03-06
+  state: archived
+  tar: orc-2.1.1.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 15af8baeee322bab0298559a14a09cf8c14cf2008e35d8a78d3cc8a4c98d1e59
+  known-issues:
+
+2.1.0:
+  date: 2025-01-09
+  state: archived
+  tar: orc-2.1.0.tar.gz
+  signed-by: William Hyun (DECDFA29)
+  sha256: 1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d
+  known-issues:
+
+2.0.6:
+  date: 2025-07-07
+  state: stable
+  tar: orc-2.0.6.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 81167d31d7ec51de3b2acbbdbecbfbff50d7e321aadd9d537f4931cc0c07e045
+  known-issues:
+
+2.0.5:
+  date: 2025-05-06
+  state: archived
+  tar: orc-2.0.5.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 35dc3ad801f632f46028c45a31474f2cc03de63d1c8d8124870525b3aa95982a
+  known-issues:
+
+2.0.4:
+  date: 2025-03-20
+  state: archived
+  tar: orc-2.0.4.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 9525a76fae64a6da2a29adba36474c2ef863d042a394b78a9873d591649b7f0a
+  known-issues:
+
+2.0.3:
+  date: 2024-11-14
+  state: archived
+  tar: orc-2.0.3.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 082cba862b5a8a0d14c225404d0b51cd8d1b64ca81b8f1e500322ce8922cb86d
+  known-issues:
+
+2.0.2:
+  date: 2024-08-15
+  state: archived
+  tar: orc-2.0.2.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: fabdee3e8acd64dae1e8b8987149a7188121b40b025de46d15cc9d0becee2279
+  known-issues:
+
+2.0.1:
+  date: 2024-05-14
+  state: archived
+  tar: orc-2.0.1.tar.gz
+  signed-by: William Hyun (DECDFA29)
+  sha256: 1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d
+  known-issues:
+
 2.0.0:
   date: 2024-03-08
-  state: latest
+  state: archived
   tar: orc-2.0.0.tar.gz
   signed-by: Dongjoon Hyun (34F0FC5C)
   sha256: 9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df
   known-issues:
 
+1.9.7:
+  date: 2025-07-04
+  state: stable
+  tar: orc-1.9.7.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 3b3b18f472f8edf3649051e17f012831a0eb8bc55fef3d8f5733d4911332b059
+  known-issues:
+
+1.9.6:
+  date: 2025-05-06
+  state: archived
+  tar: orc-1.9.6.tar.gz
+  signed-by: Gang Wu (8A461DF4)
+  sha256: 4442944f53b6b4d48f0b6a1938a8f7d1233a92864d7d588201225c8977371754
+  known-issues:
+
+1.9.5:
+  date: 2024-11-14
+  state: archived
+  tar: orc-1.9.5.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 6900b4e8a2e4e49275f4067bd0f838ad68330204305fd3f13a5ec519e9d71547
+  known-issues:
+
+1.9.4:
+  date: 2024-07-16
+  state: archived
+  tar: orc-1.9.4.tar.gz
+  signed-by: William Hyun (DECDFA29)
+  sha256: d9a6bcc00e07a6e54d81ce380134e495ed0fc0d9dc1988d4d52125c9def097fd
+  known-issues:
+
 1.9.3:
   date: 2024-03-20
-  state: stable
+  state: archived
   tar: orc-1.9.3.tar.gz
   signed-by: Gang Wu (578F619B)
   sha256: f737d005d0c4deb65688ac3c0223ed530b0ba6258552555b2774dcdb77359b0f
@@ -40,9 +152,41 @@
   sha256: 0dca8bbccdb2ee87e59ba964933436beebd02ea78c4134424828a8127fbc4faa
   known-issues:
 
+1.8.10:
+  date: 2025-06-26
+  state: stable
+  tar: orc-1.8.10.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: c204243c55d34d83a1577c347b5450eb58ece9e91f6a0eaab2842d9ed3b1503a
+  known-issues:
+
+1.8.9:
+  date: 2025-05-06
+  state: archived
+  tar: orc-1.8.9.tar.gz
+  signed-by: Gang Wu (8A461DF4)
+  sha256: 66343dc6832beda96b118bd78e74b079b20e4fda756d98c498db92d8bfc4c639
+  known-issues:
+
+1.8.8:
+  date: 2024-11-11
+  state: archived
+  tar: orc-1.8.8.tar.gz
+  signed-by: Gang Wu (8A461DF4)
+  sha256: eca12a9139c0889d11ef1ecc8f273ccb0ef5d19df70d61cb732194d806db026b
+  known-issues:
+
+1.8.7:
+  date: 2024-04-14
+  state: archived
+  tar: orc-1.8.7.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: 57c9d12bf74b2752b1ce1039c15035c3b6f6531d865df962a99b3e079b3dfdb7
+  known-issues:
+
 1.8.6:
   date: 2023-11-10
-  state: stable
+  state: archived
   tar: orc-1.8.6.tar.gz
   signed-by: Dongjoon Hyun (34F0FC5C)
   sha256: 5675b18118df4dd7f86cc6ba859ed75b425ea1b7ddff805e1d671a17fd57d7f7
@@ -96,9 +240,17 @@
   sha256: 859d78bfded98405c32ccb2847b565a57bcc53f473a74087c1f750aeb5932e62
   known-issues:
 
+1.7.11:
+  date: 2024-09-13
+  state: archived
+  tar: orc-1.7.11.tar.gz
+  signed-by: Dongjoon Hyun (34F0FC5C)
+  sha256: ff62f0b882470529b3e2507daa4092ffdb34818c220abefb11cac443e5757236
+  known-issues:
+
 1.7.10:
   date: 2023-11-10
-  state: stable
+  state: archived
   tar: orc-1.7.10.tar.gz
   signed-by: Dongjoon Hyun (34F0FC5C)
   sha256: 85aef9368dc9bcdffaaf10010b66dfe053ce22f30b64854f63852248164686a3
diff --git a/site/_docs/building.md b/site/_docs/building.md
index f1cc015eaa..d10be485c2 100644
--- a/site/_docs/building.md
+++ b/site/_docs/building.md
@@ -9,10 +9,11 @@ dockerUrl: https://github.com/apache/orc/blob/main/docker
 
 The C++ library is supported on the following operating systems:
 
-* CentOS 7
-* Debian 10 to 12
-* MacOS 12 to 14
-* Ubuntu 20.04 to 24.04
+* MacOS 13 to 15
+* Debian 11 to 12
+* Ubuntu 22.04 to 24.04
+* Oracle Linux 8 to 9
+* Amazon Linux 2023
 
 You'll want to install the usual set of developer tools, but at least:
 
@@ -27,11 +28,11 @@ is in the docker subdirectory, for the list of packages required to build ORC:
 
 * [Debian 11]({{ page.dockerUrl }}/debian11/Dockerfile)
 * [Debian 12]({{ page.dockerUrl }}/debian12/Dockerfile)
-* [Ubuntu 20]({{ page.dockerUrl }}/ubuntu20/Dockerfile)
 * [Ubuntu 22]({{ page.dockerUrl }}/ubuntu22/Dockerfile)
 * [Ubuntu 24]({{ page.dockerUrl }}/ubuntu24/Dockerfile)
-* [Fedora 37]({{ page.dockerUrl }}/fedora37/Dockerfile)
+* [Oracle Linux 8]({{ page.dockerUrl }}/oraclelinux8/Dockerfile)
 * [Oracle Linux 9]({{ page.dockerUrl }}/oraclelinux9/Dockerfile)
+* [Amazon Linux 2023]({{ page.dockerUrl }}/amazonlinux23/Dockerfile)
 
 To build a normal release:
 
diff --git a/site/_docs/dask.md b/site/_docs/dask.md
index 7719e7d4cd..d443bfef9d 100644
--- a/site/_docs/dask.md
+++ b/site/_docs/dask.md
@@ -9,7 +9,7 @@ permalink: /docs/dask.html
 [Dask](https://dask.org) also supports Apache ORC.
 
 ```
-pip3 install "dask[dataframe]==2023.8.1"
+pip3 install "dask[dataframe]==2025.5.1"
 pip3 install pandas
 ```
 
diff --git a/site/_docs/index.md b/site/_docs/index.md
index 76addd410f..5d3e2ec2a9 100644
--- a/site/_docs/index.md
+++ b/site/_docs/index.md
@@ -37,4 +37,4 @@ are separated from each other so the reader can read just the columns
 that are required.
 
 For details on the specifics of the ORC format, please see the [ORC
-format specification](/specification/).
\ No newline at end of file
+format specification]({{ site.baseurl }}/specification/).
diff --git a/site/_docs/java-tools.md b/site/_docs/java-tools.md
index f537201133..a3d546e007 100644
--- a/site/_docs/java-tools.md
+++ b/site/_docs/java-tools.md
@@ -142,6 +142,9 @@ equivalent to the Hive ORC File Dump command.
 `--backup-path <path>`
   : when used with --recover specifies the path where the recovered file is written (default: /tmp)
 
+`--column-type`
+  : Print the column id, name and type of each column
+
 `-d,--data`
   : Should the data be printed
 
diff --git a/site/_docs/pyarrow.md b/site/_docs/pyarrow.md
index fca23797fe..c082cc7a28 100644
--- a/site/_docs/pyarrow.md
+++ b/site/_docs/pyarrow.md
@@ -9,7 +9,7 @@ permalink: /docs/pyarrow.html
 [Apache Arrow](https://arrow.apache.org) project's [PyArrow](https://pypi.org/project/pyarrow/) is the recommended package.
 
 ```
-pip3 install pyarrow==13.0.0
+pip3 install pyarrow==20.0.0
 pip3 install pandas
 ```
 
diff --git a/site/_includes/docs_ul.html b/site/_includes/docs_ul.html
index 8e93fee854..a11fdbadb8 100644
--- a/site/_includes/docs_ul.html
+++ b/site/_includes/docs_ul.html
@@ -12,7 +12,7 @@
 
   {% for p in site.docs %}
     {% if p.url == item_url %}
-      <li class="{{ c }}"><a href="http://github.com/%7B%7B%20p.url%20%7D%7D">{{ p.title }}</a></li>
+      <li class="{{ c }}"><a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D%7B%7B%20p.url%20%7D%7D">{{ p.title }}</a></li>
       {% break %}
     {% endif %}
   {% endfor %}
diff --git a/site/_includes/header.html b/site/_includes/header.html
index e6e4721cf9..04d5ebde21 100644
--- a/site/_includes/header.html
+++ b/site/_includes/header.html
@@ -5,9 +5,9 @@
   <div class="grid">
     <div class="unit one-quarter center-on-mobiles">
       <h1>
-        <a href="http://github.com/">
+        <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/">
           <span class="sr-only">Apache ORC</span>
-          <img src="http://github.com/img/logo.png" width="249" height="101" alt="ORC Logo">
+          <img src="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/img/logo.png" width="249" height="101" alt="ORC Logo">
         </a>
       </h1>
     </div>
diff --git a/site/_includes/news_contents.html b/site/_includes/news_contents.html
index 2748456741..85546b49c5 100644
--- a/site/_includes/news_contents.html
+++ b/site/_includes/news_contents.html
@@ -2,17 +2,17 @@
   <aside>
     <ul>
       <li class="{% if page.title == 'News' %}current{% endif %}">
-        <a href="http://github.com/news/">All News</a>
+        <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/news/">All News</a>
       </li>
       <li class="{% if page.title == 'Releases' %}current{% endif %}">
-        <a href="http://github.com/news/releases/">ORC Releases</a>
+        <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/news/releases/">ORC Releases</a>
       </li>
     </ul>
     <h4>Recent Releases</h4>
     <ul>
       {% for post in site.categories.release limit:5 %}
       <li class="{% if page.title == post.title %}current{% endif %}">
-        <a href="http://github.com/%7B%7B%20post.url%20%7D%7D">Version {{ post.version }}</a>
+        <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D%7B%7B%20post.url%20%7D%7D">Version {{ post.version }}</a>
       </li>
       {% endfor %}
     </ul>
@@ -21,7 +21,7 @@ <h4>Other News</h4>
         {% for post in site.posts %}
         {% unless post.categories contains 'release' %}
         <li class="{% if page.title == post.title %}current{% endif %}">
-          <a href="http://github.com/%7B%7B%20post.url%20%7D%7D">{{ post.title }}</a>
+          <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D%7B%7B%20post.url%20%7D%7D">{{ post.title }}</a>
         </li>
         {% endunless %}
         {% endfor %}
diff --git a/site/_includes/news_contents_mobile.html b/site/_includes/news_contents_mobile.html
index e8fb55e722..094da46345 100644
--- a/site/_includes/news_contents_mobile.html
+++ b/site/_includes/news_contents_mobile.html
@@ -1,7 +1,7 @@
 <div class="docs-nav-mobile unit whole show-on-mobiles">
   <select onchange="if (this.value) window.location.href=this.value">
     <option value="">Navigate the blog…</option>
-    <option value="/news/">Home</option>
+    <option value="{{ site.baseurl }}/news/">Home</option>
     <optgroup label="v1.x">
       {% for post in site.posts %}
       <option value="{{ post.url }}">{{ post.title }}</option>
diff --git a/site/_includes/news_item.html b/site/_includes/news_item.html
index b7d869a3d1..cac9e4c539 100644
--- a/site/_includes/news_item.html
+++ b/site/_includes/news_item.html
@@ -13,12 +13,9 @@ <h2>
     <span class="post-date">
       {{ post.date | date_to_string }}
     </span>
-    <a href="https://people.apache.org/~%7B%7B%20post.author%20%7D%7D" class="post-author">
-      <img src="https://people.apache.org/~%7B%7B%20post.author%20%7D%7D//%7B%7B%20post.author%20%7D%7D.png"
-           class="avatar" alt="{{ post.author }} avatar"
-           width="24" height="24">
+    <span class="post-author">
       {{ post.author }}
-    </a>
+    </span>
   </div>
   <div class="post-content">
     {{ post.content }}
diff --git a/site/_includes/orc_2.1.md b/site/_includes/orc_2.1.md
new file mode 100644
index 0000000000..b91e9843e1
--- /dev/null
+++ b/site/_includes/orc_2.1.md
@@ -0,0 +1,8 @@
+The New Features and Notable Changes of ORC 2.1:
+
+- [ORC-262]({{site.jira}}/ORC-262) [C++] Support async prefetch in Orc reader
+- [ORC-1388]({{site.jira}}/ORC-1388) [C++] Support schema evolution from decimal to timestamp/string group
+- [ORC-1389]({{site.jira}}/ORC-1389) [C++] Support schema evolution from string group to numeric/string group
+- [ORC-1390]({{site.jira}}/ORC-1390) [C++] Support schema evolution from string group to decimal/timestamp
+- [ORC-1622]({{site.jira}}/ORC-1622) [C++] Support conan packaging
+- [ORC-1807]({{site.jira}}/ORC-1807) [C++] Native support for vcpkg
diff --git a/site/_includes/primary-nav-items.html b/site/_includes/primary-nav-items.html
index b5cb0c4d81..6cc2dbc51f 100644
--- a/site/_includes/primary-nav-items.html
+++ b/site/_includes/primary-nav-items.html
@@ -1,27 +1,27 @@
 <ul>
   <li class="{% if page.overview %}current{% endif %}">
-    <a href="http://github.com/">Home</a>
+    <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/">Home</a>
   </li>
   <li class="{% if page.url == '/releases/' %}current{% endif %}">
-    <a href="http://github.com/releases/"><span class="show-on-mobiles">Rel</span>
+    <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/releases/"><span class="show-on-mobiles">Rel</span>
                          <span class="hide-on-mobiles">Releases</span></a>
   </li>
   <li class="{% if page.url contains '/docs/' %}current{% endif %}">
-    <a href="http://github.com/docs/"><span class="show-on-mobiles">Doc</span>
+    <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/docs/"><span class="show-on-mobiles">Doc</span>
                      <span class="hide-on-mobiles">Documentation</span></a>
   </li>
   <li class="{% if page.url contains '/talks/' %}current{% endif %}">
-    <a href="http://github.com/talks/"><span class="show-on-mobiles">Talk</span>
+    <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/talks/"><span class="show-on-mobiles">Talk</span>
                       <span class="hide-on-mobiles">Talks</span></a>
   </li>
   <li class="{% if page.url contains '/news/' %}current{% endif %}">
-    <a href="http://github.com/news/">News</a>
+    <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/news/">News</a>
   </li>
   <li class="{% if page.url contains '/develop/' %}current{% endif %}">
-    <a href="http://github.com/develop/"><span class="show-on-mobiles">Dev</span>
+    <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/develop/"><span class="show-on-mobiles">Dev</span>
                         <span class="hide-on-mobiles">Develop</span></a>
   </li>
   <li class="{% if page.url contains '/help/' %}current{% endif %}">
-    <a href="http://github.com/help/">Help</a>
+    <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/help/">Help</a>
   </li>
 </ul>
diff --git a/site/_includes/release_table.html b/site/_includes/release_table.html
index 1ec6b331f8..17a165ca6e 100644
--- a/site/_includes/release_table.html
+++ b/site/_includes/release_table.html
@@ -19,7 +19,7 @@
         <td style="text-align: center">{{ release[1]["date"] }}</td>
         <td style="text-align: center">{{ release[1]["state"] }}</td>
         <td style="text-align: center">
-          <a href="http://github.com/news/%7B%7B%20datestr%20%7D%7D/ORC-%7B%7B%20release[0]%20%7D%7D/">
+          <a href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/news/%7B%7B%20datestr%20%7D%7D/ORC-%7B%7B%20release[0]%20%7D%7D/">
           ORC-{{ release[0] }}</a></td>
       </tr>
     {% endfor %}
diff --git a/site/_includes/top.html b/site/_includes/top.html
index a4c04e7460..69e314548d 100644
--- a/site/_includes/top.html
+++ b/site/_includes/top.html
@@ -6,10 +6,27 @@
   <meta name="viewport" content="width=device-width,initial-scale=1">
   <meta name="generator" content="Jekyll v{{ jekyll.version }}">
   <link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
-  <link rel="stylesheet" href="http://github.com/css/screen.css">
-  <link rel="icon" type="image/x-icon" href="http://github.com/favicon.ico">
+  <link rel="stylesheet" href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/css/screen.css">
+  <link rel="icon" type="image/x-icon" href="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/favicon.ico">
   <!--[if lt IE 9]>
-  <script src="http://github.com/js/html5shiv.min.js"></script>
-  <script src="http://github.com/js/respond.min.js"></script>
+  <script src="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/js/html5shiv.min.js"></script>
+  <script src="http://github.com/%7B%7B%20site.baseurl%20%7D%7D/js/respond.min.js"></script>
   <![endif]-->
+  <!-- Matomo -->
+  <script>
+    var _paq = window._paq = window._paq || [];
+    /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
+    _paq.push(["setDoNotTrack", true]);
+    _paq.push(["disableCookies"]);
+    _paq.push(['trackPageView']);
+    _paq.push(['enableLinkTracking']);
+    (function() {
+      var u="https://analytics.apache.org/";
+      _paq.push(['setTrackerUrl', u+'matomo.php']);
+      _paq.push(['setSiteId', '68']);
+      var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
+      g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
+    })();
+  </script>
+  <!-- End Matomo Code -->
 </head>
diff --git a/site/_layouts/news_item.html b/site/_layouts/news_item.html
index 1baac95d4c..42caa37783 100644
--- a/site/_layouts/news_item.html
+++ b/site/_layouts/news_item.html
@@ -5,7 +5,7 @@
 <article>
   <h2>
     {{ page.title }}
-    <a href="http://github.com/%7B%7B%20page.url%20%7D%7D" class="permalink" title="Permalink">∞</a>
+    <a href="http://github.com/%7B%7B%20page.url%20|%20relative_url%20%7D%7D" class="permalink" title="Permalink">∞</a>
   </h2>
   <span class="post-category">
     <span class="label">
@@ -16,12 +16,9 @@ <h2>
     <span class="post-date">
       {{ page.date | date_to_string }}
     </span>
-    <a href="https://people.apache.org/~%7B%7B%20page.author%20%7D%7D" class="post-author">
-      <img src="https://people.apache.org/~%7B%7B%20page.author%20%7D%7D/%7B%7B%20page.author%20%7D%7D.png"
-           class="avatar" alt="{{ page.author }} avatar"
-           width="24" height="24">
+    <span class="post-author">
       {{ page.author }}
-    </a>
+    </span>
   </div>
   <div class="post-content">
     {{ content }}
diff --git a/site/_posts/2024-04-14-ORC-1.8.7.md b/site/_posts/2024-04-14-ORC-1.8.7.md
new file mode 100644
index 0000000000..fd659c6321
--- /dev/null
+++ b/site/_posts/2024-04-14-ORC-1.8.7.md
@@ -0,0 +1,29 @@
+---
+layout: news_item
+title: "ORC 1.8.7 Released"
+date: "2024-04-14 03:00:00 -0800"
+author: dongjoon
+version: 1.8.7
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.8.7.
+
+{% assign releaseName = "1.8.7" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1528]({{site.jira}}/ORC-1528): Fix readBytes potential overflow in RecordReaderUtils.ChunkReader#create
+- [ORC-1602]({{site.jira}}/ORC-1602): [C++] limit compression block size
+
+The test changes:
+- [ORC-1556]({{site.jira}}/ORC-1556) Add Rocky Linux 9 Docker Test
+- [ORC-1557]({{site.jira}}/ORC-1557) Add GitHub Action CI for Docker Test
+- [ORC-1560]({{site.jira}}/ORC-1560) Remove Java11 and clang variants from docker/os-list.txt in branch-1.8
+- [ORC-1562]({{site.jira}}/ORC-1562) Bump guava to 33.0.0-jre
+- [ORC-1578]({{site.jira}}/ORC-1578) Fix SparkBenchmark on sales data according to SPARK-40918
+- [ORC-1621]({{site.jira}}/ORC-1621) Switch to oraclelinux9 from rocky9
+
+The documentations:
+- [ORC-1536]({{site.jira}}/ORC-1536) Remove hive-storage-api link from maven-javadoc-plugin
+- [ORC-1563]({{site.jira}}/ORC-1563) Fix orc.bloom.filter.fpp default value and orc.compress notes of Spark and Hive config docs
diff --git a/site/_posts/2024-05-13-add-shaoyun-and-yuanping.md b/site/_posts/2024-05-13-add-shaoyun-and-yuanping.md
new file mode 100644
index 0000000000..bc8e84725e
--- /dev/null
+++ b/site/_posts/2024-05-13-add-shaoyun-and-yuanping.md
@@ -0,0 +1,12 @@
+---
+layout: news_item
+title: "Shaoyun Chen and Yuanping Wu added as committers"
+date: "2024-05-13 12:00:00 -0700"
+author: gangwu
+categories: [team]
+---
+
+The ORC PMC is happy to add Shaoyun Chen and Yuanping Wu as
+committers for their work on ORC Java and C++ library.
+
+Thank you for your work on ORC, Shaoyun and Yuanping!
diff --git a/site/_posts/2024-05-14-ORC-2.0.1.md b/site/_posts/2024-05-14-ORC-2.0.1.md
new file mode 100644
index 0000000000..9a07fcd1e5
--- /dev/null
+++ b/site/_posts/2024-05-14-ORC-2.0.1.md
@@ -0,0 +1,53 @@
+---
+layout: news_item
+title: "ORC 2.0.1 Released"
+date: "2024-05-14 00:00:00 0000"
+author: william
+version: 2.0.1
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.0.1.
+
+{% assign releaseName = "2.0.1" %}
+{% include release_description.md %}
+
+The improvements (tools):
+- [ORC-1644]({{site.jira}}/ORC-1644) Add merge tool to merge multiple ORC files into a single ORC file
+- [ORC-1647]({{site.jira}}/ORC-1647) Tips for supporting ORC in the convert command
+- [ORC-1667]({{site.jira}}/ORC-1667) Add check tool to check the index of the specified column
+
+The bug fixes:
+- [ORC-1646]({{site.jira}}/ORC-1646) Close the reader when reading the schema with the convert command 
+- [ORC-1654]({{site.jira}}/ORC-1654) [C++] Count up EvaluatedRowGroupCount correctly 
+- [ORC-1684]({{site.jira}}/ORC-1684) [C++] Find tzdb without TZDIR when in conda-environments 
+- [ORC-1688]({{site.jira}}/ORC-1688) [C++] Do not access TZDB if there is no timestamp type
+- [ORC-1696]({{site.jira}}/ORC-1696) Fix ClassCastException when reading avro decimal type in bechmark 
+The tasks:
+- [ORC-1649]({{site.jira}}/ORC-1649) [C++][Conan] Add 2.0.0 to conan recipe and update release guide
+- [ORC-1669]({{site.jira}}/ORC-1669) [C++] Deprecate HDFS support
+- [ORC-1686]({{site.jira}}/ORC-1686) [C++] Avoid using std::filesystem
+
+The test changes:
+- [ORC-1648]({{site.jira}}/ORC-1648) Add test to convert ORC in the convert command 
+- [ORC-1663]({{site.jira}}/ORC-1663) [C++] Enable TestTimezone.testMissingTZDB on Windows
+- [ORC-1672]({{site.jira}}/ORC-1672) Remove test packages o.a.o.tools.check
+- [ORC-1673]({{site.jira}}/ORC-1673) Remove test packages `o.a.o.tools.[count|merge|sizes]`
+- [ORC-1676]({{site.jira}}/ORC-1676) Use Hive 4.0.0 in benchmark
+- [ORC-1681]({{site.jira}}/ORC-1681) Remove redundant import statement in tests to fix checkstyle failures
+- [ORC-1699]({{site.jira}}/ORC-1699) Fix SparkBenchmark in Parquet format according to SPARK-40918
+- [ORC-1704]({{site.jira}}/ORC-1704) Migration to Scala 2.13 of Apache Spark 3.5.1 at SparkBenchmark
+- [ORC-1707]({{site.jira}}/ORC-1707) Fix sun.util.calendar IllegalAccessException when SparkBenchmark runs on JDK17 
+- [ORC-1708]({{site.jira}}/ORC-1708) Support data/compress options in Hive benchmark
+
+The build and dependency changes:
+- [ORC-1670]({{site.jira}}/ORC-1670) Upgrade zstd-jni to 1.5.6-1
+- [ORC-1679]({{site.jira}}/ORC-1679) Bump zstd-jni to 1.5.6-2
+- [ORC-1695]({{site.jira}}/ORC-1695) Upgrade gson to 2.10.1
+- [ORC-1698]({{site.jira}}/ORC-1698) Upgrade commons-cli to 1.7.0
+- [ORC-1705]({{site.jira}}/ORC-1705) Upgrade zstd-jni to 1.5.6-3
+- [ORC-1714]({{site.jira}}/ORC-1714) Bump commons-csv to 1.11.0
+- [ORC-1715]({{site.jira}}/ORC-1715) Bump org.objenesis:objenesis to 3.3
+
+The documentation changes:
+- [ORC-1668]({{site.jira}}/ORC-1668) Add merge command to Java tools documentation
diff --git a/site/_posts/2024-07-16-ORC-1.9.4.md b/site/_posts/2024-07-16-ORC-1.9.4.md
new file mode 100644
index 0000000000..7cbca043d2
--- /dev/null
+++ b/site/_posts/2024-07-16-ORC-1.9.4.md
@@ -0,0 +1,25 @@
+---
+layout: news_item
+title: "ORC 1.9.4 Released"
+date: "2024-07-16 03:00:00 -0800"
+author: william
+version: 1.9.4
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.9.4.
+
+{% assign releaseName = "1.9.4" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1696]({{site.jira}}/ORC-1696) Fix ClassCastException when reading avro decimal type in bechmark
+- [ORC-1721]({{site.jira}}/ORC-1721) Upgrade `aircompressor` to 0.27
+- [ORC-1738]({{site.jira}}/ORC-1738) Wrong Int128 maximum value
+
+The test changes:
+- [ORC-1619]({{site.jira}}/ORC-1619) Add `MacOS 14` to GitHub Action
+- [ORC-1699]({{site.jira}}/ORC-1699) Fix SparkBenchmark in Parquet format according to SPARK-40918
+
+The task changes:
+- [ORC-1540]({{site.jira}}/ORC-1540) Remove MacOS 11 from GitHub Action CI
diff --git a/site/_posts/2024-08-15-ORC-2.0.2.md b/site/_posts/2024-08-15-ORC-2.0.2.md
new file mode 100644
index 0000000000..bf654d39ba
--- /dev/null
+++ b/site/_posts/2024-08-15-ORC-2.0.2.md
@@ -0,0 +1,40 @@
+---
+layout: news_item
+title: "ORC 2.0.2 Released"
+date: "2024-08-15 00:00:00 0000"
+author: dongjoon
+version: 2.0.2
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.0.2.
+
+{% assign releaseName = "2.0.2" %}
+{% include release_description.md %}
+
+The improvements (tools):
+- [ORC-1724]({{site.jira}}/ORC-1724) JsonFileDump utility should print user metadata
+- [ORC-1740]({{site.jira}}/ORC-1740) Avoid the dump tool repeatedly parsing ColumnStatistics
+- [ORC-1742]({{site.jira}}/ORC-1742) Support print the id, name and type of each column in dump tool
+
+The bug fixes:
+- [ORC-1732]({{site.jira}}/ORC-1732) [C++] Fix detecting Homebrew-installed Protobuf on MacOS
+- [ORC-1733]({{site.jira}}/ORC-1733) [C++][CMake] Fix CMAKE_MODULE_PATH not to use PROJECT_SOURCE_DIR
+- [ORC-1738]({{site.jira}}/ORC-1738) [C++] Fix wrong Int128 maximum value
+- [ORC-1741]({{site.jira}}/ORC-1741) Respect decimal reader isRepeating flag
+- [ORC-1749]({{site.jira}}/ORC-1749) Fix supportVectoredIO for hadoop version string with optional patch labels
+- [ORC-1751]({{site.jira}}/ORC-1751) [C++] Fix syntax error in ThirdpartyToolchain
+
+The test changes:
+- [ORC-1694]({{site.jira}}/ORC-1694) Upgrade gson to 2.9.0 for Benchmarks Hive
+- [ORC-1697]({{site.jira}}/ORC-1697) Fix IllegalArgumentException when reading json timestamp type in benchmark
+- [ORC-1700]({{site.jira}}/ORC-1700) Write parquet decimal type data in Benchmark using FIXED_LEN_BYTE_ARRAY type
+- [ORC-1743]({{site.jira}}/ORC-1743) Upgrade Spark to 4.0.0-preview1
+- [ORC-1744]({{site.jira}}/ORC-1744) Add ubuntu-24.04 to GitHub Action
+- [ORC-1746]({{site.jira}}/ORC-1746) Bump netty-all to 4.1.110.Final in bench module
+- [ORC-1752]({{site.jira}}/ORC-1752) Fix NumberFormatException when reading json timestamp type in benchmark
+- [ORC-1753]({{site.jira}}/ORC-1753) Use Avro 1.12.0 in bench module
+
+The build and dependency changes:
+- [ORC-1721]({{site.jira}}/ORC-1721) Upgrade aircompressor to 0.27
+- [ORC-1747]({{site.jira}}/ORC-1747) Upgrade zstd-jni to 1.5.6-4
diff --git a/site/_posts/2024-09-13-ORC-1.7.11.md b/site/_posts/2024-09-13-ORC-1.7.11.md
new file mode 100644
index 0000000000..807abcce48
--- /dev/null
+++ b/site/_posts/2024-09-13-ORC-1.7.11.md
@@ -0,0 +1,25 @@
+---
+layout: news_item
+title: "ORC 1.7.11 Released"
+date: "2024-09-13 03:00:00 -0800"
+author: dongjoon
+version: 1.7.11
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.7.11.
+
+{% assign releaseName = "1.7.11" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1602]({{site.jira}}/ORC-1602) [C++] limit compression block size
+- [ORC-1738]({{site.jira}}/ORC-1738) [C++] Fix wrong Int128 maximum value
+
+The 'tests' fixes:
+- [ORC-1540]({{site.jira}}/ORC-1540) Remove MacOS 11 from GitHub Action CI and docs
+- [ORC-1556]({{site.jira}}/ORC-1556) Add `Rocky Linux 9` Docker Test
+- [ORC-1557]({{site.jira}}/ORC-1557) Add GitHub Action CI for `Docker Test`
+- [ORC-1561]({{site.jira}}/ORC-1561) Remove Java11 and clang variants from `docker/os-list.txt` in `branch-1.7`
+- [ORC-1578]({{site.jira}}/ORC-1578) Fix `SparkBenchmark` on `sales` data according to SPARK-40918
+- [ORC-1696]({{site.jira}}/ORC-1696) Fix ClassCastException when reading avro decimal type in bechmark
diff --git a/site/_posts/2024-11-11-ORC-1.8.8.md b/site/_posts/2024-11-11-ORC-1.8.8.md
new file mode 100644
index 0000000000..f6b965a71c
--- /dev/null
+++ b/site/_posts/2024-11-11-ORC-1.8.8.md
@@ -0,0 +1,23 @@
+---
+layout: news_item
+title: "ORC 1.8.8 Released"
+date: "2024-11-11 22:00:00 +0800"
+author: wgtmac
+version: 1.8.8
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.8.8.
+
+{% assign releaseName = "1.8.8" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1696]({{site.jira}}/ORC-1696): Fix ClassCastException when reading avro decimal type in benchmark
+- [ORC-1738]({{site.jira}}/ORC-1738): [C++] Wrong Int128 maximum value
+
+The test changes:
+- [ORC-1793]({{site.jira}}/ORC-1793) Upgrade Spark to 3.4.4
+
+The tasks:
+- [ORC-1540]({{site.jira}}/ORC-1540) Remove MacOS 11 from GitHub Action CI
diff --git a/site/_posts/2024-11-14-ORC-1.9.5.md b/site/_posts/2024-11-14-ORC-1.9.5.md
new file mode 100644
index 0000000000..f171512ff8
--- /dev/null
+++ b/site/_posts/2024-11-14-ORC-1.9.5.md
@@ -0,0 +1,19 @@
+---
+layout: news_item
+title: "ORC 1.9.5 Released"
+date: "2024-11-14 03:00:00 -0800"
+author: dongjoon
+version: 1.9.5
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.9.5.
+
+{% assign releaseName = "1.9.5" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1741]({{site.jira}}/ORC-1741) Respect decimal reader isRepeating flag
+
+The test changes:
+- [ORC-1792]({{site.jira}}/ORC-1792) Upgrade Spark to 3.5.3
diff --git a/site/_posts/2024-11-14-ORC-2.0.3.md b/site/_posts/2024-11-14-ORC-2.0.3.md
new file mode 100644
index 0000000000..2bf119fa1a
--- /dev/null
+++ b/site/_posts/2024-11-14-ORC-2.0.3.md
@@ -0,0 +1,39 @@
+---
+layout: news_item
+title: "ORC 2.0.3 Released"
+date: "2024-11-14 04:00:00 -0800"
+author: dongjoon
+version: 2.0.3
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.0.3.
+
+{% assign releaseName = "2.0.3" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1796]({{site.jira}}/ORC-1796) [C++] Fix return wrong result if lack of has null
+
+The test changes:
+- [ORC-1680]({{site.jira}}/ORC-1680) Bump `bcpkix-jdk18on` to 1.78
+- [ORC-1702]({{site.jira}}/ORC-1702) Bump `bcpkix-jdk18on` to 1.78.1
+- [ORC-1756]({{site.jira}}/ORC-1756) Bump `snappy-java` to 1.1.10.6 in `bench` module
+- [ORC-1770]({{site.jira}}/ORC-1770) Upgrade `parquet` to 1.14.2 in `bench` module
+- [ORC-1776]({{site.jira}}/ORC-1776) Remove `MacOS 12` from GitHub Action CI and docs
+- [ORC-1778]({{site.jira}}/ORC-1778) Upgrade `Spark` to 4.0.0-preview2 in `bench` module
+- [ORC-1783]({{site.jira}}/ORC-1783) Add `MacOS 15` to GitHub Action MacOS CI and docs
+- [ORC-1790]({{site.jira}}/ORC-1790) Upgrade `parquet` to 1.14.3 in `bench` module
+- [ORC-1800]({{site.jira}}/ORC-1800) Upgrade `bcpkix-jdk18on` to 1.79
+
+The build and dependency changes:
+- [ORC-1608]({{site.jira}}/ORC-1608) Upgrade `Hadoop` to 3.4.0
+- [ORC-1750]({{site.jira}}/ORC-1750) Bump `protobuf-java` to 3.25.4
+- [ORC-1769]({{site.jira}}/ORC-1769) Upgrade `zstd-jni` to 1.5.6-5
+- [ORC-1775]({{site.jira}}/ORC-1775) Upgrade `aircompressor` to 2.0.2
+- [ORC-1777]({{site.jira}}/ORC-1777) Bump `protobuf-java` to 3.25.5
+- [ORC-1781]({{site.jira}}/ORC-1781) Upgrade `zstd-jni` to 1.5.6-6
+- [ORC-1782]({{site.jira}}/ORC-1782) Upgrade `Hadoop` to 3.4.1
+- [ORC-1784]({{site.jira}}/ORC-1784) Upgrade `Maven` to 3.9.9
+- [ORC-1785]({{site.jira}}/ORC-1785) Upgrade `commons-csv` to 1.12.0
+- [ORC-1791]({{site.jira}}/ORC-1791) Remove `commons-lang3` dependency
diff --git a/site/_posts/2025-01-09-ORC-2.1.0.md b/site/_posts/2025-01-09-ORC-2.1.0.md
new file mode 100644
index 0000000000..d9c7952a3f
--- /dev/null
+++ b/site/_posts/2025-01-09-ORC-2.1.0.md
@@ -0,0 +1,203 @@
+---
+layout: news_item
+title: "ORC 2.1.0 Released"
+date: "2025-01-09 03:00:00 -0800"
+author: william
+version: 2.1.0
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.1.0.
+
+{% assign releaseName = "2.1.0" %}
+{% include release_description.md %}
+
+### New Feature
+- [ORC-262]({{site.jira}}/ORC-262) [C++] Support async prefetch in Orc reader
+- [ORC-1388]({{site.jira}}/ORC-1388) [C++] Support schema evolution from decimal to timestamp/string group
+- [ORC-1389]({{site.jira}}/ORC-1389) [C++] Support schema evolution from string group to numeric/string group
+- [ORC-1390]({{site.jira}}/ORC-1390) [C++] Support schema evolution from string group to decimal/timestamp
+- [ORC-1622]({{site.jira}}/ORC-1622) [C++] Support conan packaging
+- [ORC-1807]({{site.jira}}/ORC-1807) [C++] Native support for vcpkg
+
+### Improvement
+- [ORC-1264]({{site.jira}}/ORC-1264) [C++] Add a writer option to align compression block with row group boundary
+- [ORC-1365]({{site.jira}}/ORC-1365) [C++] Use BlockBuffer to replace DataBuffer of rawInputBuffer in the CompressionStream
+- [ORC-1635]({{site.jira}}/ORC-1635) Try downloading orc-format from `dlcdn.apache.org` before `archive.apache.org`
+- [ORC-1645]({{site.jira}}/ORC-1645) Evaulate stripe stats before load stripe footer
+- [ORC-1658]({{site.jira}}/ORC-1658) [C++] uniform identifiers naming style.
+- [ORC-1661]({{site.jira}}/ORC-1661) [C++] Better handling when TZDB is unavailable
+- [ORC-1664]({{site.jira}}/ORC-1664) Enable the removeUnusedImports function in spotless-maven-plugin
+- [ORC-1665]({{site.jira}}/ORC-1665) Enable the `importOrder` function in `spotless-maven-plugin`
+- [ORC-1667]({{site.jira}}/ORC-1667) Add `check` tool to check the index of the specified column
+- [ORC-1669]({{site.jira}}/ORC-1669) [C++] Deprecate HDFS support
+- [ORC-1672]({{site.jira}}/ORC-1672) Modify the package name of TestCheckTool
+- [ORC-1675]({{site.jira}}/ORC-1675) [C++] Print decimal values as strings
+- [ORC-1677]({{site.jira}}/ORC-1677) [C++] remove `m` prefix of variables.
+- [ORC-1683]({{site.jira}}/ORC-1683) Fix `instanceof` of BinaryStatisticsImpl merge method
+- [ORC-1684]({{site.jira}}/ORC-1684) [C++] Find tzdb without TZDIR when in conda-environments
+- [ORC-1685]({{site.jira}}/ORC-1685) Use `Pattern Matching for instanceof` in RecordReaderImpl
+- [ORC-1686]({{site.jira}}/ORC-1686) [C++] Avoid using std::filesystem
+- [ORC-1687]({{site.jira}}/ORC-1687) [C++] Enforce naming style.
+- [ORC-1688]({{site.jira}}/ORC-1688) [C++] Do not access TZDB if there is no timestamp type
+- [ORC-1689]({{site.jira}}/ORC-1689) [C++] Generate CMake config file
+- [ORC-1690]({{site.jira}}/ORC-1690) [C++] Refactor CMake to use imported thirdtparty libraries
+- [ORC-1710]({{site.jira}}/ORC-1710)  Reduce enum array allocation
+- [ORC-1711]({{site.jira}}/ORC-1711) [C++] Introduce a memory block size parameter for writer option
+- [ORC-1720]({{site.jira}}/ORC-1720) [C++] Unified compressor/decompressor exception types
+- [ORC-1724]({{site.jira}}/ORC-1724) JsonFileDump utility should print user metadata
+- [ORC-1730]({{site.jira}}/ORC-1730) [C++] Add finishEncode support for the encoder
+- [ORC-1732]({{site.jira}}/ORC-1732) [C++] Can't detect Protobuf installed by Homebrew on macOS
+- [ORC-1733]({{site.jira}}/ORC-1733) [C++] [CMake] Fix CMAKE_MODULE_PATH not to use PROJECT_SOURCE_DIR
+- [ORC-1751]({{site.jira}}/ORC-1751) [C++] Syntax error in ThirdpartyToolchain
+- [ORC-1767]({{site.jira}}/ORC-1767) [C++] Improve writing performance of encoded string column and support EncodedStringVectorBatch for StringColumnWriter
+- [ORC-1796]({{site.jira}}/ORC-1796) [C++] Reading orc file which lack of Statistics may give wrong result
+- [ORC-1810]({{site.jira}}/ORC-1810) Offline build support
+
+### Bug Fix
+- [ORC-1654]({{site.jira}}/ORC-1654) [C++] Count up EvaluatedRowGroupCount correctly.
+- [ORC-1657]({{site.jira}}/ORC-1657) Fix building apache orc with clang-cl on Windows
+- [ORC-1706]({{site.jira}}/ORC-1706) [C++] Fix build break w/ BUILD_CPP_ENABLE_METRICS=ON
+- [ORC-1725]({{site.jira}}/ORC-1725) [C++] Statistics for BYTE type are calculated incorrectly on ARM
+- [ORC-1738]({{site.jira}}/ORC-1738) Wrong Int128 maximum value
+- [ORC-1811]({{site.jira}}/ORC-1811) Use the recommended `closer.lua` URL to download ORC format
+- [ORC-1813]({{site.jira}}/ORC-1813) Incompatibility with ORC files written in version 0.12 due to missing hasNull field in C++ Reader
+
+### Task
+- [ORC-1573]({{site.jira}}/ORC-1573) Setting version to 2.1.0-SNAPSHOT
+- [ORC-1594]({{site.jira}}/ORC-1594) Add IntelliJ conf in the project root directory to support JIRA/PR autolinks
+- [ORC-1649]({{site.jira}}/ORC-1649) [C++] [Conan] Add 2.0.0 to conan recipe and update release guide
+- [ORC-1655]({{site.jira}}/ORC-1655) Add label definition to conan directory
+- [ORC-1656]({{site.jira}}/ORC-1656) Skip build and test on conan updates
+- [ORC-1666]({{site.jira}}/ORC-1666) Remove extra newlines at the end of Java files
+- [ORC-1758]({{site.jira}}/ORC-1758) Use `OpenContainers` Annotations in docker images
+- [ORC-1802]({{site.jira}}/ORC-1802) Enable tag protection
+
+### Test
+- [ORC-1589]({{site.jira}}/ORC-1589) Bump `spotbugs-maven-plugin` to 4.8.3.0
+- [ORC-1590]({{site.jira}}/ORC-1590) Bump `spotless-maven-plugin` to 2.42.0
+- [ORC-1603]({{site.jira}}/ORC-1603) Bump `checkstyle` to 10.13.0
+- [ORC-1606]({{site.jira}}/ORC-1606) Upgrade `spotless-maven-plugin` to 2.43.0
+- [ORC-1611]({{site.jira}}/ORC-1611) Bump `junit` to 5.10.2
+- [ORC-1651]({{site.jira}}/ORC-1651) Bump `checkstyle` to 10.14.0
+- [ORC-1652]({{site.jira}}/ORC-1652) Bump `extra-enforcer-rules` to 1.8.0
+- [ORC-1653]({{site.jira}}/ORC-1653) Bump `maven-assembly-plugin` to 3.7.0
+- [ORC-1659]({{site.jira}}/ORC-1659) Bump `guava` to 33.1.0-jre
+- [ORC-1660]({{site.jira}}/ORC-1660) Bump `checkstyle` to 10.14.2
+- [ORC-1673]({{site.jira}}/ORC-1673) Remove test packages `o.a.o.tools.[count|merge|sizes]`
+- [ORC-1676]({{site.jira}}/ORC-1676) Use Hive 4.0.0 in benchmark
+- [ORC-1678]({{site.jira}}/ORC-1678) Bump `checkstyle` to 10.15.0
+- [ORC-1680]({{site.jira}}/ORC-1680) Bump `bcpkix-jdk18on` to 1.78
+- [ORC-1691]({{site.jira}}/ORC-1691) Bump `spotbugs-maven-plugin` to 4.8.4.0
+- [ORC-1694]({{site.jira}}/ORC-1694) Upgrade gson to 2.9.0 for Benchmarks Hive
+- [ORC-1695]({{site.jira}}/ORC-1695) Upgrade gson to 2.10.1
+- [ORC-1699]({{site.jira}}/ORC-1699) Fix SparkBenchmark in Parquet format according to SPARK-40918
+- [ORC-1700]({{site.jira}}/ORC-1700) Write parquet decimal type data in Benchmark using FIXED_LEN_BYTE_ARRAY type
+- [ORC-1704]({{site.jira}}/ORC-1704) Migration to Scala 2.13 of Apache Spark 3.5.1 at SparkBenchmark
+- [ORC-1707]({{site.jira}}/ORC-1707) Fix sun.util.calendar IllegalAccessException when SparkBenchmark runs on JDK17
+- [ORC-1708]({{site.jira}}/ORC-1708) Support data/compress options in Hive benchmark
+- [ORC-1709]({{site.jira}}/ORC-1709) Upgrade GitHub Action `setup-java` to v4 and use built-in cache feature
+- [ORC-1713]({{site.jira}}/ORC-1713) Bump spotbugs-maven-plugin to 4.8.5.0
+- [ORC-1716]({{site.jira}}/ORC-1716) Bump com.puppycrawl.tools:checkstyle to 10.16.0
+- [ORC-1719]({{site.jira}}/ORC-1719) Bump `guava` to 33.2.0-jre
+- [ORC-1722]({{site.jira}}/ORC-1722) Bump `checkstyle` to 10.17.0
+- [ORC-1726]({{site.jira}}/ORC-1726) Bump `guava` to 33.2.1-jre
+- [ORC-1727]({{site.jira}}/ORC-1727) Bump `maven-enforcer-plugin` to 3.5.0
+- [ORC-1728]({{site.jira}}/ORC-1728) Bump `maven-shade-plugin` to 3.6.0
+- [ORC-1729]({{site.jira}}/ORC-1729) Bump `maven-checkstyle-plugin` to 3.4.0
+- [ORC-1731]({{site.jira}}/ORC-1731) Upgrade `maven-dependency-plugin` to 3.7.0
+- [ORC-1735]({{site.jira}}/ORC-1735) Upgrade `maven-dependency-plugin` to 3.7.1
+- [ORC-1736]({{site.jira}}/ORC-1736) Bump `junit` to 5.10.3
+- [ORC-1737]({{site.jira}}/ORC-1737) Bump `spotbugs-maven-plugin` to 4.8.6.1
+- [ORC-1739]({{site.jira}}/ORC-1739) Bump `spotbugs-maven-plugin` to 4.8.6.2
+- [ORC-1745]({{site.jira}}/ORC-1745) Remove `Ubuntu 20.04` Support
+- [ORC-1750]({{site.jira}}/ORC-1750) Bump `protobuf-java` to 3.25.4
+- [ORC-1756]({{site.jira}}/ORC-1756) Bump `snappy-java` to 1.1.10.6 in `bench` module
+- [ORC-1760]({{site.jira}}/ORC-1760) Upgrade `junit` to 5.11.0
+- [ORC-1761]({{site.jira}}/ORC-1761) Upgrade `guava` to `33.3.0-jre`
+- [ORC-1763]({{site.jira}}/ORC-1763) Upgrade `checkstyle` to 10.18.0
+- [ORC-1764]({{site.jira}}/ORC-1764) Upgrade `maven-checkstyle-plugin` to 3.5.0
+- [ORC-1765]({{site.jira}}/ORC-1765) Upgrade `maven-dependency-plugin` to 3.8.0
+- [ORC-1771]({{site.jira}}/ORC-1771) Upgrade `checkstyle` to 10.18.1
+- [ORC-1772]({{site.jira}}/ORC-1772) Bump `spotbugs-maven-plugin` to 4.8.6.3
+- [ORC-1774]({{site.jira}}/ORC-1774) Upgrade `snappy-java` to 1.1.10.7 in `bench` module
+- [ORC-1776]({{site.jira}}/ORC-1776) Remove `MacOS 12` from GitHub Action CI and docs
+- [ORC-1778]({{site.jira}}/ORC-1778) Upgrade Spark to 4.0.0-preview2
+- [ORC-1779]({{site.jira}}/ORC-1779) Upgrade `extra-enforcer-rules` to 1.9.0
+- [ORC-1780]({{site.jira}}/ORC-1780) Upgrade `spotbugs-maven-plugin` to 4.8.6.4
+- [ORC-1783]({{site.jira}}/ORC-1783) Add MacOS 15 to GitHub Action MacOS CI and docs
+- [ORC-1786]({{site.jira}}/ORC-1786) Upgrade `guava` to 33.3.1-jre
+- [ORC-1788]({{site.jira}}/ORC-1788) Upgrade `checkstyle` to 10.18.2
+- [ORC-1789]({{site.jira}}/ORC-1789) Upgrade `junit` to 5.11.2
+- [ORC-1790]({{site.jira}}/ORC-1790) Upgrade `parquet` to 1.14.3 in `bench` module
+- [ORC-1794]({{site.jira}}/ORC-1794) Upgrade `checkstyle` to 10.19.0
+- [ORC-1795]({{site.jira}}/ORC-1795) Upgrade `junit` to 5.11.3
+- [ORC-1797]({{site.jira}}/ORC-1797) Upgrade `spotbugs-maven-plugin` to 4.8.6.5
+- [ORC-1799]({{site.jira}}/ORC-1799) Upgrade `maven-checkstyle-plugin` to 3.6.0
+- [ORC-1801]({{site.jira}}/ORC-1801) Upgrade `checkstyle` to 10.20.0
+- [ORC-1804]({{site.jira}}/ORC-1804) Upgrade `parquet` to 1.14.4 in `bench` module
+- [ORC-1805]({{site.jira}}/ORC-1805) Upgrade `checkstyle` to 10.20.1
+- [ORC-1806]({{site.jira}}/ORC-1806) Upgrade `spotbugs-maven-plugin` to 4.8.6.6
+- [ORC-1809]({{site.jira}}/ORC-1809) Upgrade `checkstyle` to 10.20.2
+- [ORC-1812]({{site.jira}}/ORC-1812) Upgrade `parquet` to 1.15.0 in `bench` module
+- [ORC-1816]({{site.jira}}/ORC-1816) Upgrade `checkstyle` to 10.21.0
+- [ORC-1820]({{site.jira}}/ORC-1820) Bump junit.version to 5.11.4
+- [ORC-1821]({{site.jira}}/ORC-1821) Upgrade `guava` to 33.4.0-jre
+- [ORC-1822]({{site.jira}}/ORC-1822) [C++] [CI] Use cpp-linter-action for clang-tidy and clang-format
+- [ORC-1823]({{site.jira}}/ORC-1823) Upgrade `checkstyle` to 10.21.1
+- [ORC-1826]({{site.jira}}/ORC-1826) [C++] Add ASAN to CI
+
+### Build and Dependency Changes
+- [ORC-1608]({{site.jira}}/ORC-1608) Upgrade Hadoop to 3.4.0
+- [ORC-1617]({{site.jira}}/ORC-1617) Upgrade `slf4j` to 2.0.12
+- [ORC-1640]({{site.jira}}/ORC-1640) Upgrade cyclonedx-maven-plugin to 2.7.11
+- [ORC-1650]({{site.jira}}/ORC-1650) Bump `maven-shade-plugin` to 3.5.2
+- [ORC-1670]({{site.jira}}/ORC-1670) Upgrade `zstd-jni` to 1.5.6-1
+- [ORC-1679]({{site.jira}}/ORC-1679) Bump `zstd-jni` 1.5.6-2
+- [ORC-1682]({{site.jira}}/ORC-1682) Bump maven-assembly-plugin to 3.7.1
+- [ORC-1692]({{site.jira}}/ORC-1692) Bump `slf4j` to 2.0.13
+- [ORC-1693]({{site.jira}}/ORC-1693) Bump `maven-jar-plugin` to 3.4.0
+- [ORC-1698]({{site.jira}}/ORC-1698) Upgrade `commons-cli` to 1.7.0
+- [ORC-1701]({{site.jira}}/ORC-1701) Bump `threeten-extra` to 1.8.0
+- [ORC-1702]({{site.jira}}/ORC-1702) Bump `bcpkix-jdk18on` to 1.78.1
+- [ORC-1703]({{site.jira}}/ORC-1703) Bump `maven-jar-plugin` to 3.4.1
+- [ORC-1705]({{site.jira}}/ORC-1705) Upgrade `zstd-jni` to 1.5.6-3
+- [ORC-1712]({{site.jira}}/ORC-1712) Bump maven-shade-plugin to 3.5.3
+- [ORC-1714]({{site.jira}}/ORC-1714) Bump commons-csv to 1.11.0
+- [ORC-1715]({{site.jira}}/ORC-1715) Bump org.objenesis:objenesis to 3.3
+- [ORC-1718]({{site.jira}}/ORC-1718) Upgrade `build-helper-maven-plugin` to 3.6.0
+- [ORC-1723]({{site.jira}}/ORC-1723) Upgrade `commons-cli` to 1.8.0
+- [ORC-1734]({{site.jira}}/ORC-1734) Bump `maven-jar-plugin` to 3.4.2
+- [ORC-1748]({{site.jira}}/ORC-1748) Upgrade `commons-lang3` to 3.15.0
+- [ORC-1755]({{site.jira}}/ORC-1755) Bump `commons-lang3` to 3.16.0
+- [ORC-1757]({{site.jira}}/ORC-1757) Bump `slf4j` to 2.0.14
+- [ORC-1759]({{site.jira}}/ORC-1759) Upgrade `commons-cli` to 1.9.0
+- [ORC-1762]({{site.jira}}/ORC-1762) Bump `slf4j` to 2.0.16
+- [ORC-1766]({{site.jira}}/ORC-1766) Upgrade `brotli4j` to 1.17.0
+- [ORC-1768]({{site.jira}}/ORC-1768) Upgrade `commons-lang3` to 3.17.0
+- [ORC-1773]({{site.jira}}/ORC-1773) Bump `reproducible-build-maven-plugin` to 0.17
+- [ORC-1775]({{site.jira}}/ORC-1775) Upgrade `aircompressor` to 2.0.2
+- [ORC-1777]({{site.jira}}/ORC-1777) Upgrade `protobuf-java` to 3.25.5
+- [ORC-1781]({{site.jira}}/ORC-1781) Upgrade `zstd-jni` to 1.5.6-6
+- [ORC-1782]({{site.jira}}/ORC-1782) Upgrade Hadoop to 3.4.1
+- [ORC-1784]({{site.jira}}/ORC-1784) Upgrade `Maven` to 3.9.9
+- [ORC-1785]({{site.jira}}/ORC-1785) Upgrade `commons-csv` to 1.12.0
+- [ORC-1791]({{site.jira}}/ORC-1791) Remove `commons-lang3` dependency
+- [ORC-1798]({{site.jira}}/ORC-1798) Upgrade `maven-dependency-plugin` to 3.8.1
+- [ORC-1803]({{site.jira}}/ORC-1803) Upgrade `zstd-jni` to 1.5.6-7
+- [ORC-1808]({{site.jira}}/ORC-1808) Upgrade `zstd-jni` to 1.5.6-8
+- [ORC-1817]({{site.jira}}/ORC-1817) Upgrade `brotli4j` to 1.18.0
+- [ORC-1825]({{site.jira}}/ORC-1825) [C++] Bump Snappy to 1.2.1
+- [ORC-1827]({{site.jira}}/ORC-1827) [C++] Bump ZLIB to 1.3.1
+- [ORC-1828]({{site.jira}}/ORC-1828) [C++] Bump LZ4 to 1.10.0
+
+### Documentation
+- [ORC-642]({{site.jira}}/ORC-642) Update PatchedBase doc with patch ceiling in spec
+- [ORC-1634]({{site.jira}}/ORC-1634) Fix some outdated descriptions in Building ORC documentation
+- [ORC-1668]({{site.jira}}/ORC-1668) Add merge command to Java tools documentation
+- [ORC-1800]({{site.jira}}/ORC-1800) Upgrade `bcpkix-jdk18on` to 1.79
+- [ORC-1814]({{site.jira}}/ORC-1814) Use Ubuntu 24.04/Jekyll 4.3/Rouge 4.5 to generate website
+- [ORC-1815]({{site.jira}}/ORC-1815) Remove broken `people.apache.org` links
+- [ORC-1819]({{site.jira}}/ORC-1819) Publish snapshot website through GitHub Pages
+- [ORC-1824]({{site.jira}}/ORC-1824) Update Python documentation with PyArrow 18.1.0 and Task 2024.12.1
+- [ORC-1830]({{site.jira}}/ORC-1830) Fix release table hyperlink to use baseurl
diff --git a/site/_posts/2025-03-06-ORC-2.1.1.md b/site/_posts/2025-03-06-ORC-2.1.1.md
new file mode 100644
index 0000000000..60cd0652f8
--- /dev/null
+++ b/site/_posts/2025-03-06-ORC-2.1.1.md
@@ -0,0 +1,42 @@
+---
+layout: news_item
+title: "ORC 2.1.1 Released"
+date: "2025-03-06 00:00:00 0000"
+author: dongjoon
+version: 2.1.1
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.1.1.
+
+{% assign releaseName = "2.1.1" %}
+{% include release_description.md %}
+
+The improvements (tools):
+- [ORC-1848]({{site.jira}}/ORC-1848) PrintData tool add parameter description
+
+The bug fixes:
+- [ORC-1833]({{site.jira}}/ORC-1833) [C++] Fix CMake script to be used inside another project
+- [ORC-1834]({{site.jira}}/ORC-1834) [C++] Fix undefined behavior
+- [ORC-1846]({{site.jira}}/ORC-1846) [C++] Fix imported libraries in the Conan build
+
+The test changes:
+- [ORC-1835]({{site.jira}}/ORC-1835) [C++] Fix cpp-linter-action to build first
+- [ORC-1853]({{site.jira}}/ORC-1853) Rename class TesScanData to TestScanData
+- [ORC-1854]({{site.jira}}/ORC-1854) Remove ubuntu20 from os-list.txt
+- [ORC-1855]({{site.jira}}/ORC-1855) Add Amazon Linux 2023 and Corretto to docker tests and CI
+
+The build and dependency changes:
+- [ORC-1836]({{site.jira}}/ORC-1836) Upgrade zstd-jni to 1.5.6-9
+- [ORC-1837]({{site.jira}}/ORC-1837) Remove commons-csv from parent pom.xml
+- [ORC-1843]({{site.jira}}/ORC-1843) Upgrade bcpkix-jdk18on to 1.80
+- [ORC-1847]({{site.jira}}/ORC-1847) Upgrade Hive to 4.0.1 in bench module
+- [ORC-1849]({{site.jira}}/ORC-1849) Upgrade byte-buddy to 1.17.0
+- [ORC-1850]({{site.jira}}/ORC-1850) Upgrade maven-surefire-plugin to 3.5.2
+- [ORC-1851]({{site.jira}}/ORC-1851) Upgrade zstd-jni to 1.5.6-10
+- [ORC-1852]({{site.jira}}/ORC-1852) Add --enable-native-access=ALL-UNNAMED to suppress Maven warnings
+- [ORC-1856]({{site.jira}}/ORC-1856) Bump spotbugs-maven-plugin to 4.9.1.0
+- [ORC-1859]({{site.jira}}/ORC-1859) Upgrade junit to 5.12.0
+
+The tasks:
+- [ORC-1840]({{site.jira}}/ORC-1840) Add Matomo script to support https://analytics.apache.org
diff --git a/site/_posts/2025-03-20-ORC-2.0.4.md b/site/_posts/2025-03-20-ORC-2.0.4.md
new file mode 100644
index 0000000000..1384782776
--- /dev/null
+++ b/site/_posts/2025-03-20-ORC-2.0.4.md
@@ -0,0 +1,37 @@
+---
+layout: news_item
+title: "ORC 2.0.4 Released"
+date: "2025-03-20 00:00:00 0000"
+author: dongjoon
+version: 2.0.4
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.0.4.
+
+{% assign releaseName = "2.0.4" %}
+{% include release_description.md %}
+
+The improvements (tools):
+- [ORC-1848]({{site.jira}}/ORC-1848) PrintData tool add parameter description
+
+The bug fixes:
+- [ORC-1813]({{site.jira}}/ORC-1813) [C++] Fix has_null forward compatibility
+
+The test changes:
+- [ORC-1853]({{site.jira}}/ORC-1853) Rename class TesScanData to TestScanData
+- [ORC-1855]({{site.jira}}/ORC-1855) Add Amazon Linux 2023 and Corretto to docker tests and CI
+
+The build and dependency changes:
+- [ORC-1709]({{site.jira}}/ORC-1709) Upgrade GitHub Action setup-java to v4 and use built-in cache feature
+- [ORC-1804]({{site.jira}}/ORC-1804) Upgrade parquet to 1.14.4 in bench module
+- [ORC-1810]({{site.jira}}/ORC-1810) [C++] Add environment variable ORC_FORMAT_URL
+- [ORC-1811]({{site.jira}}/ORC-1811) Use the recommended closer.lua URL to download ORC format
+- [ORC-1812]({{site.jira}}/ORC-1812) Upgrade parquet to 1.15.0 in bench module
+- [ORC-1814]({{site.jira}}/ORC-1814) Use Ubuntu 24.04/Jekyll 4.3/Rouge 4.5 to generate website
+- [ORC-1837]({{site.jira}}/ORC-1837) Remove commons-csv from parent pom.xml
+- [ORC-1847]({{site.jira}}/ORC-1847) Upgrade Hive to 4.0.1 in bench module
+- [ORC-1851]({{site.jira}}/ORC-1851) Upgrade zstd-jni to 1.5.6-10
+
+The tasks:
+- [ORC-1815]({{site.jira}}/ORC-1815) Remove broken people.apache.org links
diff --git a/site/_posts/2025-03-23-shaoyun-pmc.md b/site/_posts/2025-03-23-shaoyun-pmc.md
new file mode 100644
index 0000000000..835e243ca1
--- /dev/null
+++ b/site/_posts/2025-03-23-shaoyun-pmc.md
@@ -0,0 +1,12 @@
+---
+layout: news_item
+title: "ORC adds Shaoyun Chen to PMC"
+date: "2025-03-23 12:00:00 -0800"
+author: dongjoon
+categories: [team]
+---
+
+The Apache ORC Project Management Committee (PMC) is happy to announce that Shaoyun Chen has joined us as a new member of the PMC. Chaoyun has been showing consistent contributions as a committer, and participated in both major and maintenance releases by actively helping the release managers with testing the release candidates.
+
+Please join me in welcoming Shaoyun to the ORC PMC!
+
diff --git a/site/_posts/2025-05-06-ORC-1.8.9.md b/site/_posts/2025-05-06-ORC-1.8.9.md
new file mode 100644
index 0000000000..0dde6636f9
--- /dev/null
+++ b/site/_posts/2025-05-06-ORC-1.8.9.md
@@ -0,0 +1,26 @@
+---
+layout: news_item
+title: "ORC 1.8.9 Released"
+date: "2025-05-06 22:00:00 +0800"
+author: wgtmac
+version: 1.8.9
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.8.9.
+
+{% assign releaseName = "1.8.9" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1866]({{site.jira}}/ORC-1866) Avoid zlib decompression infinite loop
+- [ORC-1879]({{site.jira}}/ORC-1879) Fix Heap Buffer Overflow in LZO Decompression
+
+The test changes:
+- [ORC-1745]({{site.jira}}/ORC-1745) Remove `Ubuntu 20.04` Support
+- [ORC-1776]({{site.jira}}/ORC-1776) Remove `MacOS 12` from GitHub Action CI and docs
+- [ORC-1870]({{site.jira}}/ORC-1870) Remove Java 18 test pipeline from `branch-1.8`
+
+The tasks:
+- [ORC-1411]({{site.jira}}/ORC-1411) Remove Ubuntu18.04 from docker-based tests
+- [ORC-1709]({{site.jira}}/ORC-1709) Upgrade GitHub Action `setup-java` to v4 and use built-in cache feature
diff --git a/site/_posts/2025-05-06-ORC-1.9.6.md b/site/_posts/2025-05-06-ORC-1.9.6.md
new file mode 100644
index 0000000000..ca20f18604
--- /dev/null
+++ b/site/_posts/2025-05-06-ORC-1.9.6.md
@@ -0,0 +1,27 @@
+---
+layout: news_item
+title: "ORC 1.9.6 Released"
+date: "2025-05-06 22:00:00 +0800"
+author: wgtmac
+version: 1.9.6
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.9.6.
+
+{% assign releaseName = "1.9.6" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1866]({{site.jira}}/ORC-1866) Avoid zlib decompression infinite loop
+- [ORC-1879]({{site.jira}}/ORC-1879) Fix Heap Buffer Overflow in LZO Decompression
+- [ORC-1885]({{site.jira}}/ORC-1885) Update all `ubuntu-20.04` to `ubuntu-22.04` in CI
+
+The test changes:
+- [ORC-1745]({{site.jira}}/ORC-1745) Remove `Ubuntu 20.04` Support
+- [ORC-1776]({{site.jira}}/ORC-1776) Remove `MacOS 12` from GitHub Action CI and docs
+- [ORC-1818]({{site.jira}}/ORC-1818) Upgrade Spark to 3.5.4 in bench module
+- [ORC-1869]({{site.jira}}/ORC-1869) Upgrade Spark to 3.5.5 in bench module for Apache ORC 1.9.x
+
+The tasks:
+- [ORC-1709]({{site.jira}}/ORC-1709) Upgrade GitHub Action `setup-java` to v4 and use built-in cache feature
diff --git a/site/_posts/2025-05-06-ORC-2.0.5.md b/site/_posts/2025-05-06-ORC-2.0.5.md
new file mode 100644
index 0000000000..f72249788e
--- /dev/null
+++ b/site/_posts/2025-05-06-ORC-2.0.5.md
@@ -0,0 +1,24 @@
+---
+layout: news_item
+title: "ORC 2.0.5 Released"
+date: "2025-05-06 00:00:00 0000"
+author: dongjoon
+version: 2.0.5
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.0.5.
+
+{% assign releaseName = "2.0.5" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1866]({{site.jira}}/ORC-1866) Avoid zlib decompression infinite loop
+- [ORC-1879]({{site.jira}}/ORC-1879) [C++] Fix Heap Buffer Overflow in LZO Decompression
+- [ORC-1881]({{site.jira}}/ORC-1881) [C++] Populate dstBatch's scale and precision in DecimalConvertColumnReader
+
+The test changes:
+- [ORC-1745]({{site.jira}}/ORC-1745) Remove `Ubuntu 20.04` Support
+- [ORC-1822]({{site.jira}}/ORC-1822) [C++][CI] Use cpp-linter-action for clang-tidy and clang-format
+- [ORC-1835]({{site.jira}}/ORC-1835) [C++] Fix cpp-linter-action to build first
+- [ORC-1871]({{site.jira}}/ORC-1871) [C++] Include iomanip at TestDictionaryEncoding and TestConvertColumnReader
diff --git a/site/_posts/2025-05-06-ORC-2.1.2.md b/site/_posts/2025-05-06-ORC-2.1.2.md
new file mode 100644
index 0000000000..015115f75d
--- /dev/null
+++ b/site/_posts/2025-05-06-ORC-2.1.2.md
@@ -0,0 +1,26 @@
+---
+layout: news_item
+title: "ORC 2.1.2 Released"
+date: "2025-05-06 00:00:00 0000"
+author: dongjoon
+version: 2.1.2
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.1.2.
+
+{% assign releaseName = "2.1.2" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1866]({{site.jira}}/ORC-1866) Avoid zlib decompression infinite loop
+- [ORC-1879]({{site.jira}}/ORC-1879) [C++] Fix Heap Buffer Overflow in LZO Decompression
+- [ORC-1881]({{site.jira}}/ORC-1881) [C++] Populate dstBatch's scale and precision in DecimalConvertColumnReader
+
+The test changes:
+- [ORC-1871]({{site.jira}}/ORC-1871) [C++] Include iomanip at TestDictionaryEncoding and TestConvertColumnReader
+- [ORC-1872]({{site.jira}}/ORC-1872) Upgrade `extra-enforcer-rules` to 1.10.0
+- [ORC-1875]({{site.jira}}/ORC-1875) Support `ubuntu-24.04-arm` in GitHub Action CIs
+
+The build and dependency changes:
+- [ORC-1876]({{site.jira}}/ORC-1876) Upgrade to ORC Format 1.1
diff --git a/site/_posts/2025-06-26-ORC-1.8.10.md b/site/_posts/2025-06-26-ORC-1.8.10.md
new file mode 100644
index 0000000000..cd4b388cca
--- /dev/null
+++ b/site/_posts/2025-06-26-ORC-1.8.10.md
@@ -0,0 +1,20 @@
+---
+layout: news_item
+title: "ORC 1.8.10 Released"
+date: "2025-06-26 03:00:00 -0800"
+author: dongjoon
+version: 1.8.10
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.8.10.
+
+{% assign releaseName = "1.8.10" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1898]({{site.jira}}/ORC-1898): When column is all null, NULL_SAFE_EQUALS pushdown doesn't get evaluated correctly
+
+The test changes:
+- [ORC-1909]({{site.jira}}/ORC-1909) Remove unused test resource log4j.properties files
+- [ORC-1923]({{site.jira}}/ORC-1923) Remove Windows 2019 GitHub Action job
diff --git a/site/_posts/2025-07-04-ORC-1.9.7.md b/site/_posts/2025-07-04-ORC-1.9.7.md
new file mode 100644
index 0000000000..05ccfef2c9
--- /dev/null
+++ b/site/_posts/2025-07-04-ORC-1.9.7.md
@@ -0,0 +1,20 @@
+---
+layout: news_item
+title: "ORC 1.9.7 Released"
+date: "2025-07-04 03:00:00 -0800"
+author: dongjoon
+version: 1.9.7
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v1.9.7.
+
+{% assign releaseName = "1.9.7" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1898]({{site.jira}}/ORC-1898): When column is all null, NULL_SAFE_EQUALS pushdown doesn't get evaluated correctly
+
+The test changes:
+- [ORC-1909]({{site.jira}}/ORC-1909) Remove unused test resource log4j.properties files
+- [ORC-1923]({{site.jira}}/ORC-1923) Remove Windows 2019 GitHub Action job
diff --git a/site/_posts/2025-07-07-ORC-2.0.6.md b/site/_posts/2025-07-07-ORC-2.0.6.md
new file mode 100644
index 0000000000..cd8726ac19
--- /dev/null
+++ b/site/_posts/2025-07-07-ORC-2.0.6.md
@@ -0,0 +1,28 @@
+---
+layout: news_item
+title: "ORC 2.0.6 Released"
+date: "2025-07-07 03:00:00 -0800"
+author: dongjoon
+version: 2.0.6
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.0.6.
+
+{% assign releaseName = "2.0.6" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1898]({{site.jira}}/ORC-1898): When column is all null, NULL_SAFE_EQUALS pushdown doesn't get evaluated correctly
+- [ORC-1929]({{site.jira}}/ORC-1929): Fix the Javadoc of `ZstdCodec.compress`
+- [ORC-1942]({{site.jira}}/ORC-1942): Fix `PhysicalFsWriter` to change `tempOptions` directly
+
+The test changes:
+- [ORC-1728]({{site.jira}}/ORC-1728) Bump `maven-shade-plugin` to 3.6.0
+- [ORC-1872]({{site.jira}}/ORC-1872) Upgrade `extra-enforcer-rules` to 1.10.0
+- [ORC-1889]({{site.jira}}/ORC-1889) Upgrade `parquet` to 1.15.2
+- [ORC-1899]({{site.jira}}/ORC-1899) Upgrade Spark to 4.0.0 and Scala to 2.13.16
+- [ORC-1900]({{site.jira}}/ORC-1900) Upgrade Jackson to 2.18.2 in bench module
+- [ORC-1901]({{site.jira}}/ORC-1901) Remove `threeten-extra` exclusion in `enforceBytecodeVersion` rule
+- [ORC-1909]({{site.jira}}/ORC-1909) Remove unused test resource `log4j.properties` files
+- [ORC-1923]({{site.jira}}/ORC-1923) Remove `Windows 2019` GitHub Action job
diff --git a/site/_posts/2025-07-09-ORC-2.1.3.md b/site/_posts/2025-07-09-ORC-2.1.3.md
new file mode 100644
index 0000000000..0c295a1999
--- /dev/null
+++ b/site/_posts/2025-07-09-ORC-2.1.3.md
@@ -0,0 +1,43 @@
+---
+layout: news_item
+title: "ORC 2.1.3 Released"
+date: "2025-07-09 03:00:00 -0800"
+author: dongjoon
+version: 2.1.3
+categories: [release]
+---
+
+The ORC team is excited to announce the release of ORC v2.1.3.
+
+{% assign releaseName = "2.1.3" %}
+{% include release_description.md %}
+
+The bug fixes:
+- [ORC-1898]({{site.jira}}/ORC-1898): When column is all null, NULL_SAFE_EQUALS pushdown doesn't get evaluated correctly
+- [ORC-1929]({{site.jira}}/ORC-1929): Fix the Javadoc of `ZstdCodec.compress`
+- [ORC-1942]({{site.jira}}/ORC-1942): Fix `PhysicalFsWriter` to change `tempOptions` directly
+
+The improvement changes:
+- [ORC-1931]({{site.jira}}/ORC-1931): Suppress Hadoop logs lower than ERROR level in `orc-tools`
+
+The test changes:
+- [ORC-1899]({{site.jira}}/ORC-1899) Upgrade Spark to 4.0.0 and Scala to 2.13.16
+- [ORC-1900]({{site.jira}}/ORC-1900) Upgrade Jackson to 2.18.2 in bench module
+- [ORC-1907]({{site.jira}}/ORC-1907) Upgrade `byte-buddy` to 1.17.5
+- [ORC-1908]({{site.jira}}/ORC-1908) Add `--enable-native-access=ALL-UNNAMED` to Surefire argLine
+- [ORC-1909]({{site.jira}}/ORC-1909) Remove unused test resource `log4j.properties` files
+- [ORC-1910]({{site.jira}}/ORC-1910) Add `-XX:+EnableDynamicAgentLoading` to Surefire argLine
+- [ORC-1911]({{site.jira}}/ORC-1911) Update CIs to use `actions/checkout@v4` consistently
+- [ORC-1915]({{site.jira}}/ORC-1915) Remove `Fedora 35` Support
+- [ORC-1917]({{site.jira}}/ORC-1917) Add `TestConf` interface to centralize test configurations
+- [ORC-1923]({{site.jira}}/ORC-1923) Remove `Windows 2019` GitHub Action job
+- [ORC-1943]({{site.jira}}/ORC-1943) Add `com.google.protobuf.use_unsafe_pre22_gencode` to Surefire testing
+- [ORC-1944]({{site.jira}}/ORC-1944) Upgrade `spotbugs` to 4.9.3
+- [ORC-1945]({{site.jira}}/ORC-1945) Update Python documentation with PyArrow 20.0.0 and Dask 2025.5.1
+
+The build and dependency changes:
+
+- [ORC-1896]({{site.jira}}/ORC-1896) Add `CMAKE_POLICY_VERSION_MINIMUM=3.12` to `ThirdpartyToolchain.cmake`
+- [ORC-1901]({{site.jira}}/ORC-1901) Remove `threeten-extra` exclusion in `enforceBytecodeVersion` rule
+- [ORC-1914]({{site.jira}}/ORC-1914) Ensure `Annotation Processing` in `core` module compilation
+- [ORC-1934]({{site.jira}}/ORC-1934) Upgrade `protobuf-java` to 3.25.8
diff --git a/site/develop/committers.md b/site/develop/committers.md
index c332a0f2f0..1b94786258 100644
--- a/site/develop/committers.md
+++ b/site/develop/committers.md
@@ -13,6 +13,7 @@ Name                    | Apache Id    | Role
 Aliaksei Sandryhaila    | asandryh     | PMC
 Chris Douglas           | cdouglas     | PMC
 Chinna Rao Lalam        | chinnaraol   | Committer
+Shaoyun Chen            | csy          | PMC
 Chaoyu Tang             | ctang        | Committer
 Carl Steinbach          | cws          | Committer
 Daniel Dai              | daijy        | Committer
@@ -20,6 +21,7 @@ Deshan Xiao             | deshanxiao   | Committer
 Dongjoon Hyun           | dongjoon     | PMC
 Deepak Majeti           | mdeepak      | PMC
 Eugene Koifman          | ekoifman     | PMC
+Yuanping Wu             | ffacs        | Committer
 Gang Wu                 | gangwu       | PMC
 Gopal Vijayaraghavan    | gopalv       | PMC
 Gunther Hagleitner      | gunther      | Committer
@@ -80,6 +82,7 @@ Companies with employees that are committers:
 * Intel
 * LinkedIn
 * Microsoft
+* Trip
 * Uber
 * Vertica
 * Workday
diff --git a/site/develop/index.md b/site/develop/index.md
index 8000e21550..6afc49ea16 100644
--- a/site/develop/index.md
+++ b/site/develop/index.md
@@ -6,7 +6,7 @@ title: Developing
 Information about the ORC project that is most important for
 developers working on the project.
 
-The [ORC format specification](/specification) defines the format
+The [ORC format specification]({{ site.baseurl }}/specification) defines the format
 to promote compatibility between implementations.
 
 ## Development community
@@ -83,11 +83,11 @@ We've added several shortcuts to various relevant pages:
 
 From our website, you can use:
 
-* [/bugs](/bugs) to jump to our bug database
-* [/downloads](/downloads) to jump to our downloads page
-* [/releases](/releases) to jump to our releases page
-* [/src](/src) to jump to our source code
-* [/web-src](/web-src) to jump to our site source code
+* [/bugs](https://orc.apache.org/bugs) to jump to our bug database
+* [/downloads](https://orc.apache.org/downloads) to jump to our downloads page
+* [/releases](https://orc.apache.org/releases) to jump to our releases page
+* [/src](https://orc.apache.org/src) to jump to our source code
+* [/web-src](https://orc.apache.org/web-src) to jump to our site source code
 
 ## Reviews
 
@@ -128,6 +128,11 @@ $ ./mvnw test -pl core -Dtest=TestRecordReaderImpl#testFindColumn
 ~~~~
 
 ## Approving a pull request
+It is recommended to use an interactive script to merge pull requests, which can be automatically associated with the JIRA.
+
+~~~~
+% ./dev/merge_orc_pr.py
+~~~~
 
 Fetch the current state of the project:
 
diff --git a/site/doap_orc.rdf b/site/doap_orc.rdf
index f7a4b993f7..79fb7ea998 100644
--- a/site/doap_orc.rdf
+++ b/site/doap_orc.rdf
@@ -48,6 +48,8 @@ the values that are required for the current query.</description>
         <created>2022-12-02</created>
         <revision>1.8.1</revision>
       </Version>
+    </release>
+    <release>
       <Version>
         <name>Stable release</name>
         <created>2022-11-17</created>
diff --git a/site/index.html b/site/index.html
index 4917c0fe12..c061b2e1dc 100644
--- a/site/index.html
+++ b/site/index.html
@@ -18,19 +18,19 @@
       <h2>ACID Support</h2>
       <p>Includes support for ACID transactions and snapshot
          isolation</p>
-      <a href="http://github.com/docs/acid.html">ACID support &rarr;</a>
+      <a href="http://github.com/docs/acid.html">ACID support &rarr;</a>
     </div>
     <div class="unit one-third">
       <h2>Built-in Indexes</h2>
       <p>Jump to the right row with indexes including minimum, maximum,
 	 and bloom filters for each column.</p>
-      <a href="http://github.com/docs/indexes.html">ORC's indexes &rarr;</a>
+      <a href="http://github.com/docs/indexes.html">ORC's indexes &rarr;</a>
     </div>
     <div class="unit one-third">
       <h2>Complex Types</h2>
       <p>Supports all of Hive's types including the compound types:
          structs, lists, maps, and unions</p>
-      <a href="http://github.com/docs/types.html">ORC's types &rarr;</a>
+      <a href="http://github.com/docs/types.html">ORC's types &rarr;</a>
     </div>
     <div class="clear"></div>
   </div>
@@ -61,7 +61,7 @@ <h2>Complex Types</h2>
       <div class="grid pane">
         <div class="unit whole center-on-mobiles">
           <a href="https://www.apache.org/">
-             <img src="http://github.com/img/asf_logo.png" width="429" height="175"
+             <img src="http://github.com/img/asf_logo.png" width="429" height="175"
                   alt="Apache Software Foundation"></a>
           <div class="pane-content">
             <h2 class="center-on-mobiles">ORC is an <strong>Apache</strong>
diff --git a/site/releases/index.md b/site/releases/index.md
index 4760385154..c29c55d597 100644
--- a/site/releases/index.md
+++ b/site/releases/index.md
@@ -16,7 +16,7 @@ for ORC files. It also contains tools for working with ORC
 files and looking at their contents and metadata.
 
 {% include release_description.md %}
-{% include orc_2.0.md %}
+{% include orc_2.1.md %}
 {% include known_issues.md %}
 
 ## Checking signatures
diff --git a/site/releases/releases.md b/site/releases/releases.md
index e2a5ab575c..1ac5287904 100644
--- a/site/releases/releases.md
+++ b/site/releases/releases.md
@@ -17,7 +17,7 @@ for ORC files. It also contains tools for working with ORC
 files and looking at their contents and metadata.
 
 {% include release_description.md %}
-{% include orc_2.0.md %}
+{% include orc_2.1.md %}
 {% include known_issues.md %}
 
 ## Checking signatures
diff --git a/site/security/CVE-2025-47436.md b/site/security/CVE-2025-47436.md
new file mode 100644
index 0000000000..6e1763a1a4
--- /dev/null
+++ b/site/security/CVE-2025-47436.md
@@ -0,0 +1,44 @@
+---
+layout: page
+title: CVE-2025-47436
+---
+
+# Potential Heap Buffer Overflow during C++ LZO Decompression
+
+## Date:
+2025-05-13
+
+## Severity:
+
+Medium
+
+## Vendor:
+
+[The Apache Software Foundation](https://apache.org)
+
+## Versions Affected:
+
+- Apache ORC through 1.8.8
+- Apache ORC 1.9.0 through 1.9.5
+- Apache ORC 2.0.0 through 2.0.4
+- Apache ORC 2.1.0 through 2.1.1
+
+## Description:
+
+A vulnerability has been identified in the ORC C++ LZO decompression logic,
+where specially crafted malformed ORC files can cause the decompressor
+to allocate a 250-byte buffer but then attempts to copy 295 bytes into it.
+It causes memory corruption due to insufficient input buffer boundary validation during decompression.
+
+This issue is being tracked as ORC-1879 
+
+## Mitigation:
+
+* Upgrade to 1.8.9, 1.9.6, 2.0.5, and 2.1.2
+
+## Credit:
+
+This issue was discovered by Jason Villaluna.
+
+## References:
+[Apache ORC security](/security)
diff --git a/site/security/index.md b/site/security/index.md
index 6c1f0870c0..b28e50ce60 100644
--- a/site/security/index.md
+++ b/site/security/index.md
@@ -27,7 +27,7 @@ are subscribed.
 
 Please note that we do not use a team GnuPG key. If you wish to
 encrypt your e-mail to security@orc.apache.org then please use the GnuPG
-keys from [ORC GPG keys](https://people.apache.org/keys/group/orc.asc) for
+keys from [ORC GPG keys](https://dist.apache.org/repos/dist/release/orc/KEYS) for
 the members of the
 [ORC PMC](https://people.apache.org/phonebook.html?ctte=orc).
 
@@ -45,4 +45,5 @@ The full process can be found on the
 
 ## Fixed CVEs
 
-* [CVE-2018-8015](CVE-2018-8015) - ORC files with malformed types cause stack overflow.
\ No newline at end of file
+* [CVE-2018-8015](CVE-2018-8015) - ORC files with malformed types cause stack overflow.
+* [CVE-2025-47436](CVE-2025-47436) - Potential Heap Buffer Overflow during C++ LZO Decompression
diff --git a/site/specification/ORCv0.md b/site/specification/ORCv0.md
index 07f1c8cc9d..5a8f776c4c 100644
--- a/site/specification/ORCv0.md
+++ b/site/specification/ORCv0.md
@@ -27,7 +27,7 @@ include the minimum and maximum values for each column in each set of
 file reader can skip entire sets of rows that aren't important for
 this query.
 
-![ORC file structure](/img/OrcFileLayout.png)
+![ORC file structure]({{ site.baseurl }}/img/OrcFileLayout.png)
 
 # File Tail
 
@@ -381,7 +381,7 @@ for a chunk that compressed to 100,000 bytes would be [0x40, 0x0d,
 that as long as a decompressor starts at the top of a header, it can
 start decompressing without the previous bytes.
 
-![compression streams](/img/CompressionStream.png)
+![compression streams]({{ site.baseurl }}/img/CompressionStream.png)
 
 The default compression chunk size is 256K, but writers can choose
 their own value. Larger chunks lead to better compression, but require
diff --git a/site/specification/ORCv1.md b/site/specification/ORCv1.md
index 9aede7a4a3..63d6808797 100644
--- a/site/specification/ORCv1.md
+++ b/site/specification/ORCv1.md
@@ -27,7 +27,7 @@ include the minimum and maximum values for each column in each set of
 file reader can skip entire sets of rows that aren't important for
 this query.
 
-![ORC file structure](/img/OrcFileLayout.png)
+![ORC file structure]({{ site.baseurl }}/img/OrcFileLayout.png)
 
 # File Tail
 
@@ -201,7 +201,7 @@ All of the rows in an ORC file must have the same schema. Logically
 the schema is expressed as a tree as in the figure below, where
 the compound types have subcolumns under them.
 
-![ORC column structure](/img/TreeWriters.png)
+![ORC column structure]({{ site.baseurl }}/img/TreeWriters.png)
 
 The equivalent Hive DDL would be:
 
@@ -620,7 +620,7 @@ for a chunk that compressed to 100,000 bytes would be [0x40, 0x0d,
 that as long as a decompressor starts at the top of a header, it can
 start decompressing without the previous bytes.
 
-![compression streams](/img/CompressionStream.png)
+![compression streams]({{ site.baseurl }}/img/CompressionStream.png)
 
 The default compression chunk size is 256K, but writers can choose
 their own value. Larger chunks lead to better compression, but require
@@ -797,15 +797,15 @@ length of 4 (3) as [0x5e, 0x03, 0x5c, 0xa1, 0xab, 0x1e, 0xde, 0xad,
 > Note: the run length(4) is one-off. We can get 4 by adding 1 to 3
 (See [Hive-4123](https://github.com/apache/hive/commit/69deabeaac020ba60b0f2156579f53e9fe46157a#diff-c00fea1863eaf0d6f047535e874274199020ffed3eb00deb897f513aa86f6b59R232-R236))
 
-![Direct](/img/Direct.png)
+![Direct]({{ site.baseurl }}/img/Direct.png)
 
 ### Patched Base
 
 The patched base encoding is used for integer sequences whose bit
 widths varies a lot. The minimum signed value of the sequence is found
 and subtracted from the other values. The bit width of those adjusted
-values is analyzed and the 90 percentile of the bit width is chosen
-as W. The 10\% of values larger than W use patches from a patch list
+values is analyzed and the 95 percentile of the bit width is chosen
+as W. The 5% of values larger than W use patches from a patch list
 to set the additional bits. Patches are encoded as a list of gaps in
 the index values and the additional value bits.
 
@@ -830,8 +830,8 @@ the index values and the additional value bits.
   patch, and a patch value. Patches are applied by logically or'ing
   the data values with the relevant patch shifted W bits left. If a
   patch is 0, it was introduced to skip over more than 255 items. The
-  combined length of each patch (PGW + PW) must be less or equal to
-  64. (PGW + PW) is padded to the closest fixed bit size according to the
+  combined length of each patch (PGW + PW) must be less or equal to 64.
+  (PGW + PW) is padded to the closest fixed bit size according to the
   below table before being encoded in the patch list.
 
 (PGW + PW)    | closestFixedBits(PGW + PW)
@@ -1352,4 +1352,4 @@ Bloom filter streams are interlaced with row group indexes. This placement
 makes it convenient to read the bloom filter stream and row index stream
 together in single read operation.
 
-![bloom filter](/img/BloomFilter.png)
+![bloom filter]({{ site.baseurl }}/img/BloomFilter.png)
diff --git a/site/specification/ORCv2.md b/site/specification/ORCv2.md
index 2e0c354624..76f3cc4b96 100644
--- a/site/specification/ORCv2.md
+++ b/site/specification/ORCv2.md
@@ -47,7 +47,7 @@ include the minimum and maximum values for each column in each set of
 file reader can skip entire sets of rows that aren't important for
 this query.
 
-![ORC file structure](/img/OrcFileLayout.png)
+![ORC file structure]({{ site.baseurl }}/img/OrcFileLayout.png)
 
 # File Tail
 
@@ -221,7 +221,7 @@ All of the rows in an ORC file must have the same schema. Logically
 the schema is expressed as a tree as in the figure below, where
 the compound types have subcolumns under them.
 
-![ORC column structure](/img/TreeWriters.png)
+![ORC column structure]({{ site.baseurl }}/img/TreeWriters.png)
 
 The equivalent Hive DDL would be:
 
@@ -639,7 +639,7 @@ for a chunk that compressed to 100,000 bytes would be [0x40, 0x0d,
 that as long as a decompressor starts at the top of a header, it can
 start decompressing without the previous bytes.
 
-![compression streams](/img/CompressionStream.png)
+![compression streams]({{ site.baseurl }}/img/CompressionStream.png)
 
 The default compression chunk size is 256K, but writers can choose
 their own value. Larger chunks lead to better compression, but require
@@ -816,15 +816,15 @@ length of 4 (3) as [0x5e, 0x03, 0x5c, 0xa1, 0xab, 0x1e, 0xde, 0xad,
 > Note: the run length(4) is one-off. We can get 4 by adding 1 to 3
 (See [Hive-4123](https://github.com/apache/hive/commit/69deabeaac020ba60b0f2156579f53e9fe46157a#diff-c00fea1863eaf0d6f047535e874274199020ffed3eb00deb897f513aa86f6b59R232-R236))
 
-![Direct](/img/Direct.png)
+![Direct]({{ site.baseurl }}/img/Direct.png)
 
 ### Patched Base
 
 The patched base encoding is used for integer sequences whose bit
 widths varies a lot. The minimum signed value of the sequence is found
 and subtracted from the other values. The bit width of those adjusted
-values is analyzed and the 90 percentile of the bit width is chosen
-as W. The 10\% of values larger than W use patches from a patch list
+values is analyzed and the 95 percentile of the bit width is chosen
+as W. The 5% of values larger than W use patches from a patch list
 to set the additional bits. Patches are encoded as a list of gaps in
 the index values and the additional value bits.
 
@@ -849,8 +849,8 @@ the index values and the additional value bits.
   patch, and a patch value. Patches are applied by logically or'ing
   the data values with the relevant patch shifted W bits left. If a
   patch is 0, it was introduced to skip over more than 255 items. The
-  combined length of each patch (PGW + PW) must be less or equal to
-  64. (PGW + PW) is padded to the closest fixed bit size according to the
+  combined length of each patch (PGW + PW) must be less or equal to 64.
+  (PGW + PW) is padded to the closest fixed bit size according to the
   below table before being encoded in the patch list.
 
 (PGW + PW)    | closestFixedBits(PGW + PW)
@@ -1368,4 +1368,4 @@ Bloom filter streams are interlaced with row group indexes. This placement
 makes it convenient to read the bloom filter stream and row index stream
 together in single read operation.
 
-![bloom filter](/img/BloomFilter.png)
+![bloom filter]({{ site.baseurl }}/img/BloomFilter.png)
diff --git a/subprojects/abseil-cpp.wrap b/subprojects/abseil-cpp.wrap
new file mode 100644
index 0000000000..54fc2280c7
--- /dev/null
+++ b/subprojects/abseil-cpp.wrap
@@ -0,0 +1,125 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = abseil-cpp-20240722.0
+source_url = https://github.com/abseil/abseil-cpp/releases/download/20240722.0/abseil-cpp-20240722.0.tar.gz
+source_filename = abseil-cpp-20240722.0.tar.gz
+source_hash = f50e5ac311a81382da7fa75b97310e4b9006474f9560ac46f54a9967f07d4ae3
+patch_filename = abseil-cpp_20240722.0-3_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/abseil-cpp_20240722.0-3/get_patch
+patch_hash = 12dd8df1488a314c53e3751abd2750cf233b830651d168b6a9f15e7d0cf71f7b
+source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/abseil-cpp_20240722.0-3/abseil-cpp-20240722.0.tar.gz
+wrapdb_version = 20240722.0-3
+
+[provide]
+absl_base = absl_base_dep
+absl_container = absl_container_dep
+absl_debugging = absl_debugging_dep
+absl_log = absl_log_dep
+absl_flags = absl_flags_dep
+absl_hash = absl_hash_dep
+absl_crc = absl_crc_dep
+absl_numeric = absl_numeric_dep
+absl_profiling = absl_profiling_dep
+absl_random = absl_random_dep
+absl_status = absl_status_dep
+absl_strings = absl_strings_dep
+absl_synchronization = absl_synchronization_dep
+absl_time = absl_time_dep
+absl_types = absl_types_dep
+absl_algorithm_container = absl_base_dep
+absl_any_invocable = absl_base_dep
+absl_bad_any_cast_impl = absl_types_dep
+absl_bad_optional_access = absl_types_dep
+absl_bad_variant_access = absl_types_dep
+absl_bind_front = absl_base_dep
+absl_city = absl_hash_dep
+absl_civil_time = absl_time_dep
+absl_cleanup = absl_base_dep
+absl_cord = absl_strings_dep
+absl_cord_internal = absl_strings_dep
+absl_cordz_functions = absl_strings_dep
+absl_cordz_handle = absl_strings_dep
+absl_cordz_info = absl_strings_dep
+absl_cordz_sample_token = absl_strings_dep
+absl_core_headers = absl_base_dep
+absl_crc32c = absl_crc_dep
+absl_debugging_internal = absl_debugging_dep
+absl_demangle_internal = absl_debugging_dep
+absl_die_if_null = absl_log_dep
+absl_examine_stack = absl_debugging_dep
+absl_exponential_biased = absl_profiling_dep
+absl_failure_signal_handler = absl_debugging_dep
+absl_flags_commandlineflag = absl_flags_dep
+absl_flags_commandlineflag_internal = absl_flags_dep
+absl_flags_config = absl_flags_dep
+absl_flags_internal = absl_flags_dep
+absl_flags_marshalling = absl_flags_dep
+absl_flags_parse = absl_flags_dep
+absl_flags_private_handle_accessor = absl_flags_dep
+absl_flags_program_name = absl_flags_dep
+absl_flags_reflection = absl_flags_dep
+absl_flags_usage = absl_flags_dep
+absl_flags_usage_internal = absl_flags_dep
+absl_flat_hash_map = absl_container_dep
+absl_flat_hash_set = absl_container_dep
+absl_function_ref = absl_base_dep
+absl_graphcycles_internal = absl_synchronization_dep
+absl_hashtablez_sampler = absl_container_dep
+absl_inlined_vector = absl_container_dep
+absl_int128 = absl_numeric_dep
+absl_leak_check = absl_debugging_dep
+absl_log_initialize = absl_log_dep
+absl_log_internal_check_op = absl_log_dep
+absl_log_internal_message = absl_log_dep
+absl_log_severity = absl_base_dep
+absl_low_level_hash = absl_hash_dep
+absl_memory = absl_base_dep
+absl_optional = absl_types_dep
+absl_periodic_sampler = absl_profiling_dep
+absl_random_bit_gen_ref = absl_random_dep
+absl_random_distributions = absl_random_dep
+absl_random_internal_distribution_test_util = absl_random_dep
+absl_random_internal_platform = absl_random_dep
+absl_random_internal_pool_urbg = absl_random_dep
+absl_random_internal_randen = absl_random_dep
+absl_random_internal_randen_hwaes = absl_random_dep
+absl_random_internal_randen_hwaes_impl = absl_random_dep
+absl_random_internal_randen_slow = absl_random_dep
+absl_random_internal_seed_material = absl_random_dep
+absl_random_random = absl_random_dep
+absl_random_seed_gen_exception = absl_random_dep
+absl_random_seed_sequences = absl_random_dep
+absl_raw_hash_set = absl_container_dep
+absl_raw_logging_internal = absl_base_dep
+absl_scoped_set_env = absl_base_dep
+absl_span = absl_types_dep
+absl_spinlock_wait = absl_base_dep
+absl_stacktrace = absl_debugging_dep
+absl_statusor = absl_status_dep
+absl_str_format = absl_strings_dep
+absl_str_format_internal = absl_strings_dep
+absl_strerror = absl_base_dep
+absl_string_view = absl_strings_dep
+absl_strings_internal = absl_strings_dep
+absl_symbolize = absl_debugging_dep
+absl_throw_delegate = absl_base_dep
+absl_time_zone = absl_time_dep
+absl_type_traits = absl_base_dep
+absl_utility = absl_base_dep
+absl_variant = absl_types_dep
diff --git a/subprojects/google-snappy.wrap b/subprojects/google-snappy.wrap
new file mode 100644
index 0000000000..2260d5e0c0
--- /dev/null
+++ b/subprojects/google-snappy.wrap
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = snappy-1.2.2
+source_url = https://github.com/google/snappy/archive/1.2.2.tar.gz
+source_filename = 1.2.2.tar.gz
+source_hash = 90f74bc1fbf78a6c56b3c4a082a05103b3a56bb17bca1a27e052ea11723292dc
+patch_filename = google-snappy_1.2.2-1_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/google-snappy_1.2.2-1/get_patch
+patch_hash = 831c36bf1eca64e95cee892cae5a7eade382b1a13bb74a5038cd2703a8fc158c
+source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/google-snappy_1.2.2-1/1.2.2.tar.gz
+wrapdb_version = 1.2.2-1
+
+[provide]
+snappy = snappy_dep
diff --git a/subprojects/gtest.wrap b/subprojects/gtest.wrap
new file mode 100644
index 0000000000..a0e99a1a24
--- /dev/null
+++ b/subprojects/gtest.wrap
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = googletest-release-1.12.1
+source_url = https://github.com/google/googletest/archive/release-1.12.1.tar.gz
+source_filename = gtest-1.12.1.tar.gz
+source_hash = 81964fe578e9bd7c94dfdb09c8e4d6e6759e19967e397dbea48d1c10e45d0df2
+patch_filename = gtest_1.12.1-1_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/gtest_1.12.1-1/get_patch
+patch_hash = 75143f11e174952bc768699fde3176511fe8e33b25dc6f6347d89e41648e99cf
+wrapdb_version = 1.12.1-1
+
+[provide]
+gtest = gtest_dep
+gtest_main = gtest_main_dep
+gmock = gmock_dep
+gmock_main = gmock_main_dep
diff --git a/subprojects/lz4.wrap b/subprojects/lz4.wrap
new file mode 100644
index 0000000000..5e2f125ee5
--- /dev/null
+++ b/subprojects/lz4.wrap
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = lz4-1.10.0
+source_url = https://github.com/lz4/lz4/archive/v1.10.0.tar.gz
+source_filename = lz4-1.10.0.tgz
+source_hash = 537512904744b35e232912055ccf8ec66d768639ff3abe5788d90d792ec5f48b
+patch_filename = lz4_1.10.0-1_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/lz4_1.10.0-1/get_patch
+patch_hash = 17aea915e4597716afd08330f780ac9ffac0b979e713b718e0563d240f2c28c8
+source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/lz4_1.10.0-1/lz4-1.10.0.tgz
+wrapdb_version = 1.10.0-1
+
+[provide]
+liblz4 = liblz4_dep
diff --git a/subprojects/orc-format.wrap b/subprojects/orc-format.wrap
new file mode 100644
index 0000000000..76b73a228e
--- /dev/null
+++ b/subprojects/orc-format.wrap
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = orc-format-1.1.0
+source_url = https://www.apache.org/dyn/closer.lua/orc/orc-format-1.1.0/orc-format-1.1.0.tar.gz?action=download
+source_filename = orc-format-1-1.0.tar.gz
+source_hash = d4a7ac76c5442abf7119e2cb84e71b677e075aff53518aa866055e2ead0450d7
+patch_directory = orc-format
+
+[provide]
+orc_format_proto = orc_format_proto_dep
diff --git a/subprojects/packagefiles/orc-format/meson.build b/subprojects/packagefiles/orc-format/meson.build
new file mode 100644
index 0000000000..8471614539
--- /dev/null
+++ b/subprojects/packagefiles/orc-format/meson.build
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+project(
+    'orc-format',
+    'cpp',
+    version: '1.1.0',
+    license: 'Apache-2.0',
+    meson_version: '>=1.3.0',
+)
+
+protobuf_dep = dependency('protobuf', fallback: ['protobuf', 'protobuf_dep'])
+# in newer versions of protobuf from the wrapdb, find_program('protoc') will
+# "do the right thing", but the protobuf wrapdb version we are using now does
+# not call override_find_program('protoc', ...)
+if protobuf_dep.type_name() == 'internal'
+    protoc = subproject('protobuf').get_variable('protoc')
+else
+    protoc = find_program('protoc')
+endif
+
+fs = import('fs')
+proto_file = meson.project_source_root() / 'src/main/proto/orc/proto/orc_proto.proto'
+proto_parent_dir = fs.parent(proto_file)
+
+orc_proto_files = custom_target(
+    'orc-proto',
+    input: [proto_file],
+    output: ['orc_proto.pb.h', 'orc_proto.pb.cc'],
+    command: [
+        protoc,
+        '-I',
+        proto_parent_dir,
+        '--cpp_out',
+        meson.current_build_dir(),
+        '@INPUT@',
+    ],
+)
+
+orc_format_proto_dep = declare_dependency(
+    sources: orc_proto_files[0],
+    include_directories: include_directories('.'),
+)
diff --git a/subprojects/protobuf.wrap b/subprojects/protobuf.wrap
new file mode 100644
index 0000000000..edf6135492
--- /dev/null
+++ b/subprojects/protobuf.wrap
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = protobuf-3.5.1
+source_url = https://github.com/google/protobuf/releases/download/v3.5.1/protobuf-all-3.5.1.tar.gz
+source_filename = protobuf-all-3.5.1.tar.gz
+source_hash = 72d43863f58567a9ea2054671fdb667867f9cf7865df623c7be630978ff97dff
+patch_url = https://wrapdb.mesonbuild.com/v2/protobuf_3.5.1-3/get_patch
+patch_filename = protobuf-3.5.1-3-wrap.zip
+patch_hash = 9316ee29244a590545d8175136fd916149ca81e3f33f20d3cffc5bd3e6812e67
diff --git a/subprojects/sparsehash-c11.wrap b/subprojects/sparsehash-c11.wrap
new file mode 100644
index 0000000000..4177861ce8
--- /dev/null
+++ b/subprojects/sparsehash-c11.wrap
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = sparsehash-c11-2.11.1
+source_url = https://github.com/sparsehash/sparsehash-c11/archive/refs/tags/v2.11.1.tar.gz
+source_filename = sparsehash-c11-2.11.1.tar.gz
+source_hash = d4a43cad1e27646ff0ef3a8ce3e18540dbcb1fdec6cc1d1cb9b5095a9ca2a755
+patch_filename = sparsehash-c11_2.11.1-1_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/sparsehash-c11_2.11.1-1/get_patch
+patch_hash = d04ddea94db2a0a7ad059c26186e3f6f37b02ddfba8fea11352767becb3d0cfa
+source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/sparsehash-c11_2.11.1-1/sparsehash-c11-2.11.1.tar.gz
+wrapdb_version = 2.11.1-1
+
+[provide]
+sparsehash-c11 = sparsehash_c11_dep
\ No newline at end of file
diff --git a/subprojects/zlib.wrap b/subprojects/zlib.wrap
new file mode 100644
index 0000000000..f96fcef036
--- /dev/null
+++ b/subprojects/zlib.wrap
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = zlib-1.3.1
+source_url = http://zlib.net/fossils/zlib-1.3.1.tar.gz
+source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/zlib_1.3.1-1/zlib-1.3.1.tar.gz
+source_filename = zlib-1.3.1.tar.gz
+source_hash = 9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23
+patch_filename = zlib_1.3.1-1_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/zlib_1.3.1-1/get_patch
+patch_hash = e79b98eb24a75392009cec6f99ca5cdca9881ff20bfa174e8b8926d5c7a47095
+wrapdb_version = 1.3.1-1
+
+[provide]
+zlib = zlib_dep
diff --git a/subprojects/zstd.wrap b/subprojects/zstd.wrap
new file mode 100644
index 0000000000..d5eba7d73a
--- /dev/null
+++ b/subprojects/zstd.wrap
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[wrap-file]
+directory = zstd-1.5.7
+source_url = https://github.com/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz
+source_filename = zstd-1.5.7.tar.gz
+source_hash = eb33e51f49a15e023950cd7825ca74a4a2b43db8354825ac24fc1b7ee09e6fa3
+patch_filename = zstd_1.5.7-1_patch.zip
+patch_url = https://wrapdb.mesonbuild.com/v2/zstd_1.5.7-1/get_patch
+patch_hash = 454f6610c12489434533932a11c2c29043903da1c10d20de1fb8ccf5a1e0908b
+source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/zstd_1.5.7-1/zstd-1.5.7.tar.gz
+wrapdb_version = 1.5.7-1
+
+[provide]
+libzstd = libzstd_dep
diff --git a/tools/meson.build b/tools/meson.build
new file mode 100644
index 0000000000..1fcbe2a372
--- /dev/null
+++ b/tools/meson.build
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+subdir('src')
+
+if get_option('tests').enabled()
+  subdir('test')
+endif
diff --git a/tools/src/CMakeLists.txt b/tools/src/CMakeLists.txt
index 3863c408e2..d247f900ed 100644
--- a/tools/src/CMakeLists.txt
+++ b/tools/src/CMakeLists.txt
@@ -34,14 +34,19 @@
 #   executable can just be removed, as it looks like it was written for testing
 #   alone.
 
-include_directories (
-  ${PROJECT_SOURCE_DIR}/c++/include
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${CXX17_FLAGS} ${WARN_FLAGS}")
+
+add_library (orc-tools-common INTERFACE)
+target_include_directories (orc-tools-common INTERFACE
   ${PROJECT_BINARY_DIR}/c++/include
-  ${PROJECT_SOURCE_DIR}/c++/src
   ${PROJECT_BINARY_DIR}/c++/src
-  )
-
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${CXX17_FLAGS} ${WARN_FLAGS}")
+  ${PROJECT_SOURCE_DIR}/c++/include
+  ${PROJECT_SOURCE_DIR}/c++/src
+)
+target_link_libraries (orc-tools-common INTERFACE
+  orc
+  ${CMAKE_THREAD_LIBS_INIT}
+)
 
 add_executable (orc-contents
   FileContents.cc
@@ -49,8 +54,7 @@ add_executable (orc-contents
   )
 
 target_link_libraries (orc-contents
-  orc
-  ${CMAKE_THREAD_LIBS_INIT}
+  orc-tools-common
   )
 
 add_executable (orc-scan
@@ -59,8 +63,7 @@ add_executable (orc-scan
   )
 
 target_link_libraries (orc-scan
-  orc
-  ${CMAKE_THREAD_LIBS_INIT}
+  orc-tools-common
   )
 
 add_executable (orc-metadata
@@ -69,9 +72,8 @@ add_executable (orc-metadata
   )
 
 target_link_libraries (orc-metadata
-  orc
+  orc-tools-common
   orc::protobuf
-  ${CMAKE_THREAD_LIBS_INIT}
   )
 
  add_executable (orc-statistics
@@ -79,8 +81,7 @@ target_link_libraries (orc-metadata
   )
 
 target_link_libraries (orc-statistics
-  orc
-  ${CMAKE_THREAD_LIBS_INIT}
+  orc-tools-common
   )
 
 add_executable (orc-memory
@@ -89,8 +90,7 @@ add_executable (orc-memory
   )
 
 target_link_libraries (orc-memory
-  orc
-  ${CMAKE_THREAD_LIBS_INIT}
+  orc-tools-common
   )
 
 add_executable (timezone-dump
@@ -98,8 +98,7 @@ add_executable (timezone-dump
   )
 
 target_link_libraries (timezone-dump
-  orc
-  ${CMAKE_THREAD_LIBS_INIT}
+  orc-tools-common
   )
 
 add_executable (csv-import
@@ -107,8 +106,7 @@ add_executable (csv-import
   )
 
 target_link_libraries (csv-import
-  orc
-  ${CMAKE_THREAD_LIBS_INIT}
+  orc-tools-common
   )
 
 set(CPP_TOOL_NAMES
diff --git a/tools/src/CSVFileImport.cc b/tools/src/CSVFileImport.cc
index ae17b3348a..31a6f52a2d 100644
--- a/tools/src/CSVFileImport.cc
+++ b/tools/src/CSVFileImport.cc
@@ -420,6 +420,8 @@ int main(int argc, char* argv[]) {
           case orc::LIST:
           case orc::MAP:
           case orc::UNION:
+          case orc::GEOMETRY:
+          case orc::GEOGRAPHY:
             throw std::runtime_error(subType->toString() + " is not supported yet.");
         }
       }
diff --git a/tools/src/meson.build b/tools/src/meson.build
new file mode 100644
index 0000000000..ef2cc5f93f
--- /dev/null
+++ b/tools/src/meson.build
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+tools_incdir = include_directories(
+    '../../c++/include',
+    '../../c++/src',    
+)
+
+tools = {
+    'orc-contents': {
+        'sources': ['FileContents.cc', 'ToolsHelper.cc'],
+    },
+    'orc-scan': {
+        'sources': ['FileScan.cc', 'ToolsHelper.cc'],
+    },
+    'orc-metadata': {
+        'sources': ['FileMetadata.cc', 'ToolsHelper.cc'],
+    },
+    'orc-statistics': {
+        'sources': ['FileStatistics.cc'],
+    },
+    'orc-memory': {
+        'sources': ['FileMemory.cc', 'ToolsHelper.cc'],
+    },
+    'timezone-dump': {
+        'sources': ['TimezoneDump.cc'],        
+    },
+    'csv-import': {
+        'sources': ['CSVFileImport.cc'],        
+    },
+}
+
+foreach tool_name, val : tools
+    executable(
+        tool_name,
+        sources: val['sources'],
+        include_directories: tools_incdir,
+        dependencies: [orc_dep, protobuf_dep],
+        install: true,
+    )
+endforeach
diff --git a/tools/test/CMakeLists.txt b/tools/test/CMakeLists.txt
index 5cd55fed3a..39a6782bd3 100644
--- a/tools/test/CMakeLists.txt
+++ b/tools/test/CMakeLists.txt
@@ -15,14 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-include_directories(
-  ${PROJECT_SOURCE_DIR}/c++/include
-  ${PROJECT_SOURCE_DIR}/c++/src
-  ${PROJECT_SOURCE_DIR}/tools-c++/src
-  ${PROJECT_BINARY_DIR}/c++/include
-  ${PROJECT_BINARY_DIR}/c++/src
-)
-
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX17_FLAGS} ${WARN_FLAGS}")
 
 add_executable (tool-test
@@ -45,6 +37,19 @@ target_link_libraries (tool-test
   orc::gmock
 )
 
+target_include_directories(tool-test PRIVATE
+  ${PROJECT_BINARY_DIR}/c++/include
+  ${PROJECT_BINARY_DIR}/c++/src
+  ${PROJECT_SOURCE_DIR}/c++/include
+  ${PROJECT_SOURCE_DIR}/c++/src
+  ${PROJECT_SOURCE_DIR}/tools-c++/src
+)
+
+target_compile_definitions(tool-test PRIVATE
+  "-DORC_EXAMPLE_DIR=${PROJECT_SOURCE_DIR}/examples"
+  "-DORC_BUILD_DIR=${PROJECT_BINARY_DIR}"
+)
+
 add_dependencies(tool-test tool-set)
 
 if (TEST_VALGRIND_MEMCHECK)
diff --git a/tools/test/TestFileScan.cc b/tools/test/TestFileScan.cc
index 2e35d86ef0..5fb43d5d25 100644
--- a/tools/test/TestFileScan.cc
+++ b/tools/test/TestFileScan.cc
@@ -211,9 +211,22 @@ void checkForError(const std::string& filename, const std::string& errorMsg) {
   EXPECT_NE(std::string::npos, error.find(errorMsg)) << error;
 }
 
+void checkForError(const std::string& filename, const std::string& errorMsg1,
+                   const std::string& errorMsg2) {
+  const std::string pgm = findProgram("tools/src/orc-scan");
+  std::string output;
+  std::string error;
+  EXPECT_EQ(1, runProgram({pgm, filename}, output, error));
+  EXPECT_EQ("", output);
+  if (error.find(errorMsg1) == std::string::npos && error.find(errorMsg2) == std::string::npos) {
+    FAIL() << error;
+  }
+}
+
 TEST(TestFileScan, testErrorHandling) {
   checkForError(findExample("corrupt/stripe_footer_bad_column_encodings.orc"),
-                "bad number of ColumnEncodings in StripeFooter: expected=6, actual=0");
+                "bad number of ColumnEncodings in StripeFooter: expected=6, actual=0",
+                "bad StripeFooter from zlib");
   checkForError(findExample("corrupt/negative_dict_entry_lengths.orc"),
                 "Negative dictionary entry length");
   checkForError(findExample("corrupt/missing_length_stream_in_string_dict.orc"),
diff --git a/tools/test/TestFileStatistics.cc b/tools/test/TestFileStatistics.cc
index 1b2a396dc4..051f2fb3f9 100644
--- a/tools/test/TestFileStatistics.cc
+++ b/tools/test/TestFileStatistics.cc
@@ -30,12 +30,12 @@ TEST(TestFileStatistics, testNormal) {
   const std::string expected = "File " + file +
                                " has 3 columns\n"
                                "*** Column 0 ***\n"
-                               "Column has 6000 values and has null value: no\n"
+                               "Column has 6000 values and has null value: yes\n"
                                "\n"
                                "*** Column 1 ***\n"
                                "Data type: Integer\n"
                                "Values: 6000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1\n"
                                "Maximum: 6000\n"
                                "Sum: 18003000\n"
@@ -43,7 +43,7 @@ TEST(TestFileStatistics, testNormal) {
                                "*** Column 2 ***\n"
                                "Data type: String\n"
                                "Values: 6000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1000\n"
                                "Maximum: 9a\n"
                                "Total length: 23892\n"
@@ -54,12 +54,12 @@ TEST(TestFileStatistics, testNormal) {
                                "*** Stripe 0 ***\n"
                                "\n"
                                "--- Column 0 ---\n"
-                               "Column has 6000 values and has null value: no\n"
+                               "Column has 6000 values and has null value: yes\n"
                                "\n"
                                "--- Column 1 ---\n"
                                "Data type: Integer\n"
                                "Values: 6000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1\n"
                                "Maximum: 6000\n"
                                "Sum: 18003000\n"
@@ -67,7 +67,7 @@ TEST(TestFileStatistics, testNormal) {
                                "--- Column 2 ---\n"
                                "Data type: String\n"
                                "Values: 6000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1000\n"
                                "Maximum: 9a\n"
                                "Total length: 23892\n\n";
@@ -86,12 +86,12 @@ TEST(TestFileStatistics, testOptions) {
   const std::string expected = "File " + file +
                                " has 3 columns\n"
                                "*** Column 0 ***\n"
-                               "Column has 6000 values and has null value: no\n"
+                               "Column has 6000 values and has null value: yes\n"
                                "\n"
                                "*** Column 1 ***\n"
                                "Data type: Integer\n"
                                "Values: 6000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1\n"
                                "Maximum: 6000\n"
                                "Sum: 18003000\n"
@@ -99,7 +99,7 @@ TEST(TestFileStatistics, testOptions) {
                                "*** Column 2 ***\n"
                                "Data type: String\n"
                                "Values: 6000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1000\n"
                                "Maximum: 9a\n"
                                "Total length: 23892\n"
@@ -110,21 +110,21 @@ TEST(TestFileStatistics, testOptions) {
                                "*** Stripe 0 ***\n"
                                "\n"
                                "--- Column 0 ---\n"
-                               "Column has 6000 values and has null value: no\n"
+                               "Column has 6000 values and has null value: yes\n"
                                "\n"
                                "--- RowIndex 0 ---\n"
-                               "Column has 2000 values and has null value: no\n"
+                               "Column has 2000 values and has null value: yes\n"
                                "\n"
                                "--- RowIndex 1 ---\n"
-                               "Column has 2000 values and has null value: no\n"
+                               "Column has 2000 values and has null value: yes\n"
                                "\n"
                                "--- RowIndex 2 ---\n"
-                               "Column has 2000 values and has null value: no\n"
+                               "Column has 2000 values and has null value: yes\n"
                                "\n"
                                "--- Column 1 ---\n"
                                "Data type: Integer\n"
                                "Values: 6000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1\n"
                                "Maximum: 6000\n"
                                "Sum: 18003000\n"
@@ -132,7 +132,7 @@ TEST(TestFileStatistics, testOptions) {
                                "--- RowIndex 0 ---\n"
                                "Data type: Integer\n"
                                "Values: 2000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1\n"
                                "Maximum: 2000\n"
                                "Sum: 2001000\n"
@@ -140,7 +140,7 @@ TEST(TestFileStatistics, testOptions) {
                                "--- RowIndex 1 ---\n"
                                "Data type: Integer\n"
                                "Values: 2000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 2001\n"
                                "Maximum: 4000\n"
                                "Sum: 6001000\n"
@@ -148,7 +148,7 @@ TEST(TestFileStatistics, testOptions) {
                                "--- RowIndex 2 ---\n"
                                "Data type: Integer\n"
                                "Values: 2000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 4001\n"
                                "Maximum: 6000\n"
                                "Sum: 10001000\n"
@@ -156,7 +156,7 @@ TEST(TestFileStatistics, testOptions) {
                                "--- Column 2 ---\n"
                                "Data type: String\n"
                                "Values: 6000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1000\n"
                                "Maximum: 9a\n"
                                "Total length: 23892\n"
@@ -164,7 +164,7 @@ TEST(TestFileStatistics, testOptions) {
                                "--- RowIndex 0 ---\n"
                                "Data type: String\n"
                                "Values: 2000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 1000\n"
                                "Maximum: 9a\n"
                                "Total length: 7892\n"
@@ -172,7 +172,7 @@ TEST(TestFileStatistics, testOptions) {
                                "--- RowIndex 1 ---\n"
                                "Data type: String\n"
                                "Values: 2000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 2001\n"
                                "Maximum: 4000\n"
                                "Total length: 8000\n"
@@ -180,7 +180,7 @@ TEST(TestFileStatistics, testOptions) {
                                "--- RowIndex 2 ---\n"
                                "Data type: String\n"
                                "Values: 2000\n"
-                               "Has null: no\n"
+                               "Has null: yes\n"
                                "Minimum: 4001\n"
                                "Maximum: 6000\n"
                                "Total length: 8000\n\n";
diff --git a/tools/test/ToolTest.cc b/tools/test/ToolTest.cc
index 8e3c546eab..38de3db0ae 100644
--- a/tools/test/ToolTest.cc
+++ b/tools/test/ToolTest.cc
@@ -29,28 +29,9 @@
 #include <string>
 #include <vector>
 
-namespace {
-  const char* exampleDirectory = 0;
-  const char* buildDirectory = 0;
-}  // namespace
-
 GTEST_API_ int main(int argc, char** argv) {
   GOOGLE_PROTOBUF_VERIFY_VERSION;
   std::cout << "ORC version: " << ORC_VERSION << "\n";
-  if (argc >= 2) {
-    exampleDirectory = argv[1];
-  } else {
-    exampleDirectory = "../examples";
-  }
-  if (argc >= 3) {
-    buildDirectory = argv[2];
-  } else {
-    buildDirectory = ".";
-  }
-  std::cout << "example dir = " << exampleDirectory << "\n";
-  if (buildDirectory) {
-    std::cout << "build dir = " << buildDirectory << "\n";
-  }
   testing::InitGoogleTest(&argc, argv);
   int result = RUN_ALL_TESTS();
   google::protobuf::ShutdownProtobufLibrary();
@@ -77,12 +58,16 @@ int runProgram(const std::vector<std::string>& args, std::string& out, std::stri
   return WEXITSTATUS(status);
 }
 
+#define ORC_TOOL_TEST_STRINGIFY(path) ORC_TOOL_TEST_STR(path)
+#define ORC_TOOL_TEST_STR(path) #path
+
 /**
  * Get the name of the given example file.
  * @param name the simple name of the example file
  */
 std::string findExample(const std::string& name) {
-  std::string result = exampleDirectory;
+#define ORC_EXAMPLE_DIR_STR ORC_TOOL_TEST_STRINGIFY(ORC_EXAMPLE_DIR)
+  std::string result = ORC_EXAMPLE_DIR_STR;
   result += "/";
   result += name;
   return result;
@@ -93,7 +78,8 @@ std::string findExample(const std::string& name) {
  * @param name the simple name of the executable
  */
 std::string findProgram(const std::string& name) {
-  std::string result = buildDirectory;
+#define ORC_BUILD_DIR_STR ORC_TOOL_TEST_STRINGIFY(ORC_BUILD_DIR)
+  std::string result = ORC_BUILD_DIR_STR;
   result += "/";
   result += name;
   return result;
diff --git a/tools/test/meson.build b/tools/test/meson.build
new file mode 100644
index 0000000000..5423565588
--- /dev/null
+++ b/tools/test/meson.build
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+exc = executable(
+    'tool-test',
+    sources: [
+        'gzip.cc',
+        'TestCSVFileImport.cc',
+        'TestFileContents.cc',
+        'TestFileMetadata.cc',
+        'TestFileScan.cc',
+        'TestFileStatistics.cc',
+        'TestMatch.cc',
+        'ToolTest.cc',
+    ],
+    include_directories: tools_incdir,
+    dependencies: [
+        orc_dep,
+        protobuf_dep,
+        zlib_dep,
+        snappy_dep,
+        gtest_dep,
+        gmock_dep,
+    ],
+    cpp_args: [
+        '-DORC_EXAMPLE_DIR=@0@'.format(meson.project_source_root() / 'examples'),
+        '-DORC_BUILD_DIR=@0@'.format(meson.project_build_root()),
+    ],
+)
+test('tool-test', exc)