Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
7b7aad2
Upgrade tonic dependencies to 0.13.0 version (try 2) (#7839)
alamb Jul 16, 2025
0055f57
[Variant] Reserve capacity beforehand during large object building (#…
friendlymatthew Jul 16, 2025
7af62d5
[Variant] Support appending complex variants in `VariantBuilder` (#7914)
friendlymatthew Jul 16, 2025
d4c0a32
[Variant] Add `variant_get` compute kernel (#7919)
Samyak2 Jul 16, 2025
03a837e
Add tests for `BatchCoalescer::push_batch_with_filter`, fix bug (#7774)
alamb Jul 16, 2025
d809f19
[Variant] Add documentation, tests and cleaner api for Variant::get_p…
alamb Jul 17, 2025
7089786
[Variant] Avoid collecting offset iterator (#7934)
codephage2020 Jul 17, 2025
dfe907f
Minor: Support BinaryView and StringView builders in `make_builder` (…
kylebarron Jul 17, 2025
d0fa24e
[Variant] Impl `PartialEq` for VariantObject (#7943)
friendlymatthew Jul 17, 2025
233dad3
Optimize partition_validity function used in sort kernels (#7937)
jhorstmann Jul 18, 2025
722ef59
[Variant] Add ObjectBuilder::with_field for convenience (#7950)
alamb Jul 18, 2025
a984ca7
[Variant] Adding code to store metadata and value references in Varia…
abacef Jul 18, 2025
a5afda2
[Variant] VariantMetadata is allowed to contain the empty string (#7956)
scovich Jul 18, 2025
71dd48e
[Variant] Add `variant_kernels` benchmark (#7944)
alamb Jul 18, 2025
a15f345
[Variant] Add ListBuilder::with_value for convenience (#7959)
codephage2020 Jul 18, 2025
4f5ab12
[Test] Add tests for VariantList equality (#7953)
alamb Jul 18, 2025
55fbf5c
[Variant] remove VariantMetadata::dictionary_size (#7958)
codephage2020 Jul 18, 2025
99eb1bc
Add missing `parquet-variant-compute` crate to CI jobs (#7963)
alamb Jul 18, 2025
82821e5
arrow-ipc: Remove all abilities to preserve dict IDs (#7940)
brancz Jul 18, 2025
291e6e5
Add arrow-avro support for Impala Nullability (#7954)
veronica-m-ef Jul 21, 2025
b726b6f
Add additional integration tests to arrow-avro (#7974)
nathaniel-d-ef Jul 22, 2025
ed02131
arrow-schema: Remove dict_id from being required equal for merging (#…
brancz Jul 22, 2025
d4f1cfa
Implement Improved arrow-avro Reader Zero-Byte Record Handling (#7966)
jecsand838 Jul 22, 2025
6874ffa
[Variant] Avoid extra allocation in object builder (#7935)
klion26 Jul 22, 2025
dff67c9
GH-7686: [Parquet] Fix int96 min/max stats (#7687)
rahulketch Jul 22, 2025
f39461c
[Variant] Revisit VariantMetadata and Object equality (#7961)
friendlymatthew Jul 22, 2025
ec81db3
Add decimal32 and decimal64 support to Parquet, JSON and CSV readers …
CurtHagenlocher Jul 22, 2025
50f5562
Convert JSON to VariantArray without copying (8 - 32% faster) (#7911)
alamb Jul 23, 2025
a7f3ba8
Fix panic on lossy decimal to float casting: round to saturation for …
kosiew Jul 23, 2025
3e089d2
Perf: optimize actual_buffer_size to use only data buffer capacity fo…
zhuqi-lucas Jul 23, 2025
16794ab
Minor: Restore warning comment on Int96 statistics read (#7975)
alamb Jul 23, 2025
a65a984
test: add tests for converting sliced list to row based (#7994)
rluvaton Jul 27, 2025
0f0baf8
bench: benchmark interleave structs (#8007)
rluvaton Jul 27, 2025
9d26336
bench: add benchmark for converting list and sliced list to row forma…
rluvaton Jul 28, 2025
73c3e97
[Variant] Avoid extra buffer allocation in ListBuilder (#7987)
klion26 Jul 28, 2025
5acdafb
perf: Improve `interleave` performance for struct (3-6 times faster) …
rluvaton Jul 28, 2025
4fcffa5
perf: only encode actual list values in `RowConverter` (16-26 times f…
rluvaton Jul 28, 2025
fde1947
doc: remove outdated info from CONTRIBUTING doc in project root dir. …
sonhmai Jul 28, 2025
1d9afbc
Minor: Upate `cast_with_options` docs about casting integers --> inte…
alamb Jul 28, 2025
00a2f73
[Variant] impl FromIterator for VariantPath (#8011)
sdf-jkl Jul 29, 2025
499de7d
Create empty buffer for a buffer specified in the C Data Interface wi…
viirya Jul 29, 2025
625e6ee
Perf: improve sort via `partition_validity` to use fast path for bit …
zhuqi-lucas Jul 29, 2025
2418c59
[Parquet] Allow writing compatible DictionaryArrays to parquet writer…
albertlockett Jul 29, 2025
cbadec7
Add benchmark for converting StringViewArray with mixed short and lon…
ding-young Jul 29, 2025
d634ac8
Implement full-range `i256::to_f64` to eliminate ±∞ saturation for De…
kosiew Jul 29, 2025
079d4f2
Improve memory usage for `arrow-row -> String/BinaryView` when utf8 v…
ding-young Jul 29, 2025
9423040
Prepare for `56.0.0` release: Update version and `CHANGELOG.md` (#8014)
alamb Jul 29, 2025
876585c
Fix doc test in avro-arrow (#8020)
alamb Jul 29, 2025
bfc7679
Add more comments to the internal parquet reader (#7932)
alamb Aug 1, 2025
a535d3b
feat: add method for sync Parquet reader read bloom filter (#8024)
mapleFU Aug 1, 2025
a9b6077
docs: Fix a typo in README (#8036)
EricccTaiwan Aug 4, 2025
a3d144f
Add more benchmarks for Parquet thrift decoding (#8037)
etseidl Aug 5, 2025
5dd3463
Add arrow-avro `SchemaStore` and fingerprinting (#8039)
jecsand838 Aug 5, 2025
c25c5a7
implement `cast_to_variant` kernel to cast native types to `VariantAr…
alamb Aug 6, 2025
3e7c887
[Variant] Minor: use From impl to make conversion infallable (#8068)
alamb Aug 6, 2025
c6887ff
Fix arrow-avro type resolver register bug (#8046)
yongkyunlee Aug 6, 2025
e4d359b
Minor: Consolidate int96 stats roundtrip test (#8034)
alamb Aug 6, 2025
0710ecc
Improve StringArray(Utf8) sort performance (~2-4x faster) (#7860)
zhuqi-lucas Aug 7, 2025
554cafa
Implement `DataType::Float16` => `Variant::Float` (#8073)
superserious-dev Aug 7, 2025
5036ca8
Support multi-threaded writing of Parquet files with modular encrypti…
rok Aug 7, 2025
a4bcd6d
Add arrow-avro Decoder Benchmarks (#8025)
jecsand838 Aug 7, 2025
e845411
Fix new clippy lints from Rust 1.89 (#8078)
alamb Aug 7, 2025
25bbb3d
Bump actions/download-artifact from 4 to 5 (#8066)
dependabot[bot] Aug 7, 2025
7a5f6d3
Implement `DataType::{Binary, LargeBinary, BinaryView}` => `Variant::…
superserious-dev Aug 7, 2025
c561acb
[Variant] Add `variant_get` and Shredded `VariantArray` (#8021)
alamb Aug 7, 2025
4a21443
Implement arrow-avro SchemaStore and Fingerprinting To Enable Schema …
jecsand838 Aug 7, 2025
04f217b
Speed up Parquet filter pushdown v4 (Predicate evaluation cache for a…
XiangpengHao Aug 8, 2025
c002613
[ADD] add integration tests for variant testing
carpecodeum Aug 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/arrow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ jobs:
- name: Test arrow-schema
run: cargo test -p arrow-schema --all-features
- name: Test arrow-array
run: cargo test -p arrow-array --all-features
run: |
cargo test -p arrow-array --all-features
# Disable feature `force_validate`
cargo test -p arrow-array --features=ffi
- name: Test arrow-select
run: cargo test -p arrow-select --all-features
- name: Test arrow-cast
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/arrow_flight.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
cargo test -p arrow-flight --all-features
- name: Test --examples
run: |
cargo test -p arrow-flight --features=flight-sql,tls --examples
cargo test -p arrow-flight --features=flight-sql,tls-ring --examples

vendor:
name: Verify Vendored Code
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Download crate docs
uses: actions/download-artifact@v4
uses: actions/download-artifact@v5
with:
name: crate-docs
path: website/build
Expand Down
16 changes: 12 additions & 4 deletions .github/workflows/parquet-variant.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ on:
pull_request:
paths:
- parquet-variant/**
- parquet-variant-json/**
- parquet-variant-compute/**
- .github/**

jobs:
Expand All @@ -50,6 +52,8 @@ jobs:
run: cargo test -p parquet-variant
- name: Test parquet-variant-json
run: cargo test -p parquet-variant-json
- name: Test parquet-variant-compute
run: cargo test -p parquet-variant-compute

# test compilation
linux-features:
Expand All @@ -63,10 +67,12 @@ jobs:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Check compilation
- name: Check compilation (parquet-variant)
run: cargo check -p parquet-variant
- name: Check compilation
- name: Check compilation (parquet-variant-json)
run: cargo check -p parquet-variant-json
- name: Check compilation (parquet-variant-compute)
run: cargo check -p parquet-variant-compute

clippy:
name: Clippy
Expand All @@ -79,7 +85,9 @@ jobs:
uses: ./.github/actions/setup-builder
- name: Setup Clippy
run: rustup component add clippy
- name: Run clippy
- name: Run clippy (parquet-variant)
run: cargo clippy -p parquet-variant --all-targets --all-features -- -D warnings
- name: Run clippy
- name: Run clippy (parquet-variant-json)
run: cargo clippy -p parquet-variant-json --all-targets --all-features -- -D warnings
- name: Run clippy (parquet-variant-compute)
run: cargo clippy -p parquet-variant-compute --all-targets --all-features -- -D warnings
320 changes: 320 additions & 0 deletions CHANGELOG-old.md

Large diffs are not rendered by default.

346 changes: 225 additions & 121 deletions CHANGELOG.md

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ You can also use rust's official docker image:
docker run --rm -v $(pwd):/arrow-rs -it rust /bin/bash -c "cd /arrow-rs && rustup component add rustfmt && cargo build"
```

The command above assumes that are in the root directory of the project, not in the same
directory as this README.md.
The command above assumes that are in the root directory of the project.

You can also compile specific workspaces:

Expand Down
34 changes: 17 additions & 17 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ exclude = [
]

[workspace.package]
version = "55.2.0"
version = "56.0.0"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <[email protected]>"]
Expand All @@ -84,22 +84,22 @@ edition = "2021"
rust-version = "1.84"

[workspace.dependencies]
arrow = { version = "55.2.0", path = "./arrow", default-features = false }
arrow-arith = { version = "55.2.0", path = "./arrow-arith" }
arrow-array = { version = "55.2.0", path = "./arrow-array" }
arrow-buffer = { version = "55.2.0", path = "./arrow-buffer" }
arrow-cast = { version = "55.2.0", path = "./arrow-cast" }
arrow-csv = { version = "55.2.0", path = "./arrow-csv" }
arrow-data = { version = "55.2.0", path = "./arrow-data" }
arrow-ipc = { version = "55.2.0", path = "./arrow-ipc" }
arrow-json = { version = "55.2.0", path = "./arrow-json" }
arrow-ord = { version = "55.2.0", path = "./arrow-ord" }
arrow-pyarrow = { version = "55.2.0", path = "./arrow-pyarrow" }
arrow-row = { version = "55.2.0", path = "./arrow-row" }
arrow-schema = { version = "55.2.0", path = "./arrow-schema" }
arrow-select = { version = "55.2.0", path = "./arrow-select" }
arrow-string = { version = "55.2.0", path = "./arrow-string" }
parquet = { version = "55.2.0", path = "./parquet", default-features = false }
arrow = { version = "56.0.0", path = "./arrow", default-features = false }
arrow-arith = { version = "56.0.0", path = "./arrow-arith" }
arrow-array = { version = "56.0.0", path = "./arrow-array" }
arrow-buffer = { version = "56.0.0", path = "./arrow-buffer" }
arrow-cast = { version = "56.0.0", path = "./arrow-cast" }
arrow-csv = { version = "56.0.0", path = "./arrow-csv" }
arrow-data = { version = "56.0.0", path = "./arrow-data" }
arrow-ipc = { version = "56.0.0", path = "./arrow-ipc" }
arrow-json = { version = "56.0.0", path = "./arrow-json" }
arrow-ord = { version = "56.0.0", path = "./arrow-ord" }
arrow-pyarrow = { version = "56.0.0", path = "./arrow-pyarrow" }
arrow-row = { version = "56.0.0", path = "./arrow-row" }
arrow-schema = { version = "56.0.0", path = "./arrow-schema" }
arrow-select = { version = "56.0.0", path = "./arrow-select" }
arrow-string = { version = "56.0.0", path = "./arrow-string" }
parquet = { version = "56.0.0", path = "./parquet", default-features = false }

# These crates have not yet been released and thus do not use the workspace version
parquet-variant = { version = "0.1.0", path = "./parquet-variant" }
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ The deprecated version is the next version which will be released (please
consult the list above). To mark the API as deprecated, use the
`#[deprecated(since = "...", note = "...")]` attribute.

Foe example
For example

```rust
#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
Expand Down
2 changes: 2 additions & 0 deletions arrow-array/src/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
DataType::BinaryView => Box::new(BinaryViewBuilder::with_capacity(capacity)),
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
Expand All @@ -464,6 +465,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
),
DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
DataType::Utf8View => Box::new(StringViewBuilder::with_capacity(capacity)),
DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
DataType::Time32(TimeUnit::Second) => {
Expand Down
39 changes: 37 additions & 2 deletions arrow-array/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,17 @@ impl ImportedArrowArray<'_> {
.map(|index| {
let len = self.buffer_len(index, variadic_buffer_lens, &self.data_type)?;
match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
Some(buf) => Ok(buf),
Some(buf) => {
// External libraries may use a dangling pointer for a buffer with length 0.
// We respect the array length specified in the C Data Interface. Actually,
// if the length is incorrect, we cannot create a correct buffer even if
// the pointer is valid.
if buf.is_empty() {
Ok(MutableBuffer::new(0).into())
} else {
Ok(buf)
}
}
None if len == 0 => {
// Null data buffer, which Rust doesn't allow. So create
// an empty buffer.
Expand Down Expand Up @@ -515,7 +525,7 @@ impl ImportedArrowArray<'_> {
unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
}

fn dictionary(&self) -> Result<Option<ImportedArrowArray>> {
fn dictionary(&self) -> Result<Option<ImportedArrowArray<'_>>> {
match (self.array.dictionary(), &self.data_type) {
(Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
array,
Expand Down Expand Up @@ -1296,9 +1306,15 @@ mod tests_to_then_from_ffi {

#[cfg(test)]
mod tests_from_ffi {
#[cfg(not(feature = "force_validate"))]
use std::ptr::NonNull;
use std::sync::Arc;

#[cfg(feature = "force_validate")]
use arrow_buffer::{bit_util, buffer::Buffer};
#[cfg(not(feature = "force_validate"))]
use arrow_buffer::{bit_util, buffer::Buffer, ScalarBuffer};

use arrow_data::transform::MutableArrayData;
use arrow_data::ArrayData;
use arrow_schema::{DataType, Field};
Expand Down Expand Up @@ -1660,6 +1676,25 @@ mod tests_from_ffi {
}
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn test_utf8_view_ffi_from_dangling_pointer() {
let empty = GenericByteViewBuilder::<StringViewType>::new().finish();
let buffers = empty.data_buffers().to_vec();
let nulls = empty.nulls().cloned();

// Create a dangling pointer to a view buffer with zero length.
let alloc = Arc::new(1);
let buffer = unsafe { Buffer::from_custom_allocation(NonNull::<u8>::dangling(), 0, alloc) };
let views = unsafe { ScalarBuffer::new_unchecked(buffer) };

let str_view: GenericByteViewArray<StringViewType> =
unsafe { GenericByteViewArray::new_unchecked(views, buffers, nulls) };
let imported = roundtrip_byte_view_array(str_view);
assert_eq!(imported.len(), 0);
assert_eq!(&imported, &empty);
}

#[test]
fn test_round_trip_byte_view() {
fn test_case<T>()
Expand Down
13 changes: 12 additions & 1 deletion arrow-avro/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,32 @@ zstd = { version = "0.13", default-features = false, optional = true }
bzip2 = { version = "0.6.0", optional = true }
xz = { version = "0.1", default-features = false, optional = true }
crc = { version = "3.0", optional = true }
strum_macros = "0.27"
uuid = "1.17"
indexmap = "2.10"


[dev-dependencies]
arrow-data = { workspace = true }
rand = { version = "0.9.1", default-features = false, features = [
"std",
"std_rng",
"thread_rng",
] }
criterion = { version = "0.6.0", default-features = false }
criterion = { version = "0.7.0", default-features = false }
tempfile = "3.3"
arrow = { workspace = true }
futures = "0.3.31"
bytes = "1.10.1"
async-stream = "0.3.6"
apache-avro = "0.14.0"
num-bigint = "0.4"
once_cell = "1.21.3"

[[bench]]
name = "avro_reader"
harness = false

[[bench]]
name = "decoder"
harness = false
Loading
Loading