From a595c576a26d8f24571d57e1fccb9eec0317a0d5 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 27 Nov 2024 10:55:19 -0500 Subject: [PATCH 01/13] add boilerplate files This commit adds the boilerplate files and fixes the CI workflows. --- .github/workflows/check.yml | 19 - .github/workflows/test.yml | 3 - .gitignore | 13 + Cargo.lock | 2836 +++++++++++++++++++++++++++++++++++ optd-mvp/Cargo.toml | 27 + optd-mvp/README.md | 27 + optd-mvp/src/bin/migrate.rs | 19 + optd-mvp/src/lib.rs | 16 + optd-mvp/src/main.rs | 3 + 9 files changed, 2941 insertions(+), 22 deletions(-) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 optd-mvp/Cargo.toml create mode 100644 optd-mvp/README.md create mode 100644 optd-mvp/src/bin/migrate.rs create mode 100644 optd-mvp/src/lib.rs create mode 100644 optd-mvp/src/main.rs diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 33cf544..aced2f1 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -21,9 +21,6 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true -defaults: - run: - working-directory: ./optd-persistent name: check jobs: fmt: @@ -61,22 +58,6 @@ jobs: components: clippy - name: cargo clippy run: cargo clippy --locked --all-targets --all-features -- -D warnings - doc: - # run docs generation on nightly rather than stable. This enables features like - # https://doc.rust-lang.org/beta/unstable-book/language-features/doc-cfg.html which allows an - # API be documented as only available in some specific platforms. - runs-on: ubuntu-latest - name: nightly / doc - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - name: Install nightly - uses: dtolnay/rust-toolchain@nightly - - name: Install cargo-docs-rs - uses: dtolnay/install@cargo-docs-rs - - name: cargo docs-rs - run: cargo docs-rs hack: # cargo-hack checks combinations of feature flags to ensure that features are all additive # which is required for feature unification diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d6253a0..565c9eb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,9 +17,6 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true -defaults: - run: - working-directory: ./optd-persistent name: test jobs: required: diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..861fce8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# Contains compiled files and executables. +debug/ +target/ + +# These are backup files generated by rustfmt. +**/*.rs.bk + +# Ignore any database files. +**/*.db + +# We will check in all code-generated entity files, as newer versions of `sea-orm-cli` might +# conflict with previous versions. +# **/entities \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..3059383 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,2836 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55f82cfe485775d02112886f4169bde0c5894d75e79ead7eafe7e40a25e45f7" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aa100a6f6f525226719f8de3f70076be4f4191801ebd92621450d1c51e9053d" + +[[package]] +name = "ahash" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + +[[package]] +name = "allocator-api2" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52f4a9cf8f3ff707b4eb1acd0136efd8b3bec6b345ed32fcab47c0a5c99b800" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e579a7752471abc2a8268df8b20005e3eadd975f585398f17efcfd8d4927371" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" + +[[package]] +name = "anstyle-parse" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcd8291a340dd8ac70e18878bc4501dd7b4ff970cfa21c207d36ece51ea88fd" +dependencies = [ + "anstyle", + "windows-sys 0.48.0", +] + +[[package]] +name = "arrayvec" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a2f58b0bb10c380af2b26e57212856b8c9a59e0925b4c20f4a174a49734eaf7" + +[[package]] +name = "async-stream" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a26cb53174ddd320edfff199a853f93d571f48eeb4dde75e67a9a3dbb7b7e5e" +dependencies = [ + "async-stream-impl", + "futures-core", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db134ba52475c060f3329a8ef0f8786d6b872ed01515d4b79c162e5798da1340" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", +] + +[[package]] +name = "async-trait" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6287685011f026b98d26afd53251ad0101e856531b423eb2384265f7d4f5b01" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" + +[[package]] +name = "backtrace" +version = "0.3.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88fb5a785d6b44fd9d6700935608639af1b8356de1e55d5f7c2740f4faa15d82" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" + +[[package]] +name = "base64" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" + +[[package]] +name = "base64ct" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71acf5509fc522cce1b100ac0121c635129bfd4d91cdf036bcc9b9935f97ccf5" + +[[package]] +name = "bigdecimal" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5274a6b6e0ee020148397245b973e30163b7bffbc6d473613f850cb99888581e" +dependencies = [ + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "bitflags" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" + +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +dependencies = [ + "serde", +] + +[[package]] +name = "block-buffer" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03588e54c62ae6d763e2a80090d50353b785795361b4ff5b3bf0a5097fc31c0b" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12ae9db68ad7fac5fe51304d20f016c911539251075a214f8e663babefa35187" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bytes" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" + +[[package]] +name = "cc" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "serde", + "windows-targets 0.52.0", +] + +[[package]] +name = "clap" +version = "4.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384e169cc618c613d5e3ca6404dda77a8685a63e08660dcc64abaf7da7cb0c7a" +dependencies = [ + "clap_builder", + "clap_derive", + "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef137bbe35aab78bdb468ccfba75a5f4d8321ae011d34063770780545176af2d" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "clap_lex" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + +[[package]] +name = "codespan-reporting" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6ce42b8998a383572e0a802d859b1f00c79b7b7474e62fff88ee5c2845d9c13" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "concurrent-queue" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ec6771ecfa0762d24683ee5a32ad78487a3d3afdc0fb8cae19d2c5deb50b7c" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "const-oid" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520fbf3c07483f94e3e3ca9d0cfd913d7718ef2483d2cfd91c0d9e91474ab913" + +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + +[[package]] +name = "cpufeatures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53757d12b596c16c78b83458d732a5d1a17ab3f53f2f7412f6fb57cc8a140ab3" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d0165d2900ae6778e36e80bbc4da3b5eefccee9ba939761f9c2882a5d9af3ff" + +[[package]] +name = "crossbeam-queue" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b10ddc024425c88c2ad148c1b0fd53f4c6d38db9697c9f1588381212fa657c9" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "crypto-common" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "cxx" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c53d75fe543215ca091d792e13351dcb940842dd2829b2a2dd43ab4bd1a015" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "618f85c8f132bd8912aab124e15a38adc762bb7e3cef84524adde1692ef3e8bc" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn 1.0.98", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca21461be76a23df4f63a2107a0bb406ef41548e635ff7edcbd1ab5a6bb997e2" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee8da0a2c0697647b5824844a5d2dedcd97a2d7b75e6e4d0b8dd183e4081e1cf" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", +] + +[[package]] +name = "darling" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c99d16b88c92aef47e58dadd53e87b4bd234c29934947a6cec8b466300f99b" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ea05d2fcb27b53f7a98faddaf5f2914760330ab7703adfc9df13332b42189f9" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "darling_macro" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bfb82b62b1b8a2a9808fb4caf844ede819a76cfc23b2827d7f94eefb49551eb" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "der" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19c5cb402c5c958281c7c0702edea7b780d03b86b606497ca3a10fcd3fc393ac" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "deranged" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +dependencies = [ + "powerfmt", + "serde", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03d86534ed367a67548dc68113a0f5db55432fdfbb6e6f9d77704397d95d5780" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "dotenvy" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed9155c8f4dc55c7470ae9da3f63c6785245093b3f6aeb0f5bf2e968efbba314" +dependencies = [ + "dirs", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +dependencies = [ + "serde", +] + +[[package]] +name = "equivalent" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b5fb89194fa3cad959b833185b3063ba881dbfc7030680b314250779fb4cc91" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "flume" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +dependencies = [ + "futures-core", + "futures-sink", + "spin 0.9.8", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "form_urlencoded" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00" +dependencies = [ + "matches", + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fa4cc29d25b0687b8570b0da86eac698dcb525110ad8b938fe6712baa711ec" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29d6d2ff5bb10fb95c85b8ce46538a2e5f5e7fdc755623a7d4529ab8a4ed9d2a" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce" + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "692eaaf7f7607518dd3cef090f1474b61edc5301d8012f09579920df68b725ee" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f41e9c77b6fc05b57497b960aad55942a9bbc5b20e1e623cf7fb1868f695d1" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddca131f3e7f2ce2df364b57949a9d47915cfbd35e46cfee355ccebbf794d6a2" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c17cc76786e99f8d2f055c11159e7f0091c42474dcc3189fbab96072e873e6d" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +dependencies = [ + "cxx", + "cxx-build", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "inherent" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3c05a410d53e44fc943a35a32ca27e32af2ea004d5107ccef685d022fc2b9fb" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", +] + +[[package]] +name = "is-terminal" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24fddda5af7e54bf7da53067d6e802dbcc381d0a8eef629df528e3ebf68755cb" +dependencies = [ + "hermit-abi", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "itertools" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92a9df60778f789c37f76778ae8d0a2471c41baa8b059d98a5873c978f549587" + +[[package]] +name = "itoa" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" + +[[package]] +name = "js-sys" +version = "0.3.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d99f9e3e84b8f67f846ef5b4cbbc3b1c29f6c759fcbce6f01aa0e73d932a24c" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" +dependencies = [ + "spin 0.4.10", +] + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "libm" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" + +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "link-cplusplus" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dfb9f65d9966f6ca6522043978030b564f3291af987fbf1dd55b6a064ba1b36" +dependencies = [ + "cc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + +[[package]] +name = "lock_api" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matches" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15305656809ce5a4805b1ff2946892810992197ce1270ff79baded852187942e" + +[[package]] +name = "md-5" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6a38fc55c8bbc10058782919516f88826e70320db6d206aebc49611d24216ae" +dependencies = [ + "digest", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "minimal-lexical" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6595bb28ed34f43c3fe088e48f6cfb2e033cab45f25a5384d5fdf564fbc8c4b2" + +[[package]] +name = "miniz_oxide" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0f75932c1f6cfae3c04000e40114adf955636e19040f9c0a2c380702aa1c7f" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys 0.45.0", +] + +[[package]] +name = "nom" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffd9d26838a953b4af82cbeb9f1592c6798916983959be223a7124e992742c1" +dependencies = [ + "memchr", + "minimal-lexical", + "version_check", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2399c9463abc5f909349d8aa9ba080e0b88b3ce2885389b60b993f39b1a56905" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-integer" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6ea62e9d81a77cd3ee9a2a5b9b609447857f3d358704331e4ef39eb247fcba" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af3fdbbc3291a5464dc57b03860ec37ca6bf915ed6ee385e7c6c052c422b2124" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c51a3322e4bca9d212ad9a158a02abc6934d005490c054a2778df73a70aa0a30" +dependencies = [ + "libc", +] + +[[package]] +name = "object" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9a7ab5d64814df0fe4a4b5ead45ed6c5f181ee3ff04ba344313a6c80446c5d4" + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "optd-mvp" +version = "0.1.0" +dependencies = [ + "async-stream", + "async-trait", + "sea-orm", + "sea-orm-migration", + "serde", + "serde_json", + "strum", + "thiserror 2.0.0", + "tokio", + "trait-variant", +] + +[[package]] +name = "ordered-float" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d84eb1409416d254e4a9c8fa56cc24701755025b458f0fcd8e59e1f5f40c23bf" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ouroboros" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c86de06555b970aec45229b27291b53154f21a5743a163419f4e4c0b065dcde" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3633d65683f13b9bcfaa3150880b018899fb0e5d0542f4adaea4f503fdb5eabf" +dependencies = [ + "heck 0.4.1", + "itertools 0.12.0", + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "parking" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" + +[[package]] +name = "parking_lot" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2f4f894f3865f6c0e02810fc597300f34dc2510f66400da262d8ae10e75767d" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys 0.29.0", +] + +[[package]] +name = "paste" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0744126afe1a6dd7f394cb50a716dbe086cb06e255e53d8d0185d82828358fb5" + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "pin-project-lite" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12cc1b0bf1727a77a54b6654e7b5f1af8604923edc8b81885f8ec92f9e3f0a05" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea" + +[[package]] +name = "proc-macro-error" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d259aa4825fa1a2371419d30a520219feff9fb3591550a209b4477d2ebaae4f" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.98", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd21889899aa8e1ca2b924c1d3f08086631fc90768225b3268b5d5c3e806a503" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", + "syn-mid", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "606c4ba35817e2922a308af55ad51bab3645b59eae5c570d4a6cf07e36bd493b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "version_check", + "yansi", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", + "rand_hc", +] + +[[package]] +name = "rand_chacha" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc" +dependencies = [ + "bitflags 1.1.0", +] + +[[package]] +name = "redox_users" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" +dependencies = [ + "getrandom", + "redox_syscall", +] + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72457500f2cf948feb4efccaeb460570c8f66ee5ba33c936bb4bfaa628d71853" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "ring" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb9d44f9bf6b635117787f72416783eb7e4227aaf255e5ce739563d817176a7e" +dependencies = [ + "cc", + "getrandom", + "libc", + "spin 0.9.8", + "untrusted", + "windows-sys 0.48.0", +] + +[[package]] +name = "rsa" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dd2017d3e6d67384f301f8b06fbf4567afc576430a61624d845eb04d2b30a72" +dependencies = [ + "byteorder", + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-iter", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core", + "signature", + "subtle", + "zeroize", +] + +[[package]] +name = "rust_decimal" +version = "1.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee9164faf726e4f3ece4978b25ca877ddc6802fa77f38cdccb32c7f805ecd70c" +dependencies = [ + "arrayvec", + "num-traits", + "serde", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3058a43ada2c2d0b92b3ae38007a2d0fa5e9db971be260e0171408a4ff471c95" + +[[package]] +name = "rustix" +version = "0.38.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +dependencies = [ + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls" +version = "0.23.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pemfile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e4980fa29e4c4b212ffb3db068a564cbf560e51d3944b7c88bd8bf5bec64f4" +dependencies = [ + "base64 0.21.0", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" + +[[package]] +name = "rustls-webpki" +version = "0.102.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "ryu" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "scratch" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e114536316b51a5aa7a0e59fc49661fd263c5507dd08bd28de052e57626ce69" + +[[package]] +name = "sea-bae" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bd3534a9978d0aa7edd2808dc1f8f31c4d0ecd31ddf71d997b3c98e9f3c9114" +dependencies = [ + "heck 0.4.1", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "sea-orm" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5680a8b686985116607ef5f5af2b1f9e1cc2c228330e93101816a0baa279afa" +dependencies = [ + "async-stream", + "async-trait", + "bigdecimal", + "chrono", + "futures", + "log", + "ouroboros", + "rust_decimal", + "sea-orm-macros", + "sea-query", + "sea-query-binder", + "serde", + "serde_json", + "sqlx", + "strum", + "thiserror 1.0.35", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "sea-orm-cli" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aefbd960c9ed7b2dfbab97b11890f5d8c314ad6e2f68c7b36c73ea0967fcc25" +dependencies = [ + "chrono", + "clap", + "dotenvy", + "glob", + "regex", + "sea-schema", + "tracing", + "tracing-subscriber", + "url", +] + +[[package]] +name = "sea-orm-macros" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a239e3bb1b566ad4ec2654d0d193d6ceddfd733487edc9c21a64d214c773910" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "sea-bae", + "syn 2.0.87", + "unicode-ident", +] + +[[package]] +name = "sea-orm-migration" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa7bbfbe3bec60b5925193acc9c98b9f8ae9853f52c8004df0c1ea5193c01ea0" +dependencies = [ + "async-trait", + "clap", + "dotenvy", + "futures", + "sea-orm", + "sea-orm-cli", + "sea-schema", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "sea-query" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff504d13b5e4b52fffcf2fb203d0352a5722fa5151696db768933e41e1e591bb" +dependencies = [ + "bigdecimal", + "chrono", + "inherent", + "ordered-float", + "rust_decimal", + "sea-query-derive", + "serde_json", + "time", + "uuid", +] + +[[package]] +name = "sea-query-binder" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0019f47430f7995af63deda77e238c17323359af241233ec768aba1faea7608" +dependencies = [ + "bigdecimal", + "chrono", + "rust_decimal", + "sea-query", + "serde_json", + "sqlx", + "time", + "uuid", +] + +[[package]] +name = "sea-query-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9834af2c4bd8c5162f00c89f1701fb6886119a88062cf76fe842ea9e232b9839" +dependencies = [ + "darling", + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.87", + "thiserror 1.0.35", +] + +[[package]] +name = "sea-schema" +version = "0.16.0-rc.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2a4ff9e87c4340affbec4f7790d724dcd87e71fcd0ffe2247481843380485aa" +dependencies = [ + "futures", + "sea-query", + "sea-schema-derive", +] + +[[package]] +name = "sea-schema-derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "debdc8729c37fdbf88472f97fd470393089f997a909e535ff67c544d18cfccf0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "serde" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "serde_json" +version = "1.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" +dependencies = [ + "itoa 1.0.1", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfa57a7f8d9c1d260a549e7224100f6c43d43f9103e06dd8b4095a9b2b43ce9" +dependencies = [ + "form_urlencoded", + "itoa 0.4.0", + "ryu", + "serde", +] + +[[package]] +name = "sha1" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c77f4e7f65455545c2153c1253d25056825e77ee2533f0e41deb65a93a34852f" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99c3bd8169c58782adad9290a9af5939994036b76187f7b4f0e6de91dbbfc0ec" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "signature" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fe458c98333f9c8152221191a77e2a44e8325d0193484af2e9421a53019e57d" +dependencies = [ + "digest", + "rand_core", +] + +[[package]] +name = "slab" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" + +[[package]] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +dependencies = [ + "serde", +] + +[[package]] +name = "socket2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "spin" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ceac490aa12c567115b40b7b7fceca03a6c9d53d5defea066123debc83c5dc1f" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37a5be806ab6f127c3da44b7378837ebf01dadca8510a0e572460216b228bd0e" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "sqlformat" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87e292b4291f154971a43c3774364e2cbcaec599d3f5bf6fa9d122885dbc38a" +dependencies = [ + "itertools 0.10.0", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93334716a037193fac19df402f8571269c84a00852f6a7066b5d2616dcd64d3e" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d8060b456358185f7d50c55d9b5066ad956956fddec42ee2e8567134a8936e" +dependencies = [ + "atoi", + "bigdecimal", + "byteorder", + "bytes", + "chrono", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-channel", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown", + "hashlink", + "hex", + "indexmap", + "log", + "memchr", + "once_cell", + "paste", + "percent-encoding", + "rust_decimal", + "rustls", + "rustls-pemfile", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlformat", + "thiserror 1.0.35", + "time", + "tokio", + "tokio-stream", + "tracing", + "url", + "uuid", + "webpki-roots", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cac0692bcc9de3b073e8d747391827297e075c7710ff6276d9f7a1f3d58c6657" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 2.0.87", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1804e8a7c7865599c9c79be146dc8a9fd8cc86935fa641d3ea58e5f0688abaa5" +dependencies = [ + "dotenvy", + "either", + "heck 0.5.0", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn 2.0.87", + "tempfile", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64bb4714269afa44aef2755150a0fc19d756fb580a67db8885608cf02f47d06a" +dependencies = [ + "atoi", + "base64 0.22.0", + "bigdecimal", + "bitflags 2.4.0", + "byteorder", + "bytes", + "chrono", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa 1.0.1", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand", + "rsa", + "rust_decimal", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 1.0.35", + "time", + "tracing", + "uuid", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fa91a732d854c5d7726349bb4bb879bb9478993ceb764247660aee25f67c2f8" +dependencies = [ + "atoi", + "base64 0.22.0", + "bigdecimal", + "bitflags 2.4.0", + "byteorder", + "chrono", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa 1.0.1", + "log", + "md-5", + "memchr", + "num-bigint", + "once_cell", + "rand", + "rust_decimal", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 1.0.35", + "time", + "tracing", + "uuid", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5b2cf34a45953bfd3daaf3db0f7a7878ab9b7a6b91b422d24a7a9e4c857b680" +dependencies = [ + "atoi", + "chrono", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "stringprep" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "strum" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117c413ac8a6cc19c773939932477a341e416eff7f0e84db42f091d85d7c6e0e" + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn-mid" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "termcolor" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a52c023823933499250b43960b272e25336c6e2ab8684672edc34489f049ccdd" +dependencies = [ + "wincolor", +] + +[[package]] +name = "thiserror" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c53f98874615aea268107765aa1ed8f6116782501d18e53d08b471733bea6c85" +dependencies = [ + "thiserror-impl 1.0.35", +] + +[[package]] +name = "thiserror" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15291287e9bff1bc6f9ff3409ed9af665bec7a5fc8ac079ea96be07bca0e2668" +dependencies = [ + "thiserror-impl 2.0.0", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8b463991b4eab2d801e724172285ec4195c650e8ec79b149e6c2a8e6dd3f783" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22efd00f33f93fa62848a7cab956c3d38c8d43095efda1decfc2b3a5dc0b8972" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "itoa 1.0.1", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tokio" +version = "1.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d3ce25f50619af8b0aec2eb23deebe84249e19e2ddd393a6e16e3300a6dadfd" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "tokio-stream" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if", + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", +] + +[[package]] +name = "tracing-core" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" +dependencies = [ + "matchers", + "once_cell", + "regex", + "sharded-slab", + "thread_local", + "tracing", + "tracing-core", +] + +[[package]] +name = "trait-variant" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70977707304198400eb4835a78f6a9f928bf41bba420deb8fdb175cd965d77a7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "typenum" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" + +[[package]] +name = "unicode-bidi" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2560b941fdb9ea38301b9b708504d612fcdf9c91a8c31d82219bd74cb07d304d" +dependencies = [ + "matches", +] + +[[package]] +name = "unicode-ident" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" + +[[package]] +name = "unicode-normalization" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f" + +[[package]] +name = "unicode-width" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc85732b6d55a0d520aaf765536a188d9d993770c28633422f85bb646da61335" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "utf8-ranges" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "uuid" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6469f4314d5f1ffec476e05f17cc9a78bc7a27a6a857842170bdf8d6f98d2f" +dependencies = [ + "serde", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83240549659d187488f91f33c0f8547cbfef0b2088bc470c116d1d260ef623d9" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae70622411ca953215ca6d06d3ebeb1e915f0f6613e3b495122878d7ebec7dae" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn 1.0.98", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e734d91443f177bfdb41969de821e15c516931c3c3db3d318fa1b68975d0f6f" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53739ff08c8a68b0fdbcd54c372b8ab800b1449ab3c9d706503bc7dd1621b2c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.98", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9a543ae66aa233d14bb765ed9af4a33e81b8b58d1584cf1b47ff8cd0b9e4489" + +[[package]] +name = "web-sys" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721c6263e2c66fd44501cc5efbfa2b7dfa775d13e4ea38c46299646ed1f9c70a" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de2cfda980f21be5a7ed2eadb3e6fe074d56022bea2cdeb1a62eb220fc04188" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "whoami" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524b58fa5a20a2fb3014dd6358b70e6579692a56ef6fce928834e488f42f65e8" +dependencies = [ + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "winapi" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ad91d846a4a5342c1fb7008d26124ee6cf94a3953751618577295373b32117" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a16a8e2ebfc883e2b1771c6482b1fb3c6831eab289ba391619a2d93a7356220f" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca29cb03c8ceaf20f8224a18a530938305e9872b1478ea24ff44b4f503a1d1d" + +[[package]] +name = "wincolor" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9dc3aa9dcda98b5a16150c54619c1ead22e3d3a5d458778ae914be760aa981a" +dependencies = [ + "winapi", +] + +[[package]] +name = "windows" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ceb069ac8b2117d36924190469735767f0990833935ab430155e71a44bafe148" +dependencies = [ + "windows_aarch64_msvc 0.29.0", + "windows_i686_gnu 0.29.0", + "windows_i686_msvc 0.29.0", + "windows_x86_64_gnu 0.29.0", + "windows_x86_64_msvc 0.29.0", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d027175d00b01e0cbeb97d6ab6ebe03b12330a35786cbaca5252b1c4bf5d9b" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8793f59f7b8e8b01eda1a652b2697d87b93097198ae85f823b969ca5b89bba58" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8602f6c418b67024be2996c512f5f995de3ba417f4c75af68401ab8756796ae4" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d615f419543e0bd7d2b3323af0d86ff19cbc4f816e6453f36a2c2ce889c354" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d95421d9ed3672c280884da53201a5c46b7b2765ca6faf34b0d71cf34a3561" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "yansi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71" + +[[package]] +name = "zerocopy" +version = "0.7.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + +[[package]] +name = "zeroize" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" diff --git a/optd-mvp/Cargo.toml b/optd-mvp/Cargo.toml new file mode 100644 index 0000000..3b72407 --- /dev/null +++ b/optd-mvp/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "optd-mvp" +version = "0.1.0" +edition = "2021" +authors = ["Connor Tsui"] + +[dependencies] + +# `VARIANTS` constant made public in "1.1.1". +sea-orm = { version = "1.1.1", features = [ + "sqlx-sqlite", + "runtime-tokio-rustls", + "macros", + "debug-print", + "with-json", +] } +sea-orm-migration = "1.0" +serde = "1.0" +serde_json = "1.0.118" # Support `Hash` on `serde_json::Value` in "1.0.118". +tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] } +trait-variant = "0.1.2" # Support `make(Send)` syntax in "0.1.2". +thiserror = "2.0" + +# Pin more recent versions for `-Zminimal-versions`. +async-trait = "0.1.43" # Remove lifetime parameter from "0.1.42". +async-stream = "0.3.1" # Fix unsatisfied trait bound from "0.3.0". +strum = "0.26.0" # Fix `std::marker::Sized` from "0.25.0". diff --git a/optd-mvp/README.md b/optd-mvp/README.md new file mode 100644 index 0000000..cfd082a --- /dev/null +++ b/optd-mvp/README.md @@ -0,0 +1,27 @@ +# Generate the `entities` module + +To make changes to the database tables and schema, you will have to modify files in the `migrator` module and then update the `entities` module using `sea-orm-cli`. + +This assumes that you already have the `sqlite3` binary installed. First, make sure you have installed `sea-orm-cli`: + +```sh +$ cargo install sea-orm-cli +``` + +Make sure your working directory is in the crate root: + +```sh +$ cd optd-mvp +``` + +If you have not generate the `sqlite.db` file yet, you will need to run this command which will generate the `sqlite.db` file and run all of the migrations: + +```sh +$ cargo run --bin migrate +``` + +Finally, run this command to generate / overwrite the `entities` module in the `src` directory. + +```sh +$ sea-orm-cli generate entity -u sqlite:./sqlite.db -o src/entities +``` diff --git a/optd-mvp/src/bin/migrate.rs b/optd-mvp/src/bin/migrate.rs new file mode 100644 index 0000000..0634f23 --- /dev/null +++ b/optd-mvp/src/bin/migrate.rs @@ -0,0 +1,19 @@ +//! A simple script that generates the database file needed for `sea-orm-cli` to extract the schemas +//! from and generate the `entities` module. + +use optd_mvp::{migrate, DATABASE_FILENAME, DATABASE_URL}; +use sea_orm::*; +use sea_orm_migration::prelude::*; + +#[tokio::main] +async fn main() { + let _ = std::fs::remove_file(DATABASE_FILENAME); + + let db = Database::connect(DATABASE_URL) + .await + .expect("Unable to connect to the database"); + + migrate(&db) + .await + .expect("Something went wrong during migration"); +} diff --git a/optd-mvp/src/lib.rs b/optd-mvp/src/lib.rs new file mode 100644 index 0000000..5abd59f --- /dev/null +++ b/optd-mvp/src/lib.rs @@ -0,0 +1,16 @@ +use sea_orm::*; +use sea_orm_migration::prelude::*; + +mod migrator; +use migrator::Migrator; + +mod entities; + +/// The filename of the SQLite database for migration. +pub const DATABASE_FILENAME: &str = "sqlite.db"; +/// The URL of the SQLite database for migration. +pub const DATABASE_URL: &str = "sqlite:./sqlite.db?mode=rwc"; + +pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> { + Migrator::refresh(db).await +} diff --git a/optd-mvp/src/main.rs b/optd-mvp/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/optd-mvp/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} From 06495a125628101d824891a277ea5558eea3d9a0 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 27 Nov 2024 11:03:32 -0500 Subject: [PATCH 02/13] add migration and entity files --- optd-mvp/src/entities/cascades_group.rs | 76 +++++++++++ optd-mvp/src/entities/logical_children.rs | 46 +++++++ optd-mvp/src/entities/logical_expression.rs | 49 +++++++ optd-mvp/src/entities/mod.rs | 9 ++ optd-mvp/src/entities/physical_children.rs | 46 +++++++ optd-mvp/src/entities/physical_expression.rs | 49 +++++++ optd-mvp/src/entities/prelude.rs | 9 ++ .../memo/m20241127_000001_cascades_group.rs | 123 ++++++++++++++++++ .../memo/m20241127_000001_logical_children.rs | 65 +++++++++ .../m20241127_000001_logical_expression.rs | 84 ++++++++++++ .../m20241127_000001_physical_children.rs | 70 ++++++++++ .../m20241127_000001_physical_expression.rs | 85 ++++++++++++ optd-mvp/src/migrator/memo/mod.rs | 14 ++ optd-mvp/src/migrator/mod.rs | 18 +++ 14 files changed, 743 insertions(+) create mode 100644 optd-mvp/src/entities/cascades_group.rs create mode 100644 optd-mvp/src/entities/logical_children.rs create mode 100644 optd-mvp/src/entities/logical_expression.rs create mode 100644 optd-mvp/src/entities/mod.rs create mode 100644 optd-mvp/src/entities/physical_children.rs create mode 100644 optd-mvp/src/entities/physical_expression.rs create mode 100644 optd-mvp/src/entities/prelude.rs create mode 100644 optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs create mode 100644 optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs create mode 100644 optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs create mode 100644 optd-mvp/src/migrator/memo/m20241127_000001_physical_children.rs create mode 100644 optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs create mode 100644 optd-mvp/src/migrator/memo/mod.rs create mode 100644 optd-mvp/src/migrator/mod.rs diff --git a/optd-mvp/src/entities/cascades_group.rs b/optd-mvp/src/entities/cascades_group.rs new file mode 100644 index 0000000..9c2ba83 --- /dev/null +++ b/optd-mvp/src/entities/cascades_group.rs @@ -0,0 +1,76 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "cascades_group")] +pub struct Model { + #[sea_orm(primary_key)] + pub id: i32, + pub winner: Option, + pub cost: Option, + pub is_optimized: bool, + pub parent_id: Option, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "Entity", + from = "Column::ParentId", + to = "Column::Id", + on_update = "Cascade", + on_delete = "SetNull" + )] + SelfRef, + #[sea_orm(has_many = "super::logical_children::Entity")] + LogicalChildren, + #[sea_orm(has_many = "super::logical_expression::Entity")] + LogicalExpression, + #[sea_orm(has_many = "super::physical_children::Entity")] + PhysicalChildren, + #[sea_orm( + belongs_to = "super::physical_expression::Entity", + from = "Column::Winner", + to = "super::physical_expression::Column::Id", + on_update = "Cascade", + on_delete = "SetNull" + )] + PhysicalExpression, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::LogicalChildren.def() + } +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::PhysicalChildren.def() + } +} + +impl Related for Entity { + fn to() -> RelationDef { + super::logical_children::Relation::LogicalExpression.def() + } + fn via() -> Option { + Some(super::logical_children::Relation::CascadesGroup.def().rev()) + } +} + +impl Related for Entity { + fn to() -> RelationDef { + super::physical_children::Relation::PhysicalExpression.def() + } + fn via() -> Option { + Some( + super::physical_children::Relation::CascadesGroup + .def() + .rev(), + ) + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/optd-mvp/src/entities/logical_children.rs b/optd-mvp/src/entities/logical_children.rs new file mode 100644 index 0000000..120641f --- /dev/null +++ b/optd-mvp/src/entities/logical_children.rs @@ -0,0 +1,46 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "logical_children")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub logical_expression_id: i32, + #[sea_orm(primary_key, auto_increment = false)] + pub group_id: i32, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::cascades_group::Entity", + from = "Column::GroupId", + to = "super::cascades_group::Column::Id", + on_update = "Cascade", + on_delete = "Cascade" + )] + CascadesGroup, + #[sea_orm( + belongs_to = "super::logical_expression::Entity", + from = "Column::GroupId", + to = "super::logical_expression::Column::Id", + on_update = "Cascade", + on_delete = "Cascade" + )] + LogicalExpression, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::CascadesGroup.def() + } +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::LogicalExpression.def() + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/optd-mvp/src/entities/logical_expression.rs b/optd-mvp/src/entities/logical_expression.rs new file mode 100644 index 0000000..1e85d1d --- /dev/null +++ b/optd-mvp/src/entities/logical_expression.rs @@ -0,0 +1,49 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "logical_expression")] +pub struct Model { + #[sea_orm(primary_key)] + pub id: i32, + pub group_id: i32, + pub fingerprint: i64, + pub kind: i16, + pub data: Json, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::cascades_group::Entity", + from = "Column::GroupId", + to = "super::cascades_group::Column::Id", + on_update = "Cascade", + on_delete = "Cascade" + )] + CascadesGroup, + #[sea_orm(has_many = "super::logical_children::Entity")] + LogicalChildren, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::LogicalChildren.def() + } +} + +impl Related for Entity { + fn to() -> RelationDef { + super::logical_children::Relation::CascadesGroup.def() + } + fn via() -> Option { + Some( + super::logical_children::Relation::LogicalExpression + .def() + .rev(), + ) + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/optd-mvp/src/entities/mod.rs b/optd-mvp/src/entities/mod.rs new file mode 100644 index 0000000..701abe4 --- /dev/null +++ b/optd-mvp/src/entities/mod.rs @@ -0,0 +1,9 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 + +pub mod prelude; + +pub mod cascades_group; +pub mod logical_children; +pub mod logical_expression; +pub mod physical_children; +pub mod physical_expression; diff --git a/optd-mvp/src/entities/physical_children.rs b/optd-mvp/src/entities/physical_children.rs new file mode 100644 index 0000000..d8f9db0 --- /dev/null +++ b/optd-mvp/src/entities/physical_children.rs @@ -0,0 +1,46 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "physical_children")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false)] + pub physical_expression_id: i32, + #[sea_orm(primary_key, auto_increment = false)] + pub group_id: i32, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::cascades_group::Entity", + from = "Column::GroupId", + to = "super::cascades_group::Column::Id", + on_update = "Cascade", + on_delete = "Cascade" + )] + CascadesGroup, + #[sea_orm( + belongs_to = "super::physical_expression::Entity", + from = "Column::PhysicalExpressionId", + to = "super::physical_expression::Column::Id", + on_update = "Cascade", + on_delete = "Cascade" + )] + PhysicalExpression, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::CascadesGroup.def() + } +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::PhysicalExpression.def() + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/optd-mvp/src/entities/physical_expression.rs b/optd-mvp/src/entities/physical_expression.rs new file mode 100644 index 0000000..2d9a2ae --- /dev/null +++ b/optd-mvp/src/entities/physical_expression.rs @@ -0,0 +1,49 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "physical_expression")] +pub struct Model { + #[sea_orm(primary_key)] + pub id: i32, + pub group_id: i32, + pub fingerprint: i64, + pub kind: i16, + pub data: Json, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::cascades_group::Entity", + from = "Column::GroupId", + to = "super::cascades_group::Column::Id", + on_update = "Cascade", + on_delete = "Cascade" + )] + CascadesGroup, + #[sea_orm(has_many = "super::physical_children::Entity")] + PhysicalChildren, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::PhysicalChildren.def() + } +} + +impl Related for Entity { + fn to() -> RelationDef { + super::physical_children::Relation::CascadesGroup.def() + } + fn via() -> Option { + Some( + super::physical_children::Relation::PhysicalExpression + .def() + .rev(), + ) + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/optd-mvp/src/entities/prelude.rs b/optd-mvp/src/entities/prelude.rs new file mode 100644 index 0000000..0b8c910 --- /dev/null +++ b/optd-mvp/src/entities/prelude.rs @@ -0,0 +1,9 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 + +#![allow(unused_imports)] + +pub use super::cascades_group::Entity as CascadesGroup; +pub use super::logical_children::Entity as LogicalChildren; +pub use super::logical_expression::Entity as LogicalExpression; +pub use super::physical_children::Entity as PhysicalChildren; +pub use super::physical_expression::Entity as PhysicalExpression; diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs b/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs new file mode 100644 index 0000000..3a0e7d0 --- /dev/null +++ b/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs @@ -0,0 +1,123 @@ +//! An entity representing a group / equivalence class in the Cascades framework. +//! +//! Quoted from the Microsoft article _Extensible query optimizers in practice_: +//! +//! > In the memo, each class of equivalent expressions is called an equivalent class or a group, +//! > and all equivalent expressions within the class are called group expressions or simply +//! > expressions. +//! +//! A Cascades group is defined as a class of equivalent logical or physical expressions. The +//! Cascades framework uses these groups as a way of storing the best query sub-plans for use in the +//! dynamic programming search algorithm. +//! +//! For example, a Cascades group could be the set of expressions containing the logical expressions +//! `Join(A, B)` and `Join(B, A)`, as well as the physical expressions `HashJoin(A, B)` and +//! `NestedLoopJoin(B, A)`. +//! +//! # Columns +//! +//! Each group is assigned a monotonically-increasing (unique) ID. This ID will be important since +//! there are many foreign key references from other tables to `cascades_group`. +//! +//! We additionally store a `latest_winner` foreign key reference to a physical expression. See +//! the [section](#best-physical-plan-winner) below for more details. +//! +//! Finally, we store an `is_optimized` flag that is used for quickly determining the state of +//! optimization for this group during the dynamic programming search. +//! +//! # Entity Relationships +//! +//! ### Child Expressions (Logical and Physical) +//! +//! To retrieve all of a `cascades_group`'s equivalent expressions, you must query the +//! [`logical_expression`] or the [`physical_expression`] entities via their foreign keys to +//! `cascades_group`. The relationship between [`logical_expression`] and `cascades_group` is +//! many-to-one, and the exact same many-to-one relationship is held for [`physical_expression`] to +//! `cascades_group`. +//! +//! ### Parent Expressions (Logical and Physical) +//! +//! Additionally, each logical or physical expression can have any number of `cascades_group`s as +//! children, and a group can be a child of any expression. Thus, `cascades_group` additionally has +//! a many-to-many relationship with [`logical_expression`] and [`physical_expression`] via the +//! [`logical_children`] and [`physical_children`] entities. +//! +//! To reiterate, `cascades_group` has **both** a one-to-many **and** a many-to-many relationship +//! with both [`logical_expression`] and [`physical_expression`]. This is due to groups being both +//! parents and children of expressions. +//! +//! ### Best Physical Plan (Winner) +//! +//! The `cascades_group` entity also stores a `latest_winner` _nullable_ foreign key reference to +//! a physical expression. This represents the most recent best query plan we have computed. The +//! reason it is nullable is because we may not have come up with any best query plan yet. +//! +//! ### Logical Properties +//! +//! FIXME: Add a logical properties table. +//! +//! Lastly, each `cascades_group` record will have a set of logical properties store in the +//! `logical_property` entity, where there is an many-to-one relationship from +//! `logical_property` to `cascades_group`. Note that we do not store physical properties directly +//! on the `cascades_group`, but rather we store them for each [`physical_expression`] record. +//! +//! [`logical_expression`]: super::logical_expression +//! [`physical_expression`]: super::physical_expression +//! [`logical_children`]: super::logical_children +//! [`physical_children`]: super::physical_children +//! `logical_property`: super::logical_property + +use crate::migrator::memo::physical_expression::PhysicalExpression; +use sea_orm_migration::{prelude::*, schema::*}; + +#[derive(DeriveIden)] +pub enum CascadesGroup { + Table, + Id, + Winner, + Cost, + IsOptimized, + ParentId, +} + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .create_table( + Table::create() + .table(CascadesGroup::Table) + .if_not_exists() + .col(pk_auto(CascadesGroup::Id)) + .col(integer_null(CascadesGroup::Winner)) + .col(big_unsigned_null(CascadesGroup::Cost)) + .foreign_key( + ForeignKey::create() + .from(CascadesGroup::Table, CascadesGroup::Winner) + .to(PhysicalExpression::Table, PhysicalExpression::Id) + .on_delete(ForeignKeyAction::SetNull) + .on_update(ForeignKeyAction::Cascade), + ) + .col(boolean(CascadesGroup::IsOptimized)) + .col(integer_null(CascadesGroup::ParentId)) + .foreign_key( + ForeignKey::create() + .from(CascadesGroup::Table, CascadesGroup::ParentId) + .to(CascadesGroup::Table, CascadesGroup::Id) + .on_delete(ForeignKeyAction::SetNull) + .on_update(ForeignKeyAction::Cascade), + ) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .drop_table(Table::drop().table(CascadesGroup::Table).to_owned()) + .await + } +} diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs b/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs new file mode 100644 index 0000000..d0835f4 --- /dev/null +++ b/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs @@ -0,0 +1,65 @@ +//! An entity representing the [`cascades_group`] children of every [`logical_expression`]. +//! +//! Formally, this entity is a junction which allows us to represent a many-to-many relationship +//! between [`logical_expression`] and [`cascades_group`]. Expressions can have any number of child +//! groups, and every group can be a child of many different expressions, hence the many-to-many +//! relationship. +//! +//! See [`cascades_group`] for more details. +//! +//! [`cascades_group`]: super::cascades_group +//! [`logical_expression`]: super::logical_expression + +use crate::migrator::memo::{cascades_group::CascadesGroup, logical_expression::LogicalExpression}; +use sea_orm_migration::{prelude::*, schema::*}; + +#[derive(DeriveIden)] +pub enum LogicalChildren { + Table, + LogicalExpressionId, + GroupId, +} + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .create_table( + Table::create() + .table(LogicalChildren::Table) + .if_not_exists() + .col(integer(LogicalChildren::LogicalExpressionId)) + .col(integer(LogicalChildren::GroupId)) + .primary_key( + Index::create() + .col(LogicalChildren::LogicalExpressionId) + .col(LogicalChildren::GroupId), + ) + .foreign_key( + ForeignKey::create() + .from(LogicalChildren::Table, LogicalChildren::GroupId) + .to(LogicalExpression::Table, LogicalExpression::Id) + .on_delete(ForeignKeyAction::Cascade) + .on_update(ForeignKeyAction::Cascade), + ) + .foreign_key( + ForeignKey::create() + .from(LogicalChildren::Table, LogicalChildren::GroupId) + .to(CascadesGroup::Table, CascadesGroup::Id) + .on_delete(ForeignKeyAction::Cascade) + .on_update(ForeignKeyAction::Cascade), + ) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .drop_table(Table::drop().table(LogicalChildren::Table).to_owned()) + .await + } +} diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs b/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs new file mode 100644 index 0000000..3682032 --- /dev/null +++ b/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs @@ -0,0 +1,84 @@ +//! An entity representing a logical plan expression in the Cascades framework. +//! +//! Quoted from the Microsoft article _Extensible query optimizers in practice_: +//! +//! > A logical expression is defined as a tree of logical operators, and corresponds to a +//! > relational algebraic expression. +//! +//! In the Cascades query optimization framework, the memo table stores equivalence classes of +//! expressions (see [`cascades_group`]). These equivalence classes, or "groups", store both +//! `logical_expression`s and [`physical_expression`]s. +//! +//! Optimization starts by "exploring" equivalent logical expressions within a group. For example, +//! the logical expressions `Join(A, B)` and `Join(B, A)` are contained in the same group. The +//! logical expressions are defined as a `Join` operator with the groups representing a scan of +//! table `A` and a scan of table `B` as its children. +//! +//! # Columns +//! +//! Each `logical_expression` has a unique primary key ID, but it holds little importance other than +//! helping distinguish between two different expressions. +//! +//! The more interesting column is the `fingerprint` column, in which we store a hashed fingerprint +//! value that can be used to efficiently check equality between two potentially equivalent logical +//! expressions (hash-consing). See ???FIXME??? for more information on expression fingerprints. +//! +//! Finally, since there are many different types of operators, we store a variant tag and a data +//! column as JSON to represent the semi-structured data fields of logical operators. +//! +//! # Entity Relationships +//! +//! The only relationship that `logical_expression` has is to [`cascades_group`]. It has **both** a +//! one-to-many **and** a many-to-many relationship with [`cascades_group`], and you can see more +//! details about this in the module-level documentation for [`cascades_group`]. +//! +//! [`cascades_group`]: super::cascades_group +//! [`physical_expression`]: super::physical_expression + +use crate::migrator::memo::cascades_group::CascadesGroup; +use sea_orm_migration::{prelude::*, schema::*}; + +#[derive(DeriveIden)] +pub enum LogicalExpression { + Table, + Id, + GroupId, + Fingerprint, + Kind, + Data, +} + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .create_table( + Table::create() + .table(LogicalExpression::Table) + .if_not_exists() + .col(pk_auto(LogicalExpression::Id)) + .col(integer(LogicalExpression::GroupId)) + .foreign_key( + ForeignKey::create() + .from(LogicalExpression::Table, LogicalExpression::GroupId) + .to(CascadesGroup::Table, CascadesGroup::Id) + .on_delete(ForeignKeyAction::Cascade) + .on_update(ForeignKeyAction::Cascade), + ) + .col(big_unsigned(LogicalExpression::Fingerprint)) + .col(small_integer(LogicalExpression::Kind)) + .col(json(LogicalExpression::Data)) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .drop_table(Table::drop().table(LogicalExpression::Table).to_owned()) + .await + } +} diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_physical_children.rs b/optd-mvp/src/migrator/memo/m20241127_000001_physical_children.rs new file mode 100644 index 0000000..3983f0c --- /dev/null +++ b/optd-mvp/src/migrator/memo/m20241127_000001_physical_children.rs @@ -0,0 +1,70 @@ +//! An entity representing the [`cascades_group`] children of every [`physical_expression`]. +//! +//! Formally, this entity is a junction which allows us to represent a many-to-many relationship +//! between [`physical_expression`] and [`cascades_group`]. Expressions can have any number of child +//! groups, and every group can be a child of many different expressions, hence the many-to-many +//! relationship. +//! +//! See [`cascades_group`] for more details. +//! +//! [`cascades_group`]: super::cascades_group +//! [`physical_expression`]: super::physical_expression + +use crate::migrator::memo::{ + cascades_group::CascadesGroup, physical_expression::PhysicalExpression, +}; +use sea_orm_migration::{prelude::*, schema::*}; + +#[derive(DeriveIden)] +pub enum PhysicalChildren { + Table, + PhysicalExpressionId, + GroupId, +} + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .create_table( + Table::create() + .table(PhysicalChildren::Table) + .if_not_exists() + .col(integer(PhysicalChildren::PhysicalExpressionId)) + .col(integer(PhysicalChildren::GroupId)) + .primary_key( + Index::create() + .col(PhysicalChildren::PhysicalExpressionId) + .col(PhysicalChildren::GroupId), + ) + .foreign_key( + ForeignKey::create() + .from( + PhysicalChildren::Table, + PhysicalChildren::PhysicalExpressionId, + ) + .to(PhysicalExpression::Table, PhysicalExpression::Id) + .on_delete(ForeignKeyAction::Cascade) + .on_update(ForeignKeyAction::Cascade), + ) + .foreign_key( + ForeignKey::create() + .from(PhysicalChildren::Table, PhysicalChildren::GroupId) + .to(CascadesGroup::Table, CascadesGroup::Id) + .on_delete(ForeignKeyAction::Cascade) + .on_update(ForeignKeyAction::Cascade), + ) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .drop_table(Table::drop().table(PhysicalChildren::Table).to_owned()) + .await + } +} diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs b/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs new file mode 100644 index 0000000..7653112 --- /dev/null +++ b/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs @@ -0,0 +1,85 @@ +//! An entity representing a logical plan expression in the Cascades framework. +//! +//! Quoted from the Microsoft article _Extensible query optimizers in practice_: +//! +//! > A physical expression is a tree of physical operators, which is also referred to as the +//! > _physical plan_ or simply _plan_. +//! +//! In the Cascades query optimization framework, the memo table stores equivalence classes of +//! expressions (see [`cascades_group`]). These equivalence classes, or "groups", store both +//! [`logical_expression`]s and `physical_expression`s. +//! +//! Optimization starts by exploring equivalent logical expressions within a group, and then it +//! proceeds to implement / optimize those logical operators into physical operators. For example, +//! the logical expression `Join(A, B)` could be implemented into a `HashJoin(A, B)` or a +//! `NestedLoopJoin(A, B)`, and both of these new physical expressions would be contained in the +//! same group. +//! +//! # Columns +//! +//! Each `physical_expression` has a unique primary key ID, and other tables will store a foreign +//! key reference to a specific `physical_expression`s. +//! +//! The more interesting column is the `fingerprint` column, in which we store a hashed fingerprint +//! value that can be used to efficiently check equality between two potentially equivalent physical +//! expressions (hash-consing). See ???FIXME??? for more information on expression fingerprints. +//! +//! Finally, since there are many different types of operators, we store a variant tag and a data +//! column as JSON to represent the semi-structured data fields of logical operators. +//! +//! # Entity Relationships +//! +//! The only relationship that `physical_expression` has is to [`cascades_group`]. It has **both** a +//! one-to-many **and** a many-to-many relationship with [`cascades_group`], and you can see more +//! details about this in the module-level documentation for [`cascades_group`]. +//! +//! [`cascades_group`]: super::cascades_group +//! [`logical_expression`]: super::logical_expression + +use crate::migrator::memo::cascades_group::CascadesGroup; +use sea_orm_migration::{prelude::*, schema::*}; + +#[derive(DeriveIden)] +pub enum PhysicalExpression { + Table, + Id, + GroupId, + Fingerprint, + Kind, + Data, +} + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .create_table( + Table::create() + .table(PhysicalExpression::Table) + .if_not_exists() + .col(pk_auto(PhysicalExpression::Id)) + .col(integer(PhysicalExpression::GroupId)) + .foreign_key( + ForeignKey::create() + .from(PhysicalExpression::Table, PhysicalExpression::GroupId) + .to(CascadesGroup::Table, CascadesGroup::Id) + .on_delete(ForeignKeyAction::Cascade) + .on_update(ForeignKeyAction::Cascade), + ) + .col(big_unsigned(PhysicalExpression::Fingerprint)) + .col(small_integer(PhysicalExpression::Kind)) + .col(json(PhysicalExpression::Data)) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .drop_table(Table::drop().table(PhysicalExpression::Table).to_owned()) + .await + } +} diff --git a/optd-mvp/src/migrator/memo/mod.rs b/optd-mvp/src/migrator/memo/mod.rs new file mode 100644 index 0000000..8ed9390 --- /dev/null +++ b/optd-mvp/src/migrator/memo/mod.rs @@ -0,0 +1,14 @@ +//! Entities related to the memo table used for dynamic programming in the Cascades query +//! optimization framework. + +pub(crate) mod m20241127_000001_cascades_group; +pub(crate) mod m20241127_000001_logical_children; +pub(crate) mod m20241127_000001_logical_expression; +pub(crate) mod m20241127_000001_physical_children; +pub(crate) mod m20241127_000001_physical_expression; + +pub(crate) use m20241127_000001_cascades_group as cascades_group; +pub(crate) use m20241127_000001_logical_children as logical_children; +pub(crate) use m20241127_000001_logical_expression as logical_expression; +pub(crate) use m20241127_000001_physical_children as physical_children; +pub(crate) use m20241127_000001_physical_expression as physical_expression; diff --git a/optd-mvp/src/migrator/mod.rs b/optd-mvp/src/migrator/mod.rs new file mode 100644 index 0000000..179c406 --- /dev/null +++ b/optd-mvp/src/migrator/mod.rs @@ -0,0 +1,18 @@ +use sea_orm_migration::prelude::*; + +mod memo; + +pub struct Migrator; + +#[async_trait::async_trait] +impl MigratorTrait for Migrator { + fn migrations() -> Vec> { + vec![ + Box::new(memo::cascades_group::Migration), + Box::new(memo::logical_expression::Migration), + Box::new(memo::logical_children::Migration), + Box::new(memo::physical_expression::Migration), + Box::new(memo::physical_children::Migration), + ] + } +} From 0e54957d94269384ce009475b85a3f50281eb842 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 27 Nov 2024 19:21:48 -0500 Subject: [PATCH 03/13] add memo trait interface and persistent memo implementation This commit adds a first draft of a memo table trait and a persistent memo table implementation backed by SeaORM entities. --- optd-mvp/src/lib.rs | 21 +++ optd-mvp/src/memo/interface.rs | 146 +++++++++++++++++++ optd-mvp/src/memo/mod.rs | 9 ++ optd-mvp/src/memo/persistent.rs | 244 ++++++++++++++++++++++++++++++++ 4 files changed, 420 insertions(+) create mode 100644 optd-mvp/src/memo/interface.rs create mode 100644 optd-mvp/src/memo/mod.rs create mode 100644 optd-mvp/src/memo/persistent.rs diff --git a/optd-mvp/src/lib.rs b/optd-mvp/src/lib.rs index 5abd59f..c5185cd 100644 --- a/optd-mvp/src/lib.rs +++ b/optd-mvp/src/lib.rs @@ -1,16 +1,37 @@ use sea_orm::*; use sea_orm_migration::prelude::*; +use thiserror::Error; mod migrator; use migrator::Migrator; mod entities; +mod memo; +use memo::MemoError; + /// The filename of the SQLite database for migration. pub const DATABASE_FILENAME: &str = "sqlite.db"; /// The URL of the SQLite database for migration. pub const DATABASE_URL: &str = "sqlite:./sqlite.db?mode=rwc"; +/// An error type wrapping all the different kinds of error the optimizer might raise. +/// +/// TODO more docs. +#[derive(Error, Debug)] +pub enum OptimizerError { + #[error("SeaORM error")] + Database(#[from] sea_orm::error::DbErr), + #[error("Memo table logical error")] + Memo(#[from] MemoError), + #[error("unknown error")] + Unknown, +} + +/// Shorthand for a [`Result`] with an error type [`OptimizerError`]. +pub type OptimizerResult = Result; + +/// Applies all migrations. pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> { Migrator::refresh(db).await } diff --git a/optd-mvp/src/memo/interface.rs b/optd-mvp/src/memo/interface.rs new file mode 100644 index 0000000..a88740e --- /dev/null +++ b/optd-mvp/src/memo/interface.rs @@ -0,0 +1,146 @@ +use crate::OptimizerResult; +use thiserror::Error; + +#[derive(Error, Debug)] +/// The different kinds of errors that might occur while running operations on a memo table. +pub enum MemoError { + #[error("unknown group ID {0}")] + UnknownGroup(i32), + #[error("unknown logical expression ID {0}")] + UnknownLogicalExpression(i32), + #[error("unknown physical expression ID {0}")] + UnknownPhysicalExpression(i32), + #[error("invalid expression encountered")] + InvalidExpression, +} + +/// A trait representing an implementation of a memoization table. +/// +/// Note that we use [`trait_variant`] here in order to add bounds on every method. +/// See this [blog post]( +/// https://blog.rust-lang.org/2023/12/21/async-fn-rpit-in-traits.html#async-fn-in-public-traits) +/// for more information. +#[allow(dead_code)] +#[trait_variant::make(Send)] +pub trait Memo { + /// A type representing a group in the Cascades framework. + type Group; + /// A type representing a unique identifier for a group. + type GroupId; + /// A type representing a logical expression. + type LogicalExpression; + /// A type representing a unique identifier for a logical expression. + type LogicalExpressionId; + /// A type representing a physical expression. + type PhysicalExpression; + /// A type representing a unique identifier for a physical expression. + type PhysicalExpressionId; + + /// Retrieves a [`Self::Group`] given a [`Self::GroupId`]. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + async fn get_group(&self, group_id: Self::GroupId) -> OptimizerResult; + + /// Retrieves a [`Self::LogicalExpression`] given a [`Self::LogicalExpressionId`]. + /// + /// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`] + /// error. + async fn get_logical_expression( + &self, + logical_expression_id: Self::LogicalExpressionId, + ) -> OptimizerResult; + + /// Retrieves a [`Self::PhysicalExpression`] given a [`Self::PhysicalExpressionId`]. + /// + /// If the physical expression does not exist, returns a + /// [`MemoError::UnknownPhysicalExpression`] error. + async fn get_physical_expression( + &self, + physical_expression_id: Self::PhysicalExpressionId, + ) -> OptimizerResult; + + /// Retrieves all of the logical expression "children" IDs of a group. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + async fn get_logical_children( + &self, + group_id: Self::GroupId, + ) -> OptimizerResult>; + + /// Retrieves all of the physical expression "children" IDs of a group. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + async fn get_physical_children( + &self, + group_id: Self::GroupId, + ) -> OptimizerResult>; + + /// Updates / replaces a group's best physical plan (winner). Optionally returns the previous + /// winner's physical expression ID. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + async fn update_group_winner( + &self, + group_id: Self::GroupId, + physical_expression_id: Self::PhysicalExpressionId, + ) -> OptimizerResult>; + + /// Adds a logical expression to an existing group via its [`Self::GroupId`]. This function + /// assumes that insertion of this expression would not create any duplicates. + /// + /// The caller is required to pass in a slice of `GroupId` that represent the child groups of + /// the input expression. + /// + /// The caller is also required to set the `group_id` field of the input `logical_expression` + /// to be equal to `group_id`, otherwise this function will return a + /// [`MemoError::InvalidExpression`] error. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + async fn add_logical_expression_to_group( + &self, + group_id: Self::GroupId, + logical_expression: Self::LogicalExpression, + children: &[Self::GroupId], + ) -> OptimizerResult<()>; + + /// Adds a physical expression to an existing group via its [`Self::GroupId`]. This function + /// assumes that insertion of this expression would not create any duplicates. + /// + /// The caller is required to pass in a slice of `GroupId` that represent the child groups of + /// the input expression. + /// + /// The caller is also required to set the `group_id` field of the input `physical_expression` + /// to be equal to `group_id`, otherwise this function will return a + /// [`MemoError::InvalidExpression`] error. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + async fn add_physical_expression_to_group( + &self, + group_id: Self::GroupId, + physical_expression: Self::PhysicalExpression, + children: &[Self::GroupId], + ) -> OptimizerResult<()>; + + /// Adds a new logical expression into the memo table, creating a new group if the expression + /// does not already exist. + /// + /// The caller is required to pass in a slice of `GroupId` that represent the child groups of + /// the input expression. + /// + /// The [`Self::LogicalExpression`] type should have some sort of mechanism for checking if + /// the expression has been seen before, and if it has already been created, then the parent + /// group ID should also be retrievable. + /// + /// If the expression already exists, then this function will return the [`Self::GroupId`] of + /// the parent group and the corresponding (already existing) [`Self::LogicalExpressionId`]. It + /// will also completely ignore the group ID field of the input expression as well as ignore the + /// input slice of child groups. + /// + /// If the expression does not exist, this function will create a new group and a new + /// expression, returning brand new IDs for both. + async fn add_logical_expression( + &self, + expression: Self::LogicalExpression, + children: &[Self::LogicalExpressionId], + ) -> OptimizerResult<(Self::GroupId, Self::LogicalExpressionId)>; +} diff --git a/optd-mvp/src/memo/mod.rs b/optd-mvp/src/memo/mod.rs new file mode 100644 index 0000000..5253352 --- /dev/null +++ b/optd-mvp/src/memo/mod.rs @@ -0,0 +1,9 @@ +//! This module contains items related to the memo table, which is key to the Cascades query +//! optimization framework. +//! +//! TODO more docs. + +mod persistent; + +mod interface; +pub use interface::{Memo, MemoError}; diff --git a/optd-mvp/src/memo/persistent.rs b/optd-mvp/src/memo/persistent.rs new file mode 100644 index 0000000..445ee6c --- /dev/null +++ b/optd-mvp/src/memo/persistent.rs @@ -0,0 +1,244 @@ +use crate::{ + entities::{prelude::*, *}, + memo::{Memo, MemoError}, + OptimizerResult, DATABASE_URL, +}; +use sea_orm::*; + +/// A persistent memo table, backed by a database on disk. +/// +/// TODO more docs. +pub struct PersistentMemo { + /// This `PersistentMemo` is reliant on the SeaORM [`DatabaseConnection`] that stores all of the + /// objects needed for query optimization. + db: DatabaseConnection, +} + +impl PersistentMemo { + /// TODO remove dead code and write docs. + #[allow(dead_code)] + pub async fn new() -> Self { + Self { + db: Database::connect(DATABASE_URL).await.unwrap(), + } + } +} + +impl Memo for PersistentMemo { + type Group = cascades_group::Model; + type GroupId = i32; + type LogicalExpression = logical_expression::Model; + type LogicalExpressionId = i32; + type PhysicalExpression = physical_expression::Model; + type PhysicalExpressionId = i32; + + async fn get_group(&self, group_id: Self::GroupId) -> OptimizerResult { + Ok(CascadesGroup::find_by_id(group_id) + .one(&self.db) + .await? + .ok_or(MemoError::UnknownGroup(group_id))?) + } + + async fn get_logical_expression( + &self, + logical_expression_id: Self::LogicalExpressionId, + ) -> OptimizerResult { + Ok(LogicalExpression::find_by_id(logical_expression_id) + .one(&self.db) + .await? + .ok_or(MemoError::UnknownLogicalExpression(logical_expression_id))?) + } + + async fn get_physical_expression( + &self, + physical_expression_id: Self::PhysicalExpressionId, + ) -> OptimizerResult { + Ok(PhysicalExpression::find_by_id(physical_expression_id) + .one(&self.db) + .await? + .ok_or(MemoError::UnknownPhysicalExpression(physical_expression_id))?) + } + + async fn get_logical_children( + &self, + group_id: Self::GroupId, + ) -> OptimizerResult> { + // First retrieve the group record, and then find all related logical expressions. + Ok(self + .get_group(group_id) + .await? + .find_related(LogicalChildren) + .all(&self.db) + .await? + .into_iter() + .map(|m| m.logical_expression_id) + .collect()) + } + + async fn get_physical_children( + &self, + group_id: Self::GroupId, + ) -> OptimizerResult> { + // First retrieve the group record, and then find all related physical expressions. + Ok(self + .get_group(group_id) + .await? + .find_related(PhysicalChildren) + .all(&self.db) + .await? + .into_iter() + .map(|m| m.physical_expression_id) + .collect()) + } + + /// FIXME: In the future, this should first check that we aren't overwriting a winner that was + /// updated from another thread. + async fn update_group_winner( + &self, + group_id: Self::GroupId, + physical_expression_id: Self::PhysicalExpressionId, + ) -> OptimizerResult> { + // First retrieve the group record, and then use an `ActiveModel` to update it. + let mut group = self.get_group(group_id).await?.into_active_model(); + let old_id = group.winner; + + group.winner = Set(Some(physical_expression_id)); + group.update(&self.db).await?; + + // The old value must be set (`None` still means it has been set). + let old = old_id.unwrap(); + Ok(old) + } + + async fn add_logical_expression_to_group( + &self, + group_id: Self::GroupId, + logical_expression: Self::LogicalExpression, + children: &[Self::GroupId], + ) -> OptimizerResult<()> { + if logical_expression.group_id != group_id { + Err(MemoError::InvalidExpression)? + } + + // Check if the group actually exists. + let _ = self.get_group(group_id).await?; + + // Insert the child groups of the expression into the junction / children table. + if !children.is_empty() { + LogicalChildren::insert_many(children.iter().copied().map(|group_id| { + logical_children::ActiveModel { + logical_expression_id: Set(logical_expression.id), + group_id: Set(group_id), + } + })) + .exec(&self.db) + .await?; + } + + // Insert the expression. + let _ = logical_expression + .into_active_model() + .insert(&self.db) + .await?; + + Ok(()) + } + + async fn add_physical_expression_to_group( + &self, + group_id: Self::GroupId, + physical_expression: Self::PhysicalExpression, + children: &[Self::GroupId], + ) -> OptimizerResult<()> { + if physical_expression.group_id != group_id { + Err(MemoError::InvalidExpression)? + } + + // Check if the group actually exists. + let _ = self.get_group(group_id).await?; + + // Insert the child groups of the expression into the junction / children table. + if !children.is_empty() { + PhysicalChildren::insert_many(children.iter().copied().map(|group_id| { + physical_children::ActiveModel { + physical_expression_id: Set(physical_expression.id), + group_id: Set(group_id), + } + })) + .exec(&self.db) + .await?; + } + + // Insert the expression. + let _ = physical_expression + .into_active_model() + .insert(&self.db) + .await?; + + Ok(()) + } + + async fn add_logical_expression( + &self, + logical_expression: Self::LogicalExpression, + children: &[Self::GroupId], + ) -> OptimizerResult<(Self::GroupId, Self::LogicalExpressionId)> { + // Lookup all expressions that have the same fingerprint. There may be false positives, but + // we will check for those later. + let fingerprint = logical_expression.fingerprint; + let potential_matches = LogicalExpression::find() + .filter(logical_expression::Column::Fingerprint.eq(fingerprint)) + .all(&self.db) + .await?; + + // Of the expressions that have the same fingerprint, check if there already exists an + // expression that is exactly identical to the input expression. + let mut matches: Vec<_> = potential_matches + .into_iter() + .filter(|expr| expr == &logical_expression) + .collect(); + assert!( + matches.len() <= 1, + "there cannot be more than 1 exact logical expression match" + ); + + // The expression already exists, so return its data. + if !matches.is_empty() { + let existing_expression = matches + .pop() + .expect("we just checked that an element exists"); + + return Ok((existing_expression.group_id, existing_expression.id)); + } + + // The expression does not exist yet, so we need to create a new group and new expression. + let group = cascades_group::ActiveModel { + winner: Set(None), + is_optimized: Set(false), + ..Default::default() + }; + + // Create a new group. + let res = cascades_group::Entity::insert(group).exec(&self.db).await?; + + // Insert the input expression with the correct `group_id`. + let mut new_expr = logical_expression.into_active_model(); + new_expr.group_id = Set(res.last_insert_id); + new_expr.id = NotSet; + let new_expr = new_expr.insert(&self.db).await?; + + // Insert the child groups of the expression into the junction / children table. + if !children.is_empty() { + LogicalChildren::insert_many(children.iter().copied().map(|group_id| { + logical_children::ActiveModel { + logical_expression_id: Set(new_expr.id), + group_id: Set(group_id), + } + })) + .exec(&self.db) + .await?; + } + + Ok((new_expr.group_id, new_expr.id)) + } +} From 285649686f56507c5a830f57f5ea7ce0fe4813c1 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Thu, 28 Nov 2024 14:43:32 -0500 Subject: [PATCH 04/13] add expression representation and refactor memo This commit adds the `src/expression` module which contains a very simple representation of Cascades expressions. The `Memo` trait interface and implemenation has also changed, where it now correctly detects exact match duplicates, and it does not track fingerprints for physical expressions (only logical). TODO: Add more tests. TODO: Figure out how to test in CI. --- .gitignore | 2 +- optd-mvp/src/entities/fingerprint.rs | 33 ++++ optd-mvp/src/entities/logical_expression.rs | 9 +- optd-mvp/src/entities/mod.rs | 1 + optd-mvp/src/entities/physical_expression.rs | 1 - optd-mvp/src/entities/prelude.rs | 3 +- optd-mvp/src/expression/logical_expression.rs | 117 +++++++++++++ optd-mvp/src/expression/mod.rs | 62 +++++++ .../src/expression/physical_expression.rs | 115 +++++++++++++ optd-mvp/src/lib.rs | 14 ++ optd-mvp/src/memo/interface.rs | 58 +++++-- .../implementation.rs} | 162 ++++++++++-------- optd-mvp/src/memo/persistent/mod.rs | 66 +++++++ optd-mvp/src/memo/persistent/tests.rs | 36 ++++ .../memo/m20241127_000001_fingerprint.rs | 49 ++++++ .../m20241127_000001_logical_expression.rs | 14 +- .../m20241127_000001_physical_expression.rs | 9 +- optd-mvp/src/migrator/memo/mod.rs | 2 + optd-mvp/src/migrator/mod.rs | 1 + 19 files changed, 655 insertions(+), 99 deletions(-) create mode 100644 optd-mvp/src/entities/fingerprint.rs create mode 100644 optd-mvp/src/expression/logical_expression.rs create mode 100644 optd-mvp/src/expression/mod.rs create mode 100644 optd-mvp/src/expression/physical_expression.rs rename optd-mvp/src/memo/{persistent.rs => persistent/implementation.rs} (68%) create mode 100644 optd-mvp/src/memo/persistent/mod.rs create mode 100644 optd-mvp/src/memo/persistent/tests.rs create mode 100644 optd-mvp/src/migrator/memo/m20241127_000001_fingerprint.rs diff --git a/.gitignore b/.gitignore index 861fce8..86281ff 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,4 @@ target/ # We will check in all code-generated entity files, as newer versions of `sea-orm-cli` might # conflict with previous versions. -# **/entities \ No newline at end of file +# **/entities diff --git a/optd-mvp/src/entities/fingerprint.rs b/optd-mvp/src/entities/fingerprint.rs new file mode 100644 index 0000000..2ab6a7f --- /dev/null +++ b/optd-mvp/src/entities/fingerprint.rs @@ -0,0 +1,33 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "fingerprint")] +pub struct Model { + #[sea_orm(primary_key)] + pub id: i32, + pub logical_expression_id: i32, + pub kind: i16, + pub hash: i64, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::logical_expression::Entity", + from = "Column::LogicalExpressionId", + to = "super::logical_expression::Column::Id", + on_update = "Cascade", + on_delete = "Cascade" + )] + LogicalExpression, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::LogicalExpression.def() + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/optd-mvp/src/entities/logical_expression.rs b/optd-mvp/src/entities/logical_expression.rs index 1e85d1d..4c257f3 100644 --- a/optd-mvp/src/entities/logical_expression.rs +++ b/optd-mvp/src/entities/logical_expression.rs @@ -8,7 +8,6 @@ pub struct Model { #[sea_orm(primary_key)] pub id: i32, pub group_id: i32, - pub fingerprint: i64, pub kind: i16, pub data: Json, } @@ -23,10 +22,18 @@ pub enum Relation { on_delete = "Cascade" )] CascadesGroup, + #[sea_orm(has_many = "super::fingerprint::Entity")] + Fingerprint, #[sea_orm(has_many = "super::logical_children::Entity")] LogicalChildren, } +impl Related for Entity { + fn to() -> RelationDef { + Relation::Fingerprint.def() + } +} + impl Related for Entity { fn to() -> RelationDef { Relation::LogicalChildren.def() diff --git a/optd-mvp/src/entities/mod.rs b/optd-mvp/src/entities/mod.rs index 701abe4..77d6b2c 100644 --- a/optd-mvp/src/entities/mod.rs +++ b/optd-mvp/src/entities/mod.rs @@ -3,6 +3,7 @@ pub mod prelude; pub mod cascades_group; +pub mod fingerprint; pub mod logical_children; pub mod logical_expression; pub mod physical_children; diff --git a/optd-mvp/src/entities/physical_expression.rs b/optd-mvp/src/entities/physical_expression.rs index 2d9a2ae..482227a 100644 --- a/optd-mvp/src/entities/physical_expression.rs +++ b/optd-mvp/src/entities/physical_expression.rs @@ -8,7 +8,6 @@ pub struct Model { #[sea_orm(primary_key)] pub id: i32, pub group_id: i32, - pub fingerprint: i64, pub kind: i16, pub data: Json, } diff --git a/optd-mvp/src/entities/prelude.rs b/optd-mvp/src/entities/prelude.rs index 0b8c910..5619363 100644 --- a/optd-mvp/src/entities/prelude.rs +++ b/optd-mvp/src/entities/prelude.rs @@ -1,8 +1,7 @@ //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 -#![allow(unused_imports)] - pub use super::cascades_group::Entity as CascadesGroup; +pub use super::fingerprint::Entity as Fingerprint; pub use super::logical_children::Entity as LogicalChildren; pub use super::logical_expression::Entity as LogicalExpression; pub use super::physical_children::Entity as PhysicalChildren; diff --git a/optd-mvp/src/expression/logical_expression.rs b/optd-mvp/src/expression/logical_expression.rs new file mode 100644 index 0000000..c87b055 --- /dev/null +++ b/optd-mvp/src/expression/logical_expression.rs @@ -0,0 +1,117 @@ +//! Definition of logical expressions / relations in the Cascades query optimization framework. +//! +//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now. +//! FIXME: Representation needs to know how to "rewrite" child group IDs to whatever a fingerprint +//! will need. +//! +//! TODO figure out if each relation should be in a different submodule. +//! TODO This entire file is a WIP. + +use crate::entities::*; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug)] +pub enum LogicalExpression { + Scan(Scan), + Filter(Filter), + Join(Join), +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Scan { + table_schema: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Filter { + child: i32, + expression: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Join { + left: i32, + right: i32, + expression: String, +} + +/// TODO Use a macro instead. +impl From for LogicalExpression { + fn from(value: logical_expression::Model) -> Self { + match value.kind { + 0 => Self::Scan( + serde_json::from_value(value.data) + .expect("unable to deserialize data into a logical `Scan`"), + ), + 1 => Self::Filter( + serde_json::from_value(value.data) + .expect("Unable to deserialize data into a logical `Filter`"), + ), + 2 => Self::Join( + serde_json::from_value(value.data) + .expect("Unable to deserialize data into a logical `Join`"), + ), + _ => panic!(), + } + } +} + +/// TODO Use a macro instead. +impl From for logical_expression::Model { + fn from(value: LogicalExpression) -> logical_expression::Model { + fn create_logical_expression( + kind: i16, + data: serde_json::Value, + ) -> logical_expression::Model { + logical_expression::Model { + id: -1, + group_id: -1, + kind, + data, + } + } + + match value { + LogicalExpression::Scan(scan) => create_logical_expression( + 0, + serde_json::to_value(scan).expect("unable to serialize logical `Scan`"), + ), + LogicalExpression::Filter(filter) => create_logical_expression( + 1, + serde_json::to_value(filter).expect("unable to serialize logical `Filter`"), + ), + LogicalExpression::Join(join) => create_logical_expression( + 2, + serde_json::to_value(join).expect("unable to serialize logical `Join`"), + ), + } + } +} + +#[cfg(test)] +pub use build::*; + +#[cfg(test)] +mod build { + use super::*; + use crate::expression::Expression; + + pub fn scan(table_schema: String) -> Expression { + Expression::Logical(LogicalExpression::Scan(Scan { table_schema })) + } + + pub fn filter(child_group: i32, expression: String) -> Expression { + Expression::Logical(LogicalExpression::Filter(Filter { + child: child_group, + expression, + })) + } + + pub fn join(left_group: i32, right_group: i32, expression: String) -> Expression { + Expression::Logical(LogicalExpression::Join(Join { + left: left_group, + right: right_group, + expression, + })) + } +} diff --git a/optd-mvp/src/expression/mod.rs b/optd-mvp/src/expression/mod.rs new file mode 100644 index 0000000..459e13b --- /dev/null +++ b/optd-mvp/src/expression/mod.rs @@ -0,0 +1,62 @@ +//! In-memory representation of Cascades logical and physical expression / operators / relations. +//! +//! TODO more docs. + +mod logical_expression; +pub use logical_expression::*; + +mod physical_expression; +pub use physical_expression::*; + +/// The representation of a Cascades expression. +/// +/// TODO more docs. +#[derive(Clone, Debug)] +pub enum Expression { + Logical(LogicalExpression), + Physical(PhysicalExpression), +} + +/// Converts the database / JSON representation of a logical expression into an in-memory one. +impl From for Expression { + fn from(value: crate::entities::logical_expression::Model) -> Self { + Self::Logical(value.into()) + } +} + +/// Converts the in-memory representation of a logical expression into the database / JSON version. +/// +/// # Panics +/// +/// This will panic if the [`Expression`] is [`Expression::Physical`]. +impl From for crate::entities::logical_expression::Model { + fn from(value: Expression) -> Self { + let Expression::Logical(expr) = value else { + panic!("Attempted to convert an in-memory physical expression into a logical database / JSON expression"); + }; + + expr.into() + } +} + +/// Converts the database / JSON representation of a physical expression into an in-memory one. +impl From for Expression { + fn from(value: crate::entities::physical_expression::Model) -> Self { + Self::Physical(value.into()) + } +} + +/// Converts the in-memory representation of a physical expression into the database / JSON version. +/// +/// # Panics +/// +/// This will panic if the [`Expression`] is [`Expression::Physical`]. +impl From for crate::entities::physical_expression::Model { + fn from(value: Expression) -> Self { + let Expression::Physical(expr) = value else { + panic!("Attempted to convert an in-memory logical expression into a physical database / JSON expression"); + }; + + expr.into() + } +} diff --git a/optd-mvp/src/expression/physical_expression.rs b/optd-mvp/src/expression/physical_expression.rs new file mode 100644 index 0000000..6552a96 --- /dev/null +++ b/optd-mvp/src/expression/physical_expression.rs @@ -0,0 +1,115 @@ +//! Definition of physical expressions / operators in the Cascades query optimization framework. +//! +//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now. +//! +//! TODO figure out if each operator should be in a different submodule. +//! TODO This entire file is a WIP. + +use crate::entities::*; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug)] +pub enum PhysicalExpression { + TableScan(TableScan), + Filter(PhysicalFilter), + HashJoin(HashJoin), +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct TableScan { + table_schema: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PhysicalFilter { + child: i32, + expression: String, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct HashJoin { + left: i32, + right: i32, + expression: String, +} + +/// TODO Use a macro instead. +impl From for PhysicalExpression { + fn from(value: physical_expression::Model) -> Self { + match value.kind { + 0 => Self::TableScan( + serde_json::from_value(value.data) + .expect("unable to deserialize data into a physical `TableScan`"), + ), + 1 => Self::Filter( + serde_json::from_value(value.data) + .expect("Unable to deserialize data into a physical `Filter`"), + ), + 2 => Self::HashJoin( + serde_json::from_value(value.data) + .expect("Unable to deserialize data into a physical `HashJoin`"), + ), + _ => panic!(), + } + } +} + +/// TODO Use a macro instead. +impl From for physical_expression::Model { + fn from(value: PhysicalExpression) -> physical_expression::Model { + fn create_physical_expression( + kind: i16, + data: serde_json::Value, + ) -> physical_expression::Model { + physical_expression::Model { + id: -1, + group_id: -1, + kind, + data, + } + } + + match value { + PhysicalExpression::TableScan(scan) => create_physical_expression( + 0, + serde_json::to_value(scan).expect("unable to serialize physical `TableScan`"), + ), + PhysicalExpression::Filter(filter) => create_physical_expression( + 1, + serde_json::to_value(filter).expect("unable to serialize physical `Filter`"), + ), + PhysicalExpression::HashJoin(join) => create_physical_expression( + 2, + serde_json::to_value(join).expect("unable to serialize physical `HashJoin`"), + ), + } + } +} + +#[cfg(test)] +pub use build::*; + +#[cfg(test)] +mod build { + use super::*; + use crate::expression::Expression; + + pub fn table_scan(table_schema: String) -> Expression { + Expression::Physical(PhysicalExpression::TableScan(TableScan { table_schema })) + } + + pub fn filter(child_group: i32, expression: String) -> Expression { + Expression::Physical(PhysicalExpression::Filter(PhysicalFilter { + child: child_group, + expression, + })) + } + + pub fn hash_join(left_group: i32, right_group: i32, expression: String) -> Expression { + Expression::Physical(PhysicalExpression::HashJoin(HashJoin { + left: left_group, + right: right_group, + expression, + })) + } +} diff --git a/optd-mvp/src/lib.rs b/optd-mvp/src/lib.rs index c5185cd..98c5f11 100644 --- a/optd-mvp/src/lib.rs +++ b/optd-mvp/src/lib.rs @@ -10,6 +10,8 @@ mod entities; mod memo; use memo::MemoError; +mod expression; + /// The filename of the SQLite database for migration. pub const DATABASE_FILENAME: &str = "sqlite.db"; /// The URL of the SQLite database for migration. @@ -35,3 +37,15 @@ pub type OptimizerResult = Result; pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> { Migrator::refresh(db).await } + +/// Helper function for hashing expression data. +/// +/// TODO remove this. +fn hash_expression(kind: i16, data: &serde_json::Value) -> i64 { + use std::hash::{DefaultHasher, Hash, Hasher}; + + let mut hasher = DefaultHasher::new(); + kind.hash(&mut hasher); + data.hash(&mut hasher); + hasher.finish() as i64 +} diff --git a/optd-mvp/src/memo/interface.rs b/optd-mvp/src/memo/interface.rs index a88740e..cb6c76d 100644 --- a/optd-mvp/src/memo/interface.rs +++ b/optd-mvp/src/memo/interface.rs @@ -1,8 +1,11 @@ +//! This module defines the [`Memo`] trait, which defines shared behavior of all memo table that can +//! be used for query optimization in the Cascades framework. + use crate::OptimizerResult; use thiserror::Error; -#[derive(Error, Debug)] /// The different kinds of errors that might occur while running operations on a memo table. +#[derive(Error, Debug)] pub enum MemoError { #[error("unknown group ID {0}")] UnknownGroup(i32), @@ -20,6 +23,8 @@ pub enum MemoError { /// See this [blog post]( /// https://blog.rust-lang.org/2023/12/21/async-fn-rpit-in-traits.html#async-fn-in-public-traits) /// for more information. +/// +/// TODO remove dead code. #[allow(dead_code)] #[trait_variant::make(Send)] pub trait Memo { @@ -75,6 +80,18 @@ pub trait Memo { group_id: Self::GroupId, ) -> OptimizerResult>; + /// Checks if a given logical expression is a duplicate / already exists in the memo table. + /// + /// In order to prevent a large amount of duplicate work, the memo table must support duplicate + /// expression detection. + /// + /// Returns `Some(expression_id)` if the memo table detects that the expression already exists, + /// and `None` otherwise. + async fn is_duplicate_logical_expression( + &self, + logical_expression: &Self::LogicalExpression, + ) -> OptimizerResult>; + /// Updates / replaces a group's best physical plan (winner). Optionally returns the previous /// winner's physical expression ID. /// @@ -85,41 +102,49 @@ pub trait Memo { physical_expression_id: Self::PhysicalExpressionId, ) -> OptimizerResult>; - /// Adds a logical expression to an existing group via its [`Self::GroupId`]. This function - /// assumes that insertion of this expression would not create any duplicates. + /// Adds a physical expression to an existing group via its [`Self::GroupId`]. /// /// The caller is required to pass in a slice of `GroupId` that represent the child groups of /// the input expression. /// - /// The caller is also required to set the `group_id` field of the input `logical_expression` + /// The caller is also required to set the `group_id` field of the input `physical_expression` /// to be equal to `group_id`, otherwise this function will return a /// [`MemoError::InvalidExpression`] error. /// /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - async fn add_logical_expression_to_group( + /// + /// On successful insertion, returns the ID of the physical expression. + async fn add_physical_expression_to_group( &self, group_id: Self::GroupId, - logical_expression: Self::LogicalExpression, + physical_expression: Self::PhysicalExpression, children: &[Self::GroupId], - ) -> OptimizerResult<()>; + ) -> OptimizerResult; - /// Adds a physical expression to an existing group via its [`Self::GroupId`]. This function - /// assumes that insertion of this expression would not create any duplicates. + /// Adds a logical expression to an existing group via its [`Self::GroupId`]. /// /// The caller is required to pass in a slice of `GroupId` that represent the child groups of /// the input expression. /// - /// The caller is also required to set the `group_id` field of the input `physical_expression` + /// The caller is also required to set the `group_id` field of the input `logical_expression` /// to be equal to `group_id`, otherwise this function will return a /// [`MemoError::InvalidExpression`] error. /// /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - async fn add_physical_expression_to_group( + /// + /// If the memo table detects that the input logical expression is a duplicate expression, it + /// will **not** insert the expression into the memo table. Instead, it will return an + /// `Ok(Err(expression_id))`, which is a unique identifier of the expression that the input is a + /// duplicate of. The caller can use this ID to retrieve the group the original belongs to. + /// + /// If the memo table detects that the input is unique, it will insert the expression into the + /// input group and return an `Ok(Ok(expression_id))`. + async fn add_logical_expression_to_group( &self, group_id: Self::GroupId, - physical_expression: Self::PhysicalExpression, + logical_expression: Self::LogicalExpression, children: &[Self::GroupId], - ) -> OptimizerResult<()>; + ) -> OptimizerResult>; /// Adds a new logical expression into the memo table, creating a new group if the expression /// does not already exist. @@ -142,5 +167,10 @@ pub trait Memo { &self, expression: Self::LogicalExpression, children: &[Self::LogicalExpressionId], - ) -> OptimizerResult<(Self::GroupId, Self::LogicalExpressionId)>; + ) -> OptimizerResult< + Result< + (Self::GroupId, Self::LogicalExpressionId), + (Self::GroupId, Self::LogicalExpressionId), + >, + >; } diff --git a/optd-mvp/src/memo/persistent.rs b/optd-mvp/src/memo/persistent/implementation.rs similarity index 68% rename from optd-mvp/src/memo/persistent.rs rename to optd-mvp/src/memo/persistent/implementation.rs index 445ee6c..4c06c4e 100644 --- a/optd-mvp/src/memo/persistent.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -1,28 +1,11 @@ +//! This module contains the implementation of the [`Memo`] trait for [`PersistentMemo`]. + +use super::*; use crate::{ - entities::{prelude::*, *}, + hash_expression, memo::{Memo, MemoError}, - OptimizerResult, DATABASE_URL, + OptimizerResult, }; -use sea_orm::*; - -/// A persistent memo table, backed by a database on disk. -/// -/// TODO more docs. -pub struct PersistentMemo { - /// This `PersistentMemo` is reliant on the SeaORM [`DatabaseConnection`] that stores all of the - /// objects needed for query optimization. - db: DatabaseConnection, -} - -impl PersistentMemo { - /// TODO remove dead code and write docs. - #[allow(dead_code)] - pub async fn new() -> Self { - Self { - db: Database::connect(DATABASE_URL).await.unwrap(), - } - } -} impl Memo for PersistentMemo { type Group = cascades_group::Model; @@ -91,6 +74,41 @@ impl Memo for PersistentMemo { .collect()) } + /// FIXME Check that all of the children are root groups? + async fn is_duplicate_logical_expression( + &self, + logical_expression: &Self::LogicalExpression, + ) -> OptimizerResult> { + // Lookup all expressions that have the same fingerprint and kind. There may be false + // positives, but we will check for those next. + let kind = logical_expression.kind; + let fingerprint = hash_expression(kind, &logical_expression.data); + + let potential_matches = Fingerprint::find() + .filter(fingerprint::Column::Hash.eq(fingerprint)) + .filter(fingerprint::Column::Kind.eq(kind)) + .all(&self.db) + .await?; + + if potential_matches.is_empty() { + return Ok(None); + } + + let mut match_id = None; + for potential_match in potential_matches { + let expr_id = potential_match.logical_expression_id; + let expr = self.get_logical_expression(expr_id).await?; + + if expr.data == logical_expression.data { + // There should be at most one duplicate expression. + match_id = Some(expr_id); + break; + } + } + + Ok(match_id) + } + /// FIXME: In the future, this should first check that we aren't overwriting a winner that was /// updated from another thread. async fn update_group_winner( @@ -110,13 +128,13 @@ impl Memo for PersistentMemo { Ok(old) } - async fn add_logical_expression_to_group( + async fn add_physical_expression_to_group( &self, group_id: Self::GroupId, - logical_expression: Self::LogicalExpression, + physical_expression: Self::PhysicalExpression, children: &[Self::GroupId], - ) -> OptimizerResult<()> { - if logical_expression.group_id != group_id { + ) -> OptimizerResult { + if physical_expression.group_id != group_id { Err(MemoError::InvalidExpression)? } @@ -125,9 +143,9 @@ impl Memo for PersistentMemo { // Insert the child groups of the expression into the junction / children table. if !children.is_empty() { - LogicalChildren::insert_many(children.iter().copied().map(|group_id| { - logical_children::ActiveModel { - logical_expression_id: Set(logical_expression.id), + PhysicalChildren::insert_many(children.iter().copied().map(|group_id| { + physical_children::ActiveModel { + physical_expression_id: Set(physical_expression.id), group_id: Set(group_id), } })) @@ -136,32 +154,41 @@ impl Memo for PersistentMemo { } // Insert the expression. - let _ = logical_expression + let res = physical_expression .into_active_model() .insert(&self.db) .await?; - Ok(()) + Ok(res.id) } - async fn add_physical_expression_to_group( + /// FIXME Check that all of the children are reduced groups? + async fn add_logical_expression_to_group( &self, group_id: Self::GroupId, - physical_expression: Self::PhysicalExpression, + logical_expression: Self::LogicalExpression, children: &[Self::GroupId], - ) -> OptimizerResult<()> { - if physical_expression.group_id != group_id { + ) -> OptimizerResult> { + if logical_expression.group_id != group_id { Err(MemoError::InvalidExpression)? } + // Check if the expression already exists in the memo table. + if let Some(existing_id) = self + .is_duplicate_logical_expression(&logical_expression) + .await? + { + return Ok(Err(existing_id)); + } + // Check if the group actually exists. let _ = self.get_group(group_id).await?; // Insert the child groups of the expression into the junction / children table. if !children.is_empty() { - PhysicalChildren::insert_many(children.iter().copied().map(|group_id| { - physical_children::ActiveModel { - physical_expression_id: Set(physical_expression.id), + LogicalChildren::insert_many(children.iter().copied().map(|group_id| { + logical_children::ActiveModel { + logical_expression_id: Set(logical_expression.id), group_id: Set(group_id), } })) @@ -170,45 +197,32 @@ impl Memo for PersistentMemo { } // Insert the expression. - let _ = physical_expression + let res = logical_expression .into_active_model() .insert(&self.db) .await?; - Ok(()) + Ok(Ok(res.id)) } + /// FIXME Check that all of the children are reduced groups? async fn add_logical_expression( &self, logical_expression: Self::LogicalExpression, children: &[Self::GroupId], - ) -> OptimizerResult<(Self::GroupId, Self::LogicalExpressionId)> { - // Lookup all expressions that have the same fingerprint. There may be false positives, but - // we will check for those later. - let fingerprint = logical_expression.fingerprint; - let potential_matches = LogicalExpression::find() - .filter(logical_expression::Column::Fingerprint.eq(fingerprint)) - .all(&self.db) - .await?; - - // Of the expressions that have the same fingerprint, check if there already exists an - // expression that is exactly identical to the input expression. - let mut matches: Vec<_> = potential_matches - .into_iter() - .filter(|expr| expr == &logical_expression) - .collect(); - assert!( - matches.len() <= 1, - "there cannot be more than 1 exact logical expression match" - ); - - // The expression already exists, so return its data. - if !matches.is_empty() { - let existing_expression = matches - .pop() - .expect("we just checked that an element exists"); - - return Ok((existing_expression.group_id, existing_expression.id)); + ) -> OptimizerResult< + Result< + (Self::GroupId, Self::LogicalExpressionId), + (Self::GroupId, Self::LogicalExpressionId), + >, + > { + // Check if the expression already exists in the memo table. + if let Some(existing_id) = self + .is_duplicate_logical_expression(&logical_expression) + .await? + { + let expr = self.get_logical_expression(existing_id).await?; + return Ok(Err((expr.group_id, expr.id))); } // The expression does not exist yet, so we need to create a new group and new expression. @@ -239,6 +253,18 @@ impl Memo for PersistentMemo { .await?; } - Ok((new_expr.group_id, new_expr.id)) + // Insert the fingerprint of the logical expression. + let hash = hash_expression(new_expr.kind, &new_expr.data); + let fingerprint = fingerprint::ActiveModel { + id: NotSet, + logical_expression_id: Set(new_expr.id), + kind: Set(new_expr.kind), + hash: Set(hash), + }; + let _ = fingerprint::Entity::insert(fingerprint) + .exec(&self.db) + .await?; + + Ok(Ok((new_expr.group_id, new_expr.id))) } } diff --git a/optd-mvp/src/memo/persistent/mod.rs b/optd-mvp/src/memo/persistent/mod.rs new file mode 100644 index 0000000..ae2577a --- /dev/null +++ b/optd-mvp/src/memo/persistent/mod.rs @@ -0,0 +1,66 @@ +//! This module contains the definition and implementation of the [`PersistentMemo`] type, which +//! implements the `Memo` trait and supports memo table operations necessary for query optimization. + +use crate::{ + entities::{prelude::*, *}, + DATABASE_URL, +}; +use sea_orm::*; + +#[cfg(test)] +mod tests; + +/// A persistent memo table, backed by a database on disk. +/// +/// TODO more docs. +pub struct PersistentMemo { + /// This `PersistentMemo` is reliant on the SeaORM [`DatabaseConnection`] that stores all of the + /// objects needed for query optimization. + db: DatabaseConnection, +} + +impl PersistentMemo { + /// Creates a new `PersistentMemo` struct by connecting to a database defined at + /// [`DATABASE_URL`]. + /// + /// TODO remove dead code and write docs. + #[allow(dead_code)] + pub async fn new() -> Self { + Self { + db: Database::connect(DATABASE_URL).await.unwrap(), + } + } + + /// Since there is no asynchronous drop yet in Rust, we must do this manually. + /// + /// TODO remove dead code and write docs. + #[allow(dead_code)] + pub async fn cleanup(&self) { + cascades_group::Entity::delete_many() + .exec(&self.db) + .await + .unwrap(); + fingerprint::Entity::delete_many() + .exec(&self.db) + .await + .unwrap(); + logical_expression::Entity::delete_many() + .exec(&self.db) + .await + .unwrap(); + logical_children::Entity::delete_many() + .exec(&self.db) + .await + .unwrap(); + physical_expression::Entity::delete_many() + .exec(&self.db) + .await + .unwrap(); + physical_children::Entity::delete_many() + .exec(&self.db) + .await + .unwrap(); + } +} + +mod implementation; diff --git a/optd-mvp/src/memo/persistent/tests.rs b/optd-mvp/src/memo/persistent/tests.rs new file mode 100644 index 0000000..7158b30 --- /dev/null +++ b/optd-mvp/src/memo/persistent/tests.rs @@ -0,0 +1,36 @@ +use super::*; +use crate::{expression::*, memo::Memo}; + +/// Tests is exact expression matches are detected and handled by the memo table. +#[ignore] +#[tokio::test] +async fn test_simple_duplicates() { + let memo = PersistentMemo::new().await; + memo.cleanup().await; + + let scan = scan("(a int, b int)".to_string()); + let scan1 = scan.clone(); + let scan2 = scan.clone(); + + let res0 = memo + .add_logical_expression(scan.into(), &[]) + .await + .unwrap() + .ok(); + let res1 = memo + .add_logical_expression(scan1.into(), &[]) + .await + .unwrap() + .err(); + let res2 = memo + .add_logical_expression(scan2.into(), &[]) + .await + .unwrap() + .err(); + + assert_eq!(res0, res1); + assert_eq!(res0, res2); + assert_eq!(res1, res2); + + memo.cleanup().await; +} diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_fingerprint.rs b/optd-mvp/src/migrator/memo/m20241127_000001_fingerprint.rs new file mode 100644 index 0000000..4a828b8 --- /dev/null +++ b/optd-mvp/src/migrator/memo/m20241127_000001_fingerprint.rs @@ -0,0 +1,49 @@ +//! An entity representing a logical expression fingerprint. +//! +//! TODO write docs. + +use crate::migrator::memo::logical_expression::LogicalExpression; +use sea_orm_migration::{prelude::*, schema::*}; + +#[derive(DeriveIden)] +pub enum Fingerprint { + Table, + Id, + LogicalExpressionId, + Kind, + Hash, +} + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .create_table( + Table::create() + .table(Fingerprint::Table) + .if_not_exists() + .col(pk_auto(Fingerprint::Id)) + .col(unsigned(Fingerprint::LogicalExpressionId)) + .foreign_key( + ForeignKey::create() + .from(Fingerprint::Table, Fingerprint::LogicalExpressionId) + .to(LogicalExpression::Table, LogicalExpression::Id) + .on_delete(ForeignKeyAction::Cascade) + .on_update(ForeignKeyAction::Cascade), + ) + .col(small_unsigned(Fingerprint::Kind)) + .col(big_unsigned(Fingerprint::Hash)) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .drop_table(Table::drop().table(Fingerprint::Table).to_owned()) + .await + } +} diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs b/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs index 3682032..57356cf 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs @@ -19,21 +19,23 @@ //! Each `logical_expression` has a unique primary key ID, but it holds little importance other than //! helping distinguish between two different expressions. //! -//! The more interesting column is the `fingerprint` column, in which we store a hashed fingerprint -//! value that can be used to efficiently check equality between two potentially equivalent logical -//! expressions (hash-consing). See ???FIXME??? for more information on expression fingerprints. -//! //! Finally, since there are many different types of operators, we store a variant tag and a data //! column as JSON to represent the semi-structured data fields of logical operators. //! //! # Entity Relationships //! -//! The only relationship that `logical_expression` has is to [`cascades_group`]. It has **both** a +//! The main relationship that `logical_expression` has is to [`cascades_group`]. It has **both** a //! one-to-many **and** a many-to-many relationship with [`cascades_group`], and you can see more //! details about this in the module-level documentation for [`cascades_group`]. //! +//! The other relationship that `logical_expression` has is to [`fingerprint`]. This table stores +//! 1 or more fingerprints for every (unique) logical expression. The reason we have multiple +//! fingerprints is that an expression can belong to multiple groups during the exploration phase +//! before the merging of groups. +//! //! [`cascades_group`]: super::cascades_group //! [`physical_expression`]: super::physical_expression +//! [`fingerprint`]: super::fingerprint use crate::migrator::memo::cascades_group::CascadesGroup; use sea_orm_migration::{prelude::*, schema::*}; @@ -43,7 +45,6 @@ pub enum LogicalExpression { Table, Id, GroupId, - Fingerprint, Kind, Data, } @@ -68,7 +69,6 @@ impl MigrationTrait for Migration { .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), ) - .col(big_unsigned(LogicalExpression::Fingerprint)) .col(small_integer(LogicalExpression::Kind)) .col(json(LogicalExpression::Data)) .to_owned(), diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs b/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs index 7653112..1e66195 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs @@ -20,9 +20,10 @@ //! Each `physical_expression` has a unique primary key ID, and other tables will store a foreign //! key reference to a specific `physical_expression`s. //! -//! The more interesting column is the `fingerprint` column, in which we store a hashed fingerprint -//! value that can be used to efficiently check equality between two potentially equivalent physical -//! expressions (hash-consing). See ???FIXME??? for more information on expression fingerprints. +//! Note that `physical_expression` does **not** store a fingerprint. Remember that we want to +//! detect duplicates in the logical exploration phase. If there are no duplicate logical +//! expressions in the memo table, then there cannot be any duplicate physical expressions, which +//! are derived from said deduplicated logical expressions. //! //! Finally, since there are many different types of operators, we store a variant tag and a data //! column as JSON to represent the semi-structured data fields of logical operators. @@ -44,7 +45,6 @@ pub enum PhysicalExpression { Table, Id, GroupId, - Fingerprint, Kind, Data, } @@ -69,7 +69,6 @@ impl MigrationTrait for Migration { .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), ) - .col(big_unsigned(PhysicalExpression::Fingerprint)) .col(small_integer(PhysicalExpression::Kind)) .col(json(PhysicalExpression::Data)) .to_owned(), diff --git a/optd-mvp/src/migrator/memo/mod.rs b/optd-mvp/src/migrator/memo/mod.rs index 8ed9390..7a60c9b 100644 --- a/optd-mvp/src/migrator/memo/mod.rs +++ b/optd-mvp/src/migrator/memo/mod.rs @@ -2,12 +2,14 @@ //! optimization framework. pub(crate) mod m20241127_000001_cascades_group; +pub(crate) mod m20241127_000001_fingerprint; pub(crate) mod m20241127_000001_logical_children; pub(crate) mod m20241127_000001_logical_expression; pub(crate) mod m20241127_000001_physical_children; pub(crate) mod m20241127_000001_physical_expression; pub(crate) use m20241127_000001_cascades_group as cascades_group; +pub(crate) use m20241127_000001_fingerprint as fingerprint; pub(crate) use m20241127_000001_logical_children as logical_children; pub(crate) use m20241127_000001_logical_expression as logical_expression; pub(crate) use m20241127_000001_physical_children as physical_children; diff --git a/optd-mvp/src/migrator/mod.rs b/optd-mvp/src/migrator/mod.rs index 179c406..0945423 100644 --- a/optd-mvp/src/migrator/mod.rs +++ b/optd-mvp/src/migrator/mod.rs @@ -9,6 +9,7 @@ impl MigratorTrait for Migrator { fn migrations() -> Vec> { vec![ Box::new(memo::cascades_group::Migration), + Box::new(memo::fingerprint::Migration), Box::new(memo::logical_expression::Migration), Box::new(memo::logical_children::Migration), Box::new(memo::physical_expression::Migration), From 0a0af6d547e6ed9e348992a2cadbd21a9eb96bf8 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sat, 30 Nov 2024 11:52:46 -0500 Subject: [PATCH 05/13] huge refactor of persistent memo implementation This commit completely refactors the memo table, removing the `Memo` trait and instead placing all methods directly on the `PersistentMemo` structure itself. This also cleans up some code in other places. --- Cargo.lock | 10 + optd-mvp/Cargo.toml | 2 + optd-mvp/src/entities/logical_children.rs | 2 +- optd-mvp/src/entities/prelude.rs | 2 + optd-mvp/src/expression/logical_expression.rs | 107 +++- .../src/expression/physical_expression.rs | 45 +- optd-mvp/src/lib.rs | 12 - optd-mvp/src/memo/interface.rs | 176 ------- optd-mvp/src/memo/mod.rs | 32 +- .../src/memo/persistent/implementation.rs | 496 ++++++++++++------ optd-mvp/src/memo/persistent/mod.rs | 50 +- optd-mvp/src/memo/persistent/tests.rs | 146 +++++- .../memo/m20241127_000001_logical_children.rs | 2 +- 13 files changed, 603 insertions(+), 479 deletions(-) delete mode 100644 optd-mvp/src/memo/interface.rs diff --git a/Cargo.lock b/Cargo.lock index 3059383..8acb13b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -710,6 +710,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.4" @@ -1146,6 +1155,7 @@ version = "0.1.0" dependencies = [ "async-stream", "async-trait", + "fxhash", "sea-orm", "sea-orm-migration", "serde", diff --git a/optd-mvp/Cargo.toml b/optd-mvp/Cargo.toml index 3b72407..f4a3a62 100644 --- a/optd-mvp/Cargo.toml +++ b/optd-mvp/Cargo.toml @@ -20,8 +20,10 @@ serde_json = "1.0.118" # Support `Hash` on `serde_json::Value` in "1.0.118". tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] } trait-variant = "0.1.2" # Support `make(Send)` syntax in "0.1.2". thiserror = "2.0" +fxhash = "0.2" # Pin more recent versions for `-Zminimal-versions`. async-trait = "0.1.43" # Remove lifetime parameter from "0.1.42". async-stream = "0.3.1" # Fix unsatisfied trait bound from "0.3.0". strum = "0.26.0" # Fix `std::marker::Sized` from "0.25.0". + diff --git a/optd-mvp/src/entities/logical_children.rs b/optd-mvp/src/entities/logical_children.rs index 120641f..067eaa7 100644 --- a/optd-mvp/src/entities/logical_children.rs +++ b/optd-mvp/src/entities/logical_children.rs @@ -23,7 +23,7 @@ pub enum Relation { CascadesGroup, #[sea_orm( belongs_to = "super::logical_expression::Entity", - from = "Column::GroupId", + from = "Column::LogicalExpressionId", to = "super::logical_expression::Column::Id", on_update = "Cascade", on_delete = "Cascade" diff --git a/optd-mvp/src/entities/prelude.rs b/optd-mvp/src/entities/prelude.rs index 5619363..bf6879b 100644 --- a/optd-mvp/src/entities/prelude.rs +++ b/optd-mvp/src/entities/prelude.rs @@ -1,5 +1,7 @@ //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +#![allow(unused_imports)] + pub use super::cascades_group::Entity as CascadesGroup; pub use super::fingerprint::Entity as Fingerprint; pub use super::logical_children::Entity as LogicalChildren; diff --git a/optd-mvp/src/expression/logical_expression.rs b/optd-mvp/src/expression/logical_expression.rs index c87b055..7c3362d 100644 --- a/optd-mvp/src/expression/logical_expression.rs +++ b/optd-mvp/src/expression/logical_expression.rs @@ -1,37 +1,91 @@ //! Definition of logical expressions / relations in the Cascades query optimization framework. //! -//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now. -//! FIXME: Representation needs to know how to "rewrite" child group IDs to whatever a fingerprint -//! will need. +//! FIXME: All fields are placeholders. //! -//! TODO figure out if each relation should be in a different submodule. +//! TODO Remove dead code. +//! TODO Figure out if each relation should be in a different submodule. //! TODO This entire file is a WIP. -use crate::entities::*; +#![allow(dead_code)] + +use crate::{entities::*, memo::GroupId}; +use fxhash::hash; use serde::{Deserialize, Serialize}; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum LogicalExpression { Scan(Scan), Filter(Filter), Join(Join), } -#[derive(Serialize, Deserialize, Clone, Debug)] +/// FIXME: Figure out how to make everything unsigned instead of signed. +impl LogicalExpression { + pub fn kind(&self) -> i16 { + match self { + LogicalExpression::Scan(_) => 0, + LogicalExpression::Filter(_) => 1, + LogicalExpression::Join(_) => 2, + } + } + + /// Definitions of custom fingerprinting strategies for each kind of logical expression. + pub fn fingerprint(&self) -> i64 { + self.fingerprint_with_rewrite(&[]) + } + + /// Calculates the fingerprint of a given expression, but replaces all of the children group IDs + /// with a new group ID if it is listed in the input `rewrites` list. + /// + /// TODO Allow each expression to implement a trait that does this. + pub fn fingerprint_with_rewrite(&self, rewrites: &[(GroupId, GroupId)]) -> i64 { + // Closure that rewrites a group ID if needed. + let rewrite = |x: GroupId| { + if rewrites.is_empty() { + return x; + } + + if let Some(i) = rewrites.iter().position(|(curr, _new)| &x == curr) { + assert_eq!(rewrites[i].0, x); + rewrites[i].1 + } else { + x + } + }; + + let kind = self.kind() as u16 as usize; + let hash = match self { + LogicalExpression::Scan(scan) => hash(scan.table_schema.as_str()), + LogicalExpression::Filter(filter) => { + hash(&rewrite(filter.child).0) ^ hash(filter.expression.as_str()) + } + LogicalExpression::Join(join) => { + hash(&rewrite(join.left).0) + ^ hash(&rewrite(join.right).0) + ^ hash(join.expression.as_str()) + } + }; + + // Mask out the bottom 16 bits of `hash` and replace them with `kind`. + ((hash & !0xFFFF) | kind) as i64 + } +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub struct Scan { table_schema: String, } -#[derive(Serialize, Deserialize, Clone, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub struct Filter { - child: i32, + child: GroupId, expression: String, } -#[derive(Serialize, Deserialize, Clone, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub struct Join { - left: i32, - right: i32, + left: GroupId, + right: GroupId, expression: String, } @@ -71,17 +125,18 @@ impl From for logical_expression::Model { } } + let kind = value.kind(); match value { LogicalExpression::Scan(scan) => create_logical_expression( - 0, + kind, serde_json::to_value(scan).expect("unable to serialize logical `Scan`"), ), LogicalExpression::Filter(filter) => create_logical_expression( - 1, + kind, serde_json::to_value(filter).expect("unable to serialize logical `Filter`"), ), LogicalExpression::Join(join) => create_logical_expression( - 2, + kind, serde_json::to_value(join).expect("unable to serialize logical `Join`"), ), } @@ -94,24 +149,28 @@ pub use build::*; #[cfg(test)] mod build { use super::*; - use crate::expression::Expression; + use crate::expression::LogicalExpression; - pub fn scan(table_schema: String) -> Expression { - Expression::Logical(LogicalExpression::Scan(Scan { table_schema })) + pub fn scan(table_schema: String) -> LogicalExpression { + LogicalExpression::Scan(Scan { table_schema }) } - pub fn filter(child_group: i32, expression: String) -> Expression { - Expression::Logical(LogicalExpression::Filter(Filter { + pub fn filter(child_group: GroupId, expression: String) -> LogicalExpression { + LogicalExpression::Filter(Filter { child: child_group, expression, - })) + }) } - pub fn join(left_group: i32, right_group: i32, expression: String) -> Expression { - Expression::Logical(LogicalExpression::Join(Join { + pub fn join( + left_group: GroupId, + right_group: GroupId, + expression: String, + ) -> LogicalExpression { + LogicalExpression::Join(Join { left: left_group, right: right_group, expression, - })) + }) } } diff --git a/optd-mvp/src/expression/physical_expression.rs b/optd-mvp/src/expression/physical_expression.rs index 6552a96..5719752 100644 --- a/optd-mvp/src/expression/physical_expression.rs +++ b/optd-mvp/src/expression/physical_expression.rs @@ -1,35 +1,38 @@ //! Definition of physical expressions / operators in the Cascades query optimization framework. //! -//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now. +//! FIXME: All fields are placeholders. //! -//! TODO figure out if each operator should be in a different submodule. +//! TODO Remove dead code. +//! TODO Figure out if each operator should be in a different submodule. //! TODO This entire file is a WIP. -use crate::entities::*; +#![allow(dead_code)] + +use crate::{entities::*, memo::GroupId}; use serde::{Deserialize, Serialize}; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum PhysicalExpression { TableScan(TableScan), Filter(PhysicalFilter), HashJoin(HashJoin), } -#[derive(Serialize, Deserialize, Clone, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub struct TableScan { table_schema: String, } -#[derive(Serialize, Deserialize, Clone, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub struct PhysicalFilter { - child: i32, + child: GroupId, expression: String, } -#[derive(Serialize, Deserialize, Clone, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub struct HashJoin { - left: i32, - right: i32, + left: GroupId, + right: GroupId, expression: String, } @@ -92,24 +95,28 @@ pub use build::*; #[cfg(test)] mod build { use super::*; - use crate::expression::Expression; + use crate::expression::PhysicalExpression; - pub fn table_scan(table_schema: String) -> Expression { - Expression::Physical(PhysicalExpression::TableScan(TableScan { table_schema })) + pub fn table_scan(table_schema: String) -> PhysicalExpression { + PhysicalExpression::TableScan(TableScan { table_schema }) } - pub fn filter(child_group: i32, expression: String) -> Expression { - Expression::Physical(PhysicalExpression::Filter(PhysicalFilter { + pub fn filter(child_group: GroupId, expression: String) -> PhysicalExpression { + PhysicalExpression::Filter(PhysicalFilter { child: child_group, expression, - })) + }) } - pub fn hash_join(left_group: i32, right_group: i32, expression: String) -> Expression { - Expression::Physical(PhysicalExpression::HashJoin(HashJoin { + pub fn hash_join( + left_group: GroupId, + right_group: GroupId, + expression: String, + ) -> PhysicalExpression { + PhysicalExpression::HashJoin(HashJoin { left: left_group, right: right_group, expression, - })) + }) } } diff --git a/optd-mvp/src/lib.rs b/optd-mvp/src/lib.rs index 98c5f11..506eee4 100644 --- a/optd-mvp/src/lib.rs +++ b/optd-mvp/src/lib.rs @@ -37,15 +37,3 @@ pub type OptimizerResult = Result; pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> { Migrator::refresh(db).await } - -/// Helper function for hashing expression data. -/// -/// TODO remove this. -fn hash_expression(kind: i16, data: &serde_json::Value) -> i64 { - use std::hash::{DefaultHasher, Hash, Hasher}; - - let mut hasher = DefaultHasher::new(); - kind.hash(&mut hasher); - data.hash(&mut hasher); - hasher.finish() as i64 -} diff --git a/optd-mvp/src/memo/interface.rs b/optd-mvp/src/memo/interface.rs deleted file mode 100644 index cb6c76d..0000000 --- a/optd-mvp/src/memo/interface.rs +++ /dev/null @@ -1,176 +0,0 @@ -//! This module defines the [`Memo`] trait, which defines shared behavior of all memo table that can -//! be used for query optimization in the Cascades framework. - -use crate::OptimizerResult; -use thiserror::Error; - -/// The different kinds of errors that might occur while running operations on a memo table. -#[derive(Error, Debug)] -pub enum MemoError { - #[error("unknown group ID {0}")] - UnknownGroup(i32), - #[error("unknown logical expression ID {0}")] - UnknownLogicalExpression(i32), - #[error("unknown physical expression ID {0}")] - UnknownPhysicalExpression(i32), - #[error("invalid expression encountered")] - InvalidExpression, -} - -/// A trait representing an implementation of a memoization table. -/// -/// Note that we use [`trait_variant`] here in order to add bounds on every method. -/// See this [blog post]( -/// https://blog.rust-lang.org/2023/12/21/async-fn-rpit-in-traits.html#async-fn-in-public-traits) -/// for more information. -/// -/// TODO remove dead code. -#[allow(dead_code)] -#[trait_variant::make(Send)] -pub trait Memo { - /// A type representing a group in the Cascades framework. - type Group; - /// A type representing a unique identifier for a group. - type GroupId; - /// A type representing a logical expression. - type LogicalExpression; - /// A type representing a unique identifier for a logical expression. - type LogicalExpressionId; - /// A type representing a physical expression. - type PhysicalExpression; - /// A type representing a unique identifier for a physical expression. - type PhysicalExpressionId; - - /// Retrieves a [`Self::Group`] given a [`Self::GroupId`]. - /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - async fn get_group(&self, group_id: Self::GroupId) -> OptimizerResult; - - /// Retrieves a [`Self::LogicalExpression`] given a [`Self::LogicalExpressionId`]. - /// - /// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`] - /// error. - async fn get_logical_expression( - &self, - logical_expression_id: Self::LogicalExpressionId, - ) -> OptimizerResult; - - /// Retrieves a [`Self::PhysicalExpression`] given a [`Self::PhysicalExpressionId`]. - /// - /// If the physical expression does not exist, returns a - /// [`MemoError::UnknownPhysicalExpression`] error. - async fn get_physical_expression( - &self, - physical_expression_id: Self::PhysicalExpressionId, - ) -> OptimizerResult; - - /// Retrieves all of the logical expression "children" IDs of a group. - /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - async fn get_logical_children( - &self, - group_id: Self::GroupId, - ) -> OptimizerResult>; - - /// Retrieves all of the physical expression "children" IDs of a group. - /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - async fn get_physical_children( - &self, - group_id: Self::GroupId, - ) -> OptimizerResult>; - - /// Checks if a given logical expression is a duplicate / already exists in the memo table. - /// - /// In order to prevent a large amount of duplicate work, the memo table must support duplicate - /// expression detection. - /// - /// Returns `Some(expression_id)` if the memo table detects that the expression already exists, - /// and `None` otherwise. - async fn is_duplicate_logical_expression( - &self, - logical_expression: &Self::LogicalExpression, - ) -> OptimizerResult>; - - /// Updates / replaces a group's best physical plan (winner). Optionally returns the previous - /// winner's physical expression ID. - /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - async fn update_group_winner( - &self, - group_id: Self::GroupId, - physical_expression_id: Self::PhysicalExpressionId, - ) -> OptimizerResult>; - - /// Adds a physical expression to an existing group via its [`Self::GroupId`]. - /// - /// The caller is required to pass in a slice of `GroupId` that represent the child groups of - /// the input expression. - /// - /// The caller is also required to set the `group_id` field of the input `physical_expression` - /// to be equal to `group_id`, otherwise this function will return a - /// [`MemoError::InvalidExpression`] error. - /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// - /// On successful insertion, returns the ID of the physical expression. - async fn add_physical_expression_to_group( - &self, - group_id: Self::GroupId, - physical_expression: Self::PhysicalExpression, - children: &[Self::GroupId], - ) -> OptimizerResult; - - /// Adds a logical expression to an existing group via its [`Self::GroupId`]. - /// - /// The caller is required to pass in a slice of `GroupId` that represent the child groups of - /// the input expression. - /// - /// The caller is also required to set the `group_id` field of the input `logical_expression` - /// to be equal to `group_id`, otherwise this function will return a - /// [`MemoError::InvalidExpression`] error. - /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// - /// If the memo table detects that the input logical expression is a duplicate expression, it - /// will **not** insert the expression into the memo table. Instead, it will return an - /// `Ok(Err(expression_id))`, which is a unique identifier of the expression that the input is a - /// duplicate of. The caller can use this ID to retrieve the group the original belongs to. - /// - /// If the memo table detects that the input is unique, it will insert the expression into the - /// input group and return an `Ok(Ok(expression_id))`. - async fn add_logical_expression_to_group( - &self, - group_id: Self::GroupId, - logical_expression: Self::LogicalExpression, - children: &[Self::GroupId], - ) -> OptimizerResult>; - - /// Adds a new logical expression into the memo table, creating a new group if the expression - /// does not already exist. - /// - /// The caller is required to pass in a slice of `GroupId` that represent the child groups of - /// the input expression. - /// - /// The [`Self::LogicalExpression`] type should have some sort of mechanism for checking if - /// the expression has been seen before, and if it has already been created, then the parent - /// group ID should also be retrievable. - /// - /// If the expression already exists, then this function will return the [`Self::GroupId`] of - /// the parent group and the corresponding (already existing) [`Self::LogicalExpressionId`]. It - /// will also completely ignore the group ID field of the input expression as well as ignore the - /// input slice of child groups. - /// - /// If the expression does not exist, this function will create a new group and a new - /// expression, returning brand new IDs for both. - async fn add_logical_expression( - &self, - expression: Self::LogicalExpression, - children: &[Self::LogicalExpressionId], - ) -> OptimizerResult< - Result< - (Self::GroupId, Self::LogicalExpressionId), - (Self::GroupId, Self::LogicalExpressionId), - >, - >; -} diff --git a/optd-mvp/src/memo/mod.rs b/optd-mvp/src/memo/mod.rs index 5253352..fbf23a2 100644 --- a/optd-mvp/src/memo/mod.rs +++ b/optd-mvp/src/memo/mod.rs @@ -3,7 +3,33 @@ //! //! TODO more docs. -mod persistent; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +/// A new type of an integer identifying a unique group. +#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +#[serde(transparent)] +pub struct GroupId(pub i32); + +/// A new type of an integer identifying a unique logical expression. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct LogicalExpressionId(pub i32); -mod interface; -pub use interface::{Memo, MemoError}; +/// A new type of an integer identifying a unique physical expression. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct PhysicalExpressionId(pub i32); + +/// The different kinds of errors that might occur while running operations on a memo table. +#[derive(Error, Debug)] +pub enum MemoError { + #[error("unknown group ID {0:?}")] + UnknownGroup(GroupId), + #[error("unknown logical expression ID {0:?}")] + UnknownLogicalExpression(LogicalExpressionId), + #[error("unknown physical expression ID {0:?}")] + UnknownPhysicalExpression(PhysicalExpressionId), + #[error("invalid expression encountered")] + InvalidExpression, +} + +mod persistent; diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 4c06c4e..4fc7048 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -1,228 +1,370 @@ -//! This module contains the implementation of the [`Memo`] trait for [`PersistentMemo`]. +//! This module contains the implementation of [`PersistentMemo`]. +//! +//! TODO For parallelism, almost all of these methods need to be under transactions. +//! TODO Write more docs. +//! TODO Remove dead code. -use super::*; +#![allow(dead_code)] + +use super::PersistentMemo; use crate::{ - hash_expression, - memo::{Memo, MemoError}, - OptimizerResult, + entities::*, + expression::{LogicalExpression, PhysicalExpression}, + memo::{GroupId, LogicalExpressionId, MemoError, PhysicalExpressionId}, + OptimizerResult, DATABASE_URL, +}; +use sea_orm::{ + entity::prelude::*, + entity::{IntoActiveModel, NotSet, Set}, + Database, }; -impl Memo for PersistentMemo { - type Group = cascades_group::Model; - type GroupId = i32; - type LogicalExpression = logical_expression::Model; - type LogicalExpressionId = i32; - type PhysicalExpression = physical_expression::Model; - type PhysicalExpressionId = i32; +impl PersistentMemo { + /// Creates a new `PersistentMemo` struct by connecting to a database defined at + /// [`DATABASE_URL`]. + pub async fn new() -> Self { + Self { + db: Database::connect(DATABASE_URL).await.unwrap(), + } + } + + /// Deletes all objects in the backing database. + /// + /// Since there is no asynchronous drop yet in Rust, in order to drop all objects in the + /// database, the user must call this manually. + pub async fn cleanup(&self) { + macro_rules! delete_all { + ($($module: ident),+ $(,)?) => { + $( + $module::Entity::delete_many() + .exec(&self.db) + .await + .unwrap(); + )+ + }; + } + + delete_all! { + cascades_group, + fingerprint, + logical_expression, + logical_children, + physical_expression, + physical_children + }; + } - async fn get_group(&self, group_id: Self::GroupId) -> OptimizerResult { - Ok(CascadesGroup::find_by_id(group_id) + /// Retrieves a [`cascades_group::Model`] given its ID. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// + /// FIXME: use an in-memory representation of a group instead. + pub async fn get_group(&self, group_id: GroupId) -> OptimizerResult { + Ok(cascades_group::Entity::find_by_id(group_id.0) .one(&self.db) .await? .ok_or(MemoError::UnknownGroup(group_id))?) } - async fn get_logical_expression( + /// Retrieves a [`physical_expression::Model`] given a [`PhysicalExpressionId`]. + /// + /// If the physical expression does not exist, returns a + /// [`MemoError::UnknownPhysicalExpression`] error. + pub async fn get_physical_expression( &self, - logical_expression_id: Self::LogicalExpressionId, - ) -> OptimizerResult { - Ok(LogicalExpression::find_by_id(logical_expression_id) + physical_expression_id: PhysicalExpressionId, + ) -> OptimizerResult<(GroupId, PhysicalExpression)> { + // Lookup the entity in the database via the unique expression ID. + let model = physical_expression::Entity::find_by_id(physical_expression_id.0) .one(&self.db) .await? - .ok_or(MemoError::UnknownLogicalExpression(logical_expression_id))?) + .ok_or(MemoError::UnknownPhysicalExpression(physical_expression_id))?; + + let group_id = GroupId(model.group_id); + let expr = model.into(); + + Ok((group_id, expr)) } - async fn get_physical_expression( + /// Retrieves a [`logical_expression::Model`] given its [`LogicalExpressionId`]. + /// + /// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`] + /// error. + pub async fn get_logical_expression( &self, - physical_expression_id: Self::PhysicalExpressionId, - ) -> OptimizerResult { - Ok(PhysicalExpression::find_by_id(physical_expression_id) + logical_expression_id: LogicalExpressionId, + ) -> OptimizerResult<(GroupId, LogicalExpression)> { + // Lookup the entity in the database via the unique expression ID. + let model = logical_expression::Entity::find_by_id(logical_expression_id.0) .one(&self.db) .await? - .ok_or(MemoError::UnknownPhysicalExpression(physical_expression_id))?) + .ok_or(MemoError::UnknownLogicalExpression(logical_expression_id))?; + + let group_id = GroupId(model.group_id); + let expr = model.into(); + + Ok((group_id, expr)) } - async fn get_logical_children( + /// Retrieves all of the logical expression "children" IDs of a group. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// + /// FIXME: `find_related` does not work for some reason, have to use manual `filter`. + pub async fn get_logical_children( &self, - group_id: Self::GroupId, - ) -> OptimizerResult> { - // First retrieve the group record, and then find all related logical expressions. - Ok(self - .get_group(group_id) - .await? - .find_related(LogicalChildren) + group_id: GroupId, + ) -> OptimizerResult> { + // Search for expressions that have the given parent group ID. + let children = logical_expression::Entity::find() + .filter(logical_expression::Column::GroupId.eq(group_id.0)) .all(&self.db) .await? .into_iter() - .map(|m| m.logical_expression_id) - .collect()) + .map(|m| LogicalExpressionId(m.id)) + .collect(); + + Ok(children) } - async fn get_physical_children( + /// Retrieves all of the physical expression "children" IDs of a group. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + pub async fn get_physical_children( &self, - group_id: Self::GroupId, - ) -> OptimizerResult> { - // First retrieve the group record, and then find all related physical expressions. - Ok(self - .get_group(group_id) - .await? - .find_related(PhysicalChildren) + group_id: GroupId, + ) -> OptimizerResult> { + // Search for expressions that have the given parent group ID. + let children = physical_expression::Entity::find() + .filter(physical_expression::Column::GroupId.eq(group_id.0)) .all(&self.db) .await? .into_iter() - .map(|m| m.physical_expression_id) - .collect()) - } - - /// FIXME Check that all of the children are root groups? - async fn is_duplicate_logical_expression( - &self, - logical_expression: &Self::LogicalExpression, - ) -> OptimizerResult> { - // Lookup all expressions that have the same fingerprint and kind. There may be false - // positives, but we will check for those next. - let kind = logical_expression.kind; - let fingerprint = hash_expression(kind, &logical_expression.data); - - let potential_matches = Fingerprint::find() - .filter(fingerprint::Column::Hash.eq(fingerprint)) - .filter(fingerprint::Column::Kind.eq(kind)) - .all(&self.db) - .await?; - - if potential_matches.is_empty() { - return Ok(None); - } - - let mut match_id = None; - for potential_match in potential_matches { - let expr_id = potential_match.logical_expression_id; - let expr = self.get_logical_expression(expr_id).await?; - - if expr.data == logical_expression.data { - // There should be at most one duplicate expression. - match_id = Some(expr_id); - break; - } - } + .map(|m| PhysicalExpressionId(m.id)) + .collect(); - Ok(match_id) + Ok(children) } + /// Updates / replaces a group's best physical plan (winner). Optionally returns the previous + /// winner's physical expression ID. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// /// FIXME: In the future, this should first check that we aren't overwriting a winner that was - /// updated from another thread. - async fn update_group_winner( + /// updated from another thread by comparing against the cost of the plan. + pub async fn update_group_winner( &self, - group_id: Self::GroupId, - physical_expression_id: Self::PhysicalExpressionId, - ) -> OptimizerResult> { - // First retrieve the group record, and then use an `ActiveModel` to update it. + group_id: GroupId, + physical_expression_id: PhysicalExpressionId, + ) -> OptimizerResult> { + // First retrieve the group record. let mut group = self.get_group(group_id).await?.into_active_model(); - let old_id = group.winner; - group.winner = Set(Some(physical_expression_id)); + // Update the group to point to the new winner. + let old_id = group.winner; + group.winner = Set(Some(physical_expression_id.0)); group.update(&self.db).await?; - // The old value must be set (`None` still means it has been set). - let old = old_id.unwrap(); + // Note that the `unwrap` here is unwrapping the `ActiveValue`, not the `Option`. + let old = old_id.unwrap().map(PhysicalExpressionId); Ok(old) } - async fn add_physical_expression_to_group( + /// Adds a logical expression to an existing group via its ID. + /// + /// The caller is required to pass in a slice of [`GroupId`] that represent the child groups of + /// the input expression. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// + /// If the memo table detects that the input logical expression is a duplicate expression, this + /// function will **not** insert the expression into the memo table. Instead, it will return an + /// `Ok(Err(expression_id))`, which is a unique identifier of the expression that the input is a + /// duplicate of. The caller can use this ID to retrieve the group the original belongs to. + /// + /// If the memo table detects that the input is unique, it will insert the expression into the + /// input group and return an `Ok(Ok(expression_id))`. + /// + /// FIXME Check that all of the children are reduced groups? + pub async fn add_logical_expression_to_group( &self, - group_id: Self::GroupId, - physical_expression: Self::PhysicalExpression, - children: &[Self::GroupId], - ) -> OptimizerResult { - if physical_expression.group_id != group_id { - Err(MemoError::InvalidExpression)? + group_id: GroupId, + logical_expression: LogicalExpression, + children: &[GroupId], + ) -> OptimizerResult> { + // Check if the expression already exists anywhere in the memo table. + if let Some(existing_id) = self + .is_duplicate_logical_expression(&logical_expression) + .await? + { + return Ok(Err(existing_id)); } // Check if the group actually exists. let _ = self.get_group(group_id).await?; + // Insert the expression. + let model: logical_expression::Model = logical_expression.into(); + let mut active_model = model.into_active_model(); + active_model.group_id = Set(group_id.0); + active_model.id = NotSet; + let new_model = active_model.insert(&self.db).await?; + + let expr_id = new_model.id; + // Insert the child groups of the expression into the junction / children table. - if !children.is_empty() { - PhysicalChildren::insert_many(children.iter().copied().map(|group_id| { - physical_children::ActiveModel { - physical_expression_id: Set(physical_expression.id), - group_id: Set(group_id), - } - })) - .exec(&self.db) - .await?; - } + logical_children::Entity::insert_many(children.iter().copied().map(|child_id| { + logical_children::ActiveModel { + logical_expression_id: Set(expr_id), + group_id: Set(child_id.0), + } + })) + .on_empty_do_nothing() + .exec(&self.db) + .await?; - // Insert the expression. - let res = physical_expression - .into_active_model() - .insert(&self.db) + // Finally, insert the fingerprint of the logical expression as well. + let new_expr: LogicalExpression = new_model.into(); + let kind = new_expr.kind(); + let hash = new_expr.fingerprint(); + + let fingerprint = fingerprint::ActiveModel { + id: NotSet, + logical_expression_id: Set(expr_id), + kind: Set(kind), + hash: Set(hash), + }; + let _ = fingerprint::Entity::insert(fingerprint) + .exec(&self.db) .await?; - Ok(res.id) + Ok(Ok(LogicalExpressionId(expr_id))) } - /// FIXME Check that all of the children are reduced groups? - async fn add_logical_expression_to_group( + /// Adds a physical expression to an existing group via its ID. + /// + /// The caller is required to pass in a slice of [`GroupId`] that represent the child groups of + /// the input expression. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// + /// On successful insertion, returns the ID of the physical expression. + pub async fn add_physical_expression_to_group( &self, - group_id: Self::GroupId, - logical_expression: Self::LogicalExpression, - children: &[Self::GroupId], - ) -> OptimizerResult> { - if logical_expression.group_id != group_id { - Err(MemoError::InvalidExpression)? - } - - // Check if the expression already exists in the memo table. - if let Some(existing_id) = self - .is_duplicate_logical_expression(&logical_expression) - .await? - { - return Ok(Err(existing_id)); - } - + group_id: GroupId, + physical_expression: PhysicalExpression, + children: &[GroupId], + ) -> OptimizerResult { // Check if the group actually exists. let _ = self.get_group(group_id).await?; + // Insert the expression. + let model: physical_expression::Model = physical_expression.into(); + let mut active_model = model.into_active_model(); + active_model.group_id = Set(group_id.0); + active_model.id = NotSet; + let new_model = active_model.insert(&self.db).await?; + // Insert the child groups of the expression into the junction / children table. - if !children.is_empty() { - LogicalChildren::insert_many(children.iter().copied().map(|group_id| { - logical_children::ActiveModel { - logical_expression_id: Set(logical_expression.id), - group_id: Set(group_id), - } - })) - .exec(&self.db) + physical_children::Entity::insert_many(children.iter().copied().map(|child_id| { + physical_children::ActiveModel { + physical_expression_id: Set(new_model.id), + group_id: Set(child_id.0), + } + })) + .on_empty_do_nothing() + .exec(&self.db) + .await?; + + Ok(PhysicalExpressionId(new_model.id)) + } + + /// Checks if the given logical expression is a duplicate / already exists in the memo table. + /// + /// In order to prevent a large amount of duplicate work, the memo table must support duplicate + /// expression detection. + /// + /// Returns `Some(expression_id)` if the memo table detects that the expression already exists, + /// and `None` otherwise. + /// + /// This function assumes that the child groups of the expression are currently roots of their + /// group sets. For example, if G1 and G2 should be merged, and G1 is the root, then the input + /// expression should _not_ have G2 as a child, and should be replaced with G1. + /// + /// TODO Check that all of the children are root groups? How to do this? + pub async fn is_duplicate_logical_expression( + &self, + logical_expression: &LogicalExpression, + ) -> OptimizerResult> { + let model: logical_expression::Model = logical_expression.clone().into(); + + // Lookup all expressions that have the same fingerprint and kind. There may be false + // positives, but we will check for those next. + let kind = model.kind; + let fingerprint = logical_expression.fingerprint(); + + // Filter first by the fingerprint, and then the kind. + // FIXME: The kind is already embedded into the fingerprint, so we may not actually need the + // second filter? + let potential_matches = fingerprint::Entity::find() + .filter(fingerprint::Column::Hash.eq(fingerprint)) + .filter(fingerprint::Column::Kind.eq(kind)) + .all(&self.db) .await?; + + if potential_matches.is_empty() { + return Ok(None); } - // Insert the expression. - let res = logical_expression - .into_active_model() - .insert(&self.db) - .await?; + // Now that we have all of the expressions that match the given fingerprint, we need to + // filter out all of the expressions that might have had the same fingerprint but are not + // actually equivalent (hash collisions). + let mut match_id = None; + for potential_match in potential_matches { + let expr_id = LogicalExpressionId(potential_match.logical_expression_id); + let (_, expr) = self.get_logical_expression(expr_id).await?; - Ok(Ok(res.id)) + // Check for an exact match. + if &expr == logical_expression { + match_id = Some(expr_id); + + // There should be at most one duplicate expression, so we can break here. + break; + } + } + + Ok(match_id) } + /// Adds a new group into the memo table via a logical expression, creating a new group if the + /// logical expression does not already exist. + /// + /// The caller is required to pass in a slice of [`GroupId`] that represent the child groups of + /// the input expression. + /// + /// If the expression already exists, then this function will return the [`GroupId`] of the + /// parent group and the corresponding (already existing) [`LogicalExpressionId`]. It will also + /// completely ignore the group ID field of the input expression as well as ignore the input + /// slice of child groups. + /// + /// If the expression does not exist, this function will create a new group and a new + /// expression, returning brand new IDs for both. + /// /// FIXME Check that all of the children are reduced groups? - async fn add_logical_expression( + pub async fn add_group( &self, - logical_expression: Self::LogicalExpression, - children: &[Self::GroupId], - ) -> OptimizerResult< - Result< - (Self::GroupId, Self::LogicalExpressionId), - (Self::GroupId, Self::LogicalExpressionId), - >, - > { + logical_expression: LogicalExpression, + children: &[GroupId], + ) -> OptimizerResult> + { // Check if the expression already exists in the memo table. if let Some(existing_id) = self .is_duplicate_logical_expression(&logical_expression) .await? { - let expr = self.get_logical_expression(existing_id).await?; - return Ok(Err((expr.group_id, expr.id))); + let (group_id, _expr) = self.get_logical_expression(existing_id).await?; + return Ok(Err((group_id, existing_id))); } // The expression does not exist yet, so we need to create a new group and new expression. @@ -232,39 +374,45 @@ impl Memo for PersistentMemo { ..Default::default() }; - // Create a new group. + // Create the new group. let res = cascades_group::Entity::insert(group).exec(&self.db).await?; - // Insert the input expression with the correct `group_id`. - let mut new_expr = logical_expression.into_active_model(); - new_expr.group_id = Set(res.last_insert_id); - new_expr.id = NotSet; - let new_expr = new_expr.insert(&self.db).await?; + // Insert the input expression into the newly created group. + let model: logical_expression::Model = logical_expression.clone().into(); + let mut active_model = model.into_active_model(); + active_model.group_id = Set(res.last_insert_id); + active_model.id = NotSet; + let new_model = active_model.insert(&self.db).await?; + + let group_id = new_model.group_id; + let expr_id = new_model.id; // Insert the child groups of the expression into the junction / children table. - if !children.is_empty() { - LogicalChildren::insert_many(children.iter().copied().map(|group_id| { - logical_children::ActiveModel { - logical_expression_id: Set(new_expr.id), - group_id: Set(group_id), - } - })) - .exec(&self.db) - .await?; - } + logical_children::Entity::insert_many(children.iter().copied().map(|child_id| { + logical_children::ActiveModel { + logical_expression_id: Set(new_model.id), + group_id: Set(child_id.0), + } + })) + .on_empty_do_nothing() + .exec(&self.db) + .await?; + + // Finally, insert the fingerprint of the logical expression as well. + let new_expr: LogicalExpression = new_model.into(); + let kind = new_expr.kind(); + let hash = new_expr.fingerprint(); - // Insert the fingerprint of the logical expression. - let hash = hash_expression(new_expr.kind, &new_expr.data); let fingerprint = fingerprint::ActiveModel { id: NotSet, - logical_expression_id: Set(new_expr.id), - kind: Set(new_expr.kind), + logical_expression_id: Set(expr_id), + kind: Set(kind), hash: Set(hash), }; let _ = fingerprint::Entity::insert(fingerprint) .exec(&self.db) .await?; - Ok(Ok((new_expr.group_id, new_expr.id))) + Ok(Ok((GroupId(group_id), LogicalExpressionId(expr_id)))) } } diff --git a/optd-mvp/src/memo/persistent/mod.rs b/optd-mvp/src/memo/persistent/mod.rs index ae2577a..ed64fc5 100644 --- a/optd-mvp/src/memo/persistent/mod.rs +++ b/optd-mvp/src/memo/persistent/mod.rs @@ -1,11 +1,7 @@ //! This module contains the definition and implementation of the [`PersistentMemo`] type, which //! implements the `Memo` trait and supports memo table operations necessary for query optimization. -use crate::{ - entities::{prelude::*, *}, - DATABASE_URL, -}; -use sea_orm::*; +use sea_orm::DatabaseConnection; #[cfg(test)] mod tests; @@ -19,48 +15,4 @@ pub struct PersistentMemo { db: DatabaseConnection, } -impl PersistentMemo { - /// Creates a new `PersistentMemo` struct by connecting to a database defined at - /// [`DATABASE_URL`]. - /// - /// TODO remove dead code and write docs. - #[allow(dead_code)] - pub async fn new() -> Self { - Self { - db: Database::connect(DATABASE_URL).await.unwrap(), - } - } - - /// Since there is no asynchronous drop yet in Rust, we must do this manually. - /// - /// TODO remove dead code and write docs. - #[allow(dead_code)] - pub async fn cleanup(&self) { - cascades_group::Entity::delete_many() - .exec(&self.db) - .await - .unwrap(); - fingerprint::Entity::delete_many() - .exec(&self.db) - .await - .unwrap(); - logical_expression::Entity::delete_many() - .exec(&self.db) - .await - .unwrap(); - logical_children::Entity::delete_many() - .exec(&self.db) - .await - .unwrap(); - physical_expression::Entity::delete_many() - .exec(&self.db) - .await - .unwrap(); - physical_children::Entity::delete_many() - .exec(&self.db) - .await - .unwrap(); - } -} - mod implementation; diff --git a/optd-mvp/src/memo/persistent/tests.rs b/optd-mvp/src/memo/persistent/tests.rs index 7158b30..f3afea6 100644 --- a/optd-mvp/src/memo/persistent/tests.rs +++ b/optd-mvp/src/memo/persistent/tests.rs @@ -1,36 +1,142 @@ -use super::*; -use crate::{expression::*, memo::Memo}; +use crate::{expression::*, memo::persistent::PersistentMemo}; -/// Tests is exact expression matches are detected and handled by the memo table. +/// Tests that exact expression matches are detected and handled by the memo table. #[ignore] #[tokio::test] -async fn test_simple_duplicates() { +async fn test_simple_logical_duplicates() { let memo = PersistentMemo::new().await; memo.cleanup().await; - let scan = scan("(a int, b int)".to_string()); - let scan1 = scan.clone(); - let scan2 = scan.clone(); + let scan = scan("t1".to_string()); + let scan1a = scan.clone(); + let scan1b = scan.clone(); + let scan2a = scan.clone(); + let scan2b = scan.clone(); - let res0 = memo - .add_logical_expression(scan.into(), &[]) + // Insert a new group and its corresponding expression. + let (group_id, logical_expression_id) = memo.add_group(scan, &[]).await.unwrap().ok().unwrap(); + + // Test `add_logical_expression`. + { + // Attempting to create a new group with a duplicate expression should fail every time. + let (group_id_1a, logical_expression_id_1a) = + memo.add_group(scan1a, &[]).await.unwrap().err().unwrap(); + assert_eq!(group_id, group_id_1a); + assert_eq!(logical_expression_id, logical_expression_id_1a); + + // Try again just in case... + let (group_id_1b, logical_expression_id_1b) = + memo.add_group(scan1b, &[]).await.unwrap().err().unwrap(); + assert_eq!(group_id, group_id_1b); + assert_eq!(logical_expression_id, logical_expression_id_1b); + } + + // Test `add_logical_expression_to_group`. + { + // Attempting to add a duplicate expression into the same group should also fail every time. + let logical_expression_id_2a = memo + .add_logical_expression_to_group(group_id, scan2a, &[]) + .await + .unwrap() + .err() + .unwrap(); + assert_eq!(logical_expression_id, logical_expression_id_2a); + + let logical_expression_id_2b = memo + .add_logical_expression_to_group(group_id, scan2b, &[]) + .await + .unwrap() + .err() + .unwrap(); + assert_eq!(logical_expression_id, logical_expression_id_2b); + } + + memo.cleanup().await; +} + +/// Tests that physical expression are _not_ subject to duplicate detection and elimination. +/// +/// !!! Important !!! Note that this behavior should not actually be seen during query optimization, +/// since if logical expression have been deduplicated, there should not be any duplicate physical +/// expressions as they are derivative of the deduplicated logical expressions. +#[ignore] +#[tokio::test] +async fn test_simple_add_physical_expression() { + let memo = PersistentMemo::new().await; + memo.cleanup().await; + + // Insert a new group and its corresponding expression. + let scan = scan("t1".to_string()); + let (group_id, _) = memo.add_group(scan, &[]).await.unwrap().ok().unwrap(); + + // Insert two identical physical expressions into the _same_ group. + let table_scan_1 = table_scan("t1".to_string()); + let table_scan_2 = table_scan_1.clone(); + + let physical_expression_id_1 = memo + .add_physical_expression_to_group(group_id, table_scan_1, &[]) .await - .unwrap() - .ok(); - let res1 = memo - .add_logical_expression(scan1.into(), &[]) + .unwrap(); + + let physical_expression_id_2 = memo + .add_physical_expression_to_group(group_id, table_scan_2, &[]) + .await + .unwrap(); + + // Since physical expressions do not need duplicate detection, + assert_ne!(physical_expression_id_1, physical_expression_id_2); + + memo.cleanup().await; +} + +/// Tests if the memo tables able to correctly retrieve a group's expressions. +#[ignore] +#[tokio::test] +async fn test_simple_tree() { + let memo = PersistentMemo::new().await; + memo.cleanup().await; + + // Create two scan groups. + let scan1: LogicalExpression = scan("t1".to_string()); + let scan2 = scan("t2".to_string()); + let (scan_id_1, scan_expr_id_1) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); + let (scan_id_2, scan_expr_id_2) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); + + assert_eq!( + memo.get_logical_children(scan_id_1).await.unwrap(), + &[scan_expr_id_1] + ); + assert_eq!( + memo.get_logical_children(scan_id_2).await.unwrap(), + &[scan_expr_id_2] + ); + + // Create two join expression that should be in the same group. + // TODO: Eventually, the predicates will be in their own table, and the predicate representation + // will be a foreign key. For now, we represent them as strings. + let join1 = join(scan_id_1, scan_id_2, "t1.a = t2.b".to_string()); + let join2 = join(scan_id_2, scan_id_1, "t1.a = t2.b".to_string()); + + // Create the group, adding the first expression. + let (join_id, join_expr_id_1) = memo + .add_group(join1, &[scan_id_1, scan_id_2]) .await .unwrap() - .err(); - let res2 = memo - .add_logical_expression(scan2.into(), &[]) + .ok() + .unwrap(); + // Add the second expression. + let join_expr_id_2 = memo + .add_logical_expression_to_group(join_id, join2, &[scan_id_2, scan_id_1]) .await .unwrap() - .err(); + .ok() + .unwrap(); - assert_eq!(res0, res1); - assert_eq!(res0, res2); - assert_eq!(res1, res2); + assert_ne!(join_expr_id_1, join_expr_id_2); + assert_eq!( + memo.get_logical_children(join_id).await.unwrap(), + &[join_expr_id_1, join_expr_id_2] + ); memo.cleanup().await; } diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs b/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs index d0835f4..037a637 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs @@ -40,7 +40,7 @@ impl MigrationTrait for Migration { ) .foreign_key( ForeignKey::create() - .from(LogicalChildren::Table, LogicalChildren::GroupId) + .from(LogicalChildren::Table, LogicalChildren::LogicalExpressionId) .to(LogicalExpression::Table, LogicalExpression::Id) .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), From ce1b93d073abab0a247f28151f0dabe28b35c065 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sat, 30 Nov 2024 16:24:39 -0500 Subject: [PATCH 06/13] add union find and group merging support --- optd-mvp/DESIGN.md | 67 +++++++++ optd-mvp/{README.md => entities.md} | 0 optd-mvp/src/entities/cascades_group.rs | 2 +- optd-mvp/src/memo/mod.rs | 8 + .../src/memo/persistent/implementation.rs | 142 +++++++++++++++--- optd-mvp/src/memo/persistent/tests.rs | 74 ++++++++- .../memo/m20241127_000001_cascades_group.rs | 6 +- .../memo/m20241127_000001_fingerprint.rs | 6 +- 8 files changed, 279 insertions(+), 26 deletions(-) create mode 100644 optd-mvp/DESIGN.md rename optd-mvp/{README.md => entities.md} (100%) diff --git a/optd-mvp/DESIGN.md b/optd-mvp/DESIGN.md new file mode 100644 index 0000000..e50a56c --- /dev/null +++ b/optd-mvp/DESIGN.md @@ -0,0 +1,67 @@ +# Duplicate Elimination Memo Table + +Note that most of the details are in `src/memo/persistent/implementation.rs`. + +For this document, we are assuming that the memo table is backed by a database / ORM. A lot of these +problems would likely not be an issue if everything was in memory. + +## Group Merging + +During logical exploration, there will be rules that create cycles between groups. The easy solution +for this is to immediately merge two groups together when the engine determines that adding an +expression would result in a duplicate expression from another group. + +However, if we want to support parallel exploration, this could be prone to high contention. By +definition, merging group G1 into group G2 would mean that _every expression_ that has a child of +group G1 with would need to be rewritten to point to group G2 instead. + +This is unacceptable in a parallel setting, as that would mean every single task that gets affected +would need to either wait for the rewrites to happen before resuming work, or need to abort their +work because data has changed underneath them. + +So immediate / eager group merging is not a great idea for parallel exploration. However, if we do +not ever merge two groups that are identical, we are subject to doing duplicate work for every +duplicate expression in the memo table during physical optimization. + +Instead of merging groups together immediately, we can instead maintain an auxiliary data structure +that records the groups that _eventually_ need to get merged, and "lazily" merge those groups +together once every group has finished exploration. + +## Union-Find Group Sets + +We use the well-known Union-Find algorithm and corresponding data structure as the auxiliary data +structure that tracks the to-be-merged groups. + +Union-Find supports `Union` and `Find` operations, where `Union` merges sets and `Find` searches for +a "canonical" or "root" element that is shared between all elements in a given set. + +For more information about Union-Find, see these +[15-451 lecture notes](https://www.cs.cmu.edu/~15451-f24/lectures/lecture08-union-find.pdf). + +Here, we make the elements the groups themselves (really the Group IDs), which allows us to merge +group sets together and also determine a "root group" that all groups in a set can agree on. + +When every group in a group set has finished exploration, we can safely begin to merge them +together by moving all expressions from every group in the group set into a single large group. +Other than making sure that any reference to an old group in the group set points to this new large +group, exploration of all groups are done and physical optimization can start. + +RFC: Do we need to support incremental search? + +Note that since we are now waiting for exploration of all groups to finish, this algorithm is much +closer to the Volcano framework than the Cascades' incremental search. However, since we eventually +will want to store trails / breadcrumbs of decisions made to skip work in the future, and since we +essentially have unlimited space due to the memo table being backed by a DBMS, this is not as much +of a problem. + +## Duplicate Detection + +TODO explain the fingerprinting algorithm and how it relates to group merging + +When taking the fingerprint of an expression, the child groups of an expression need to be root groups. If they are not, we need to try again. +Assuming that all children are root groups, the fingerprint we make for any expression that fulfills that is valid and can be looked up for duplicates. +In order to maintain that correctness, on a merge of two sets, the smaller one requires that a new fingerprint be generated for every expression that has a group in that smaller set. +For example, let's say we need to merge { 1, 2 } (root group 1) with { 3, 4, 5, 6, 7, 8 } (root group 3). We need to find every single expression that has a child group of 1 or 2 and we need to generate a new fingerprint for each where the child groups have been "rewritten" to 3 + +TODO this is incredibly expensive, but is potentially easily parallelizable? + diff --git a/optd-mvp/README.md b/optd-mvp/entities.md similarity index 100% rename from optd-mvp/README.md rename to optd-mvp/entities.md diff --git a/optd-mvp/src/entities/cascades_group.rs b/optd-mvp/src/entities/cascades_group.rs index 9c2ba83..62e1835 100644 --- a/optd-mvp/src/entities/cascades_group.rs +++ b/optd-mvp/src/entities/cascades_group.rs @@ -7,9 +7,9 @@ use sea_orm::entity::prelude::*; pub struct Model { #[sea_orm(primary_key)] pub id: i32, + pub status: i8, pub winner: Option, pub cost: Option, - pub is_optimized: bool, pub parent_id: Option, } diff --git a/optd-mvp/src/memo/mod.rs b/optd-mvp/src/memo/mod.rs index fbf23a2..83a821f 100644 --- a/optd-mvp/src/memo/mod.rs +++ b/optd-mvp/src/memo/mod.rs @@ -19,6 +19,14 @@ pub struct LogicalExpressionId(pub i32); #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct PhysicalExpressionId(pub i32); +/// A status enum representing the different states a group can be during query optimization. +#[repr(u8)] +pub enum GroupStatus { + InProgress = 0, + Explored = 1, + Optimized = 2, +} + /// The different kinds of errors that might occur while running operations on a memo table. #[derive(Error, Debug)] pub enum MemoError { diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 4fc7048..002893a 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -10,7 +10,7 @@ use super::PersistentMemo; use crate::{ entities::*, expression::{LogicalExpression, PhysicalExpression}, - memo::{GroupId, LogicalExpressionId, MemoError, PhysicalExpressionId}, + memo::{GroupId, GroupStatus, LogicalExpressionId, MemoError, PhysicalExpressionId}, OptimizerResult, DATABASE_URL, }; use sea_orm::{ @@ -66,6 +66,40 @@ impl PersistentMemo { .ok_or(MemoError::UnknownGroup(group_id))?) } + /// Retrieves the root / canonical group ID of the given group ID. + /// + /// The groups form a union find / disjoint set parent pointer forest, where group merging + /// causes two trees to merge. + /// + /// This function uses the path compression optimization, which amortizes the cost to a single + /// lookup (theoretically in constant time, but we must be wary of the I/O roundtrip). + pub async fn get_root_group(&self, group_id: GroupId) -> OptimizerResult { + let mut curr_group = self.get_group(group_id).await?; + + // Traverse up the path and find the root group, keeping track of groups we have visited. + let mut path = vec![]; + loop { + let Some(parent_id) = curr_group.parent_id else { + break; + }; + + let next_group = self.get_group(GroupId(parent_id)).await?; + path.push(curr_group); + curr_group = next_group; + } + + let root_id = GroupId(curr_group.id); + + // Path Compression Optimization: + // For every group along the path that we walked, set their parent id pointer to the root. + // This allows for an amortized O(1) cost for `get_root_group`. + for group in path { + self.update_group_parent(GroupId(group.id), root_id).await?; + } + + Ok(root_id) + } + /// Retrieves a [`physical_expression::Model`] given a [`PhysicalExpressionId`]. /// /// If the physical expression does not exist, returns a @@ -146,6 +180,32 @@ impl PersistentMemo { Ok(children) } + /// Updates / replaces a group's status. Returns the previous group status. + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + pub async fn update_group_status( + &self, + group_id: GroupId, + status: GroupStatus, + ) -> OptimizerResult { + // First retrieve the group record. + let mut group = self.get_group(group_id).await?.into_active_model(); + + // Update the group's status. + let old_status = group.status; + group.status = Set(status as u8 as i8); + group.update(&self.db).await?; + + let old_status = match old_status.unwrap() { + 0 => GroupStatus::InProgress, + 1 => GroupStatus::Explored, + 2 => GroupStatus::Optimized, + _ => panic!("encountered an invalid group status"), + }; + + Ok(old_status) + } + /// Updates / replaces a group's best physical plan (winner). Optionally returns the previous /// winner's physical expression ID. /// @@ -167,8 +227,32 @@ impl PersistentMemo { group.update(&self.db).await?; // Note that the `unwrap` here is unwrapping the `ActiveValue`, not the `Option`. - let old = old_id.unwrap().map(PhysicalExpressionId); - Ok(old) + let old_id = old_id.unwrap().map(PhysicalExpressionId); + Ok(old_id) + } + + /// Updates / replaces a group's parent group. Optionally returns the previous parent. + /// + /// If either of the groups do not exist, returns a [`MemoError::UnknownGroup`] error. + pub async fn update_group_parent( + &self, + group_id: GroupId, + parent_id: GroupId, + ) -> OptimizerResult> { + // First retrieve the group record. + let mut group = self.get_group(group_id).await?.into_active_model(); + + // Check that the parent group exists. + let _ = self.get_group(parent_id).await?; + + // Update the group to point to the new parent. + let old_parent = group.parent_id; + group.parent_id = Set(Some(parent_id.0)); + group.update(&self.db).await?; + + // Note that the `unwrap` here is unwrapping the `ActiveValue`, not the `Option`. + let old_parent = old_parent.unwrap().map(GroupId); + Ok(old_parent) } /// Adds a logical expression to an existing group via its ID. @@ -192,10 +276,10 @@ impl PersistentMemo { group_id: GroupId, logical_expression: LogicalExpression, children: &[GroupId], - ) -> OptimizerResult> { + ) -> OptimizerResult> { // Check if the expression already exists anywhere in the memo table. if let Some(existing_id) = self - .is_duplicate_logical_expression(&logical_expression) + .is_duplicate_logical_expression(&logical_expression, children) .await? { return Ok(Err(existing_id)); @@ -227,7 +311,15 @@ impl PersistentMemo { // Finally, insert the fingerprint of the logical expression as well. let new_expr: LogicalExpression = new_model.into(); let kind = new_expr.kind(); - let hash = new_expr.fingerprint(); + + // In order to calculate a correct fingerprint, we will want to use the IDs of the root + // groups of the children instead of the child ID themselves. + let mut rewrites = vec![]; + for &child_id in children { + let root_id = self.get_root_group(child_id).await?; + rewrites.push((child_id, root_id)); + } + let hash = new_expr.fingerprint_with_rewrite(&rewrites); let fingerprint = fingerprint::ActiveModel { id: NotSet, @@ -285,8 +377,8 @@ impl PersistentMemo { /// In order to prevent a large amount of duplicate work, the memo table must support duplicate /// expression detection. /// - /// Returns `Some(expression_id)` if the memo table detects that the expression already exists, - /// and `None` otherwise. + /// Returns `Some((group_id, expression_id))` if the memo table detects that the expression + /// already exists, and `None` otherwise. /// /// This function assumes that the child groups of the expression are currently roots of their /// group sets. For example, if G1 and G2 should be merged, and G1 is the root, then the input @@ -296,13 +388,22 @@ impl PersistentMemo { pub async fn is_duplicate_logical_expression( &self, logical_expression: &LogicalExpression, - ) -> OptimizerResult> { + children: &[GroupId], + ) -> OptimizerResult> { let model: logical_expression::Model = logical_expression.clone().into(); // Lookup all expressions that have the same fingerprint and kind. There may be false // positives, but we will check for those next. let kind = model.kind; - let fingerprint = logical_expression.fingerprint(); + + // In order to calculate a correct fingerprint, we will want to use the IDs of the root + // groups of the children instead of the child ID themselves. + let mut rewrites = vec![]; + for &child_id in children { + let root_id = self.get_root_group(child_id).await?; + rewrites.push((child_id, root_id)); + } + let fingerprint = logical_expression.fingerprint_with_rewrite(&rewrites); // Filter first by the fingerprint, and then the kind. // FIXME: The kind is already embedded into the fingerprint, so we may not actually need the @@ -323,11 +424,11 @@ impl PersistentMemo { let mut match_id = None; for potential_match in potential_matches { let expr_id = LogicalExpressionId(potential_match.logical_expression_id); - let (_, expr) = self.get_logical_expression(expr_id).await?; + let (group_id, expr) = self.get_logical_expression(expr_id).await?; // Check for an exact match. if &expr == logical_expression { - match_id = Some(expr_id); + match_id = Some((group_id, expr_id)); // There should be at most one duplicate expression, so we can break here. break; @@ -359,18 +460,17 @@ impl PersistentMemo { ) -> OptimizerResult> { // Check if the expression already exists in the memo table. - if let Some(existing_id) = self - .is_duplicate_logical_expression(&logical_expression) + if let Some((group_id, existing_id)) = self + .is_duplicate_logical_expression(&logical_expression, children) .await? { - let (group_id, _expr) = self.get_logical_expression(existing_id).await?; return Ok(Err((group_id, existing_id))); } // The expression does not exist yet, so we need to create a new group and new expression. let group = cascades_group::ActiveModel { winner: Set(None), - is_optimized: Set(false), + status: Set(0), // `GroupStatus::InProgress` status. ..Default::default() }; @@ -401,7 +501,15 @@ impl PersistentMemo { // Finally, insert the fingerprint of the logical expression as well. let new_expr: LogicalExpression = new_model.into(); let kind = new_expr.kind(); - let hash = new_expr.fingerprint(); + + // In order to calculate a correct fingerprint, we will want to use the IDs of the root + // groups of the children instead of the child ID themselves. + let mut rewrites = vec![]; + for &child_id in children { + let root_id = self.get_root_group(child_id).await?; + rewrites.push((child_id, root_id)); + } + let hash = new_expr.fingerprint_with_rewrite(&rewrites); let fingerprint = fingerprint::ActiveModel { id: NotSet, diff --git a/optd-mvp/src/memo/persistent/tests.rs b/optd-mvp/src/memo/persistent/tests.rs index f3afea6..3dcddd6 100644 --- a/optd-mvp/src/memo/persistent/tests.rs +++ b/optd-mvp/src/memo/persistent/tests.rs @@ -34,20 +34,22 @@ async fn test_simple_logical_duplicates() { // Test `add_logical_expression_to_group`. { // Attempting to add a duplicate expression into the same group should also fail every time. - let logical_expression_id_2a = memo + let (group_id_2a, logical_expression_id_2a) = memo .add_logical_expression_to_group(group_id, scan2a, &[]) .await .unwrap() .err() .unwrap(); + assert_eq!(group_id, group_id_2a); assert_eq!(logical_expression_id, logical_expression_id_2a); - let logical_expression_id_2b = memo + let (group_id_2b, logical_expression_id_2b) = memo .add_logical_expression_to_group(group_id, scan2b, &[]) .await .unwrap() .err() .unwrap(); + assert_eq!(group_id, group_id_2b); assert_eq!(logical_expression_id, logical_expression_id_2b); } @@ -140,3 +142,71 @@ async fn test_simple_tree() { memo.cleanup().await; } + +/// Tests basic group merging. See comments in the test itself for more information. +#[ignore] +#[tokio::test] +async fn test_simple_group_link() { + let memo = PersistentMemo::new().await; + memo.cleanup().await; + + // Create two scan groups. + let scan1 = scan("t1".to_string()); + let scan2 = scan("t2".to_string()); + let (scan_id_1, _) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); + let (scan_id_2, _) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); + + // Create two join expression that should be in the same group. + // Even though these are obviously the same expression (to humans), the fingerprints will be + // different, and so they will be put into different groups. + let join1 = join(scan_id_1, scan_id_2, "t1.a = t2.b".to_string()); + let join2 = join(scan_id_2, scan_id_1, "t2.b = t1.a".to_string()); + let join_unknown = join2.clone(); + + let (join_group_1, _) = memo + .add_group(join1, &[scan_id_1, scan_id_2]) + .await + .unwrap() + .ok() + .unwrap(); + let (join_group_2, join_expr_2) = memo + .add_group(join2, &[scan_id_2, scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + assert_ne!(join_group_1, join_group_2); + + // Assume that some rule was applied to `join1`, and it outputs something like `join_unknown`. + // The memo table will tell us that `join_unknown == join2`. + // Take note here that `join_unknown` is a clone of `join2`, not `join1`. + let (existing_group, not_actually_new_expr_id) = memo + .add_logical_expression_to_group(join_group_1, join_unknown, &[scan_id_2, scan_id_1]) + .await + .unwrap() + .err() + .unwrap(); + assert_eq!(existing_group, join_group_2); + assert_eq!(not_actually_new_expr_id, join_expr_2); + + // The above tells the application that the expression already exists in the memo, specifically + // under `existing_group`. Thus, we should link these two groups together. + // Here, we arbitrarily choose to link group 1 into group 2. + memo.update_group_parent(join_group_1, join_group_2) + .await + .unwrap(); + + let test_root_1 = memo.get_root_group(join_group_1).await.unwrap(); + let test_root_2 = memo.get_root_group(join_group_2).await.unwrap(); + assert_eq!(test_root_1, test_root_2); + + // TODO(Connor) + // + // We now need to find all logical expressions that had group 1 (or whatever the root group of + // the set that group 1 belongs to is, in this case it is just group 1) as a child, and add a + // new fingerprint for each one that uses group 2 as a child instead. + // + // In order to do this, we need to iterate through every group in group 1's set. + + memo.cleanup().await; +} diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs b/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs index 3a0e7d0..abaa829 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs @@ -74,9 +74,9 @@ use sea_orm_migration::{prelude::*, schema::*}; pub enum CascadesGroup { Table, Id, + Status, Winner, Cost, - IsOptimized, ParentId, } @@ -92,8 +92,9 @@ impl MigrationTrait for Migration { .table(CascadesGroup::Table) .if_not_exists() .col(pk_auto(CascadesGroup::Id)) + .col(tiny_integer(CascadesGroup::Status)) .col(integer_null(CascadesGroup::Winner)) - .col(big_unsigned_null(CascadesGroup::Cost)) + .col(big_integer_null(CascadesGroup::Cost)) .foreign_key( ForeignKey::create() .from(CascadesGroup::Table, CascadesGroup::Winner) @@ -101,7 +102,6 @@ impl MigrationTrait for Migration { .on_delete(ForeignKeyAction::SetNull) .on_update(ForeignKeyAction::Cascade), ) - .col(boolean(CascadesGroup::IsOptimized)) .col(integer_null(CascadesGroup::ParentId)) .foreign_key( ForeignKey::create() diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_fingerprint.rs b/optd-mvp/src/migrator/memo/m20241127_000001_fingerprint.rs index 4a828b8..e153b9e 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_fingerprint.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_fingerprint.rs @@ -26,7 +26,7 @@ impl MigrationTrait for Migration { .table(Fingerprint::Table) .if_not_exists() .col(pk_auto(Fingerprint::Id)) - .col(unsigned(Fingerprint::LogicalExpressionId)) + .col(integer(Fingerprint::LogicalExpressionId)) .foreign_key( ForeignKey::create() .from(Fingerprint::Table, Fingerprint::LogicalExpressionId) @@ -34,8 +34,8 @@ impl MigrationTrait for Migration { .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), ) - .col(small_unsigned(Fingerprint::Kind)) - .col(big_unsigned(Fingerprint::Hash)) + .col(small_integer(Fingerprint::Kind)) + .col(big_integer(Fingerprint::Hash)) .to_owned(), ) .await From 194ae5e210c12359c7648917306ea11be9f9d8a6 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sun, 1 Dec 2024 13:20:02 -0500 Subject: [PATCH 07/13] rename cascades_group to group --- optd-mvp/DESIGN.md | 2 + .../entities/{cascades_group.rs => group.rs} | 21 +++--- optd-mvp/src/entities/logical_children.rs | 10 +-- optd-mvp/src/entities/logical_expression.rs | 14 ++-- optd-mvp/src/entities/mod.rs | 2 +- optd-mvp/src/entities/physical_children.rs | 10 +-- optd-mvp/src/entities/physical_expression.rs | 10 +-- optd-mvp/src/entities/prelude.rs | 2 +- optd-mvp/src/expression/logical_expression.rs | 2 +- optd-mvp/src/expression/mod.rs | 4 +- .../src/expression/physical_expression.rs | 2 +- optd-mvp/src/memo/mod.rs | 3 +- .../src/memo/persistent/implementation.rs | 42 ++++++------ ...des_group.rs => m20241127_000001_group.rs} | 67 ++++++++++++------- .../memo/m20241127_000001_logical_children.rs | 12 ++-- .../m20241127_000001_logical_expression.rs | 16 ++--- .../m20241127_000001_physical_children.rs | 14 ++-- .../m20241127_000001_physical_expression.rs | 16 ++--- optd-mvp/src/migrator/memo/mod.rs | 8 +-- optd-mvp/src/migrator/mod.rs | 2 +- 20 files changed, 137 insertions(+), 122 deletions(-) rename optd-mvp/src/entities/{cascades_group.rs => group.rs} (82%) rename optd-mvp/src/migrator/memo/{m20241127_000001_cascades_group.rs => m20241127_000001_group.rs} (61%) diff --git a/optd-mvp/DESIGN.md b/optd-mvp/DESIGN.md index e50a56c..190eee3 100644 --- a/optd-mvp/DESIGN.md +++ b/optd-mvp/DESIGN.md @@ -58,6 +58,8 @@ of a problem. TODO explain the fingerprinting algorithm and how it relates to group merging +Union find data structure with a circular linked list for linear iteration + When taking the fingerprint of an expression, the child groups of an expression need to be root groups. If they are not, we need to try again. Assuming that all children are root groups, the fingerprint we make for any expression that fulfills that is valid and can be looked up for duplicates. In order to maintain that correctness, on a merge of two sets, the smaller one requires that a new fingerprint be generated for every expression that has a group in that smaller set. diff --git a/optd-mvp/src/entities/cascades_group.rs b/optd-mvp/src/entities/group.rs similarity index 82% rename from optd-mvp/src/entities/cascades_group.rs rename to optd-mvp/src/entities/group.rs index 62e1835..333ab05 100644 --- a/optd-mvp/src/entities/cascades_group.rs +++ b/optd-mvp/src/entities/group.rs @@ -3,7 +3,7 @@ use sea_orm::entity::prelude::*; #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] -#[sea_orm(table_name = "cascades_group")] +#[sea_orm(table_name = "group")] pub struct Model { #[sea_orm(primary_key)] pub id: i32, @@ -11,10 +11,19 @@ pub struct Model { pub winner: Option, pub cost: Option, pub parent_id: Option, + pub next_id: Option, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] pub enum Relation { + #[sea_orm( + belongs_to = "Entity", + from = "Column::NextId", + to = "Column::Id", + on_update = "Cascade", + on_delete = "SetNull" + )] + SelfRef2, #[sea_orm( belongs_to = "Entity", from = "Column::ParentId", @@ -22,7 +31,7 @@ pub enum Relation { on_update = "Cascade", on_delete = "SetNull" )] - SelfRef, + SelfRef1, #[sea_orm(has_many = "super::logical_children::Entity")] LogicalChildren, #[sea_orm(has_many = "super::logical_expression::Entity")] @@ -56,7 +65,7 @@ impl Related for Entity { super::logical_children::Relation::LogicalExpression.def() } fn via() -> Option { - Some(super::logical_children::Relation::CascadesGroup.def().rev()) + Some(super::logical_children::Relation::Group.def().rev()) } } @@ -65,11 +74,7 @@ impl Related for Entity { super::physical_children::Relation::PhysicalExpression.def() } fn via() -> Option { - Some( - super::physical_children::Relation::CascadesGroup - .def() - .rev(), - ) + Some(super::physical_children::Relation::Group.def().rev()) } } diff --git a/optd-mvp/src/entities/logical_children.rs b/optd-mvp/src/entities/logical_children.rs index 067eaa7..a0ac39c 100644 --- a/optd-mvp/src/entities/logical_children.rs +++ b/optd-mvp/src/entities/logical_children.rs @@ -14,13 +14,13 @@ pub struct Model { #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] pub enum Relation { #[sea_orm( - belongs_to = "super::cascades_group::Entity", + belongs_to = "super::group::Entity", from = "Column::GroupId", - to = "super::cascades_group::Column::Id", + to = "super::group::Column::Id", on_update = "Cascade", on_delete = "Cascade" )] - CascadesGroup, + Group, #[sea_orm( belongs_to = "super::logical_expression::Entity", from = "Column::LogicalExpressionId", @@ -31,9 +31,9 @@ pub enum Relation { LogicalExpression, } -impl Related for Entity { +impl Related for Entity { fn to() -> RelationDef { - Relation::CascadesGroup.def() + Relation::Group.def() } } diff --git a/optd-mvp/src/entities/logical_expression.rs b/optd-mvp/src/entities/logical_expression.rs index 4c257f3..82d938f 100644 --- a/optd-mvp/src/entities/logical_expression.rs +++ b/optd-mvp/src/entities/logical_expression.rs @@ -14,16 +14,16 @@ pub struct Model { #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] pub enum Relation { + #[sea_orm(has_many = "super::fingerprint::Entity")] + Fingerprint, #[sea_orm( - belongs_to = "super::cascades_group::Entity", + belongs_to = "super::group::Entity", from = "Column::GroupId", - to = "super::cascades_group::Column::Id", + to = "super::group::Column::Id", on_update = "Cascade", on_delete = "Cascade" )] - CascadesGroup, - #[sea_orm(has_many = "super::fingerprint::Entity")] - Fingerprint, + Group, #[sea_orm(has_many = "super::logical_children::Entity")] LogicalChildren, } @@ -40,9 +40,9 @@ impl Related for Entity { } } -impl Related for Entity { +impl Related for Entity { fn to() -> RelationDef { - super::logical_children::Relation::CascadesGroup.def() + super::logical_children::Relation::Group.def() } fn via() -> Option { Some( diff --git a/optd-mvp/src/entities/mod.rs b/optd-mvp/src/entities/mod.rs index 77d6b2c..3abd379 100644 --- a/optd-mvp/src/entities/mod.rs +++ b/optd-mvp/src/entities/mod.rs @@ -2,8 +2,8 @@ pub mod prelude; -pub mod cascades_group; pub mod fingerprint; +pub mod group; pub mod logical_children; pub mod logical_expression; pub mod physical_children; diff --git a/optd-mvp/src/entities/physical_children.rs b/optd-mvp/src/entities/physical_children.rs index d8f9db0..e58e9ca 100644 --- a/optd-mvp/src/entities/physical_children.rs +++ b/optd-mvp/src/entities/physical_children.rs @@ -14,13 +14,13 @@ pub struct Model { #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] pub enum Relation { #[sea_orm( - belongs_to = "super::cascades_group::Entity", + belongs_to = "super::group::Entity", from = "Column::GroupId", - to = "super::cascades_group::Column::Id", + to = "super::group::Column::Id", on_update = "Cascade", on_delete = "Cascade" )] - CascadesGroup, + Group, #[sea_orm( belongs_to = "super::physical_expression::Entity", from = "Column::PhysicalExpressionId", @@ -31,9 +31,9 @@ pub enum Relation { PhysicalExpression, } -impl Related for Entity { +impl Related for Entity { fn to() -> RelationDef { - Relation::CascadesGroup.def() + Relation::Group.def() } } diff --git a/optd-mvp/src/entities/physical_expression.rs b/optd-mvp/src/entities/physical_expression.rs index 482227a..4fba71e 100644 --- a/optd-mvp/src/entities/physical_expression.rs +++ b/optd-mvp/src/entities/physical_expression.rs @@ -15,13 +15,13 @@ pub struct Model { #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] pub enum Relation { #[sea_orm( - belongs_to = "super::cascades_group::Entity", + belongs_to = "super::group::Entity", from = "Column::GroupId", - to = "super::cascades_group::Column::Id", + to = "super::group::Column::Id", on_update = "Cascade", on_delete = "Cascade" )] - CascadesGroup, + Group, #[sea_orm(has_many = "super::physical_children::Entity")] PhysicalChildren, } @@ -32,9 +32,9 @@ impl Related for Entity { } } -impl Related for Entity { +impl Related for Entity { fn to() -> RelationDef { - super::physical_children::Relation::CascadesGroup.def() + super::physical_children::Relation::Group.def() } fn via() -> Option { Some( diff --git a/optd-mvp/src/entities/prelude.rs b/optd-mvp/src/entities/prelude.rs index bf6879b..8e8deaa 100644 --- a/optd-mvp/src/entities/prelude.rs +++ b/optd-mvp/src/entities/prelude.rs @@ -2,8 +2,8 @@ #![allow(unused_imports)] -pub use super::cascades_group::Entity as CascadesGroup; pub use super::fingerprint::Entity as Fingerprint; +pub use super::group::Entity as Group; pub use super::logical_children::Entity as LogicalChildren; pub use super::logical_expression::Entity as LogicalExpression; pub use super::physical_children::Entity as PhysicalChildren; diff --git a/optd-mvp/src/expression/logical_expression.rs b/optd-mvp/src/expression/logical_expression.rs index 7c3362d..c7918de 100644 --- a/optd-mvp/src/expression/logical_expression.rs +++ b/optd-mvp/src/expression/logical_expression.rs @@ -1,4 +1,4 @@ -//! Definition of logical expressions / relations in the Cascades query optimization framework. +//! Definition of logical expressions / relations in our query optimization framework. //! //! FIXME: All fields are placeholders. //! diff --git a/optd-mvp/src/expression/mod.rs b/optd-mvp/src/expression/mod.rs index 459e13b..3b6d7cf 100644 --- a/optd-mvp/src/expression/mod.rs +++ b/optd-mvp/src/expression/mod.rs @@ -1,4 +1,4 @@ -//! In-memory representation of Cascades logical and physical expression / operators / relations. +//! In-memory representation of logical and physical expression / operators / relations. //! //! TODO more docs. @@ -8,7 +8,7 @@ pub use logical_expression::*; mod physical_expression; pub use physical_expression::*; -/// The representation of a Cascades expression. +/// The representation of an expression. /// /// TODO more docs. #[derive(Clone, Debug)] diff --git a/optd-mvp/src/expression/physical_expression.rs b/optd-mvp/src/expression/physical_expression.rs index 5719752..aaaa9e7 100644 --- a/optd-mvp/src/expression/physical_expression.rs +++ b/optd-mvp/src/expression/physical_expression.rs @@ -1,4 +1,4 @@ -//! Definition of physical expressions / operators in the Cascades query optimization framework. +//! Definition of physical expressions / operators in our query optimization framework. //! //! FIXME: All fields are placeholders. //! diff --git a/optd-mvp/src/memo/mod.rs b/optd-mvp/src/memo/mod.rs index 83a821f..08b74db 100644 --- a/optd-mvp/src/memo/mod.rs +++ b/optd-mvp/src/memo/mod.rs @@ -1,5 +1,4 @@ -//! This module contains items related to the memo table, which is key to the Cascades query -//! optimization framework. +//! This module contains items related to the memo table. //! //! TODO more docs. diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 002893a..d7e7c25 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -45,7 +45,7 @@ impl PersistentMemo { } delete_all! { - cascades_group, + group, fingerprint, logical_expression, logical_children, @@ -54,13 +54,13 @@ impl PersistentMemo { }; } - /// Retrieves a [`cascades_group::Model`] given its ID. + /// Retrieves a [`group::Model`] given its ID. /// /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. /// /// FIXME: use an in-memory representation of a group instead. - pub async fn get_group(&self, group_id: GroupId) -> OptimizerResult { - Ok(cascades_group::Entity::find_by_id(group_id.0) + pub async fn get_group(&self, group_id: GroupId) -> OptimizerResult { + Ok(group::Entity::find_by_id(group_id.0) .one(&self.db) .await? .ok_or(MemoError::UnknownGroup(group_id))?) @@ -78,11 +78,7 @@ impl PersistentMemo { // Traverse up the path and find the root group, keeping track of groups we have visited. let mut path = vec![]; - loop { - let Some(parent_id) = curr_group.parent_id else { - break; - }; - + while let Some(parent_id) = curr_group.parent_id { let next_group = self.get_group(GroupId(parent_id)).await?; path.push(curr_group); curr_group = next_group; @@ -468,29 +464,29 @@ impl PersistentMemo { } // The expression does not exist yet, so we need to create a new group and new expression. - let group = cascades_group::ActiveModel { - winner: Set(None), + let group = group::ActiveModel { status: Set(0), // `GroupStatus::InProgress` status. ..Default::default() }; // Create the new group. - let res = cascades_group::Entity::insert(group).exec(&self.db).await?; + let group_res = group::Entity::insert(group).exec(&self.db).await?; + let group_id = group_res.last_insert_id; // Insert the input expression into the newly created group. - let model: logical_expression::Model = logical_expression.clone().into(); - let mut active_model = model.into_active_model(); - active_model.group_id = Set(res.last_insert_id); - active_model.id = NotSet; - let new_model = active_model.insert(&self.db).await?; + let expression: logical_expression::Model = logical_expression.clone().into(); + let mut active_expression = expression.into_active_model(); + active_expression.group_id = Set(group_id); + active_expression.id = NotSet; + let new_expression = active_expression.insert(&self.db).await?; - let group_id = new_model.group_id; - let expr_id = new_model.id; + let group_id = new_expression.group_id; + let expr_id = new_expression.id; // Insert the child groups of the expression into the junction / children table. logical_children::Entity::insert_many(children.iter().copied().map(|child_id| { logical_children::ActiveModel { - logical_expression_id: Set(new_model.id), + logical_expression_id: Set(new_expression.id), group_id: Set(child_id.0), } })) @@ -499,8 +495,8 @@ impl PersistentMemo { .await?; // Finally, insert the fingerprint of the logical expression as well. - let new_expr: LogicalExpression = new_model.into(); - let kind = new_expr.kind(); + let new_logical_expression: LogicalExpression = new_expression.into(); + let kind = new_logical_expression.kind(); // In order to calculate a correct fingerprint, we will want to use the IDs of the root // groups of the children instead of the child ID themselves. @@ -509,7 +505,7 @@ impl PersistentMemo { let root_id = self.get_root_group(child_id).await?; rewrites.push((child_id, root_id)); } - let hash = new_expr.fingerprint_with_rewrite(&rewrites); + let hash = new_logical_expression.fingerprint_with_rewrite(&rewrites); let fingerprint = fingerprint::ActiveModel { id: NotSet, diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs b/optd-mvp/src/migrator/memo/m20241127_000001_group.rs similarity index 61% rename from optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs rename to optd-mvp/src/migrator/memo/m20241127_000001_group.rs index abaa829..d5bbe0e 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_cascades_group.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_group.rs @@ -1,3 +1,5 @@ +//! FIXME We aren't really following the cascades framework anymore... +//! //! An entity representing a group / equivalence class in the Cascades framework. //! //! Quoted from the Microsoft article _Extensible query optimizers in practice_: @@ -17,38 +19,42 @@ //! # Columns //! //! Each group is assigned a monotonically-increasing (unique) ID. This ID will be important since -//! there are many foreign key references from other tables to `cascades_group`. +//! there are many foreign key references from other tables to `group`. +//! +//! We store an `status` enumeration encoded as an 8-bit integer that is used for quickly +//! determining the state of optimization for this group during the dynamic programming search. //! -//! We additionally store a `latest_winner` foreign key reference to a physical expression. See -//! the [section](#best-physical-plan-winner) below for more details. +//! We additionally store a `winner` foreign key reference to a physical expression paired with a +//! `cost` foreign key reference to a cost record (FIXME). See the +//! [section](#best-physical-plan-winner) below for more details. //! -//! Finally, we store an `is_optimized` flag that is used for quickly determining the state of -//! optimization for this group during the dynamic programming search. +//! Finally, we maintain a union-find graph structure embedded in the group records. +//! TODO write more information about this once this is implemented. //! //! # Entity Relationships //! //! ### Child Expressions (Logical and Physical) //! -//! To retrieve all of a `cascades_group`'s equivalent expressions, you must query the +//! To retrieve all of a `group`'s equivalent expressions, you must query the //! [`logical_expression`] or the [`physical_expression`] entities via their foreign keys to -//! `cascades_group`. The relationship between [`logical_expression`] and `cascades_group` is +//! `group`. The relationship between [`logical_expression`] and `group` is //! many-to-one, and the exact same many-to-one relationship is held for [`physical_expression`] to -//! `cascades_group`. +//! `group`. //! //! ### Parent Expressions (Logical and Physical) //! -//! Additionally, each logical or physical expression can have any number of `cascades_group`s as -//! children, and a group can be a child of any expression. Thus, `cascades_group` additionally has +//! Additionally, each logical or physical expression can have any number of `group`s as +//! children, and a group can be a child of any expression. Thus, `group` additionally has //! a many-to-many relationship with [`logical_expression`] and [`physical_expression`] via the //! [`logical_children`] and [`physical_children`] entities. //! -//! To reiterate, `cascades_group` has **both** a one-to-many **and** a many-to-many relationship +//! To reiterate, `group` has **both** a one-to-many **and** a many-to-many relationship //! with both [`logical_expression`] and [`physical_expression`]. This is due to groups being both //! parents and children of expressions. //! //! ### Best Physical Plan (Winner) //! -//! The `cascades_group` entity also stores a `latest_winner` _nullable_ foreign key reference to +//! The `group` entity also stores a `winner` _nullable_ foreign key reference to //! a physical expression. This represents the most recent best query plan we have computed. The //! reason it is nullable is because we may not have come up with any best query plan yet. //! @@ -56,10 +62,10 @@ //! //! FIXME: Add a logical properties table. //! -//! Lastly, each `cascades_group` record will have a set of logical properties store in the +//! Lastly, each `group` record will have a set of logical properties store in the //! `logical_property` entity, where there is an many-to-one relationship from -//! `logical_property` to `cascades_group`. Note that we do not store physical properties directly -//! on the `cascades_group`, but rather we store them for each [`physical_expression`] record. +//! `logical_property` to `group`. Note that we do not store physical properties directly +//! on the `group`, but rather we store them for each [`physical_expression`] record. //! //! [`logical_expression`]: super::logical_expression //! [`physical_expression`]: super::physical_expression @@ -71,13 +77,14 @@ use crate::migrator::memo::physical_expression::PhysicalExpression; use sea_orm_migration::{prelude::*, schema::*}; #[derive(DeriveIden)] -pub enum CascadesGroup { +pub enum Group { Table, Id, Status, Winner, Cost, ParentId, + NextId, } #[derive(DeriveMigrationName)] @@ -89,24 +96,32 @@ impl MigrationTrait for Migration { manager .create_table( Table::create() - .table(CascadesGroup::Table) + .table(Group::Table) .if_not_exists() - .col(pk_auto(CascadesGroup::Id)) - .col(tiny_integer(CascadesGroup::Status)) - .col(integer_null(CascadesGroup::Winner)) - .col(big_integer_null(CascadesGroup::Cost)) + .col(pk_auto(Group::Id)) + .col(tiny_integer(Group::Status)) + .col(integer_null(Group::Winner)) + .col(big_integer_null(Group::Cost)) .foreign_key( ForeignKey::create() - .from(CascadesGroup::Table, CascadesGroup::Winner) + .from(Group::Table, Group::Winner) .to(PhysicalExpression::Table, PhysicalExpression::Id) .on_delete(ForeignKeyAction::SetNull) .on_update(ForeignKeyAction::Cascade), ) - .col(integer_null(CascadesGroup::ParentId)) + .col(integer_null(Group::ParentId)) + .foreign_key( + ForeignKey::create() + .from(Group::Table, Group::ParentId) + .to(Group::Table, Group::Id) + .on_delete(ForeignKeyAction::SetNull) + .on_update(ForeignKeyAction::Cascade), + ) + .col(integer_null(Group::NextId)) .foreign_key( ForeignKey::create() - .from(CascadesGroup::Table, CascadesGroup::ParentId) - .to(CascadesGroup::Table, CascadesGroup::Id) + .from(Group::Table, Group::NextId) + .to(Group::Table, Group::Id) .on_delete(ForeignKeyAction::SetNull) .on_update(ForeignKeyAction::Cascade), ) @@ -117,7 +132,7 @@ impl MigrationTrait for Migration { async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { manager - .drop_table(Table::drop().table(CascadesGroup::Table).to_owned()) + .drop_table(Table::drop().table(Group::Table).to_owned()) .await } } diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs b/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs index 037a637..12b5aa6 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_logical_children.rs @@ -1,16 +1,16 @@ -//! An entity representing the [`cascades_group`] children of every [`logical_expression`]. +//! An entity representing the [`group`] children of every [`logical_expression`]. //! //! Formally, this entity is a junction which allows us to represent a many-to-many relationship -//! between [`logical_expression`] and [`cascades_group`]. Expressions can have any number of child +//! between [`logical_expression`] and [`group`]. Expressions can have any number of child //! groups, and every group can be a child of many different expressions, hence the many-to-many //! relationship. //! -//! See [`cascades_group`] for more details. +//! See [`group`] for more details. //! -//! [`cascades_group`]: super::cascades_group +//! [`group`]: super::group //! [`logical_expression`]: super::logical_expression -use crate::migrator::memo::{cascades_group::CascadesGroup, logical_expression::LogicalExpression}; +use crate::migrator::memo::{group::Group, logical_expression::LogicalExpression}; use sea_orm_migration::{prelude::*, schema::*}; #[derive(DeriveIden)] @@ -48,7 +48,7 @@ impl MigrationTrait for Migration { .foreign_key( ForeignKey::create() .from(LogicalChildren::Table, LogicalChildren::GroupId) - .to(CascadesGroup::Table, CascadesGroup::Id) + .to(Group::Table, Group::Id) .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), ) diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs b/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs index 57356cf..9b5eefc 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_logical_expression.rs @@ -1,4 +1,4 @@ -//! An entity representing a logical plan expression in the Cascades framework. +//! An entity representing a logical relational expression. //! //! Quoted from the Microsoft article _Extensible query optimizers in practice_: //! @@ -6,7 +6,7 @@ //! > relational algebraic expression. //! //! In the Cascades query optimization framework, the memo table stores equivalence classes of -//! expressions (see [`cascades_group`]). These equivalence classes, or "groups", store both +//! expressions (see [`group`]). These equivalence classes, or "groups", store both //! `logical_expression`s and [`physical_expression`]s. //! //! Optimization starts by "exploring" equivalent logical expressions within a group. For example, @@ -24,20 +24,20 @@ //! //! # Entity Relationships //! -//! The main relationship that `logical_expression` has is to [`cascades_group`]. It has **both** a -//! one-to-many **and** a many-to-many relationship with [`cascades_group`], and you can see more -//! details about this in the module-level documentation for [`cascades_group`]. +//! The main relationship that `logical_expression` has is to [`group`]. It has **both** a +//! one-to-many **and** a many-to-many relationship with [`group`], and you can see more +//! details about this in the module-level documentation for [`group`]. //! //! The other relationship that `logical_expression` has is to [`fingerprint`]. This table stores //! 1 or more fingerprints for every (unique) logical expression. The reason we have multiple //! fingerprints is that an expression can belong to multiple groups during the exploration phase //! before the merging of groups. //! -//! [`cascades_group`]: super::cascades_group +//! [`group`]: super::group //! [`physical_expression`]: super::physical_expression //! [`fingerprint`]: super::fingerprint -use crate::migrator::memo::cascades_group::CascadesGroup; +use crate::migrator::memo::group::Group; use sea_orm_migration::{prelude::*, schema::*}; #[derive(DeriveIden)] @@ -65,7 +65,7 @@ impl MigrationTrait for Migration { .foreign_key( ForeignKey::create() .from(LogicalExpression::Table, LogicalExpression::GroupId) - .to(CascadesGroup::Table, CascadesGroup::Id) + .to(Group::Table, Group::Id) .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), ) diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_physical_children.rs b/optd-mvp/src/migrator/memo/m20241127_000001_physical_children.rs index 3983f0c..06ce259 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_physical_children.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_physical_children.rs @@ -1,18 +1,16 @@ -//! An entity representing the [`cascades_group`] children of every [`physical_expression`]. +//! An entity representing the [`group`] children of every [`physical_expression`]. //! //! Formally, this entity is a junction which allows us to represent a many-to-many relationship -//! between [`physical_expression`] and [`cascades_group`]. Expressions can have any number of child +//! between [`physical_expression`] and [`group`]. Expressions can have any number of child //! groups, and every group can be a child of many different expressions, hence the many-to-many //! relationship. //! -//! See [`cascades_group`] for more details. +//! See [`group`] for more details. //! -//! [`cascades_group`]: super::cascades_group +//! [`group`]: super::group //! [`physical_expression`]: super::physical_expression -use crate::migrator::memo::{ - cascades_group::CascadesGroup, physical_expression::PhysicalExpression, -}; +use crate::migrator::memo::{group::Group, physical_expression::PhysicalExpression}; use sea_orm_migration::{prelude::*, schema::*}; #[derive(DeriveIden)] @@ -53,7 +51,7 @@ impl MigrationTrait for Migration { .foreign_key( ForeignKey::create() .from(PhysicalChildren::Table, PhysicalChildren::GroupId) - .to(CascadesGroup::Table, CascadesGroup::Id) + .to(Group::Table, Group::Id) .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), ) diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs b/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs index 1e66195..88cd63b 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_physical_expression.rs @@ -1,4 +1,4 @@ -//! An entity representing a logical plan expression in the Cascades framework. +//! An entity representing a physical plan expression. //! //! Quoted from the Microsoft article _Extensible query optimizers in practice_: //! @@ -6,7 +6,7 @@ //! > _physical plan_ or simply _plan_. //! //! In the Cascades query optimization framework, the memo table stores equivalence classes of -//! expressions (see [`cascades_group`]). These equivalence classes, or "groups", store both +//! expressions (see [`group`]). These equivalence classes, or "groups", store both //! [`logical_expression`]s and `physical_expression`s. //! //! Optimization starts by exploring equivalent logical expressions within a group, and then it @@ -30,14 +30,14 @@ //! //! # Entity Relationships //! -//! The only relationship that `physical_expression` has is to [`cascades_group`]. It has **both** a -//! one-to-many **and** a many-to-many relationship with [`cascades_group`], and you can see more -//! details about this in the module-level documentation for [`cascades_group`]. +//! The only relationship that `physical_expression` has is to [`group`]. It has **both** a +//! one-to-many **and** a many-to-many relationship with [`group`], and you can see more +//! details about this in the module-level documentation for [`group`]. //! -//! [`cascades_group`]: super::cascades_group +//! [`group`]: super::group //! [`logical_expression`]: super::logical_expression -use crate::migrator::memo::cascades_group::CascadesGroup; +use crate::migrator::memo::group::Group; use sea_orm_migration::{prelude::*, schema::*}; #[derive(DeriveIden)] @@ -65,7 +65,7 @@ impl MigrationTrait for Migration { .foreign_key( ForeignKey::create() .from(PhysicalExpression::Table, PhysicalExpression::GroupId) - .to(CascadesGroup::Table, CascadesGroup::Id) + .to(Group::Table, Group::Id) .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), ) diff --git a/optd-mvp/src/migrator/memo/mod.rs b/optd-mvp/src/migrator/memo/mod.rs index 7a60c9b..a419189 100644 --- a/optd-mvp/src/migrator/memo/mod.rs +++ b/optd-mvp/src/migrator/memo/mod.rs @@ -1,15 +1,15 @@ -//! Entities related to the memo table used for dynamic programming in the Cascades query -//! optimization framework. +//! Entities related to the memo table used for dynamic programming in the our query optimization +//! framework. -pub(crate) mod m20241127_000001_cascades_group; pub(crate) mod m20241127_000001_fingerprint; +pub(crate) mod m20241127_000001_group; pub(crate) mod m20241127_000001_logical_children; pub(crate) mod m20241127_000001_logical_expression; pub(crate) mod m20241127_000001_physical_children; pub(crate) mod m20241127_000001_physical_expression; -pub(crate) use m20241127_000001_cascades_group as cascades_group; pub(crate) use m20241127_000001_fingerprint as fingerprint; +pub(crate) use m20241127_000001_group as group; pub(crate) use m20241127_000001_logical_children as logical_children; pub(crate) use m20241127_000001_logical_expression as logical_expression; pub(crate) use m20241127_000001_physical_children as physical_children; diff --git a/optd-mvp/src/migrator/mod.rs b/optd-mvp/src/migrator/mod.rs index 0945423..cbc39ae 100644 --- a/optd-mvp/src/migrator/mod.rs +++ b/optd-mvp/src/migrator/mod.rs @@ -8,7 +8,7 @@ pub struct Migrator; impl MigratorTrait for Migrator { fn migrations() -> Vec> { vec![ - Box::new(memo::cascades_group::Migration), + Box::new(memo::group::Migration), Box::new(memo::fingerprint::Migration), Box::new(memo::logical_expression::Migration), Box::new(memo::logical_children::Migration), From e075d8f89d6343caa4c5394dae581dd1ec20c834 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sun, 1 Dec 2024 14:32:57 -0500 Subject: [PATCH 08/13] first draft fully merge group --- optd-mvp/DESIGN.md | 191 ++++++++++-- optd-mvp/entities.md | 2 +- optd-mvp/src/expression/logical_expression.rs | 76 +++-- .../src/expression/physical_expression.rs | 26 +- optd-mvp/src/lib.rs | 2 - .../src/memo/persistent/implementation.rs | 183 ++++++++--- optd-mvp/src/memo/persistent/tests.rs | 287 +++++++++++++++++- .../migrator/memo/m20241127_000001_group.rs | 4 +- 8 files changed, 650 insertions(+), 121 deletions(-) diff --git a/optd-mvp/DESIGN.md b/optd-mvp/DESIGN.md index 190eee3..6d1863d 100644 --- a/optd-mvp/DESIGN.md +++ b/optd-mvp/DESIGN.md @@ -1,9 +1,12 @@ # Duplicate Elimination Memo Table +_Connor Tsui, December 2024_ + Note that most of the details are in `src/memo/persistent/implementation.rs`. -For this document, we are assuming that the memo table is backed by a database / ORM. A lot of these -problems would likely not be an issue if everything was in memory. +For this document, we are assuming that the memo table is backed by a database / ORM. Both the +problems and the features detailed in this document are unique to this design, and likely do not +apply to an in-memory memo table. ## Group Merging @@ -12,20 +15,21 @@ for this is to immediately merge two groups together when the engine determines expression would result in a duplicate expression from another group. However, if we want to support parallel exploration, this could be prone to high contention. By -definition, merging group G1 into group G2 would mean that _every expression_ that has a child of -group G1 with would need to be rewritten to point to group G2 instead. +definition, merging group 1 into group 2 would mean that _every expression_ that has a child of +group 1 with would need to be rewritten to point to group 2 instead. -This is unacceptable in a parallel setting, as that would mean every single task that gets affected -would need to either wait for the rewrites to happen before resuming work, or need to abort their -work because data has changed underneath them. +This is prohibitive in a parallel setting, as that would mean every single task that gets affected +would need to either wait for the rewrites to happen before resuming work, or potentially need to +abort their work because data has changed underneath them. -So immediate / eager group merging is not a great idea for parallel exploration. However, if we do -not ever merge two groups that are identical, we are subject to doing duplicate work for every +So immediate / eager group merging is not a great idea for parallel exploration. However, if we +don't merge two groups that are equivalent, we are subject to doing duplicate work for every duplicate expression in the memo table during physical optimization. Instead of merging groups together immediately, we can instead maintain an auxiliary data structure that records the groups that _eventually_ need to get merged, and "lazily" merge those groups -together once every group has finished exploration. +together once every group has finished exploration. We will refer to merging groups as the act of +recording that the groups should eventually be merged together after exploration is finished. ## Union-Find Group Sets @@ -33,20 +37,22 @@ We use the well-known Union-Find algorithm and corresponding data structure as t structure that tracks the to-be-merged groups. Union-Find supports `Union` and `Find` operations, where `Union` merges sets and `Find` searches for -a "canonical" or "root" element that is shared between all elements in a given set. +a "canonical" or "root" element that is shared between all elements in a given set. Note that we +will also support an iteration operation that iterates over all elements in a given set. We will +need this for [duplicate detection](#fingerprinting--group-merge), which is explained below. For more information about Union-Find, see these -[15-451 lecture notes](https://www.cs.cmu.edu/~15451-f24/lectures/lecture08-union-find.pdf). +[15-451 lecture notes](https://www.cs.cmu.edu/~15451-f24/lectures/lecture08-union-find.pdf). We will +use the exact same data structure, but add an additional `next` pointer for each node that embeds +a circular linked list for each set. -Here, we make the elements the groups themselves (really the Group IDs), which allows us to merge +Here, we make the elements the groups themselves (really the group IDs), which allows us to merge group sets together and also determine a "root group" that all groups in a set can agree on. When every group in a group set has finished exploration, we can safely begin to merge them together by moving all expressions from every group in the group set into a single large group. Other than making sure that any reference to an old group in the group set points to this new large -group, exploration of all groups are done and physical optimization can start. - -RFC: Do we need to support incremental search? +group, exploration of all groups is done and physical optimization can start. Note that since we are now waiting for exploration of all groups to finish, this algorithm is much closer to the Volcano framework than the Cascades' incremental search. However, since we eventually @@ -56,14 +62,153 @@ of a problem. ## Duplicate Detection -TODO explain the fingerprinting algorithm and how it relates to group merging +Deciding that we will merge groups lazily does not solve all of our problems. We have to know _when_ +we want to merge these groups. -Union find data structure with a circular linked list for linear iteration +A naive approach is to simply loop over every expression in the memo table and check if we are about +to insert a duplicate. This, of course, is bad for performance. -When taking the fingerprint of an expression, the child groups of an expression need to be root groups. If they are not, we need to try again. -Assuming that all children are root groups, the fingerprint we make for any expression that fulfills that is valid and can be looked up for duplicates. -In order to maintain that correctness, on a merge of two sets, the smaller one requires that a new fingerprint be generated for every expression that has a group in that smaller set. -For example, let's say we need to merge { 1, 2 } (root group 1) with { 3, 4, 5, 6, 7, 8 } (root group 3). We need to find every single expression that has a child group of 1 or 2 and we need to generate a new fingerprint for each where the child groups have been "rewritten" to 3 +We will use a fingerprinting / hashing method to detect when a duplicate expression might be +inserted into the memo table (returning an error instead of inserting), and we will use that to +trigger group merges. -TODO this is incredibly expensive, but is potentially easily parallelizable? +For every logical expression we insert into the memo table, we will create a fingerprint that +contains both the kind of expression / relation (Scan, Filter, Join) and a hash of all +information that makes that expression unique. For example: +- The fingerprint of a Scan should probably contain a hash of the table name and the pushdown + predicate. +- The fingerprint of a Filter should probably contain a hash of its child group ID and predicate. +- The fingerprint of a Join should probably contain a hash of the left group ID and the right group + ID, as well as the join predicate. + +Note that the above descriptions are slightly inaccurate, and we'll explain why in a later +[section](#fingerprinting--group-merge). + +Also, if we have duplicate detection for logical expression, and we do not start physical +optimization until after full plan enumeration, then we do not actually need to do duplicate +detection of physical expressions, since they are derivative of the deduplicated logical +expressions. + +### Fingerprint Matching Algorithm + +When an expression is added to the memo table, it will first calculate the fingerprint of the +expression. The memo table will compare this fingerprint with every fingerprint in the memo table to +check if we have seen this expression before (in any group). While this is effectively a scan +through every expression, supporting the fingerprint table with an B+tree index will speed up this +operation dramatically (since these fingerprints can be sorted by expression / relation kind). + +If there are no identical fingerprints, then there is no duplicate expression, and we can safely +add the expression into the memo table. However, if there are matching fingerprints, we need to +further check for false positives due to hash collisions. + +We do full exact match equality checks with every expression that had a fingerprint match. If there +are no exact matches, then we can safely add the expression into the memo table. However, if we find +an exact match (note that there can be at most one exact match since we have an invariant that there +cannot be duplicate expressions), then we know that the expression we are trying to add already +exists in the memo table. + +### Fingerprinting + Group Merge + +There is a slight problem with the algorithm described above. It does not account for when a child +group has merged into another group. + +For example, let's say we have groups 1, 2, and 3. We insert an expression Join(1, 2) into the +memo table with its fingerprint calculated with groups 1 and 2. It is possible that we find out that +groups 2 and 3 need to merged. This means that Join(1, 2) and Join (1, 3) are actually identical +expressions, and the fingerprinting strategies for expressions described above do not handle this. + +We will solve this problem by adding allowing multiple fingerprints to reference the same logical +expression, and we will generate a new fingerprint for every expression that is affected by a group +merge / every expression who's parent group now has a new root group. + +In the above scenario, we will find every expression in the memo table that has group 2 as a child. +For each expression, we will generate another fingerprint with group 2 "rewritten" as group 3 in the +hash. Note that we _do not_ modify the original expression, we are simply adding another fingerprint +into the memo table. + +Finally, we need to handle when multiple groups in a group set are merged into another group set. +For example, if a left group set { 1, 2, 3, 4, 5 } with root 1 needs to be merged into a right group +set { 6, 7, 8, 9, 10 } with root 6, then we need to generate a new fingerprint for every expression +in groups 1, 2, 3, 4, and 5 with group 1 "rewritten" as group 6. + +More formally, we are maintaining this invariant: +**For every expression, there exists a fingerprint that maps back to the expression that uses the** +**root groups of their children to calculate the hash.** + +For example, if we have a group set { 1, 3, 5 } with root group 1 and group set { 2, 4, 6 } with +root group 2, the fingerprint of Join(5, 4) should really be a fingerprint of Join(1, 2). + +This invariant means that when we are checking if some expression already exists, we should use the +root groups of the child groups in our expression to calculate the fingerprint, and we can guarantee +that no fingerprint matches implies no duplicates. + +A further implication of this invariant means that new fingerprints need to be generated every time +we merge groups. If we have a left group set { 1, 3, 5 } with root group 1 and right group set +{ 2, 4, 6 } with root group 2, and we merge the first group set into the second, then every +expression that has a child group of 1, 3, or 5 now has a stale fingerprint that uses root group 1 +instead of root group 2. + +Thus, when we merge the left group into the right group, we need to do the following: + +1. Gather the group set, i.e. every single group that has root group 1 (iterate) +2. Retrieve every single expression that has a child group in the group set (via junction table) +3. Generate a new fingerprint for each expression and add it into the memo table + +The speed of steps 2 and 3 above are largely dependent on the backing DBMS. However, we can support +step 1 directly in the union find data structure by maintain a circular linked list for every set. +Each group now tracks both a `parent` pointer and a `next` pointer. When merging / unioning a set +into another set, we swap the `next` pointers of the two roots to maintain the circular linked list. +This allows us to do step 1 in linear time relative to the size of the group set. + +### Discovered Duplicates + +The above algorithm has one more problem: merging groups can cause the memo table to "discover" that +there are duplicate expressions in the memo table. + +Here is an example: let's say we have the following groups, each with one expression (note that the +example will work even with multiple expressions): + +1. `Scan(1)` +2. `Scan(2)` +3. `Filter(1)` +4. `Filter(2)` +5. `Filter(4)` +6. `Join(3, 4)` +7. `Join(3, 5)` +8. `Sort(6)` +9. `Sort(7)` + +Note how groups 5 is just a second filter on top of group 2. Suppose that we find out that +`(Filter(4) = Filter(Filter(2))) == Filter(2)`. In that case, we need to merge groups 4 and 5. The +problem here is that groups 6 and 7 are considered separate groups, but we have now discovered that +they are actually the same. The same is true for groups 8 and 9. In this scenario, the merging of +groups has "generated" a duplicate expression. + +However, this is not as big of a problem as it might seem. The issue we want to avoid is lots of +duplicate work or even an infinite loop of rule application. Observe that if we apply a rule to both +the expression in group 6 and group 7 that we will get the same exact expression. + +For example, if we apply join commutativity to the expression in group 6 (`Join(3, 4)`), we would +add `Join(4, 3)` into group 6. When we apply join commutativity to the expression in group 7 +(`Join(3, 5)`), we would get back `Join(5, 3)`. However, the memo table will detect this as a +duplicate because it will use the root group of 4 and 5 to generate the fingerprint and see that +`Join(4, 3)` already exists. Again, similar logic applies for groups 8 and 9. + +At a high level, almost all of our operations are lazy. Work does not need to be done unless it is +absolutely necessary for correctness. By allowing some amount of duplicates, we get some nice +properties with respect to parallelizing memo table access. + +## Efficiency and Parallelism + +Fingerprinting by itself is very efficient, as creating a fingerprint and looking up a fingerprint +can be made quite efficient with indexes. The real concern here is that merging two groups is very, +very expensive. Depending on the workload, it is both possible that the amortized cost is low or +that group merging takes a majority of the work. + +However, we must remember that we want to parallelize access to the memo table. The above algorithms +are notably **read and append only**. There is never a point where we need to update an expression +to maintain invariants. This is important, as it means that we can add and lookup expression and +groups _without having to take any locks_. If we enforce a serializable isolation level, then every +method on the memo table can be done in parallel with relatively low contention due to there being +zero write-write conflicts. diff --git a/optd-mvp/entities.md b/optd-mvp/entities.md index cfd082a..fc13a39 100644 --- a/optd-mvp/entities.md +++ b/optd-mvp/entities.md @@ -8,7 +8,7 @@ This assumes that you already have the `sqlite3` binary installed. First, make s $ cargo install sea-orm-cli ``` -Make sure your working directory is in the crate root: +Make sure your working directory is in the crate root (not workspace): ```sh $ cd optd-mvp diff --git a/optd-mvp/src/expression/logical_expression.rs b/optd-mvp/src/expression/logical_expression.rs index c7918de..4ddf46e 100644 --- a/optd-mvp/src/expression/logical_expression.rs +++ b/optd-mvp/src/expression/logical_expression.rs @@ -2,24 +2,20 @@ //! //! FIXME: All fields are placeholders. //! -//! TODO Remove dead code. //! TODO Figure out if each relation should be in a different submodule. //! TODO This entire file is a WIP. -#![allow(dead_code)] - use crate::{entities::*, memo::GroupId}; use fxhash::hash; use serde::{Deserialize, Serialize}; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug)] pub enum LogicalExpression { Scan(Scan), Filter(Filter), Join(Join), } -/// FIXME: Figure out how to make everything unsigned instead of signed. impl LogicalExpression { pub fn kind(&self) -> i16 { match self { @@ -29,11 +25,6 @@ impl LogicalExpression { } } - /// Definitions of custom fingerprinting strategies for each kind of logical expression. - pub fn fingerprint(&self) -> i64 { - self.fingerprint_with_rewrite(&[]) - } - /// Calculates the fingerprint of a given expression, but replaces all of the children group IDs /// with a new group ID if it is listed in the input `rewrites` list. /// @@ -55,13 +46,14 @@ impl LogicalExpression { let kind = self.kind() as u16 as usize; let hash = match self { - LogicalExpression::Scan(scan) => hash(scan.table_schema.as_str()), + LogicalExpression::Scan(scan) => hash(scan.table.as_str()), LogicalExpression::Filter(filter) => { hash(&rewrite(filter.child).0) ^ hash(filter.expression.as_str()) } LogicalExpression::Join(join) => { - hash(&rewrite(join.left).0) - ^ hash(&rewrite(join.right).0) + // Make sure that there is a difference between `Join(A, B)` and `Join(B, A)`. + hash(&(rewrite(join.left).0 + 1)) + ^ hash(&(rewrite(join.right).0 + 2)) ^ hash(join.expression.as_str()) } }; @@ -69,27 +61,69 @@ impl LogicalExpression { // Mask out the bottom 16 bits of `hash` and replace them with `kind`. ((hash & !0xFFFF) | kind) as i64 } + + /// Checks equality between two expressions, with both expression rewriting their child group + /// IDs according to the input `rewrites` list. + pub fn eq_with_rewrite(&self, other: &Self, rewrites: &[(GroupId, GroupId)]) -> bool { + // Closure that rewrites a group ID if needed. + let rewrite = |x: GroupId| { + if rewrites.is_empty() { + return x; + } + + if let Some(i) = rewrites.iter().position(|(curr, _new)| &x == curr) { + assert_eq!(rewrites[i].0, x); + rewrites[i].1 + } else { + x + } + }; + + match (self, other) { + (LogicalExpression::Scan(scan_left), LogicalExpression::Scan(scan_right)) => { + scan_left.table == scan_right.table + } + (LogicalExpression::Filter(filter_left), LogicalExpression::Filter(filter_right)) => { + rewrite(filter_left.child) == rewrite(filter_right.child) + && filter_left.expression == filter_right.expression + } + (LogicalExpression::Join(join_left), LogicalExpression::Join(join_right)) => { + rewrite(join_left.left) == rewrite(join_right.left) + && rewrite(join_left.right) == rewrite(join_right.right) + && join_left.expression == join_right.expression + } + _ => false, + } + } + + pub fn children(&self) -> Vec { + match self { + LogicalExpression::Scan(_) => vec![], + LogicalExpression::Filter(filter) => vec![filter.child], + LogicalExpression::Join(join) => vec![join.left, join.right], + } + } } -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct Scan { - table_schema: String, + table: String, } -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct Filter { child: GroupId, expression: String, } -#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct Join { left: GroupId, right: GroupId, expression: String, } -/// TODO Use a macro instead. +/// TODO Use a macro. impl From for LogicalExpression { fn from(value: logical_expression::Model) -> Self { match value.kind { @@ -110,7 +144,7 @@ impl From for LogicalExpression { } } -/// TODO Use a macro instead. +/// TODO Use a macro. impl From for logical_expression::Model { fn from(value: LogicalExpression) -> logical_expression::Model { fn create_logical_expression( @@ -152,7 +186,9 @@ mod build { use crate::expression::LogicalExpression; pub fn scan(table_schema: String) -> LogicalExpression { - LogicalExpression::Scan(Scan { table_schema }) + LogicalExpression::Scan(Scan { + table: table_schema, + }) } pub fn filter(child_group: GroupId, expression: String) -> LogicalExpression { diff --git a/optd-mvp/src/expression/physical_expression.rs b/optd-mvp/src/expression/physical_expression.rs index aaaa9e7..9c451b7 100644 --- a/optd-mvp/src/expression/physical_expression.rs +++ b/optd-mvp/src/expression/physical_expression.rs @@ -2,12 +2,9 @@ //! //! FIXME: All fields are placeholders. //! -//! TODO Remove dead code. //! TODO Figure out if each operator should be in a different submodule. //! TODO This entire file is a WIP. -#![allow(dead_code)] - use crate::{entities::*, memo::GroupId}; use serde::{Deserialize, Serialize}; @@ -36,7 +33,7 @@ pub struct HashJoin { expression: String, } -/// TODO Use a macro instead. +/// TODO Use a macro. impl From for PhysicalExpression { fn from(value: physical_expression::Model) -> Self { match value.kind { @@ -57,7 +54,7 @@ impl From for PhysicalExpression { } } -/// TODO Use a macro instead. +/// TODO Use a macro. impl From for physical_expression::Model { fn from(value: PhysicalExpression) -> physical_expression::Model { fn create_physical_expression( @@ -100,23 +97,4 @@ mod build { pub fn table_scan(table_schema: String) -> PhysicalExpression { PhysicalExpression::TableScan(TableScan { table_schema }) } - - pub fn filter(child_group: GroupId, expression: String) -> PhysicalExpression { - PhysicalExpression::Filter(PhysicalFilter { - child: child_group, - expression, - }) - } - - pub fn hash_join( - left_group: GroupId, - right_group: GroupId, - expression: String, - ) -> PhysicalExpression { - PhysicalExpression::HashJoin(HashJoin { - left: left_group, - right: right_group, - expression, - }) - } } diff --git a/optd-mvp/src/lib.rs b/optd-mvp/src/lib.rs index 506eee4..48a4c78 100644 --- a/optd-mvp/src/lib.rs +++ b/optd-mvp/src/lib.rs @@ -18,8 +18,6 @@ pub const DATABASE_FILENAME: &str = "sqlite.db"; pub const DATABASE_URL: &str = "sqlite:./sqlite.db?mode=rwc"; /// An error type wrapping all the different kinds of error the optimizer might raise. -/// -/// TODO more docs. #[derive(Error, Debug)] pub enum OptimizerError { #[error("SeaORM error")] diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index d7e7c25..70b10e1 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -18,6 +18,7 @@ use sea_orm::{ entity::{IntoActiveModel, NotSet, Set}, Database, }; +use std::collections::HashSet; impl PersistentMemo { /// Creates a new `PersistentMemo` struct by connecting to a database defined at @@ -90,12 +91,55 @@ impl PersistentMemo { // For every group along the path that we walked, set their parent id pointer to the root. // This allows for an amortized O(1) cost for `get_root_group`. for group in path { - self.update_group_parent(GroupId(group.id), root_id).await?; + let mut active_group = group.into_active_model(); + + // Update the group to point to the new parent. + active_group.parent_id = Set(Some(root_id.0)); + active_group.update(&self.db).await?; } Ok(root_id) } + /// Retrieves every group ID of groups that share the same root group with the input group. + /// + /// If a group does not exist in the cycle, returns a [`MemoError::UnknownGroup`] error. + /// + /// The group records form a union-find data structure that also maintains a circular linked + /// list in every set that allows us to iterate over all elements in a set in linear time. + pub async fn get_group_set(&self, group_id: GroupId) -> OptimizerResult> { + // Iterate over the circular linked list until we reach ourselves again. + let base_group = self.get_group(group_id).await?; + + // The only case when `next_id` is set to `None` is if the current group is a root, which + // means that this group is the only group in the set. + if base_group.next_id.is_none() { + assert!(base_group.parent_id.is_none()); + return Ok(vec![group_id]); + } + + // Iterate over the circular linked list until we see ourselves again, collecting nodes + // along the way. + let mut set = vec![group_id]; + let mut next_id = base_group + .next_id + .expect("next pointer cannot be null if it is in a cycle"); + loop { + let curr_group = self.get_group(GroupId(next_id)).await?; + + if curr_group.id == group_id.0 { + break; + } + + set.push(GroupId(curr_group.id)); + next_id = curr_group + .next_id + .expect("next pointer cannot be null if it is in a cycle"); + } + + Ok(set) + } + /// Retrieves a [`physical_expression::Model`] given a [`PhysicalExpressionId`]. /// /// If the physical expression does not exist, returns a @@ -227,30 +271,6 @@ impl PersistentMemo { Ok(old_id) } - /// Updates / replaces a group's parent group. Optionally returns the previous parent. - /// - /// If either of the groups do not exist, returns a [`MemoError::UnknownGroup`] error. - pub async fn update_group_parent( - &self, - group_id: GroupId, - parent_id: GroupId, - ) -> OptimizerResult> { - // First retrieve the group record. - let mut group = self.get_group(group_id).await?.into_active_model(); - - // Check that the parent group exists. - let _ = self.get_group(parent_id).await?; - - // Update the group to point to the new parent. - let old_parent = group.parent_id; - group.parent_id = Set(Some(parent_id.0)); - group.update(&self.db).await?; - - // Note that the `unwrap` here is unwrapping the `ActiveValue`, not the `Option`. - let old_parent = old_parent.unwrap().map(GroupId); - Ok(old_parent) - } - /// Adds a logical expression to an existing group via its ID. /// /// The caller is required to pass in a slice of [`GroupId`] that represent the child groups of @@ -265,8 +285,6 @@ impl PersistentMemo { /// /// If the memo table detects that the input is unique, it will insert the expression into the /// input group and return an `Ok(Ok(expression_id))`. - /// - /// FIXME Check that all of the children are reduced groups? pub async fn add_logical_expression_to_group( &self, group_id: GroupId, @@ -323,7 +341,7 @@ impl PersistentMemo { kind: Set(kind), hash: Set(hash), }; - let _ = fingerprint::Entity::insert(fingerprint) + fingerprint::Entity::insert(fingerprint) .exec(&self.db) .await?; @@ -379,8 +397,6 @@ impl PersistentMemo { /// This function assumes that the child groups of the expression are currently roots of their /// group sets. For example, if G1 and G2 should be merged, and G1 is the root, then the input /// expression should _not_ have G2 as a child, and should be replaced with G1. - /// - /// TODO Check that all of the children are root groups? How to do this? pub async fn is_duplicate_logical_expression( &self, logical_expression: &LogicalExpression, @@ -422,8 +438,16 @@ impl PersistentMemo { let expr_id = LogicalExpressionId(potential_match.logical_expression_id); let (group_id, expr) = self.get_logical_expression(expr_id).await?; - // Check for an exact match. - if &expr == logical_expression { + // We need to add the root groups of the new expression to the rewrites vector. + // TODO make this much more efficient by making rewrites a hash map, potentially im::HashMap. + let mut rewrites = rewrites.clone(); + for child_id in expr.children() { + let root_id = self.get_root_group(child_id).await?; + rewrites.push((child_id, root_id)); + } + + // Check for an exact match after rewrites. + if logical_expression.eq_with_rewrite(&expr, &rewrites) { match_id = Some((group_id, expr_id)); // There should be at most one duplicate expression, so we can break here. @@ -447,8 +471,6 @@ impl PersistentMemo { /// /// If the expression does not exist, this function will create a new group and a new /// expression, returning brand new IDs for both. - /// - /// FIXME Check that all of the children are reduced groups? pub async fn add_group( &self, logical_expression: LogicalExpression, @@ -513,10 +535,101 @@ impl PersistentMemo { kind: Set(kind), hash: Set(hash), }; - let _ = fingerprint::Entity::insert(fingerprint) + fingerprint::Entity::insert(fingerprint) .exec(&self.db) .await?; Ok(Ok((GroupId(group_id), LogicalExpressionId(expr_id)))) } + + /// Merges two groups sets together. + /// + /// If either of the input groups do not exist, returns a [`MemoError::UnknownGroup`] error. + /// + /// TODO write docs. + /// TODO highly inefficient, need to understand metrics and performance testing. + /// TODO Optimization: add rank / size into data structure + pub async fn merge_groups( + &self, + left_group_id: GroupId, + right_group_id: GroupId, + ) -> OptimizerResult { + // Without a rank / size field, we have no way of determining which set is better to merge + // into the other. So we will arbitrarily choose to merge the left group into the right + // group here. If rank is added in the future, then merge the smaller set into the larger. + + let left_root_id = self.get_root_group(left_group_id).await?; + let left_root = self.get_group(left_root_id).await?; + // A `None` next pointer means it should technically be pointing to itself. + let left_next = left_root.next_id.unwrap_or(left_root_id.0); + let mut active_left_root = left_root.into_active_model(); + + let right_root_id = self.get_root_group(right_group_id).await?; + let right_root = self.get_group(right_root_id).await?; + // A `None` next pointer means it should technically be pointing to itself. + let right_next = right_root.next_id.unwrap_or(right_root_id.0); + let mut active_right_root = right_root.into_active_model(); + + // Before we actually update the group records, We first need to generate new fingerprints + // for every single expression that has a child group in the left set. + // TODO make this more efficient, this code is doing double work from `get_group_set`. + let group_set_ids = self.get_group_set(left_group_id).await?; + let mut left_group_models = Vec::with_capacity(group_set_ids.len()); + for &group_id in &group_set_ids { + left_group_models.push(self.get_group(group_id).await?); + } + + // Retrieve every single expression that has a child group in the left set. + let left_group_expressions: Vec> = left_group_models + .load_many_to_many( + logical_expression::Entity, + logical_children::Entity, + &self.db, + ) + .await?; + + // Need to replace every single occurrence of groups in the set with the new root. + let rewrites: Vec<(GroupId, GroupId)> = group_set_ids + .iter() + .map(|&group_id| (group_id, right_root_id)) + .collect(); + + // For each expression, generate a new fingerprint. + let mut seen = HashSet::new(); + for model in left_group_expressions.into_iter().flatten() { + let expr_id = model.id; + + // There may be duplicates in the expressions list. + if seen.contains(&expr_id) { + continue; + } else { + seen.insert(expr_id); + } + + let logical_expression: LogicalExpression = model.into(); + let hash = logical_expression.fingerprint_with_rewrite(&rewrites); + + let fingerprint = fingerprint::ActiveModel { + id: NotSet, + logical_expression_id: Set(expr_id), + kind: Set(logical_expression.kind()), + hash: Set(hash), + }; + fingerprint::Entity::insert(fingerprint) + .exec(&self.db) + .await?; + } + + // Update the left group root to point to the right group root. + active_left_root.parent_id = Set(Some(right_root_id.0)); + + // Swap the next pointers of each root to maintain the circular linked list. + active_left_root.next_id = Set(Some(right_next)); + active_right_root.next_id = Set(Some(left_next)); + + active_left_root.update(&self.db).await?; + active_right_root.update(&self.db).await?; + + Ok(right_root_id) + } } diff --git a/optd-mvp/src/memo/persistent/tests.rs b/optd-mvp/src/memo/persistent/tests.rs index 3dcddd6..be3115c 100644 --- a/optd-mvp/src/memo/persistent/tests.rs +++ b/optd-mvp/src/memo/persistent/tests.rs @@ -114,8 +114,6 @@ async fn test_simple_tree() { ); // Create two join expression that should be in the same group. - // TODO: Eventually, the predicates will be in their own table, and the predicate representation - // will be a foreign key. For now, we represent them as strings. let join1 = join(scan_id_1, scan_id_2, "t1.a = t2.b".to_string()); let join2 = join(scan_id_2, scan_id_1, "t1.a = t2.b".to_string()); @@ -143,7 +141,7 @@ async fn test_simple_tree() { memo.cleanup().await; } -/// Tests basic group merging. See comments in the test itself for more information. +/// Tests a single group merge. See comments in the test itself for more information. #[ignore] #[tokio::test] async fn test_simple_group_link() { @@ -191,22 +189,283 @@ async fn test_simple_group_link() { // The above tells the application that the expression already exists in the memo, specifically // under `existing_group`. Thus, we should link these two groups together. - // Here, we arbitrarily choose to link group 1 into group 2. - memo.update_group_parent(join_group_1, join_group_2) - .await - .unwrap(); + memo.merge_groups(join_group_1, join_group_2).await.unwrap(); let test_root_1 = memo.get_root_group(join_group_1).await.unwrap(); let test_root_2 = memo.get_root_group(join_group_2).await.unwrap(); assert_eq!(test_root_1, test_root_2); - // TODO(Connor) - // - // We now need to find all logical expressions that had group 1 (or whatever the root group of - // the set that group 1 belongs to is, in this case it is just group 1) as a child, and add a - // new fingerprint for each one that uses group 2 as a child instead. - // - // In order to do this, we need to iterate through every group in group 1's set. + memo.cleanup().await; +} + +#[ignore] +#[tokio::test] +async fn test_group_merge_ladder() { + let memo = PersistentMemo::new().await; + memo.cleanup().await; + + // Build up a tree of true filters that should be collapsed into a single table scan. + let scan_base = scan("t1".to_string()); + let (scan_id, _) = memo.add_group(scan_base, &[]).await.unwrap().ok().unwrap(); + + let filter0 = filter(scan_id, "true".to_string()); + let (filter_id_0, _) = memo + .add_group(filter0, &[scan_id]) + .await + .unwrap() + .ok() + .unwrap(); + + let filter1 = filter(filter_id_0, "true".to_string()); + let (filter_id_1, _) = memo + .add_group(filter1, &[scan_id]) + .await + .unwrap() + .ok() + .unwrap(); + + let filter2 = filter(filter_id_1, "true".to_string()); + let (filter_id_2, _) = memo + .add_group(filter2, &[scan_id]) + .await + .unwrap() + .ok() + .unwrap(); + + let filter3 = filter(filter_id_2, "true".to_string()); + let (filter_id_3, _) = memo + .add_group(filter3, &[scan_id]) + .await + .unwrap() + .ok() + .unwrap(); + + let mut groups = vec![scan_id, filter_id_0, filter_id_1, filter_id_2, filter_id_3]; + + let m0 = memo.merge_groups(filter_id_3, filter_id_2).await.unwrap(); + let m1 = memo.merge_groups(filter_id_2, filter_id_1).await.unwrap(); + let m2 = memo.merge_groups(filter_id_1, filter_id_0).await.unwrap(); + let root = memo.merge_groups(filter_id_0, scan_id).await.unwrap(); + groups.extend_from_slice(&[m0, m1, m2, root]); + + for group_id in groups { + assert_eq!(root, memo.get_root_group(group_id).await.unwrap()); + } + + memo.cleanup().await; +} + +/// Tests merging a bunch of groups together in order to prevent duplicates from being added. +#[ignore] +#[tokio::test] +async fn test_group_merge() { + let memo = PersistentMemo::new().await; + memo.cleanup().await; + + // Create a base group. + let scan1 = scan("t1".to_string()); + let (scan_id_1, _) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); + + // Create a bunch of equivalent groups. + let filter0 = filter(scan_id_1, "true".to_string()); + let filter1 = filter(scan_id_1, "1 < 2".to_string()); + let filter2 = filter(scan_id_1, "2 > 1".to_string()); + let filter3 = filter(scan_id_1, "42 != 100".to_string()); + let filter4 = filter(scan_id_1, "10000 > 0".to_string()); + let filter5 = filter(scan_id_1, "1 + 2 = 3".to_string()); + let filter6 = filter(scan_id_1, "true OR false".to_string()); + let filter7 = filter(scan_id_1, "(1 + 1 > -1 AND true) OR false".to_string()); + let (filter_id_0, _) = memo + .add_group(filter0, &[scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + let (filter_id_1, _) = memo + .add_group(filter1, &[scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + let (filter_id_2, _) = memo + .add_group(filter2, &[scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + let (filter_id_3, _) = memo + .add_group(filter3, &[scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + let (filter_id_4, _) = memo + .add_group(filter4, &[scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + let (filter_id_5, _) = memo + .add_group(filter5, &[scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + let (filter_id_6, _) = memo + .add_group(filter6, &[scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + let (filter_id_7, _) = memo + .add_group(filter7, &[scan_id_1]) + .await + .unwrap() + .ok() + .unwrap(); + let filters = vec![ + filter_id_0, + filter_id_1, + filter_id_2, + filter_id_3, + filter_id_4, + filter_id_5, + filter_id_6, + filter_id_7, + ]; + + // Merge them all together. + let quarter_0 = memo.merge_groups(filters[0], filters[1]).await.unwrap(); + let quarter_1 = memo.merge_groups(filters[2], filters[3]).await.unwrap(); + let quarter_2 = memo.merge_groups(filters[4], filters[5]).await.unwrap(); + let quarter_3 = memo.merge_groups(filters[6], filters[7]).await.unwrap(); + let semi_0 = memo.merge_groups(quarter_0, quarter_1).await.unwrap(); + let semi_1 = memo.merge_groups(quarter_2, quarter_3).await.unwrap(); + let final_id = memo.merge_groups(semi_0, semi_1).await.unwrap(); + + // Check that the group set is properly representative. + { + let set = memo.get_group_set(final_id).await.unwrap(); + assert_eq!(set.len(), 8); + for id in set { + assert!(filters.contains(&id)); + } + } + + // Create another base group. + let scan2 = scan("t2".to_string()); + let (scan_id_2, _) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); + + // Add a join group. + let join0 = join(filter_id_0, scan_id_2, "t1.a = t2.a".to_string()); + let (join_group_id, join_expr_id) = memo + .add_group(join0, &[filter_id_0, scan_id_2]) + .await + .unwrap() + .ok() + .unwrap(); + + // Adding the duplicate join expressions should return a duplication error containing the IDs of + // the already existing group and expression. + for filter_id in filters { + let join_test = join(filter_id, scan_id_2, "t1.a = t2.a".to_string()); + let (join_group_id_test, join_expr_id_test) = memo + .add_group(join_test, &[filter_id, scan_id_2]) + .await + .unwrap() + .err() + .unwrap(); + assert_eq!(join_group_id, join_group_id_test); + assert_eq!(join_expr_id, join_expr_id_test); + } + + memo.cleanup().await; +} + +/// Tests the exact same scenario as in the "Discovered Duplicates" section in `DESIGN.md`. +#[ignore] +#[tokio::test] +async fn test_cascading_merge() { + let memo = PersistentMemo::new().await; + memo.cleanup().await; + + // Create the base groups. + let scan1 = scan("t1".to_string()); + let (g1, _) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); + let scan2 = scan("t2".to_string()); + let (g2, _) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); + + let filter1 = filter(g1, "x > 1000".to_string()); + let (g3, _) = memo.add_group(filter1, &[g1]).await.unwrap().ok().unwrap(); + + // Create two groups that will need to be merged. + let filter2a = filter(g2, "a < 42".to_string()); + let (g4, _) = memo.add_group(filter2a, &[g2]).await.unwrap().ok().unwrap(); + let filter2b = filter(g4, "a < 42 AND 1 = 1".to_string()); + let (g5, _) = memo.add_group(filter2b, &[g4]).await.unwrap().ok().unwrap(); + + // Create groups that are dependent on the to-be-merged groups. + let join1 = join(g3, g4, "t1.x = t2.a".to_string()); + let (g6, _) = memo + .add_group(join1, &[g3, g4]) + .await + .unwrap() + .ok() + .unwrap(); + let join2 = join(g3, g5, "t1.x = t2.a".to_string()); + let (g7, _) = memo + .add_group(join2, &[g3, g5]) + .await + .unwrap() + .ok() + .unwrap(); + + // Create more groups that are dependent on the to-be-merged groups. + // TODO actually use a sort expression instead of a `filter` placeholder. + let sort1 = filter(g6, "ORDER BY a".to_string()); + let (g8, _) = memo.add_group(sort1, &[g6]).await.unwrap().ok().unwrap(); + + let sort2 = filter(g7, "ORDER BY a".to_string()); + let (g9, _) = memo.add_group(sort2, &[g7]).await.unwrap().ok().unwrap(); + + // Now that everything is set up, we can merge groups 4 and 5 to begin the cascading process. + let filter_root = memo.merge_groups(g4, g5).await.unwrap(); + assert_eq!(memo.get_root_group(g4).await.unwrap(), filter_root); + assert_eq!(memo.get_root_group(g5).await.unwrap(), filter_root); + + // After merging, the join groups (6 and 7) are technically identical, but we have not merged + // them together yet. However, applying rules will reveal that they are identical, and we will + // know that they need to get merged. + let join1_commute = join(g4, g3, "t1.x = t2.a".to_string()); + let join1_commute_id = memo + .add_logical_expression_to_group(g6, join1_commute, &[g4, g3]) + .await + .unwrap() + .ok() + .unwrap(); + + // Adding this expression should now result in a duplication error and return the above ID. + let join2_commute = join(g5, g3, "t1.x = t2.a".to_string()); + let (existing_g6, existing_id) = memo + .add_logical_expression_to_group(g7, join2_commute, &[g5, g3]) + .await + .unwrap() + .err() + .unwrap(); + assert_eq!(existing_g6, g6); + assert_eq!(existing_id, join1_commute_id); + + // Since the memo table has told us these are duplicates, we can now merge groups 6 and 7. + let join_root = memo.merge_groups(g6, g7).await.unwrap(); + assert_eq!(memo.get_root_group(g6).await.unwrap(), join_root); + assert_eq!(memo.get_root_group(g7).await.unwrap(), join_root); + + // Do a similar thing for the sort groups. We'll skip the expression adding for now and just + // merge them immediately, but remember that the application should observe a duplicate + // somewhere in the memo table before deciding to merge groups. + let sort_root = memo.merge_groups(g8, g9).await.unwrap(); + assert_eq!(memo.get_root_group(g8).await.unwrap(), sort_root); + assert_eq!(memo.get_root_group(g9).await.unwrap(), sort_root); memo.cleanup().await; } diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_group.rs b/optd-mvp/src/migrator/memo/m20241127_000001_group.rs index d5bbe0e..59b5a09 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_group.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_group.rs @@ -28,8 +28,8 @@ //! `cost` foreign key reference to a cost record (FIXME). See the //! [section](#best-physical-plan-winner) below for more details. //! -//! Finally, we maintain a union-find graph structure embedded in the group records. -//! TODO write more information about this once this is implemented. +//! Finally, we maintain a union-find graph structure embedded in the group records. See the +//! `DESIGN.md` document for more information. //! //! # Entity Relationships //! From e9fba273028e3274cfab2b828ac657edb094c6ca Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 6 Dec 2024 17:07:14 -0500 Subject: [PATCH 09/13] remove top-level Expression type and rename to Default<>Expression --- optd-mvp/src/expression/logical_expression.rs | 65 ++++++++++--------- optd-mvp/src/expression/mod.rs | 53 --------------- .../src/expression/physical_expression.rs | 20 +++--- .../src/memo/persistent/implementation.rs | 20 +++--- optd-mvp/src/memo/persistent/tests.rs | 2 +- 5 files changed, 57 insertions(+), 103 deletions(-) diff --git a/optd-mvp/src/expression/logical_expression.rs b/optd-mvp/src/expression/logical_expression.rs index 4ddf46e..f4797bc 100644 --- a/optd-mvp/src/expression/logical_expression.rs +++ b/optd-mvp/src/expression/logical_expression.rs @@ -10,18 +10,18 @@ use fxhash::hash; use serde::{Deserialize, Serialize}; #[derive(Clone, Debug)] -pub enum LogicalExpression { +pub enum DefaultLogicalExpression { Scan(Scan), Filter(Filter), Join(Join), } -impl LogicalExpression { +impl DefaultLogicalExpression { pub fn kind(&self) -> i16 { match self { - LogicalExpression::Scan(_) => 0, - LogicalExpression::Filter(_) => 1, - LogicalExpression::Join(_) => 2, + DefaultLogicalExpression::Scan(_) => 0, + DefaultLogicalExpression::Filter(_) => 1, + DefaultLogicalExpression::Join(_) => 2, } } @@ -46,11 +46,11 @@ impl LogicalExpression { let kind = self.kind() as u16 as usize; let hash = match self { - LogicalExpression::Scan(scan) => hash(scan.table.as_str()), - LogicalExpression::Filter(filter) => { + DefaultLogicalExpression::Scan(scan) => hash(scan.table.as_str()), + DefaultLogicalExpression::Filter(filter) => { hash(&rewrite(filter.child).0) ^ hash(filter.expression.as_str()) } - LogicalExpression::Join(join) => { + DefaultLogicalExpression::Join(join) => { // Make sure that there is a difference between `Join(A, B)` and `Join(B, A)`. hash(&(rewrite(join.left).0 + 1)) ^ hash(&(rewrite(join.right).0 + 2)) @@ -80,14 +80,21 @@ impl LogicalExpression { }; match (self, other) { - (LogicalExpression::Scan(scan_left), LogicalExpression::Scan(scan_right)) => { - scan_left.table == scan_right.table - } - (LogicalExpression::Filter(filter_left), LogicalExpression::Filter(filter_right)) => { + ( + DefaultLogicalExpression::Scan(scan_left), + DefaultLogicalExpression::Scan(scan_right), + ) => scan_left.table == scan_right.table, + ( + DefaultLogicalExpression::Filter(filter_left), + DefaultLogicalExpression::Filter(filter_right), + ) => { rewrite(filter_left.child) == rewrite(filter_right.child) && filter_left.expression == filter_right.expression } - (LogicalExpression::Join(join_left), LogicalExpression::Join(join_right)) => { + ( + DefaultLogicalExpression::Join(join_left), + DefaultLogicalExpression::Join(join_right), + ) => { rewrite(join_left.left) == rewrite(join_right.left) && rewrite(join_left.right) == rewrite(join_right.right) && join_left.expression == join_right.expression @@ -98,9 +105,9 @@ impl LogicalExpression { pub fn children(&self) -> Vec { match self { - LogicalExpression::Scan(_) => vec![], - LogicalExpression::Filter(filter) => vec![filter.child], - LogicalExpression::Join(join) => vec![join.left, join.right], + DefaultLogicalExpression::Scan(_) => vec![], + DefaultLogicalExpression::Filter(filter) => vec![filter.child], + DefaultLogicalExpression::Join(join) => vec![join.left, join.right], } } } @@ -124,7 +131,7 @@ pub struct Join { } /// TODO Use a macro. -impl From for LogicalExpression { +impl From for DefaultLogicalExpression { fn from(value: logical_expression::Model) -> Self { match value.kind { 0 => Self::Scan( @@ -145,8 +152,8 @@ impl From for LogicalExpression { } /// TODO Use a macro. -impl From for logical_expression::Model { - fn from(value: LogicalExpression) -> logical_expression::Model { +impl From for logical_expression::Model { + fn from(value: DefaultLogicalExpression) -> logical_expression::Model { fn create_logical_expression( kind: i16, data: serde_json::Value, @@ -161,15 +168,15 @@ impl From for logical_expression::Model { let kind = value.kind(); match value { - LogicalExpression::Scan(scan) => create_logical_expression( + DefaultLogicalExpression::Scan(scan) => create_logical_expression( kind, serde_json::to_value(scan).expect("unable to serialize logical `Scan`"), ), - LogicalExpression::Filter(filter) => create_logical_expression( + DefaultLogicalExpression::Filter(filter) => create_logical_expression( kind, serde_json::to_value(filter).expect("unable to serialize logical `Filter`"), ), - LogicalExpression::Join(join) => create_logical_expression( + DefaultLogicalExpression::Join(join) => create_logical_expression( kind, serde_json::to_value(join).expect("unable to serialize logical `Join`"), ), @@ -183,16 +190,16 @@ pub use build::*; #[cfg(test)] mod build { use super::*; - use crate::expression::LogicalExpression; + use crate::expression::DefaultLogicalExpression; - pub fn scan(table_schema: String) -> LogicalExpression { - LogicalExpression::Scan(Scan { + pub fn scan(table_schema: String) -> DefaultLogicalExpression { + DefaultLogicalExpression::Scan(Scan { table: table_schema, }) } - pub fn filter(child_group: GroupId, expression: String) -> LogicalExpression { - LogicalExpression::Filter(Filter { + pub fn filter(child_group: GroupId, expression: String) -> DefaultLogicalExpression { + DefaultLogicalExpression::Filter(Filter { child: child_group, expression, }) @@ -202,8 +209,8 @@ mod build { left_group: GroupId, right_group: GroupId, expression: String, - ) -> LogicalExpression { - LogicalExpression::Join(Join { + ) -> DefaultLogicalExpression { + DefaultLogicalExpression::Join(Join { left: left_group, right: right_group, expression, diff --git a/optd-mvp/src/expression/mod.rs b/optd-mvp/src/expression/mod.rs index 3b6d7cf..0efbb44 100644 --- a/optd-mvp/src/expression/mod.rs +++ b/optd-mvp/src/expression/mod.rs @@ -7,56 +7,3 @@ pub use logical_expression::*; mod physical_expression; pub use physical_expression::*; - -/// The representation of an expression. -/// -/// TODO more docs. -#[derive(Clone, Debug)] -pub enum Expression { - Logical(LogicalExpression), - Physical(PhysicalExpression), -} - -/// Converts the database / JSON representation of a logical expression into an in-memory one. -impl From for Expression { - fn from(value: crate::entities::logical_expression::Model) -> Self { - Self::Logical(value.into()) - } -} - -/// Converts the in-memory representation of a logical expression into the database / JSON version. -/// -/// # Panics -/// -/// This will panic if the [`Expression`] is [`Expression::Physical`]. -impl From for crate::entities::logical_expression::Model { - fn from(value: Expression) -> Self { - let Expression::Logical(expr) = value else { - panic!("Attempted to convert an in-memory physical expression into a logical database / JSON expression"); - }; - - expr.into() - } -} - -/// Converts the database / JSON representation of a physical expression into an in-memory one. -impl From for Expression { - fn from(value: crate::entities::physical_expression::Model) -> Self { - Self::Physical(value.into()) - } -} - -/// Converts the in-memory representation of a physical expression into the database / JSON version. -/// -/// # Panics -/// -/// This will panic if the [`Expression`] is [`Expression::Physical`]. -impl From for crate::entities::physical_expression::Model { - fn from(value: Expression) -> Self { - let Expression::Physical(expr) = value else { - panic!("Attempted to convert an in-memory logical expression into a physical database / JSON expression"); - }; - - expr.into() - } -} diff --git a/optd-mvp/src/expression/physical_expression.rs b/optd-mvp/src/expression/physical_expression.rs index 9c451b7..fb8692c 100644 --- a/optd-mvp/src/expression/physical_expression.rs +++ b/optd-mvp/src/expression/physical_expression.rs @@ -9,7 +9,7 @@ use crate::{entities::*, memo::GroupId}; use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, PartialEq, Eq)] -pub enum PhysicalExpression { +pub enum DefaultPhysicalExpression { TableScan(TableScan), Filter(PhysicalFilter), HashJoin(HashJoin), @@ -34,7 +34,7 @@ pub struct HashJoin { } /// TODO Use a macro. -impl From for PhysicalExpression { +impl From for DefaultPhysicalExpression { fn from(value: physical_expression::Model) -> Self { match value.kind { 0 => Self::TableScan( @@ -55,8 +55,8 @@ impl From for PhysicalExpression { } /// TODO Use a macro. -impl From for physical_expression::Model { - fn from(value: PhysicalExpression) -> physical_expression::Model { +impl From for physical_expression::Model { + fn from(value: DefaultPhysicalExpression) -> physical_expression::Model { fn create_physical_expression( kind: i16, data: serde_json::Value, @@ -70,15 +70,15 @@ impl From for physical_expression::Model { } match value { - PhysicalExpression::TableScan(scan) => create_physical_expression( + DefaultPhysicalExpression::TableScan(scan) => create_physical_expression( 0, serde_json::to_value(scan).expect("unable to serialize physical `TableScan`"), ), - PhysicalExpression::Filter(filter) => create_physical_expression( + DefaultPhysicalExpression::Filter(filter) => create_physical_expression( 1, serde_json::to_value(filter).expect("unable to serialize physical `Filter`"), ), - PhysicalExpression::HashJoin(join) => create_physical_expression( + DefaultPhysicalExpression::HashJoin(join) => create_physical_expression( 2, serde_json::to_value(join).expect("unable to serialize physical `HashJoin`"), ), @@ -92,9 +92,9 @@ pub use build::*; #[cfg(test)] mod build { use super::*; - use crate::expression::PhysicalExpression; + use crate::expression::DefaultPhysicalExpression; - pub fn table_scan(table_schema: String) -> PhysicalExpression { - PhysicalExpression::TableScan(TableScan { table_schema }) + pub fn table_scan(table_schema: String) -> DefaultPhysicalExpression { + DefaultPhysicalExpression::TableScan(TableScan { table_schema }) } } diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 70b10e1..8a60bc9 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -9,7 +9,7 @@ use super::PersistentMemo; use crate::{ entities::*, - expression::{LogicalExpression, PhysicalExpression}, + expression::{DefaultLogicalExpression, DefaultPhysicalExpression}, memo::{GroupId, GroupStatus, LogicalExpressionId, MemoError, PhysicalExpressionId}, OptimizerResult, DATABASE_URL, }; @@ -147,7 +147,7 @@ impl PersistentMemo { pub async fn get_physical_expression( &self, physical_expression_id: PhysicalExpressionId, - ) -> OptimizerResult<(GroupId, PhysicalExpression)> { + ) -> OptimizerResult<(GroupId, DefaultPhysicalExpression)> { // Lookup the entity in the database via the unique expression ID. let model = physical_expression::Entity::find_by_id(physical_expression_id.0) .one(&self.db) @@ -167,7 +167,7 @@ impl PersistentMemo { pub async fn get_logical_expression( &self, logical_expression_id: LogicalExpressionId, - ) -> OptimizerResult<(GroupId, LogicalExpression)> { + ) -> OptimizerResult<(GroupId, DefaultLogicalExpression)> { // Lookup the entity in the database via the unique expression ID. let model = logical_expression::Entity::find_by_id(logical_expression_id.0) .one(&self.db) @@ -288,7 +288,7 @@ impl PersistentMemo { pub async fn add_logical_expression_to_group( &self, group_id: GroupId, - logical_expression: LogicalExpression, + logical_expression: DefaultLogicalExpression, children: &[GroupId], ) -> OptimizerResult> { // Check if the expression already exists anywhere in the memo table. @@ -323,7 +323,7 @@ impl PersistentMemo { .await?; // Finally, insert the fingerprint of the logical expression as well. - let new_expr: LogicalExpression = new_model.into(); + let new_expr: DefaultLogicalExpression = new_model.into(); let kind = new_expr.kind(); // In order to calculate a correct fingerprint, we will want to use the IDs of the root @@ -359,7 +359,7 @@ impl PersistentMemo { pub async fn add_physical_expression_to_group( &self, group_id: GroupId, - physical_expression: PhysicalExpression, + physical_expression: DefaultPhysicalExpression, children: &[GroupId], ) -> OptimizerResult { // Check if the group actually exists. @@ -399,7 +399,7 @@ impl PersistentMemo { /// expression should _not_ have G2 as a child, and should be replaced with G1. pub async fn is_duplicate_logical_expression( &self, - logical_expression: &LogicalExpression, + logical_expression: &DefaultLogicalExpression, children: &[GroupId], ) -> OptimizerResult> { let model: logical_expression::Model = logical_expression.clone().into(); @@ -473,7 +473,7 @@ impl PersistentMemo { /// expression, returning brand new IDs for both. pub async fn add_group( &self, - logical_expression: LogicalExpression, + logical_expression: DefaultLogicalExpression, children: &[GroupId], ) -> OptimizerResult> { @@ -517,7 +517,7 @@ impl PersistentMemo { .await?; // Finally, insert the fingerprint of the logical expression as well. - let new_logical_expression: LogicalExpression = new_expression.into(); + let new_logical_expression: DefaultLogicalExpression = new_expression.into(); let kind = new_logical_expression.kind(); // In order to calculate a correct fingerprint, we will want to use the IDs of the root @@ -606,7 +606,7 @@ impl PersistentMemo { seen.insert(expr_id); } - let logical_expression: LogicalExpression = model.into(); + let logical_expression: DefaultLogicalExpression = model.into(); let hash = logical_expression.fingerprint_with_rewrite(&rewrites); let fingerprint = fingerprint::ActiveModel { diff --git a/optd-mvp/src/memo/persistent/tests.rs b/optd-mvp/src/memo/persistent/tests.rs index be3115c..7493363 100644 --- a/optd-mvp/src/memo/persistent/tests.rs +++ b/optd-mvp/src/memo/persistent/tests.rs @@ -99,7 +99,7 @@ async fn test_simple_tree() { memo.cleanup().await; // Create two scan groups. - let scan1: LogicalExpression = scan("t1".to_string()); + let scan1: DefaultLogicalExpression = scan("t1".to_string()); let scan2 = scan("t2".to_string()); let (scan_id_1, scan_expr_id_1) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); let (scan_id_2, scan_expr_id_2) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); From 2702eb4bb2d362711a900fffb0d6c819205eab3b Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Fri, 6 Dec 2024 17:53:27 -0500 Subject: [PATCH 10/13] make memo table genric of expression traits This commit replaces the specific expression types with traits that define the behavior the in-memory represenations of both logical and physical expressions need to have. Right now, the `PhysicalExpression` trait does not do that much, but the `LogicalExpression` trait is super important to how the persistent memo table works. --- optd-mvp/src/expression/logical_expression.rs | 152 +++++++++--------- .../src/expression/physical_expression.rs | 50 ++++-- .../src/memo/persistent/implementation.rs | 43 +++-- optd-mvp/src/memo/persistent/mod.rs | 9 +- optd-mvp/src/memo/persistent/tests.rs | 13 +- 5 files changed, 157 insertions(+), 110 deletions(-) diff --git a/optd-mvp/src/expression/logical_expression.rs b/optd-mvp/src/expression/logical_expression.rs index f4797bc..486f1cb 100644 --- a/optd-mvp/src/expression/logical_expression.rs +++ b/optd-mvp/src/expression/logical_expression.rs @@ -5,9 +5,35 @@ //! TODO Figure out if each relation should be in a different submodule. //! TODO This entire file is a WIP. -use crate::{entities::*, memo::GroupId}; +use crate::{entities::logical_expression::Model, memo::GroupId}; use fxhash::hash; use serde::{Deserialize, Serialize}; +use std::fmt::Debug; + +/// An interface defining what an in-memory logical expression representation should be able to do. +pub trait LogicalExpression: From + Into + Clone + Debug { + /// Returns the kind of relation / operator node encoded as an integer. + fn kind(&self) -> i16; + + /// Retrieves the child groups IDs of this logical expression. + fn children(&self) -> Vec; + + /// Computes the fingerprint of this expression, which should generate an integer for equality + /// checks that has a low collision rate. + fn fingerprint(&self) -> i64; + + /// Checks if the current expression is a duplicate of the other expression. + /// + /// Note that this is similar to `Eq` and `PartialEq`, but the implementor should be aware that + /// different expressions can be duplicates of each other without having the exact same data. + fn is_duplicate(&self, other: &Self) -> bool; + + /// Rewrites the expression to use new child groups IDs, where `rewrites` is a slice of tuples + /// representing `(old_group_id, new_group_id)`. + /// + /// TODO: There's definitely a better way to represent this API + fn rewrite(&self, rewrites: &[(GroupId, GroupId)]) -> Self; +} #[derive(Clone, Debug)] pub enum DefaultLogicalExpression { @@ -16,44 +42,32 @@ pub enum DefaultLogicalExpression { Join(Join), } -impl DefaultLogicalExpression { - pub fn kind(&self) -> i16 { +impl LogicalExpression for DefaultLogicalExpression { + fn kind(&self) -> i16 { match self { - DefaultLogicalExpression::Scan(_) => 0, - DefaultLogicalExpression::Filter(_) => 1, - DefaultLogicalExpression::Join(_) => 2, + Self::Scan(_) => 0, + Self::Filter(_) => 1, + Self::Join(_) => 2, } } - /// Calculates the fingerprint of a given expression, but replaces all of the children group IDs - /// with a new group ID if it is listed in the input `rewrites` list. - /// - /// TODO Allow each expression to implement a trait that does this. - pub fn fingerprint_with_rewrite(&self, rewrites: &[(GroupId, GroupId)]) -> i64 { - // Closure that rewrites a group ID if needed. - let rewrite = |x: GroupId| { - if rewrites.is_empty() { - return x; - } - - if let Some(i) = rewrites.iter().position(|(curr, _new)| &x == curr) { - assert_eq!(rewrites[i].0, x); - rewrites[i].1 - } else { - x - } - }; + fn children(&self) -> Vec { + match self { + Self::Scan(_) => vec![], + Self::Filter(filter) => vec![filter.child], + Self::Join(join) => vec![join.left, join.right], + } + } + fn fingerprint(&self) -> i64 { let kind = self.kind() as u16 as usize; let hash = match self { - DefaultLogicalExpression::Scan(scan) => hash(scan.table.as_str()), - DefaultLogicalExpression::Filter(filter) => { - hash(&rewrite(filter.child).0) ^ hash(filter.expression.as_str()) - } - DefaultLogicalExpression::Join(join) => { + Self::Scan(scan) => hash(scan.table.as_str()), + Self::Filter(filter) => hash(&filter.child.0) ^ hash(filter.expression.as_str()), + Self::Join(join) => { // Make sure that there is a difference between `Join(A, B)` and `Join(B, A)`. - hash(&(rewrite(join.left).0 + 1)) - ^ hash(&(rewrite(join.right).0 + 2)) + hash(&(join.left.0 + 1)) + ^ hash(&(join.right.0 + 2)) ^ hash(join.expression.as_str()) } }; @@ -62,10 +76,23 @@ impl DefaultLogicalExpression { ((hash & !0xFFFF) | kind) as i64 } - /// Checks equality between two expressions, with both expression rewriting their child group - /// IDs according to the input `rewrites` list. - pub fn eq_with_rewrite(&self, other: &Self, rewrites: &[(GroupId, GroupId)]) -> bool { - // Closure that rewrites a group ID if needed. + fn is_duplicate(&self, other: &Self) -> bool { + match (self, other) { + (Self::Scan(scan_left), Self::Scan(scan_right)) => scan_left.table == scan_right.table, + (Self::Filter(filter_left), Self::Filter(filter_right)) => { + filter_left.child == filter_right.child + && filter_left.expression == filter_right.expression + } + (Self::Join(join_left), Self::Join(join_right)) => { + join_left.left == join_right.left + && join_left.right == join_right.right + && join_left.expression == join_right.expression + } + _ => false, + } + } + + fn rewrite(&self, rewrites: &[(GroupId, GroupId)]) -> Self { let rewrite = |x: GroupId| { if rewrites.is_empty() { return x; @@ -79,35 +106,17 @@ impl DefaultLogicalExpression { } }; - match (self, other) { - ( - DefaultLogicalExpression::Scan(scan_left), - DefaultLogicalExpression::Scan(scan_right), - ) => scan_left.table == scan_right.table, - ( - DefaultLogicalExpression::Filter(filter_left), - DefaultLogicalExpression::Filter(filter_right), - ) => { - rewrite(filter_left.child) == rewrite(filter_right.child) - && filter_left.expression == filter_right.expression - } - ( - DefaultLogicalExpression::Join(join_left), - DefaultLogicalExpression::Join(join_right), - ) => { - rewrite(join_left.left) == rewrite(join_right.left) - && rewrite(join_left.right) == rewrite(join_right.right) - && join_left.expression == join_right.expression - } - _ => false, - } - } - - pub fn children(&self) -> Vec { match self { - DefaultLogicalExpression::Scan(_) => vec![], - DefaultLogicalExpression::Filter(filter) => vec![filter.child], - DefaultLogicalExpression::Join(join) => vec![join.left, join.right], + Self::Scan(_) => self.clone(), + Self::Filter(filter) => Self::Filter(Filter { + child: rewrite(filter.child), + expression: filter.expression.clone(), + }), + Self::Join(join) => Self::Join(Join { + left: rewrite(join.left), + right: rewrite(join.right), + expression: join.expression.clone(), + }), } } } @@ -130,9 +139,8 @@ pub struct Join { expression: String, } -/// TODO Use a macro. -impl From for DefaultLogicalExpression { - fn from(value: logical_expression::Model) -> Self { +impl From for DefaultLogicalExpression { + fn from(value: Model) -> Self { match value.kind { 0 => Self::Scan( serde_json::from_value(value.data) @@ -151,14 +159,10 @@ impl From for DefaultLogicalExpression { } } -/// TODO Use a macro. -impl From for logical_expression::Model { - fn from(value: DefaultLogicalExpression) -> logical_expression::Model { - fn create_logical_expression( - kind: i16, - data: serde_json::Value, - ) -> logical_expression::Model { - logical_expression::Model { +impl From for Model { + fn from(value: DefaultLogicalExpression) -> Model { + fn create_logical_expression(kind: i16, data: serde_json::Value) -> Model { + Model { id: -1, group_id: -1, kind, diff --git a/optd-mvp/src/expression/physical_expression.rs b/optd-mvp/src/expression/physical_expression.rs index fb8692c..d7f71de 100644 --- a/optd-mvp/src/expression/physical_expression.rs +++ b/optd-mvp/src/expression/physical_expression.rs @@ -2,11 +2,42 @@ //! //! FIXME: All fields are placeholders. //! +//! TODO Remove dead code. //! TODO Figure out if each operator should be in a different submodule. //! TODO This entire file is a WIP. -use crate::{entities::*, memo::GroupId}; +#![allow(dead_code)] + +use crate::{entities::physical_expression::Model, memo::GroupId}; use serde::{Deserialize, Serialize}; +use std::fmt::Debug; + +/// An interface defining what an in-memory physical expression representation should be able to do. +pub trait PhysicalExpression: From + Into + Clone + Debug { + /// Returns the kind of relation / operator node encoded as an integer. + fn kind(&self) -> i16; + + /// Retrieves the child groups IDs of this logical expression. + fn children(&self) -> Vec; +} + +impl PhysicalExpression for DefaultPhysicalExpression { + fn kind(&self) -> i16 { + match self { + Self::TableScan(_) => 0, + Self::Filter(_) => 1, + Self::HashJoin(_) => 2, + } + } + + fn children(&self) -> Vec { + match self { + Self::TableScan(_) => vec![], + Self::Filter(filter) => vec![filter.child], + Self::HashJoin(hash_join) => vec![hash_join.left, hash_join.right], + } + } +} #[derive(Clone, Debug, PartialEq, Eq)] pub enum DefaultPhysicalExpression { @@ -33,9 +64,8 @@ pub struct HashJoin { expression: String, } -/// TODO Use a macro. -impl From for DefaultPhysicalExpression { - fn from(value: physical_expression::Model) -> Self { +impl From for DefaultPhysicalExpression { + fn from(value: Model) -> Self { match value.kind { 0 => Self::TableScan( serde_json::from_value(value.data) @@ -54,14 +84,10 @@ impl From for DefaultPhysicalExpression { } } -/// TODO Use a macro. -impl From for physical_expression::Model { - fn from(value: DefaultPhysicalExpression) -> physical_expression::Model { - fn create_physical_expression( - kind: i16, - data: serde_json::Value, - ) -> physical_expression::Model { - physical_expression::Model { +impl From for Model { + fn from(value: DefaultPhysicalExpression) -> Model { + fn create_physical_expression(kind: i16, data: serde_json::Value) -> Model { + Model { id: -1, group_id: -1, kind, diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 8a60bc9..8d994b3 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -9,7 +9,7 @@ use super::PersistentMemo; use crate::{ entities::*, - expression::{DefaultLogicalExpression, DefaultPhysicalExpression}, + expression::{LogicalExpression, PhysicalExpression}, memo::{GroupId, GroupStatus, LogicalExpressionId, MemoError, PhysicalExpressionId}, OptimizerResult, DATABASE_URL, }; @@ -18,14 +18,20 @@ use sea_orm::{ entity::{IntoActiveModel, NotSet, Set}, Database, }; -use std::collections::HashSet; +use std::{collections::HashSet, marker::PhantomData}; -impl PersistentMemo { +impl PersistentMemo +where + L: LogicalExpression, + P: PhysicalExpression, +{ /// Creates a new `PersistentMemo` struct by connecting to a database defined at /// [`DATABASE_URL`]. pub async fn new() -> Self { Self { db: Database::connect(DATABASE_URL).await.unwrap(), + _phantom_logical: PhantomData, + _phantom_physical: PhantomData, } } @@ -147,7 +153,7 @@ impl PersistentMemo { pub async fn get_physical_expression( &self, physical_expression_id: PhysicalExpressionId, - ) -> OptimizerResult<(GroupId, DefaultPhysicalExpression)> { + ) -> OptimizerResult<(GroupId, P)> { // Lookup the entity in the database via the unique expression ID. let model = physical_expression::Entity::find_by_id(physical_expression_id.0) .one(&self.db) @@ -167,7 +173,7 @@ impl PersistentMemo { pub async fn get_logical_expression( &self, logical_expression_id: LogicalExpressionId, - ) -> OptimizerResult<(GroupId, DefaultLogicalExpression)> { + ) -> OptimizerResult<(GroupId, L)> { // Lookup the entity in the database via the unique expression ID. let model = logical_expression::Entity::find_by_id(logical_expression_id.0) .one(&self.db) @@ -288,7 +294,7 @@ impl PersistentMemo { pub async fn add_logical_expression_to_group( &self, group_id: GroupId, - logical_expression: DefaultLogicalExpression, + logical_expression: L, children: &[GroupId], ) -> OptimizerResult> { // Check if the expression already exists anywhere in the memo table. @@ -323,7 +329,7 @@ impl PersistentMemo { .await?; // Finally, insert the fingerprint of the logical expression as well. - let new_expr: DefaultLogicalExpression = new_model.into(); + let new_expr: L = new_model.into(); let kind = new_expr.kind(); // In order to calculate a correct fingerprint, we will want to use the IDs of the root @@ -333,7 +339,7 @@ impl PersistentMemo { let root_id = self.get_root_group(child_id).await?; rewrites.push((child_id, root_id)); } - let hash = new_expr.fingerprint_with_rewrite(&rewrites); + let hash = new_expr.rewrite(&rewrites).fingerprint(); let fingerprint = fingerprint::ActiveModel { id: NotSet, @@ -359,7 +365,7 @@ impl PersistentMemo { pub async fn add_physical_expression_to_group( &self, group_id: GroupId, - physical_expression: DefaultPhysicalExpression, + physical_expression: P, children: &[GroupId], ) -> OptimizerResult { // Check if the group actually exists. @@ -399,7 +405,7 @@ impl PersistentMemo { /// expression should _not_ have G2 as a child, and should be replaced with G1. pub async fn is_duplicate_logical_expression( &self, - logical_expression: &DefaultLogicalExpression, + logical_expression: &L, children: &[GroupId], ) -> OptimizerResult> { let model: logical_expression::Model = logical_expression.clone().into(); @@ -415,7 +421,7 @@ impl PersistentMemo { let root_id = self.get_root_group(child_id).await?; rewrites.push((child_id, root_id)); } - let fingerprint = logical_expression.fingerprint_with_rewrite(&rewrites); + let fingerprint = logical_expression.rewrite(&rewrites).fingerprint(); // Filter first by the fingerprint, and then the kind. // FIXME: The kind is already embedded into the fingerprint, so we may not actually need the @@ -447,7 +453,10 @@ impl PersistentMemo { } // Check for an exact match after rewrites. - if logical_expression.eq_with_rewrite(&expr, &rewrites) { + if logical_expression + .rewrite(&rewrites) + .is_duplicate(&expr.rewrite(&rewrites)) + { match_id = Some((group_id, expr_id)); // There should be at most one duplicate expression, so we can break here. @@ -473,7 +482,7 @@ impl PersistentMemo { /// expression, returning brand new IDs for both. pub async fn add_group( &self, - logical_expression: DefaultLogicalExpression, + logical_expression: L, children: &[GroupId], ) -> OptimizerResult> { @@ -517,7 +526,7 @@ impl PersistentMemo { .await?; // Finally, insert the fingerprint of the logical expression as well. - let new_logical_expression: DefaultLogicalExpression = new_expression.into(); + let new_logical_expression: L = new_expression.into(); let kind = new_logical_expression.kind(); // In order to calculate a correct fingerprint, we will want to use the IDs of the root @@ -527,7 +536,7 @@ impl PersistentMemo { let root_id = self.get_root_group(child_id).await?; rewrites.push((child_id, root_id)); } - let hash = new_logical_expression.fingerprint_with_rewrite(&rewrites); + let hash = new_logical_expression.rewrite(&rewrites).fingerprint(); let fingerprint = fingerprint::ActiveModel { id: NotSet, @@ -606,8 +615,8 @@ impl PersistentMemo { seen.insert(expr_id); } - let logical_expression: DefaultLogicalExpression = model.into(); - let hash = logical_expression.fingerprint_with_rewrite(&rewrites); + let logical_expression: L = model.into(); + let hash = logical_expression.rewrite(&rewrites).fingerprint(); let fingerprint = fingerprint::ActiveModel { id: NotSet, diff --git a/optd-mvp/src/memo/persistent/mod.rs b/optd-mvp/src/memo/persistent/mod.rs index ed64fc5..1f5466c 100644 --- a/optd-mvp/src/memo/persistent/mod.rs +++ b/optd-mvp/src/memo/persistent/mod.rs @@ -2,6 +2,7 @@ //! implements the `Memo` trait and supports memo table operations necessary for query optimization. use sea_orm::DatabaseConnection; +use std::marker::PhantomData; #[cfg(test)] mod tests; @@ -9,10 +10,16 @@ mod tests; /// A persistent memo table, backed by a database on disk. /// /// TODO more docs. -pub struct PersistentMemo { +pub struct PersistentMemo { /// This `PersistentMemo` is reliant on the SeaORM [`DatabaseConnection`] that stores all of the /// objects needed for query optimization. db: DatabaseConnection, + + /// Generic marker for a generic logical expression. + _phantom_logical: PhantomData, + + /// Generic marker for a generic physical expression. + _phantom_physical: PhantomData

, } mod implementation; diff --git a/optd-mvp/src/memo/persistent/tests.rs b/optd-mvp/src/memo/persistent/tests.rs index 7493363..12838f6 100644 --- a/optd-mvp/src/memo/persistent/tests.rs +++ b/optd-mvp/src/memo/persistent/tests.rs @@ -4,7 +4,7 @@ use crate::{expression::*, memo::persistent::PersistentMemo}; #[ignore] #[tokio::test] async fn test_simple_logical_duplicates() { - let memo = PersistentMemo::new().await; + let memo = PersistentMemo::::new().await; memo.cleanup().await; let scan = scan("t1".to_string()); @@ -95,7 +95,7 @@ async fn test_simple_add_physical_expression() { #[ignore] #[tokio::test] async fn test_simple_tree() { - let memo = PersistentMemo::new().await; + let memo = PersistentMemo::::new().await; memo.cleanup().await; // Create two scan groups. @@ -145,7 +145,7 @@ async fn test_simple_tree() { #[ignore] #[tokio::test] async fn test_simple_group_link() { - let memo = PersistentMemo::new().await; + let memo = PersistentMemo::::new().await; memo.cleanup().await; // Create two scan groups. @@ -198,10 +198,11 @@ async fn test_simple_group_link() { memo.cleanup().await; } +/// Tests merging groups up a chain. #[ignore] #[tokio::test] async fn test_group_merge_ladder() { - let memo = PersistentMemo::new().await; + let memo = PersistentMemo::::new().await; memo.cleanup().await; // Build up a tree of true filters that should be collapsed into a single table scan. @@ -259,7 +260,7 @@ async fn test_group_merge_ladder() { #[ignore] #[tokio::test] async fn test_group_merge() { - let memo = PersistentMemo::new().await; + let memo = PersistentMemo::::new().await; memo.cleanup().await; // Create a base group. @@ -386,7 +387,7 @@ async fn test_group_merge() { #[ignore] #[tokio::test] async fn test_cascading_merge() { - let memo = PersistentMemo::new().await; + let memo = PersistentMemo::::new().await; memo.cleanup().await; // Create the base groups. From 2c015b46e1df8894a4cc6e6b459790cbae984b27 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 9 Dec 2024 09:49:04 -0500 Subject: [PATCH 11/13] add errors and panics documentation in memo module --- optd-mvp/src/lib.rs | 18 ++- optd-mvp/src/memo/mod.rs | 12 +- .../src/memo/persistent/implementation.rs | 132 ++++++++++++++---- optd-mvp/src/memo/persistent/mod.rs | 2 +- optd-mvp/src/migrator/mod.rs | 13 ++ 5 files changed, 140 insertions(+), 37 deletions(-) diff --git a/optd-mvp/src/lib.rs b/optd-mvp/src/lib.rs index 48a4c78..2e238e0 100644 --- a/optd-mvp/src/lib.rs +++ b/optd-mvp/src/lib.rs @@ -1,3 +1,8 @@ +//! This crate is an attempt to make an MVP of a duplicate-detecting memo table for query +//! optimization. +//! +//! TODO write more docs. + use sea_orm::*; use sea_orm_migration::prelude::*; use thiserror::Error; @@ -7,10 +12,8 @@ use migrator::Migrator; mod entities; -mod memo; -use memo::MemoError; - -mod expression; +pub mod expression; +pub mod memo; /// The filename of the SQLite database for migration. pub const DATABASE_FILENAME: &str = "sqlite.db"; @@ -18,12 +21,13 @@ pub const DATABASE_FILENAME: &str = "sqlite.db"; pub const DATABASE_URL: &str = "sqlite:./sqlite.db?mode=rwc"; /// An error type wrapping all the different kinds of error the optimizer might raise. +#[allow(missing_docs)] #[derive(Error, Debug)] pub enum OptimizerError { #[error("SeaORM error")] Database(#[from] sea_orm::error::DbErr), #[error("Memo table logical error")] - Memo(#[from] MemoError), + Memo(#[from] memo::MemoError), #[error("unknown error")] Unknown, } @@ -32,6 +36,10 @@ pub enum OptimizerError { pub type OptimizerResult = Result; /// Applies all migrations. +/// +/// # Errors +/// +/// Returns a [`DbErr`] if unable to apply any migrations. pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> { Migrator::refresh(db).await } diff --git a/optd-mvp/src/memo/mod.rs b/optd-mvp/src/memo/mod.rs index 08b74db..690bdee 100644 --- a/optd-mvp/src/memo/mod.rs +++ b/optd-mvp/src/memo/mod.rs @@ -2,6 +2,12 @@ //! //! TODO more docs. +#![warn(missing_docs)] +#![warn(clippy::missing_docs_in_private_items)] +#![warn(clippy::missing_errors_doc)] +#![warn(clippy::missing_panics_doc)] +#![warn(clippy::missing_safety_doc)] + use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -21,12 +27,16 @@ pub struct PhysicalExpressionId(pub i32); /// A status enum representing the different states a group can be during query optimization. #[repr(u8)] pub enum GroupStatus { + /// Represents a group that is currently being logically explored. InProgress = 0, + /// Represents a logically explored group that is currently being physically optimized. Explored = 1, + /// Represents a fully optimized group. Optimized = 2, } /// The different kinds of errors that might occur while running operations on a memo table. +#[allow(missing_docs)] #[derive(Error, Debug)] pub enum MemoError { #[error("unknown group ID {0:?}")] @@ -39,4 +49,4 @@ pub enum MemoError { InvalidExpression, } -mod persistent; +pub mod persistent; diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 8d994b3..9eee4d9 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -27,6 +27,10 @@ where { /// Creates a new `PersistentMemo` struct by connecting to a database defined at /// [`DATABASE_URL`]. + /// + /// # Panics + /// + /// Panics if unable to create a databse connection to [`DATABASE_URL`]. pub async fn new() -> Self { Self { db: Database::connect(DATABASE_URL).await.unwrap(), @@ -39,7 +43,14 @@ where /// /// Since there is no asynchronous drop yet in Rust, in order to drop all objects in the /// database, the user must call this manually. + /// + /// # Panics + /// + /// May panic if unable to delete entities from any table. pub async fn cleanup(&self) { + /// Simple private macro to teardown all tables in the database. + /// Note that these have to be specified manually, so when adding a new table to the + /// database, we must make sure to add that table here. macro_rules! delete_all { ($($module: ident),+ $(,)?) => { $( @@ -63,9 +74,11 @@ where /// Retrieves a [`group::Model`] given its ID. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// /// FIXME: use an in-memory representation of a group instead. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. pub async fn get_group(&self, group_id: GroupId) -> OptimizerResult { Ok(group::Entity::find_by_id(group_id.0) .one(&self.db) @@ -80,39 +93,48 @@ where /// /// This function uses the path compression optimization, which amortizes the cost to a single /// lookup (theoretically in constant time, but we must be wary of the I/O roundtrip). + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. This function + /// also performs path compression pointer updates, so any of those updates can fail with a + /// [`DbErr`]. pub async fn get_root_group(&self, group_id: GroupId) -> OptimizerResult { - let mut curr_group = self.get_group(group_id).await?; - - // Traverse up the path and find the root group, keeping track of groups we have visited. - let mut path = vec![]; - while let Some(parent_id) = curr_group.parent_id { - let next_group = self.get_group(GroupId(parent_id)).await?; - path.push(curr_group); - curr_group = next_group; - } + let curr_group = self.get_group(group_id).await?; + + // If we have no parent, then we are at the root. + let Some(parent_id) = curr_group.parent_id else { + return Ok(GroupId(curr_group.id)); + }; - let root_id = GroupId(curr_group.id); + // Recursively find the root group ID. + let root_id = Box::pin(self.get_root_group(GroupId(parent_id))).await?; // Path Compression Optimization: // For every group along the path that we walked, set their parent id pointer to the root. // This allows for an amortized O(1) cost for `get_root_group`. - for group in path { - let mut active_group = group.into_active_model(); + let mut active_group = curr_group.into_active_model(); - // Update the group to point to the new parent. - active_group.parent_id = Set(Some(root_id.0)); - active_group.update(&self.db).await?; - } + // Update the group to point to the new parent. + active_group.parent_id = Set(Some(root_id.0)); + active_group.update(&self.db).await?; - Ok(root_id) + Ok(GroupId(root_id.0)) } /// Retrieves every group ID of groups that share the same root group with the input group. /// - /// If a group does not exist in the cycle, returns a [`MemoError::UnknownGroup`] error. - /// /// The group records form a union-find data structure that also maintains a circular linked /// list in every set that allows us to iterate over all elements in a set in linear time. + /// + /// # Errors + /// + /// If the input group does not exist, or if any pointer along the path is invalid, returns a + /// [`MemoError::UnknownGroup`] error. + /// + /// # Panics + /// + /// Panics if the embedded union-find data structure is malformed. pub async fn get_group_set(&self, group_id: GroupId) -> OptimizerResult> { // Iterate over the circular linked list until we reach ourselves again. let base_group = self.get_group(group_id).await?; @@ -148,6 +170,8 @@ where /// Retrieves a [`physical_expression::Model`] given a [`PhysicalExpressionId`]. /// + /// # Errors + /// /// If the physical expression does not exist, returns a /// [`MemoError::UnknownPhysicalExpression`] error. pub async fn get_physical_expression( @@ -168,6 +192,8 @@ where /// Retrieves a [`logical_expression::Model`] given its [`LogicalExpressionId`]. /// + /// # Errors + /// /// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`] /// error. pub async fn get_logical_expression( @@ -188,13 +214,19 @@ where /// Retrieves all of the logical expression "children" IDs of a group. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// /// FIXME: `find_related` does not work for some reason, have to use manual `filter`. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return + /// a [`DbErr`] if the something goes wrong with the filter scan. pub async fn get_logical_children( &self, group_id: GroupId, ) -> OptimizerResult> { + // First ensure that the group exists. + let _ = self.get_group(group_id).await?; + // Search for expressions that have the given parent group ID. let children = logical_expression::Entity::find() .filter(logical_expression::Column::GroupId.eq(group_id.0)) @@ -209,11 +241,19 @@ where /// Retrieves all of the physical expression "children" IDs of a group. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// FIXME: `find_related` does not work for some reason, have to use manual `filter`. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return + /// a [`DbErr`] if the something goes wrong with the filter scan. pub async fn get_physical_children( &self, group_id: GroupId, ) -> OptimizerResult> { + // First ensure that the group exists. + let _ = self.get_group(group_id).await?; + // Search for expressions that have the given parent group ID. let children = physical_expression::Entity::find() .filter(physical_expression::Column::GroupId.eq(group_id.0)) @@ -228,7 +268,10 @@ where /// Updates / replaces a group's status. Returns the previous group status. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a + /// [`DbErr`] if the update fails. pub async fn update_group_status( &self, group_id: GroupId, @@ -246,7 +289,7 @@ where 0 => GroupStatus::InProgress, 1 => GroupStatus::Explored, 2 => GroupStatus::Optimized, - _ => panic!("encountered an invalid group status"), + _ => unreachable!("encountered an invalid group status"), }; Ok(old_status) @@ -255,10 +298,13 @@ where /// Updates / replaces a group's best physical plan (winner). Optionally returns the previous /// winner's physical expression ID. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// /// FIXME: In the future, this should first check that we aren't overwriting a winner that was /// updated from another thread by comparing against the cost of the plan. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a + /// [`DbErr`] if the update fails. pub async fn update_group_winner( &self, group_id: GroupId, @@ -291,6 +337,13 @@ where /// /// If the memo table detects that the input is unique, it will insert the expression into the /// input group and return an `Ok(Ok(expression_id))`. + /// + /// # Errors + /// + /// Note that the return value is a [`Result`] wrapped in an [`OptimizerResult`]. The outer + /// result is used for raising [`DbErr`] or other database/IO-related errors. The inner result + /// is used for notifying the caller if the expression that they attempted to insert was a + /// duplicate expression or not. pub async fn add_logical_expression_to_group( &self, group_id: GroupId, @@ -359,9 +412,13 @@ where /// The caller is required to pass in a slice of [`GroupId`] that represent the child groups of /// the input expression. /// - /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. - /// /// On successful insertion, returns the ID of the physical expression. + /// + /// # Errors + /// + /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also fail if + /// insertion of the new physical expression or any of its child junction entries are not able + /// to be inserted. pub async fn add_physical_expression_to_group( &self, group_id: GroupId, @@ -403,6 +460,10 @@ where /// This function assumes that the child groups of the expression are currently roots of their /// group sets. For example, if G1 and G2 should be merged, and G1 is the root, then the input /// expression should _not_ have G2 as a child, and should be replaced with G1. + /// + /// # Errors + /// + /// Returns a [`DbErr`] when a database operation fails. pub async fn is_duplicate_logical_expression( &self, logical_expression: &L, @@ -480,6 +541,13 @@ where /// /// If the expression does not exist, this function will create a new group and a new /// expression, returning brand new IDs for both. + /// + /// # Errors + /// + /// Note that the return value is a [`Result`] wrapped in an [`OptimizerResult`]. The outer + /// result is used for raising [`DbErr`] or other database/IO-related errors. The inner result + /// is used for notifying the caller if the expression/group that they attempted to insert was a + /// duplicate expression or not. pub async fn add_group( &self, logical_expression: L, @@ -558,6 +626,10 @@ where /// TODO write docs. /// TODO highly inefficient, need to understand metrics and performance testing. /// TODO Optimization: add rank / size into data structure + /// + /// # Errors + /// + /// TODO pub async fn merge_groups( &self, left_group_id: GroupId, diff --git a/optd-mvp/src/memo/persistent/mod.rs b/optd-mvp/src/memo/persistent/mod.rs index 1f5466c..55fe049 100644 --- a/optd-mvp/src/memo/persistent/mod.rs +++ b/optd-mvp/src/memo/persistent/mod.rs @@ -22,4 +22,4 @@ pub struct PersistentMemo { _phantom_physical: PhantomData

, } -mod implementation; +pub mod implementation; diff --git a/optd-mvp/src/migrator/mod.rs b/optd-mvp/src/migrator/mod.rs index cbc39ae..92f100a 100644 --- a/optd-mvp/src/migrator/mod.rs +++ b/optd-mvp/src/migrator/mod.rs @@ -1,7 +1,20 @@ +//! This module defines the tables and their schemas for representing a persistent memo table. +//! +//! The most important tables represented here are the [`group`], [`logical_expression`], and +//! [`physical_expression`] tables. See the corresponding modules for more information on their +//! relations and fields. +//! +//! See the SeaORM docs for more information specific to migrations. +//! +//! [`group`]: memo::group +//! [`logical_expression`]: memo::logical_expression +//! [`physical_expression`]: memo::physical_expression + use sea_orm_migration::prelude::*; mod memo; +/// A unit struct that implements the [`MigratorTrait`] for running custom migrations. pub struct Migrator; #[async_trait::async_trait] From 4d582214fcdde3e8138a0ddd0be2ebc8f547b1c7 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 9 Dec 2024 10:16:35 -0500 Subject: [PATCH 12/13] implement transaction on all memo table operations --- .../src/memo/persistent/implementation.rs | 105 +++++++--- optd-mvp/src/memo/persistent/mod.rs | 14 +- optd-mvp/src/memo/persistent/tests.rs | 191 +++++++++--------- 3 files changed, 187 insertions(+), 123 deletions(-) diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 9eee4d9..8ec6295 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -6,7 +6,7 @@ #![allow(dead_code)] -use super::PersistentMemo; +use super::{PersistentMemo, PersistentMemoTransaction}; use crate::{ entities::*, expression::{LogicalExpression, PhysicalExpression}, @@ -14,9 +14,8 @@ use crate::{ OptimizerResult, DATABASE_URL, }; use sea_orm::{ - entity::prelude::*, - entity::{IntoActiveModel, NotSet, Set}, - Database, + entity::{prelude::*, IntoActiveModel, NotSet, Set}, + Database, DatabaseTransaction, TransactionTrait, }; use std::{collections::HashSet, marker::PhantomData}; @@ -39,6 +38,15 @@ where } } + /// Starts a new database transaction. + /// + /// # Errors + /// + /// Returns a [`DbErr`] if unable to create a new transaction. + pub async fn begin(&self) -> OptimizerResult> { + Ok(PersistentMemoTransaction::new(self.db.begin().await?).await) + } + /// Deletes all objects in the backing database. /// /// Since there is no asynchronous drop yet in Rust, in order to drop all objects in the @@ -71,6 +79,39 @@ where physical_children }; } +} + +impl PersistentMemoTransaction +where + L: LogicalExpression, + P: PhysicalExpression, +{ + /// Creates a new transaction object. + pub async fn new(txn: DatabaseTransaction) -> Self { + Self { + txn, + _phantom_logical: PhantomData, + _phantom_physical: PhantomData, + } + } + + /// Commits the transaction. + /// + /// # Errors + /// + /// Returns a [`DbErr`] if unable to commit the transaction. + pub async fn commit(self) -> OptimizerResult<()> { + Ok(self.txn.commit().await?) + } + + /// Rolls back the transaction. + /// + /// # Errors + /// + /// Returns a [`DbErr`] if unable to roll back the transaction. + pub async fn rollback(self) -> OptimizerResult<()> { + Ok(self.txn.rollback().await?) + } /// Retrieves a [`group::Model`] given its ID. /// @@ -81,7 +122,7 @@ where /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. pub async fn get_group(&self, group_id: GroupId) -> OptimizerResult { Ok(group::Entity::find_by_id(group_id.0) - .one(&self.db) + .one(&self.txn) .await? .ok_or(MemoError::UnknownGroup(group_id))?) } @@ -117,7 +158,7 @@ where // Update the group to point to the new parent. active_group.parent_id = Set(Some(root_id.0)); - active_group.update(&self.db).await?; + active_group.update(&self.txn).await?; Ok(GroupId(root_id.0)) } @@ -180,7 +221,7 @@ where ) -> OptimizerResult<(GroupId, P)> { // Lookup the entity in the database via the unique expression ID. let model = physical_expression::Entity::find_by_id(physical_expression_id.0) - .one(&self.db) + .one(&self.txn) .await? .ok_or(MemoError::UnknownPhysicalExpression(physical_expression_id))?; @@ -202,7 +243,7 @@ where ) -> OptimizerResult<(GroupId, L)> { // Lookup the entity in the database via the unique expression ID. let model = logical_expression::Entity::find_by_id(logical_expression_id.0) - .one(&self.db) + .one(&self.txn) .await? .ok_or(MemoError::UnknownLogicalExpression(logical_expression_id))?; @@ -230,7 +271,7 @@ where // Search for expressions that have the given parent group ID. let children = logical_expression::Entity::find() .filter(logical_expression::Column::GroupId.eq(group_id.0)) - .all(&self.db) + .all(&self.txn) .await? .into_iter() .map(|m| LogicalExpressionId(m.id)) @@ -257,7 +298,7 @@ where // Search for expressions that have the given parent group ID. let children = physical_expression::Entity::find() .filter(physical_expression::Column::GroupId.eq(group_id.0)) - .all(&self.db) + .all(&self.txn) .await? .into_iter() .map(|m| PhysicalExpressionId(m.id)) @@ -273,7 +314,7 @@ where /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a /// [`DbErr`] if the update fails. pub async fn update_group_status( - &self, + &mut self, group_id: GroupId, status: GroupStatus, ) -> OptimizerResult { @@ -283,7 +324,7 @@ where // Update the group's status. let old_status = group.status; group.status = Set(status as u8 as i8); - group.update(&self.db).await?; + group.update(&self.txn).await?; let old_status = match old_status.unwrap() { 0 => GroupStatus::InProgress, @@ -306,7 +347,7 @@ where /// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a /// [`DbErr`] if the update fails. pub async fn update_group_winner( - &self, + &mut self, group_id: GroupId, physical_expression_id: PhysicalExpressionId, ) -> OptimizerResult> { @@ -316,7 +357,7 @@ where // Update the group to point to the new winner. let old_id = group.winner; group.winner = Set(Some(physical_expression_id.0)); - group.update(&self.db).await?; + group.update(&self.txn).await?; // Note that the `unwrap` here is unwrapping the `ActiveValue`, not the `Option`. let old_id = old_id.unwrap().map(PhysicalExpressionId); @@ -345,7 +386,7 @@ where /// is used for notifying the caller if the expression that they attempted to insert was a /// duplicate expression or not. pub async fn add_logical_expression_to_group( - &self, + &mut self, group_id: GroupId, logical_expression: L, children: &[GroupId], @@ -366,7 +407,7 @@ where let mut active_model = model.into_active_model(); active_model.group_id = Set(group_id.0); active_model.id = NotSet; - let new_model = active_model.insert(&self.db).await?; + let new_model = active_model.insert(&self.txn).await?; let expr_id = new_model.id; @@ -378,7 +419,7 @@ where } })) .on_empty_do_nothing() - .exec(&self.db) + .exec(&self.txn) .await?; // Finally, insert the fingerprint of the logical expression as well. @@ -401,7 +442,7 @@ where hash: Set(hash), }; fingerprint::Entity::insert(fingerprint) - .exec(&self.db) + .exec(&self.txn) .await?; Ok(Ok(LogicalExpressionId(expr_id))) @@ -420,7 +461,7 @@ where /// insertion of the new physical expression or any of its child junction entries are not able /// to be inserted. pub async fn add_physical_expression_to_group( - &self, + &mut self, group_id: GroupId, physical_expression: P, children: &[GroupId], @@ -433,7 +474,7 @@ where let mut active_model = model.into_active_model(); active_model.group_id = Set(group_id.0); active_model.id = NotSet; - let new_model = active_model.insert(&self.db).await?; + let new_model = active_model.insert(&self.txn).await?; // Insert the child groups of the expression into the junction / children table. physical_children::Entity::insert_many(children.iter().copied().map(|child_id| { @@ -443,7 +484,7 @@ where } })) .on_empty_do_nothing() - .exec(&self.db) + .exec(&self.txn) .await?; Ok(PhysicalExpressionId(new_model.id)) @@ -490,7 +531,7 @@ where let potential_matches = fingerprint::Entity::find() .filter(fingerprint::Column::Hash.eq(fingerprint)) .filter(fingerprint::Column::Kind.eq(kind)) - .all(&self.db) + .all(&self.txn) .await?; if potential_matches.is_empty() { @@ -549,7 +590,7 @@ where /// is used for notifying the caller if the expression/group that they attempted to insert was a /// duplicate expression or not. pub async fn add_group( - &self, + &mut self, logical_expression: L, children: &[GroupId], ) -> OptimizerResult> @@ -569,7 +610,7 @@ where }; // Create the new group. - let group_res = group::Entity::insert(group).exec(&self.db).await?; + let group_res = group::Entity::insert(group).exec(&self.txn).await?; let group_id = group_res.last_insert_id; // Insert the input expression into the newly created group. @@ -577,7 +618,7 @@ where let mut active_expression = expression.into_active_model(); active_expression.group_id = Set(group_id); active_expression.id = NotSet; - let new_expression = active_expression.insert(&self.db).await?; + let new_expression = active_expression.insert(&self.txn).await?; let group_id = new_expression.group_id; let expr_id = new_expression.id; @@ -590,7 +631,7 @@ where } })) .on_empty_do_nothing() - .exec(&self.db) + .exec(&self.txn) .await?; // Finally, insert the fingerprint of the logical expression as well. @@ -613,7 +654,7 @@ where hash: Set(hash), }; fingerprint::Entity::insert(fingerprint) - .exec(&self.db) + .exec(&self.txn) .await?; Ok(Ok((GroupId(group_id), LogicalExpressionId(expr_id)))) @@ -631,7 +672,7 @@ where /// /// TODO pub async fn merge_groups( - &self, + &mut self, left_group_id: GroupId, right_group_id: GroupId, ) -> OptimizerResult { @@ -665,7 +706,7 @@ where .load_many_to_many( logical_expression::Entity, logical_children::Entity, - &self.db, + &self.txn, ) .await?; @@ -697,7 +738,7 @@ where hash: Set(hash), }; fingerprint::Entity::insert(fingerprint) - .exec(&self.db) + .exec(&self.txn) .await?; } @@ -708,8 +749,8 @@ where active_left_root.next_id = Set(Some(right_next)); active_right_root.next_id = Set(Some(left_next)); - active_left_root.update(&self.db).await?; - active_right_root.update(&self.db).await?; + active_left_root.update(&self.txn).await?; + active_right_root.update(&self.txn).await?; Ok(right_root_id) } diff --git a/optd-mvp/src/memo/persistent/mod.rs b/optd-mvp/src/memo/persistent/mod.rs index 55fe049..dfddffa 100644 --- a/optd-mvp/src/memo/persistent/mod.rs +++ b/optd-mvp/src/memo/persistent/mod.rs @@ -1,7 +1,7 @@ //! This module contains the definition and implementation of the [`PersistentMemo`] type, which //! implements the `Memo` trait and supports memo table operations necessary for query optimization. -use sea_orm::DatabaseConnection; +use sea_orm::{DatabaseConnection, DatabaseTransaction}; use std::marker::PhantomData; #[cfg(test)] @@ -22,4 +22,16 @@ pub struct PersistentMemo { _phantom_physical: PhantomData

, } +/// TODO docs. +pub struct PersistentMemoTransaction { + /// A database transaction over the [`PersistentMemo`] table. + txn: DatabaseTransaction, + + /// Generic marker for a generic logical expression. + _phantom_logical: PhantomData, + + /// Generic marker for a generic physical expression. + _phantom_physical: PhantomData

, +} + pub mod implementation; diff --git a/optd-mvp/src/memo/persistent/tests.rs b/optd-mvp/src/memo/persistent/tests.rs index 12838f6..8fe1aeb 100644 --- a/optd-mvp/src/memo/persistent/tests.rs +++ b/optd-mvp/src/memo/persistent/tests.rs @@ -1,12 +1,14 @@ use crate::{expression::*, memo::persistent::PersistentMemo}; -/// Tests that exact expression matches are detected and handled by the memo table. +/// Tests that exact expression matches are detected and handled by the txn table. #[ignore] #[tokio::test] async fn test_simple_logical_duplicates() { let memo = PersistentMemo::::new().await; memo.cleanup().await; + let mut txn = memo.begin().await.unwrap(); + let scan = scan("t1".to_string()); let scan1a = scan.clone(); let scan1b = scan.clone(); @@ -14,19 +16,19 @@ async fn test_simple_logical_duplicates() { let scan2b = scan.clone(); // Insert a new group and its corresponding expression. - let (group_id, logical_expression_id) = memo.add_group(scan, &[]).await.unwrap().ok().unwrap(); + let (group_id, logical_expression_id) = txn.add_group(scan, &[]).await.unwrap().ok().unwrap(); // Test `add_logical_expression`. { // Attempting to create a new group with a duplicate expression should fail every time. let (group_id_1a, logical_expression_id_1a) = - memo.add_group(scan1a, &[]).await.unwrap().err().unwrap(); + txn.add_group(scan1a, &[]).await.unwrap().err().unwrap(); assert_eq!(group_id, group_id_1a); assert_eq!(logical_expression_id, logical_expression_id_1a); // Try again just in case... let (group_id_1b, logical_expression_id_1b) = - memo.add_group(scan1b, &[]).await.unwrap().err().unwrap(); + txn.add_group(scan1b, &[]).await.unwrap().err().unwrap(); assert_eq!(group_id, group_id_1b); assert_eq!(logical_expression_id, logical_expression_id_1b); } @@ -34,7 +36,7 @@ async fn test_simple_logical_duplicates() { // Test `add_logical_expression_to_group`. { // Attempting to add a duplicate expression into the same group should also fail every time. - let (group_id_2a, logical_expression_id_2a) = memo + let (group_id_2a, logical_expression_id_2a) = txn .add_logical_expression_to_group(group_id, scan2a, &[]) .await .unwrap() @@ -43,7 +45,7 @@ async fn test_simple_logical_duplicates() { assert_eq!(group_id, group_id_2a); assert_eq!(logical_expression_id, logical_expression_id_2a); - let (group_id_2b, logical_expression_id_2b) = memo + let (group_id_2b, logical_expression_id_2b) = txn .add_logical_expression_to_group(group_id, scan2b, &[]) .await .unwrap() @@ -53,6 +55,7 @@ async fn test_simple_logical_duplicates() { assert_eq!(logical_expression_id, logical_expression_id_2b); } + txn.commit().await.unwrap(); memo.cleanup().await; } @@ -64,23 +67,25 @@ async fn test_simple_logical_duplicates() { #[ignore] #[tokio::test] async fn test_simple_add_physical_expression() { - let memo = PersistentMemo::new().await; + let memo = PersistentMemo::::new().await; memo.cleanup().await; + let mut txn = memo.begin().await.unwrap(); + // Insert a new group and its corresponding expression. let scan = scan("t1".to_string()); - let (group_id, _) = memo.add_group(scan, &[]).await.unwrap().ok().unwrap(); + let (group_id, _) = txn.add_group(scan, &[]).await.unwrap().ok().unwrap(); // Insert two identical physical expressions into the _same_ group. let table_scan_1 = table_scan("t1".to_string()); let table_scan_2 = table_scan_1.clone(); - let physical_expression_id_1 = memo + let physical_expression_id_1 = txn .add_physical_expression_to_group(group_id, table_scan_1, &[]) .await .unwrap(); - let physical_expression_id_2 = memo + let physical_expression_id_2 = txn .add_physical_expression_to_group(group_id, table_scan_2, &[]) .await .unwrap(); @@ -88,28 +93,31 @@ async fn test_simple_add_physical_expression() { // Since physical expressions do not need duplicate detection, assert_ne!(physical_expression_id_1, physical_expression_id_2); + txn.commit().await.unwrap(); memo.cleanup().await; } -/// Tests if the memo tables able to correctly retrieve a group's expressions. +/// Tests if the txn tables able to correctly retrieve a group's expressions. #[ignore] #[tokio::test] async fn test_simple_tree() { let memo = PersistentMemo::::new().await; memo.cleanup().await; + let mut txn = memo.begin().await.unwrap(); + // Create two scan groups. let scan1: DefaultLogicalExpression = scan("t1".to_string()); let scan2 = scan("t2".to_string()); - let (scan_id_1, scan_expr_id_1) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); - let (scan_id_2, scan_expr_id_2) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); + let (scan_id_1, scan_expr_id_1) = txn.add_group(scan1, &[]).await.unwrap().ok().unwrap(); + let (scan_id_2, scan_expr_id_2) = txn.add_group(scan2, &[]).await.unwrap().ok().unwrap(); assert_eq!( - memo.get_logical_children(scan_id_1).await.unwrap(), + txn.get_logical_children(scan_id_1).await.unwrap(), &[scan_expr_id_1] ); assert_eq!( - memo.get_logical_children(scan_id_2).await.unwrap(), + txn.get_logical_children(scan_id_2).await.unwrap(), &[scan_expr_id_2] ); @@ -118,14 +126,14 @@ async fn test_simple_tree() { let join2 = join(scan_id_2, scan_id_1, "t1.a = t2.b".to_string()); // Create the group, adding the first expression. - let (join_id, join_expr_id_1) = memo + let (join_id, join_expr_id_1) = txn .add_group(join1, &[scan_id_1, scan_id_2]) .await .unwrap() .ok() .unwrap(); // Add the second expression. - let join_expr_id_2 = memo + let join_expr_id_2 = txn .add_logical_expression_to_group(join_id, join2, &[scan_id_2, scan_id_1]) .await .unwrap() @@ -134,10 +142,11 @@ async fn test_simple_tree() { assert_ne!(join_expr_id_1, join_expr_id_2); assert_eq!( - memo.get_logical_children(join_id).await.unwrap(), + txn.get_logical_children(join_id).await.unwrap(), &[join_expr_id_1, join_expr_id_2] ); + txn.commit().await.unwrap(); memo.cleanup().await; } @@ -148,11 +157,13 @@ async fn test_simple_group_link() { let memo = PersistentMemo::::new().await; memo.cleanup().await; + let mut txn = memo.begin().await.unwrap(); + // Create two scan groups. let scan1 = scan("t1".to_string()); let scan2 = scan("t2".to_string()); - let (scan_id_1, _) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); - let (scan_id_2, _) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); + let (scan_id_1, _) = txn.add_group(scan1, &[]).await.unwrap().ok().unwrap(); + let (scan_id_2, _) = txn.add_group(scan2, &[]).await.unwrap().ok().unwrap(); // Create two join expression that should be in the same group. // Even though these are obviously the same expression (to humans), the fingerprints will be @@ -161,13 +172,13 @@ async fn test_simple_group_link() { let join2 = join(scan_id_2, scan_id_1, "t2.b = t1.a".to_string()); let join_unknown = join2.clone(); - let (join_group_1, _) = memo + let (join_group_1, _) = txn .add_group(join1, &[scan_id_1, scan_id_2]) .await .unwrap() .ok() .unwrap(); - let (join_group_2, join_expr_2) = memo + let (join_group_2, join_expr_2) = txn .add_group(join2, &[scan_id_2, scan_id_1]) .await .unwrap() @@ -176,9 +187,9 @@ async fn test_simple_group_link() { assert_ne!(join_group_1, join_group_2); // Assume that some rule was applied to `join1`, and it outputs something like `join_unknown`. - // The memo table will tell us that `join_unknown == join2`. + // The txn table will tell us that `join_unknown == join2`. // Take note here that `join_unknown` is a clone of `join2`, not `join1`. - let (existing_group, not_actually_new_expr_id) = memo + let (existing_group, not_actually_new_expr_id) = txn .add_logical_expression_to_group(join_group_1, join_unknown, &[scan_id_2, scan_id_1]) .await .unwrap() @@ -187,14 +198,15 @@ async fn test_simple_group_link() { assert_eq!(existing_group, join_group_2); assert_eq!(not_actually_new_expr_id, join_expr_2); - // The above tells the application that the expression already exists in the memo, specifically + // The above tells the application that the expression already exists in the txn, specifically // under `existing_group`. Thus, we should link these two groups together. - memo.merge_groups(join_group_1, join_group_2).await.unwrap(); + txn.merge_groups(join_group_1, join_group_2).await.unwrap(); - let test_root_1 = memo.get_root_group(join_group_1).await.unwrap(); - let test_root_2 = memo.get_root_group(join_group_2).await.unwrap(); + let test_root_1 = txn.get_root_group(join_group_1).await.unwrap(); + let test_root_2 = txn.get_root_group(join_group_2).await.unwrap(); assert_eq!(test_root_1, test_root_2); + txn.commit().await.unwrap(); memo.cleanup().await; } @@ -205,12 +217,14 @@ async fn test_group_merge_ladder() { let memo = PersistentMemo::::new().await; memo.cleanup().await; + let mut txn = memo.begin().await.unwrap(); + // Build up a tree of true filters that should be collapsed into a single table scan. let scan_base = scan("t1".to_string()); - let (scan_id, _) = memo.add_group(scan_base, &[]).await.unwrap().ok().unwrap(); + let (scan_id, _) = txn.add_group(scan_base, &[]).await.unwrap().ok().unwrap(); let filter0 = filter(scan_id, "true".to_string()); - let (filter_id_0, _) = memo + let (filter_id_0, _) = txn .add_group(filter0, &[scan_id]) .await .unwrap() @@ -218,7 +232,7 @@ async fn test_group_merge_ladder() { .unwrap(); let filter1 = filter(filter_id_0, "true".to_string()); - let (filter_id_1, _) = memo + let (filter_id_1, _) = txn .add_group(filter1, &[scan_id]) .await .unwrap() @@ -226,7 +240,7 @@ async fn test_group_merge_ladder() { .unwrap(); let filter2 = filter(filter_id_1, "true".to_string()); - let (filter_id_2, _) = memo + let (filter_id_2, _) = txn .add_group(filter2, &[scan_id]) .await .unwrap() @@ -234,7 +248,7 @@ async fn test_group_merge_ladder() { .unwrap(); let filter3 = filter(filter_id_2, "true".to_string()); - let (filter_id_3, _) = memo + let (filter_id_3, _) = txn .add_group(filter3, &[scan_id]) .await .unwrap() @@ -243,16 +257,17 @@ async fn test_group_merge_ladder() { let mut groups = vec![scan_id, filter_id_0, filter_id_1, filter_id_2, filter_id_3]; - let m0 = memo.merge_groups(filter_id_3, filter_id_2).await.unwrap(); - let m1 = memo.merge_groups(filter_id_2, filter_id_1).await.unwrap(); - let m2 = memo.merge_groups(filter_id_1, filter_id_0).await.unwrap(); - let root = memo.merge_groups(filter_id_0, scan_id).await.unwrap(); + let m0 = txn.merge_groups(filter_id_3, filter_id_2).await.unwrap(); + let m1 = txn.merge_groups(filter_id_2, filter_id_1).await.unwrap(); + let m2 = txn.merge_groups(filter_id_1, filter_id_0).await.unwrap(); + let root = txn.merge_groups(filter_id_0, scan_id).await.unwrap(); groups.extend_from_slice(&[m0, m1, m2, root]); for group_id in groups { - assert_eq!(root, memo.get_root_group(group_id).await.unwrap()); + assert_eq!(root, txn.get_root_group(group_id).await.unwrap()); } + txn.commit().await.unwrap(); memo.cleanup().await; } @@ -263,9 +278,11 @@ async fn test_group_merge() { let memo = PersistentMemo::::new().await; memo.cleanup().await; + let mut txn = memo.begin().await.unwrap(); + // Create a base group. let scan1 = scan("t1".to_string()); - let (scan_id_1, _) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); + let (scan_id_1, _) = txn.add_group(scan1, &[]).await.unwrap().ok().unwrap(); // Create a bunch of equivalent groups. let filter0 = filter(scan_id_1, "true".to_string()); @@ -276,49 +293,49 @@ async fn test_group_merge() { let filter5 = filter(scan_id_1, "1 + 2 = 3".to_string()); let filter6 = filter(scan_id_1, "true OR false".to_string()); let filter7 = filter(scan_id_1, "(1 + 1 > -1 AND true) OR false".to_string()); - let (filter_id_0, _) = memo + let (filter_id_0, _) = txn .add_group(filter0, &[scan_id_1]) .await .unwrap() .ok() .unwrap(); - let (filter_id_1, _) = memo + let (filter_id_1, _) = txn .add_group(filter1, &[scan_id_1]) .await .unwrap() .ok() .unwrap(); - let (filter_id_2, _) = memo + let (filter_id_2, _) = txn .add_group(filter2, &[scan_id_1]) .await .unwrap() .ok() .unwrap(); - let (filter_id_3, _) = memo + let (filter_id_3, _) = txn .add_group(filter3, &[scan_id_1]) .await .unwrap() .ok() .unwrap(); - let (filter_id_4, _) = memo + let (filter_id_4, _) = txn .add_group(filter4, &[scan_id_1]) .await .unwrap() .ok() .unwrap(); - let (filter_id_5, _) = memo + let (filter_id_5, _) = txn .add_group(filter5, &[scan_id_1]) .await .unwrap() .ok() .unwrap(); - let (filter_id_6, _) = memo + let (filter_id_6, _) = txn .add_group(filter6, &[scan_id_1]) .await .unwrap() .ok() .unwrap(); - let (filter_id_7, _) = memo + let (filter_id_7, _) = txn .add_group(filter7, &[scan_id_1]) .await .unwrap() @@ -336,17 +353,17 @@ async fn test_group_merge() { ]; // Merge them all together. - let quarter_0 = memo.merge_groups(filters[0], filters[1]).await.unwrap(); - let quarter_1 = memo.merge_groups(filters[2], filters[3]).await.unwrap(); - let quarter_2 = memo.merge_groups(filters[4], filters[5]).await.unwrap(); - let quarter_3 = memo.merge_groups(filters[6], filters[7]).await.unwrap(); - let semi_0 = memo.merge_groups(quarter_0, quarter_1).await.unwrap(); - let semi_1 = memo.merge_groups(quarter_2, quarter_3).await.unwrap(); - let final_id = memo.merge_groups(semi_0, semi_1).await.unwrap(); + let quarter_0 = txn.merge_groups(filters[0], filters[1]).await.unwrap(); + let quarter_1 = txn.merge_groups(filters[2], filters[3]).await.unwrap(); + let quarter_2 = txn.merge_groups(filters[4], filters[5]).await.unwrap(); + let quarter_3 = txn.merge_groups(filters[6], filters[7]).await.unwrap(); + let semi_0 = txn.merge_groups(quarter_0, quarter_1).await.unwrap(); + let semi_1 = txn.merge_groups(quarter_2, quarter_3).await.unwrap(); + let final_id = txn.merge_groups(semi_0, semi_1).await.unwrap(); // Check that the group set is properly representative. { - let set = memo.get_group_set(final_id).await.unwrap(); + let set = txn.get_group_set(final_id).await.unwrap(); assert_eq!(set.len(), 8); for id in set { assert!(filters.contains(&id)); @@ -355,11 +372,11 @@ async fn test_group_merge() { // Create another base group. let scan2 = scan("t2".to_string()); - let (scan_id_2, _) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); + let (scan_id_2, _) = txn.add_group(scan2, &[]).await.unwrap().ok().unwrap(); // Add a join group. let join0 = join(filter_id_0, scan_id_2, "t1.a = t2.a".to_string()); - let (join_group_id, join_expr_id) = memo + let (join_group_id, join_expr_id) = txn .add_group(join0, &[filter_id_0, scan_id_2]) .await .unwrap() @@ -370,7 +387,7 @@ async fn test_group_merge() { // the already existing group and expression. for filter_id in filters { let join_test = join(filter_id, scan_id_2, "t1.a = t2.a".to_string()); - let (join_group_id_test, join_expr_id_test) = memo + let (join_group_id_test, join_expr_id_test) = txn .add_group(join_test, &[filter_id, scan_id_2]) .await .unwrap() @@ -380,6 +397,7 @@ async fn test_group_merge() { assert_eq!(join_expr_id, join_expr_id_test); } + txn.commit().await.unwrap(); memo.cleanup().await; } @@ -390,55 +408,47 @@ async fn test_cascading_merge() { let memo = PersistentMemo::::new().await; memo.cleanup().await; + let mut txn = memo.begin().await.unwrap(); + // Create the base groups. let scan1 = scan("t1".to_string()); - let (g1, _) = memo.add_group(scan1, &[]).await.unwrap().ok().unwrap(); + let (g1, _) = txn.add_group(scan1, &[]).await.unwrap().ok().unwrap(); let scan2 = scan("t2".to_string()); - let (g2, _) = memo.add_group(scan2, &[]).await.unwrap().ok().unwrap(); + let (g2, _) = txn.add_group(scan2, &[]).await.unwrap().ok().unwrap(); let filter1 = filter(g1, "x > 1000".to_string()); - let (g3, _) = memo.add_group(filter1, &[g1]).await.unwrap().ok().unwrap(); + let (g3, _) = txn.add_group(filter1, &[g1]).await.unwrap().ok().unwrap(); // Create two groups that will need to be merged. let filter2a = filter(g2, "a < 42".to_string()); - let (g4, _) = memo.add_group(filter2a, &[g2]).await.unwrap().ok().unwrap(); + let (g4, _) = txn.add_group(filter2a, &[g2]).await.unwrap().ok().unwrap(); let filter2b = filter(g4, "a < 42 AND 1 = 1".to_string()); - let (g5, _) = memo.add_group(filter2b, &[g4]).await.unwrap().ok().unwrap(); + let (g5, _) = txn.add_group(filter2b, &[g4]).await.unwrap().ok().unwrap(); // Create groups that are dependent on the to-be-merged groups. let join1 = join(g3, g4, "t1.x = t2.a".to_string()); - let (g6, _) = memo - .add_group(join1, &[g3, g4]) - .await - .unwrap() - .ok() - .unwrap(); + let (g6, _) = txn.add_group(join1, &[g3, g4]).await.unwrap().ok().unwrap(); let join2 = join(g3, g5, "t1.x = t2.a".to_string()); - let (g7, _) = memo - .add_group(join2, &[g3, g5]) - .await - .unwrap() - .ok() - .unwrap(); + let (g7, _) = txn.add_group(join2, &[g3, g5]).await.unwrap().ok().unwrap(); // Create more groups that are dependent on the to-be-merged groups. // TODO actually use a sort expression instead of a `filter` placeholder. let sort1 = filter(g6, "ORDER BY a".to_string()); - let (g8, _) = memo.add_group(sort1, &[g6]).await.unwrap().ok().unwrap(); + let (g8, _) = txn.add_group(sort1, &[g6]).await.unwrap().ok().unwrap(); let sort2 = filter(g7, "ORDER BY a".to_string()); - let (g9, _) = memo.add_group(sort2, &[g7]).await.unwrap().ok().unwrap(); + let (g9, _) = txn.add_group(sort2, &[g7]).await.unwrap().ok().unwrap(); // Now that everything is set up, we can merge groups 4 and 5 to begin the cascading process. - let filter_root = memo.merge_groups(g4, g5).await.unwrap(); - assert_eq!(memo.get_root_group(g4).await.unwrap(), filter_root); - assert_eq!(memo.get_root_group(g5).await.unwrap(), filter_root); + let filter_root = txn.merge_groups(g4, g5).await.unwrap(); + assert_eq!(txn.get_root_group(g4).await.unwrap(), filter_root); + assert_eq!(txn.get_root_group(g5).await.unwrap(), filter_root); // After merging, the join groups (6 and 7) are technically identical, but we have not merged // them together yet. However, applying rules will reveal that they are identical, and we will // know that they need to get merged. let join1_commute = join(g4, g3, "t1.x = t2.a".to_string()); - let join1_commute_id = memo + let join1_commute_id = txn .add_logical_expression_to_group(g6, join1_commute, &[g4, g3]) .await .unwrap() @@ -447,7 +457,7 @@ async fn test_cascading_merge() { // Adding this expression should now result in a duplication error and return the above ID. let join2_commute = join(g5, g3, "t1.x = t2.a".to_string()); - let (existing_g6, existing_id) = memo + let (existing_g6, existing_id) = txn .add_logical_expression_to_group(g7, join2_commute, &[g5, g3]) .await .unwrap() @@ -456,17 +466,18 @@ async fn test_cascading_merge() { assert_eq!(existing_g6, g6); assert_eq!(existing_id, join1_commute_id); - // Since the memo table has told us these are duplicates, we can now merge groups 6 and 7. - let join_root = memo.merge_groups(g6, g7).await.unwrap(); - assert_eq!(memo.get_root_group(g6).await.unwrap(), join_root); - assert_eq!(memo.get_root_group(g7).await.unwrap(), join_root); + // Since the txn table has told us these are duplicates, we can now merge groups 6 and 7. + let join_root = txn.merge_groups(g6, g7).await.unwrap(); + assert_eq!(txn.get_root_group(g6).await.unwrap(), join_root); + assert_eq!(txn.get_root_group(g7).await.unwrap(), join_root); // Do a similar thing for the sort groups. We'll skip the expression adding for now and just // merge them immediately, but remember that the application should observe a duplicate - // somewhere in the memo table before deciding to merge groups. - let sort_root = memo.merge_groups(g8, g9).await.unwrap(); - assert_eq!(memo.get_root_group(g8).await.unwrap(), sort_root); - assert_eq!(memo.get_root_group(g9).await.unwrap(), sort_root); + // somewhere in the txn table before deciding to merge groups. + let sort_root = txn.merge_groups(g8, g9).await.unwrap(); + assert_eq!(txn.get_root_group(g8).await.unwrap(), sort_root); + assert_eq!(txn.get_root_group(g9).await.unwrap(), sort_root); + txn.commit().await.unwrap(); memo.cleanup().await; } From 32b3cbe8d7529c6753dd7f74770ba1cabb6b7eaf Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 9 Dec 2024 10:25:57 -0500 Subject: [PATCH 13/13] add rank by size optimization + bump sea-orm-cli This commit adds the rank by size optimization into the embedded union-find data structure of the group sets. It also bumps the version number of `sea-orm-cli` to `1.1.2`. --- optd-mvp/src/entities/fingerprint.rs | 2 +- optd-mvp/src/entities/group.rs | 3 ++- optd-mvp/src/entities/logical_children.rs | 2 +- optd-mvp/src/entities/logical_expression.rs | 2 +- optd-mvp/src/entities/mod.rs | 2 +- optd-mvp/src/entities/physical_children.rs | 2 +- optd-mvp/src/entities/physical_expression.rs | 2 +- optd-mvp/src/entities/prelude.rs | 2 +- .../src/memo/persistent/implementation.rs | 23 ++++++++++++------- .../migrator/memo/m20241127_000001_group.rs | 2 ++ 10 files changed, 26 insertions(+), 16 deletions(-) diff --git a/optd-mvp/src/entities/fingerprint.rs b/optd-mvp/src/entities/fingerprint.rs index 2ab6a7f..608ca57 100644 --- a/optd-mvp/src/entities/fingerprint.rs +++ b/optd-mvp/src/entities/fingerprint.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.2 use sea_orm::entity::prelude::*; diff --git a/optd-mvp/src/entities/group.rs b/optd-mvp/src/entities/group.rs index 333ab05..b5b1686 100644 --- a/optd-mvp/src/entities/group.rs +++ b/optd-mvp/src/entities/group.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.2 use sea_orm::entity::prelude::*; @@ -10,6 +10,7 @@ pub struct Model { pub status: i8, pub winner: Option, pub cost: Option, + pub set_size: i32, pub parent_id: Option, pub next_id: Option, } diff --git a/optd-mvp/src/entities/logical_children.rs b/optd-mvp/src/entities/logical_children.rs index a0ac39c..31e86c9 100644 --- a/optd-mvp/src/entities/logical_children.rs +++ b/optd-mvp/src/entities/logical_children.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.2 use sea_orm::entity::prelude::*; diff --git a/optd-mvp/src/entities/logical_expression.rs b/optd-mvp/src/entities/logical_expression.rs index 82d938f..9311776 100644 --- a/optd-mvp/src/entities/logical_expression.rs +++ b/optd-mvp/src/entities/logical_expression.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.2 use sea_orm::entity::prelude::*; diff --git a/optd-mvp/src/entities/mod.rs b/optd-mvp/src/entities/mod.rs index 3abd379..8bdd8f7 100644 --- a/optd-mvp/src/entities/mod.rs +++ b/optd-mvp/src/entities/mod.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.2 pub mod prelude; diff --git a/optd-mvp/src/entities/physical_children.rs b/optd-mvp/src/entities/physical_children.rs index e58e9ca..94859a6 100644 --- a/optd-mvp/src/entities/physical_children.rs +++ b/optd-mvp/src/entities/physical_children.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.2 use sea_orm::entity::prelude::*; diff --git a/optd-mvp/src/entities/physical_expression.rs b/optd-mvp/src/entities/physical_expression.rs index 4fba71e..918ac03 100644 --- a/optd-mvp/src/entities/physical_expression.rs +++ b/optd-mvp/src/entities/physical_expression.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.2 use sea_orm::entity::prelude::*; diff --git a/optd-mvp/src/entities/prelude.rs b/optd-mvp/src/entities/prelude.rs index 8e8deaa..1e27a2c 100644 --- a/optd-mvp/src/entities/prelude.rs +++ b/optd-mvp/src/entities/prelude.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.2 #![allow(unused_imports)] diff --git a/optd-mvp/src/memo/persistent/implementation.rs b/optd-mvp/src/memo/persistent/implementation.rs index 8ec6295..9ce30ce 100644 --- a/optd-mvp/src/memo/persistent/implementation.rs +++ b/optd-mvp/src/memo/persistent/implementation.rs @@ -606,6 +606,7 @@ where // The expression does not exist yet, so we need to create a new group and new expression. let group = group::ActiveModel { status: Set(0), // `GroupStatus::InProgress` status. + set_size: Set(1), ..Default::default() }; @@ -676,18 +677,24 @@ where left_group_id: GroupId, right_group_id: GroupId, ) -> OptimizerResult { - // Without a rank / size field, we have no way of determining which set is better to merge - // into the other. So we will arbitrarily choose to merge the left group into the right - // group here. If rank is added in the future, then merge the smaller set into the larger. + let mut left_root_id = self.get_root_group(left_group_id).await?; + let mut left_root = self.get_group(left_root_id).await?; + let mut left_size = left_root.set_size; + + let mut right_root_id = self.get_root_group(right_group_id).await?; + let mut right_root = self.get_group(right_root_id).await?; + let mut right_size = left_root.set_size; + + // Rank/size optimization: merge the smaller set into the larger set. + if left_size > right_size { + std::mem::swap(&mut left_root_id, &mut right_root_id); + std::mem::swap(&mut left_root, &mut right_root); + std::mem::swap(&mut left_size, &mut right_size); + } - let left_root_id = self.get_root_group(left_group_id).await?; - let left_root = self.get_group(left_root_id).await?; // A `None` next pointer means it should technically be pointing to itself. let left_next = left_root.next_id.unwrap_or(left_root_id.0); let mut active_left_root = left_root.into_active_model(); - - let right_root_id = self.get_root_group(right_group_id).await?; - let right_root = self.get_group(right_root_id).await?; // A `None` next pointer means it should technically be pointing to itself. let right_next = right_root.next_id.unwrap_or(right_root_id.0); let mut active_right_root = right_root.into_active_model(); diff --git a/optd-mvp/src/migrator/memo/m20241127_000001_group.rs b/optd-mvp/src/migrator/memo/m20241127_000001_group.rs index 59b5a09..6251b6a 100644 --- a/optd-mvp/src/migrator/memo/m20241127_000001_group.rs +++ b/optd-mvp/src/migrator/memo/m20241127_000001_group.rs @@ -83,6 +83,7 @@ pub enum Group { Status, Winner, Cost, + SetSize, ParentId, NextId, } @@ -109,6 +110,7 @@ impl MigrationTrait for Migration { .on_delete(ForeignKeyAction::SetNull) .on_update(ForeignKeyAction::Cascade), ) + .col(integer(Group::SetSize)) .col(integer_null(Group::ParentId)) .foreign_key( ForeignKey::create()