diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 5fc2e686..98e166d1 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -91,7 +91,7 @@ jobs: uses: dtolnay/install@cargo-docs-rs - name: cargo docs-rs # TODO: Once we figure out the crates, rename this. - run: cargo docs-rs -p optd-tmp + run: cargo docs-rs -p optd-storage hack: # cargo-hack checks combinations of feature flags to ensure that features are all additive # which is required for feature unification diff --git a/.gitignore b/.gitignore index d01bd1a9..bf5befe0 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,11 @@ Cargo.lock # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file +#.idea/ + +### Project Specific ### + +# The memo table database for testing purposes. +test_memo.db +# Storing environment variables. +.env diff --git a/Cargo.toml b/Cargo.toml index aa9544c8..383663f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,14 @@ [workspace] -members = ["optd-tmp"] +members = ["optd-storage"] resolver = "2" + +[workspace.dependencies] +anyhow = "1" +chrono = "0.4.39" +diesel = { version = "2.2", features = [ + "sqlite", + "returning_clauses_for_sqlite_3_35", + "chrono", +] } +# Using a bundled version of sqlite3-sys to avoid build issues. +libsqlite3-sys = { version = "0.30", features = ["bundled"] } diff --git a/diesel.toml b/diesel.toml new file mode 100644 index 00000000..9a25cf54 --- /dev/null +++ b/diesel.toml @@ -0,0 +1,15 @@ +# For documentation on how to configure this file, +# see https://diesel.rs/guides/configuring-diesel-cli + +[print_schema] +# The file diesel will write the generated schema to. +file = "optd-storage/src/storage/schema.rs" + + +# A column of type `INTEGER PRIMARY KEY` becomes an alias for the 64-bit signed integer `ROWID`. +# See https://sqlite.org/autoinc.html for more details. +sqlite_integer_primary_key_is_bigint = true + +[migrations_directory] +# The directory where the migration files are located. +dir = "optd-storage/migrations" diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 794d9556..37fd4c6e 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -9,6 +9,7 @@ # Contributor Guide - [Installaton]() +- [Working with diesel-rs](./contributor_guide/diesel.md) # RFCs diff --git a/docs/src/contributor_guide/diesel.md b/docs/src/contributor_guide/diesel.md new file mode 100644 index 00000000..02516ecf --- /dev/null +++ b/docs/src/contributor_guide/diesel.md @@ -0,0 +1,55 @@ +# Working with diesel-rs + +[Diesel](https://diesel.rs/) is an ORM framework we use to persist the core objects in the optd query optimizer. We chose to work with Diesel instead of other alternatives mainly for its compile-time safety guarantees, which is a good companion for our table-per-operator-kind model. + +This guide assumes that you already have the `sqlite3` binary installed. + +## Setup + +When working with Diesel for the first time, you could use the convenient setup scripts located at `scripts/setup.sh`. The script will install the Diesel CLI tool, generate a testing memo table database at project root, and run the Diesel setup script. + +For more details, follow the [Getting Started with Diesel](https://diesel.rs/guides/getting-started.html) guide. + +## Making changes + +To generate a new migration, use the following command: + +```shell +diesel migration generate +``` + +Diesel CLI will create two empty files in the `optd-storgage/migrations` folder. You will see output that looks something like this: + +```shell +Creating optd-storage/migrations/2025-01-20-153830_/up.sql +Creating optd-storage/migrations/2025-01-20-153830_/down.sql +``` + +The `up.sql` file should contain the changes you want to apply and `down.sql` should contain the command to revert the changes. + +Before optd becomes stable, it is ok to directly modify the migrations themselves. + +To apply the new migration, run: + +```shell +diesel migration run +``` + +You can also check that if `down.sql` properly revert the change: + +```shell +diesel migration redo [-n ] +``` + +You can also use the following command to revert changes: + +```shell +diesel migration revert [-n ] + +## Adding a new operator + +(TODO) + +## Adding a new property + +(TODO) diff --git a/optd-storage/Cargo.toml b/optd-storage/Cargo.toml new file mode 100644 index 00000000..4fba73c8 --- /dev/null +++ b/optd-storage/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "optd-storage" +version = "0.1.0" +edition = "2021" + +[dependencies] +diesel.workspace = true +chrono.workspace = true +anyhow.workspace = true +libsqlite3-sys.workspace = true diff --git a/optd-storage/migrations/.keep b/optd-storage/migrations/.keep new file mode 100644 index 00000000..e69de29b diff --git a/optd-storage/migrations/2025-01-19-032646_create_rel_groups/down.sql b/optd-storage/migrations/2025-01-19-032646_create_rel_groups/down.sql new file mode 100644 index 00000000..e086f58f --- /dev/null +++ b/optd-storage/migrations/2025-01-19-032646_create_rel_groups/down.sql @@ -0,0 +1 @@ +DROP TABLE rel_groups; diff --git a/optd-storage/migrations/2025-01-19-032646_create_rel_groups/up.sql b/optd-storage/migrations/2025-01-19-032646_create_rel_groups/up.sql new file mode 100644 index 00000000..babb063b --- /dev/null +++ b/optd-storage/migrations/2025-01-19-032646_create_rel_groups/up.sql @@ -0,0 +1,14 @@ +CREATE TABLE rel_groups ( + -- The group identifier + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The optimization status of the group. + -- It could be: + -- Unexplored, Exploring, Explored, Optimizing, Optimized. + -- `0` indicates `Unexplored`. + status INTEGER NOT NULL, + -- Time at which the group is created. + created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL, + -- The group identifier of the representative. + rep_id BIGINT, + FOREIGN KEY (rep_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd-storage/migrations/2025-01-19-032846_create_logical_props/down.sql b/optd-storage/migrations/2025-01-19-032846_create_logical_props/down.sql new file mode 100644 index 00000000..18db8bc1 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-032846_create_logical_props/down.sql @@ -0,0 +1 @@ +DROP TABLE logical_props; diff --git a/optd-storage/migrations/2025-01-19-032846_create_logical_props/up.sql b/optd-storage/migrations/2025-01-19-032846_create_logical_props/up.sql new file mode 100644 index 00000000..4c460101 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-032846_create_logical_props/up.sql @@ -0,0 +1,9 @@ +CREATE TABLE logical_props ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The relational group that shares this logical property entry. + group_id BIGINT NOT NULL, + -- The number of rows produced by this relation. + card_est BIGINT NOT NULL, + + FOREIGN KEY(group_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd-storage/migrations/2025-01-19-034417_create_logical_typ_descs/down.sql b/optd-storage/migrations/2025-01-19-034417_create_logical_typ_descs/down.sql new file mode 100644 index 00000000..b03b21b9 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-034417_create_logical_typ_descs/down.sql @@ -0,0 +1 @@ +DROP TABLE logical_typ_descs; diff --git a/optd-storage/migrations/2025-01-19-034417_create_logical_typ_descs/up.sql b/optd-storage/migrations/2025-01-19-034417_create_logical_typ_descs/up.sql new file mode 100644 index 00000000..f3a6285e --- /dev/null +++ b/optd-storage/migrations/2025-01-19-034417_create_logical_typ_descs/up.sql @@ -0,0 +1,4 @@ +CREATE TABLE logical_typ_descs ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL +); diff --git a/optd-storage/migrations/2025-01-19-034418_create_logical_exprs/down.sql b/optd-storage/migrations/2025-01-19-034418_create_logical_exprs/down.sql new file mode 100644 index 00000000..9cb0e521 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-034418_create_logical_exprs/down.sql @@ -0,0 +1 @@ +DROP TABLE logical_exprs; diff --git a/optd-storage/migrations/2025-01-19-034418_create_logical_exprs/up.sql b/optd-storage/migrations/2025-01-19-034418_create_logical_exprs/up.sql new file mode 100644 index 00000000..6737f426 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-034418_create_logical_exprs/up.sql @@ -0,0 +1,13 @@ +-- The relational logical expressions table specifies which group a logical expression belongs to. +CREATE TABLE logical_exprs ( + -- The logical expression id. + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The type descriptor of the logical expression. + typ_desc BIGINT NOT NULL, + -- The group identifier of the logical expression. + group_id BIGINT NOT NULL, -- groups.id + -- Time at which the logical expression is created. + created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL, + FOREIGN KEY (typ_desc) REFERENCES logical_typ_descs(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (group_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd-storage/migrations/2025-01-19-044136_create_physical_typ_descs/down.sql b/optd-storage/migrations/2025-01-19-044136_create_physical_typ_descs/down.sql new file mode 100644 index 00000000..f874e166 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-044136_create_physical_typ_descs/down.sql @@ -0,0 +1 @@ +DROP TABLE physical_typ_descs; diff --git a/optd-storage/migrations/2025-01-19-044136_create_physical_typ_descs/up.sql b/optd-storage/migrations/2025-01-19-044136_create_physical_typ_descs/up.sql new file mode 100644 index 00000000..f718ea06 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-044136_create_physical_typ_descs/up.sql @@ -0,0 +1,5 @@ +CREATE TABLE physical_typ_descs ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- Name of the physical operator. + name TEXT NOT NULL +); diff --git a/optd-storage/migrations/2025-01-19-053753_create_physical_props/down.sql b/optd-storage/migrations/2025-01-19-053753_create_physical_props/down.sql new file mode 100644 index 00000000..c280123e --- /dev/null +++ b/optd-storage/migrations/2025-01-19-053753_create_physical_props/down.sql @@ -0,0 +1 @@ +DROP TABLE physical_props; diff --git a/optd-storage/migrations/2025-01-19-053753_create_physical_props/up.sql b/optd-storage/migrations/2025-01-19-053753_create_physical_props/up.sql new file mode 100644 index 00000000..022b0656 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-053753_create_physical_props/up.sql @@ -0,0 +1,5 @@ +CREATE TABLE physical_props ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- Payload type. + payload BLOB NOT NULL +); diff --git a/optd-storage/migrations/2025-01-19-054756_create_physical_exprs/down.sql b/optd-storage/migrations/2025-01-19-054756_create_physical_exprs/down.sql new file mode 100644 index 00000000..e531a3cf --- /dev/null +++ b/optd-storage/migrations/2025-01-19-054756_create_physical_exprs/down.sql @@ -0,0 +1 @@ +DROP TABLE physical_exprs; diff --git a/optd-storage/migrations/2025-01-19-054756_create_physical_exprs/up.sql b/optd-storage/migrations/2025-01-19-054756_create_physical_exprs/up.sql new file mode 100644 index 00000000..3c6ad3a3 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-054756_create_physical_exprs/up.sql @@ -0,0 +1,19 @@ +-- The relational physical expressions table specifies which group a physical expression belongs to. +-- It also specifies the derived physical property and the cost associated with the physical expression. +CREATE TABLE physical_exprs ( + -- The physical expression id. + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The type descriptor of the physical expression. + typ_desc BIGINT NOT NULL, + -- The group this physical expression belongs to. + group_id BIGINT NOT NULL, + -- The physical property dervied based on the properties of the children nodes. + derived_phys_prop_id BIGINT NOT NULL, + -- The cost associated with the physical expression. + cost DOUBLE NOT NULL, + -- Time at which the physical expression is created. + created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL, + FOREIGN KEY (typ_desc) REFERENCES physical_typ_descs(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (group_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (derived_phys_prop_id) REFERENCES physical_props(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd-storage/migrations/2025-01-19-054757_create_rel_subgroup_winners/down.sql b/optd-storage/migrations/2025-01-19-054757_create_rel_subgroup_winners/down.sql new file mode 100644 index 00000000..2752bec9 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-054757_create_rel_subgroup_winners/down.sql @@ -0,0 +1 @@ +DROP TABLE rel_subgroup_winners; diff --git a/optd-storage/migrations/2025-01-19-054757_create_rel_subgroup_winners/up.sql b/optd-storage/migrations/2025-01-19-054757_create_rel_subgroup_winners/up.sql new file mode 100644 index 00000000..ba39afe0 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-054757_create_rel_subgroup_winners/up.sql @@ -0,0 +1,9 @@ +-- The winners table records the winner of a group with some required physical property. +CREATE TABLE rel_subgroup_winners ( + -- The subgroup id of the winner, i.e. the winner of the group with `group_id` and some required physical property. + subgroup_id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The physical expression id of the winner. + physical_expr_id BIGINT NOT NULL, + FOREIGN KEY (subgroup_id) REFERENCES rel_subgroup(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (physical_expr_id) REFERENCES physical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd-storage/migrations/2025-01-19-055017_create_scalar_groups/down.sql b/optd-storage/migrations/2025-01-19-055017_create_scalar_groups/down.sql new file mode 100644 index 00000000..da5650a0 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-055017_create_scalar_groups/down.sql @@ -0,0 +1 @@ +DROP TABLE scalar_groups; diff --git a/optd-storage/migrations/2025-01-19-055017_create_scalar_groups/up.sql b/optd-storage/migrations/2025-01-19-055017_create_scalar_groups/up.sql new file mode 100644 index 00000000..304ada38 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-055017_create_scalar_groups/up.sql @@ -0,0 +1,14 @@ +CREATE TABLE scalar_groups ( + -- The group identifier + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The optimization status of the group. + -- It could be: + -- Unexplored, Exploring, Explored, Optimizing, Optimized. + -- `0` indicates `Unexplored`. + status INTEGER NOT NULL, + -- Time at which the group is created. + created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL, + -- The group identifier of the representative. + rep_id BIGINT, + FOREIGN KEY (rep_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd-storage/migrations/2025-01-19-055223_create_scalar_typ_descs/down.sql b/optd-storage/migrations/2025-01-19-055223_create_scalar_typ_descs/down.sql new file mode 100644 index 00000000..8fd2c6b3 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-055223_create_scalar_typ_descs/down.sql @@ -0,0 +1 @@ +DROP TABLE scalar_typ_descs; diff --git a/optd-storage/migrations/2025-01-19-055223_create_scalar_typ_descs/up.sql b/optd-storage/migrations/2025-01-19-055223_create_scalar_typ_descs/up.sql new file mode 100644 index 00000000..189a8c34 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-055223_create_scalar_typ_descs/up.sql @@ -0,0 +1,5 @@ +CREATE TABLE scalar_typ_descs ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- Name of the scalar operator. + name TEXT NOT NULL +); diff --git a/optd-storage/migrations/2025-01-19-055508_create_scalar_props/down.sql b/optd-storage/migrations/2025-01-19-055508_create_scalar_props/down.sql new file mode 100644 index 00000000..c0bdc3b4 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-055508_create_scalar_props/down.sql @@ -0,0 +1 @@ +DROP TABLE scalar_props; diff --git a/optd-storage/migrations/2025-01-19-055508_create_scalar_props/up.sql b/optd-storage/migrations/2025-01-19-055508_create_scalar_props/up.sql new file mode 100644 index 00000000..921a4566 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-055508_create_scalar_props/up.sql @@ -0,0 +1,7 @@ +-- The scalar properties table contains the scalar property associated with +-- some scalar expression. +-- TODO(yuchen): add scalar properties. +CREATE TABLE scalar_props ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + payload BLOB NOT NULL +); \ No newline at end of file diff --git a/optd-storage/migrations/2025-01-19-055546_create_scalar_exprs/down.sql b/optd-storage/migrations/2025-01-19-055546_create_scalar_exprs/down.sql new file mode 100644 index 00000000..5b185c0e --- /dev/null +++ b/optd-storage/migrations/2025-01-19-055546_create_scalar_exprs/down.sql @@ -0,0 +1 @@ +DROP TABLE scalar_exprs; diff --git a/optd-storage/migrations/2025-01-19-055546_create_scalar_exprs/up.sql b/optd-storage/migrations/2025-01-19-055546_create_scalar_exprs/up.sql new file mode 100644 index 00000000..19746fb8 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-055546_create_scalar_exprs/up.sql @@ -0,0 +1,16 @@ +-- The scalar expressions table specifies which group a scalar expression belongs to. +-- It also specifies the derived scalar property and the cost associated with the +CREATE TABLE scalar_exprs ( + -- The scalar expression id. + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The type descriptor of the scalar expression. + typ_desc BIGINT NOT NULL, + -- The group identifier of the scalar expression. + group_id BIGINT NOT NULL, + -- Time at which the logical expression is created. + created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL, + -- The cost associated computing the scalar expression. + cost DOUBLE, -- TODO: This can be NULL, do we want a seperate table? + FOREIGN KEY (typ_desc) REFERENCES scalar_typ_descs(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (group_id) REFERENCES scalar_groups(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd-storage/migrations/2025-01-19-142409_create_scalar_group_winners/down.sql b/optd-storage/migrations/2025-01-19-142409_create_scalar_group_winners/down.sql new file mode 100644 index 00000000..a8b90e71 --- /dev/null +++ b/optd-storage/migrations/2025-01-19-142409_create_scalar_group_winners/down.sql @@ -0,0 +1 @@ +DROP TABLE scalar_group_winners; diff --git a/optd-storage/migrations/2025-01-19-142409_create_scalar_group_winners/up.sql b/optd-storage/migrations/2025-01-19-142409_create_scalar_group_winners/up.sql new file mode 100644 index 00000000..fcefbd3a --- /dev/null +++ b/optd-storage/migrations/2025-01-19-142409_create_scalar_group_winners/up.sql @@ -0,0 +1,15 @@ +-- The scalar group winners table records the winner of a scalar group. +CREATE TABLE scalar_group_winners ( + -- The scalar group we are interested in. + group_id BIGINT NOT NULL, + -- The winner of the group with `group_id`. + scalar_expr_id BIGINT NOT NULL, + PRIMARY KEY (group_id), + FOREIGN KEY (group_id) REFERENCES scalar_groups(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (scalar_expr_id) REFERENCES scalar_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE +); + +-- Could also do a query to compute the winner: +-- SELECT MIN(cost), [all other fields] +-- FROM scalar_exprs +-- WHERE group_id = ; diff --git a/optd-storage/migrations/2025-01-20-150957_create_rel_subgroups/down.sql b/optd-storage/migrations/2025-01-20-150957_create_rel_subgroups/down.sql new file mode 100644 index 00000000..0b5a3828 --- /dev/null +++ b/optd-storage/migrations/2025-01-20-150957_create_rel_subgroups/down.sql @@ -0,0 +1 @@ +DROP TABLE rel_subgroups; diff --git a/optd-storage/migrations/2025-01-20-150957_create_rel_subgroups/up.sql b/optd-storage/migrations/2025-01-20-150957_create_rel_subgroups/up.sql new file mode 100644 index 00000000..65a6f878 --- /dev/null +++ b/optd-storage/migrations/2025-01-20-150957_create_rel_subgroups/up.sql @@ -0,0 +1,10 @@ +-- The relational subgroups table specifies the subgroups of a group with some required physical property. +CREATE TABLE rel_subgroups ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The group the subgroup belongs to. + group_id BIGINT NOT NULL, + -- The required physical property of the subgroup. + required_phys_prop_id BIGINT NOT NULL, + FOREIGN KEY (group_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (required_phys_prop_id) REFERENCES physical_props(id) ON DELETE CASCADE ON UPDATE CASCADE +); \ No newline at end of file diff --git a/optd-storage/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/down.sql b/optd-storage/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/down.sql new file mode 100644 index 00000000..850f7dd0 --- /dev/null +++ b/optd-storage/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/down.sql @@ -0,0 +1 @@ +DROP TABLE rel_subgroup_physical_exprs; diff --git a/optd-storage/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/up.sql b/optd-storage/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/up.sql new file mode 100644 index 00000000..95ea0415 --- /dev/null +++ b/optd-storage/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/up.sql @@ -0,0 +1,12 @@ +-- The relational subgroup expressions table specifies the physical expressions of a subgroup. +-- It is a m:n junction table since a subgroup can have multiple physical expressions, +-- and a physical expression can belong to multiple subgroups. +CREATE TABLE rel_subgroup_physical_exprs ( + -- The subgroup the physical expression belongs to. + subgroup_id BIGINT NOT NULL, + -- The physical expression id. + physical_expr_id BIGINT NOT NULL, + PRIMARY KEY (subgroup_id, physical_expr_id), + FOREIGN KEY (subgroup_id) REFERENCES rel_subgroups(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (physical_expr_id) REFERENCES physical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd-tmp/src/lib.rs b/optd-storage/src/lib.rs similarity index 92% rename from optd-tmp/src/lib.rs rename to optd-storage/src/lib.rs index b93cf3ff..1ed4dcf8 100644 --- a/optd-tmp/src/lib.rs +++ b/optd-storage/src/lib.rs @@ -1,3 +1,5 @@ +pub mod storage; + pub fn add(left: u64, right: u64) -> u64 { left + right } diff --git a/optd-storage/src/storage.rs b/optd-storage/src/storage.rs new file mode 100644 index 00000000..d5cbad7e --- /dev/null +++ b/optd-storage/src/storage.rs @@ -0,0 +1,2 @@ +pub mod models; +pub mod schema; diff --git a/optd-storage/src/storage/models.rs b/optd-storage/src/storage/models.rs new file mode 100644 index 00000000..caafa69a --- /dev/null +++ b/optd-storage/src/storage/models.rs @@ -0,0 +1,386 @@ +use anyhow::bail; +use diesel::{ + backend::Backend, + deserialize::{FromSql, FromSqlRow}, + expression::AsExpression, + prelude::*, + serialize::{IsNull, ToSql}, + sql_types::{BigInt, Integer}, +}; + +/// A relational group contains one or more equivalent logical expressions +/// and zero or more physical expressions. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::rel_groups)] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct RelGroup { + /// The relational group identifier. + pub id: RelGroupId, + /// Optimization status of the group. + pub status: RelGroupStatus, + /// Timestamp at which the group was created. + pub created_at: chrono::NaiveDateTime, + /// The group identifier of the representative. + pub rep_id: Option, +} + +/// A relational subgroup contains a subset of physical expressions in a relational group that +/// can satisfies the same required physical properties. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::rel_subgroups)] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct RelSubGroup { + pub id: RelSubGroupId, + /// The group the subgroup belongs to. + pub group_id: RelGroupId, + /// The required physical property of the subgroup. + pub required_phys_prop_id: PhysicalPropId, +} + +/// A subgroup winner is a physical expression that is the winner of a group with a required physical property. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::rel_subgroup_winners)] +#[diesel(belongs_to(RelSubGroup))] +#[diesel(belongs_to(PhysicalExpr))] +#[diesel(primary_key(subgroup_id))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct RelSubgroupWinner { + /// The subgroup id of the winner, i.e. the winner of the group with `group_id` and some required physical property. + pub subgroup_id: RelSubGroupId, + /// The physical expression id of the winner. + pub physical_expr_id: PhysicalExprId, +} + +/// A logical expression is a relational expression that consists of a tree of logical operators. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::logical_exprs)] +#[diesel(belongs_to(RelGroup))] +#[diesel(belongs_to(LogicalTypDesc))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct LogicalExpr { + /// The logical expression identifier. + pub id: LogicalExprId, + /// The type descriptor of the logical expression. + pub typ_desc: LogicalTypDescId, + /// The relational group that this logical expression belongs to. + pub group_id: RelGroupId, + /// The time at which this logical expression was created. + pub created_at: chrono::NaiveDateTime, +} + +/// Logicial properties are shared by a relational group. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::logical_props)] +#[diesel(belongs_to(RelGroup))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct LogicalProp { + /// The logical property identifier. + pub id: LogicalPropId, + /// The relational group that shares this property. + pub group_id: RelGroupId, + /// The number of rows produced by this relation. + pub card_est: i64, +} + +/// Descriptor for a logical relational operator type. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::logical_typ_descs)] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct LogicalTypDesc { + /// The logical type descriptor identifier. + pub id: LogicalTypDescId, + /// The name of the logical type. + pub name: String, +} + +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::physical_exprs)] +#[diesel(belongs_to(RelGroup))] +#[diesel(belongs_to(PhysicalTypDesc))] +#[diesel(belongs_to(PhysicalProp))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct PhysicalExpr { + /// The physical expression id. + pub id: PhysicalExprId, + /// The type descriptor of the physical expression. + pub typ_desc: PhysicalTypDescId, + /// The relational group that this physical expression belongs to. + pub group_id: RelGroupId, + /// The physical property dervied based on the properties of the children expressions. + pub derived_phys_prop_id: PhysicalPropId, + /// The cost associated with this physical expression. + pub cost: f64, + /// The time at which this physical expression was created. + pub created_at: chrono::NaiveDateTime, +} + +// TODO(yuchen): Do we need a junction table for (logical_expr, required_phys_prop) <=> subgroup? TBD. +/// A relational subgroup expression entry specifies if a physical expression belongs to a subgroup. +/// It is a m:n relationship since a subgroup can have multiple physical expressions, +/// and a physical expression can belong to multiple subgroups. +#[derive(Queryable, Selectable, Identifiable, Associations)] +#[diesel(table_name = super::schema::rel_subgroup_physical_exprs)] +#[diesel(primary_key(subgroup_id, physical_expr_id))] +#[diesel(belongs_to(RelSubGroup, foreign_key = subgroup_id))] +#[diesel(belongs_to(PhysicalExpr))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct RelSubGroupPhysicalExpr { + /// The subgroup the physical expression belongs to. + pub subgroup_id: RelSubGroupId, + /// TThe physical expression id. + pub physical_expr_id: PhysicalExprId, +} + +/// A physical property is a characteristic of an expression that impacts its layout, +/// presentation, or location, but not its logical content. +/// They could be either required by a subgroup or derived on a physical expression. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::physical_props)] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct PhysicalProp { + /// The physical property id. + pub id: PhysicalPropId, + /// The opaquely stored payload. + // TODO(yuchen): Instead, probably could do something simliar to + // the relational and scalar expression "inheritance" pattern. + pub payload: Vec, +} + +/// Descriptor for a physical relational operator type. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::physical_typ_descs)] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct PhysicalTypDesc { + /// The physical type descriptor id. + pub id: PhysicalTypDescId, + /// The name of the physical type. + pub name: String, +} + +// TODO: ideally you want scalar to mimic the relational expressions. We don't have a definition of a physical scalar expression yet. +/// A scalar expression consists of a tree of scalar operators. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::scalar_exprs)] +#[diesel(belongs_to(ScalarGroup))] +#[diesel(belongs_to(ScalarTyeDesc))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct ScalarExpr { + /// The scalar expression id. + pub id: ScalarExprId, + /// The type descriptor of the scalar expression. + pub typ_desc: ScalarTypDescId, + /// The scalar group that this scalar expression belongs to. + pub group_id: ScalarGroupId, + /// The time at which this scalar expression was created. + pub created_at: chrono::NaiveDateTime, + /// The cost associated with this scalar expression. None if the cost has not been computed. + pub cost: Option, +} + +/// A scalar group contains one or more equivalent scalar expressions. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::scalar_groups)] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct ScalarGroup { + pub id: ScalarGroupId, + pub status: i32, + pub created_at: chrono::NaiveDateTime, + pub rep_id: Option, +} + +/// A scalar group winner is a scalar expression with the lowest cost in a scalar group. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::scalar_group_winners)] +#[diesel(primary_key(group_id))] +#[diesel(belongs_to(ScalarGroup))] +#[diesel(belongs_to(ScalarExpr))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct ScalarGroupWinner { + pub group_id: ScalarGroupId, + pub scalar_expr_id: ScalarExprId, +} + +/// A scalar property is a property shared by a scalar group. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::scalar_props)] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct ScalarProp { + /// The scalar property id. + pub id: ScalarPropId, + /// The opaquely stored payload. + // TODO(yuchen): Instead, probably could do something simliar to + // the relational and scalar expression "inheritance" pattern. + pub payload: Vec, +} + +/// Descriptor for a scalar type. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::scalar_typ_descs)] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct ScalarTyeDesc { + /// The scalar type descriptor id. + pub id: ScalarTypDescId, + /// The name of the scalar type. + pub name: String, +} + +/// Defines a new ID type with the given name, inner type, and SQL type. +/// Also deriving some common traits for the new type. +#[macro_export] +macro_rules! impl_diesel_new_type_from_to_sql { + ($type_name:ident, $inner_type:ty, $sql_type:ty) => { + #[derive( + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Debug, + Default, + Hash, + AsExpression, + FromSqlRow, + )] + #[diesel(sql_type = $sql_type)] + pub struct $type_name(pub $inner_type); + + impl FromSql<$sql_type, DB> for $type_name + where + DB: Backend, + $inner_type: FromSql<$sql_type, DB>, + { + fn from_sql(bytes: ::RawValue<'_>) -> diesel::deserialize::Result { + <$inner_type>::from_sql(bytes).map($type_name) + } + } + + impl ToSql<$sql_type, diesel::sqlite::Sqlite> for $type_name + where + $inner_type: ToSql<$sql_type, diesel::sqlite::Sqlite>, + { + fn to_sql<'b>( + &'b self, + out: &mut diesel::serialize::Output<'b, '_, diesel::sqlite::Sqlite>, + ) -> diesel::serialize::Result { + out.set_value(self.0); + Ok(IsNull::No) + } + } + }; +} + +impl_diesel_new_type_from_to_sql!(RelGroupId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(RelSubGroupId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(LogicalExprId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(PhysicalExprId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(LogicalPropId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(PhysicalPropId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(LogicalTypDescId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(PhysicalTypDescId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(ScalarGroupId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(ScalarExprId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(ScalarPropId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(ScalarTypDescId, i64, BigInt); + +#[repr(i32)] +#[derive(Debug, Clone, Copy, AsExpression, FromSqlRow)] +#[diesel(sql_type = Integer)] +pub enum RelGroupStatus { + Unexplored = 1, + Exploring, + Explored, + Optimizing, + Optimized, +} + +impl TryFrom for RelGroupStatus { + type Error = anyhow::Error; + + fn try_from(value: i32) -> Result { + use RelGroupStatus::*; + match value { + x if x == Unexplored as i32 => Ok(Unexplored), + x if x == Exploring as i32 => Ok(Exploring), + x if x == Explored as i32 => Ok(Explored), + x if x == Optimizing as i32 => Ok(Optimizing), + x if x == Optimized as i32 => Ok(Optimized), + _ => bail!("Invalid integer value for RelGroupStatus: {}", value), + } + } +} + +impl FromSql for RelGroupStatus +where + DB: Backend, + i32: FromSql, +{ + fn from_sql(bytes: ::RawValue<'_>) -> diesel::deserialize::Result { + let status = i32::from_sql(bytes)?.try_into()?; + Ok(status) + } +} + +impl ToSql for RelGroupStatus +where + i32: ToSql, +{ + fn to_sql<'b>( + &'b self, + out: &mut diesel::serialize::Output<'b, '_, diesel::sqlite::Sqlite>, + ) -> diesel::serialize::Result { + out.set_value(*self as i32); + Ok(IsNull::No) + } +} + +#[repr(i32)] +#[derive(Debug, Clone, Copy, AsExpression, FromSqlRow)] +#[diesel(sql_type = Integer)] +pub enum ScalarGroupStatus { + Unexplored = 1, + Exploring, + Explored, + Optimizing, + Optimized, +} + +impl TryFrom for ScalarGroupStatus { + type Error = anyhow::Error; + + fn try_from(value: i32) -> Result { + use ScalarGroupStatus::*; + match value { + x if x == Unexplored as i32 => Ok(Unexplored), + x if x == Exploring as i32 => Ok(Exploring), + x if x == Explored as i32 => Ok(Explored), + x if x == Optimizing as i32 => Ok(Optimizing), + x if x == Optimized as i32 => Ok(Optimized), + _ => bail!("Invalid integer value for ScalarGroupStatus: {}", value), + } + } +} + +impl FromSql for ScalarGroupStatus +where + DB: Backend, + i32: FromSql, +{ + fn from_sql(bytes: ::RawValue<'_>) -> diesel::deserialize::Result { + let status = i32::from_sql(bytes)?.try_into()?; + Ok(status) + } +} + +impl ToSql for ScalarGroupStatus +where + i32: ToSql, +{ + fn to_sql<'b>( + &'b self, + out: &mut diesel::serialize::Output<'b, '_, diesel::sqlite::Sqlite>, + ) -> diesel::serialize::Result { + out.set_value(*self as i32); + Ok(IsNull::No) + } +} diff --git a/optd-storage/src/storage/schema.rs b/optd-storage/src/storage/schema.rs new file mode 100644 index 00000000..0e861a28 --- /dev/null +++ b/optd-storage/src/storage/schema.rs @@ -0,0 +1,156 @@ +// @generated automatically by Diesel CLI. + +diesel::table! { + logical_exprs (id) { + id -> BigInt, + typ_desc -> BigInt, + group_id -> BigInt, + created_at -> Timestamp, + } +} + +diesel::table! { + logical_props (id) { + id -> BigInt, + group_id -> BigInt, + card_est -> BigInt, + } +} + +diesel::table! { + logical_typ_descs (id) { + id -> BigInt, + name -> Text, + } +} + +diesel::table! { + physical_exprs (id) { + id -> BigInt, + typ_desc -> BigInt, + group_id -> BigInt, + derived_phys_prop_id -> BigInt, + cost -> Double, + created_at -> Timestamp, + } +} + +diesel::table! { + physical_props (id) { + id -> BigInt, + payload -> Binary, + } +} + +diesel::table! { + physical_typ_descs (id) { + id -> BigInt, + name -> Text, + } +} + +diesel::table! { + rel_groups (id) { + id -> BigInt, + status -> Integer, + created_at -> Timestamp, + rep_id -> Nullable, + } +} + +diesel::table! { + rel_subgroup_physical_exprs (subgroup_id, physical_expr_id) { + subgroup_id -> BigInt, + physical_expr_id -> BigInt, + } +} + +diesel::table! { + rel_subgroup_winners (subgroup_id) { + subgroup_id -> BigInt, + physical_expr_id -> BigInt, + } +} + +diesel::table! { + rel_subgroups (id) { + id -> BigInt, + group_id -> BigInt, + required_phys_prop_id -> BigInt, + } +} + +diesel::table! { + scalar_exprs (id) { + id -> BigInt, + typ_desc -> BigInt, + group_id -> BigInt, + created_at -> Timestamp, + cost -> Nullable, + } +} + +diesel::table! { + scalar_group_winners (group_id) { + group_id -> BigInt, + scalar_expr_id -> BigInt, + } +} + +diesel::table! { + scalar_groups (id) { + id -> BigInt, + status -> Integer, + created_at -> Timestamp, + rep_id -> Nullable, + } +} + +diesel::table! { + scalar_props (id) { + id -> BigInt, + payload -> Binary, + } +} + +diesel::table! { + scalar_typ_descs (id) { + id -> BigInt, + name -> Text, + } +} + +diesel::joinable!(logical_exprs -> logical_typ_descs (typ_desc)); +diesel::joinable!(logical_exprs -> rel_groups (group_id)); +diesel::joinable!(logical_props -> rel_groups (group_id)); +diesel::joinable!(physical_exprs -> physical_props (derived_phys_prop_id)); +diesel::joinable!(physical_exprs -> physical_typ_descs (typ_desc)); +diesel::joinable!(physical_exprs -> rel_groups (group_id)); +diesel::joinable!(rel_subgroup_physical_exprs -> physical_exprs (physical_expr_id)); +diesel::joinable!(rel_subgroup_physical_exprs -> rel_subgroups (subgroup_id)); +diesel::joinable!(rel_subgroup_winners -> physical_exprs (physical_expr_id)); +diesel::joinable!(rel_subgroups -> physical_props (required_phys_prop_id)); +diesel::joinable!(rel_subgroups -> rel_groups (group_id)); +diesel::joinable!(scalar_exprs -> scalar_groups (group_id)); +diesel::joinable!(scalar_exprs -> scalar_typ_descs (typ_desc)); +diesel::joinable!(scalar_group_winners -> scalar_exprs (scalar_expr_id)); +diesel::joinable!(scalar_group_winners -> scalar_groups (group_id)); +diesel::joinable!(scalar_groups -> rel_groups (rep_id)); + +diesel::allow_tables_to_appear_in_same_query!( + logical_exprs, + logical_props, + logical_typ_descs, + physical_exprs, + physical_props, + physical_typ_descs, + rel_groups, + rel_subgroup_physical_exprs, + rel_subgroup_winners, + rel_subgroups, + scalar_exprs, + scalar_group_winners, + scalar_groups, + scalar_props, + scalar_typ_descs, +); diff --git a/optd-tmp/Cargo.toml b/optd-tmp/Cargo.toml deleted file mode 100644 index 60afae51..00000000 --- a/optd-tmp/Cargo.toml +++ /dev/null @@ -1,6 +0,0 @@ -[package] -name = "optd-tmp" -version = "0.1.0" -edition = "2021" - -[dependencies] diff --git a/scripts/setup.sh b/scripts/setup.sh new file mode 100644 index 00000000..85eb4903 --- /dev/null +++ b/scripts/setup.sh @@ -0,0 +1,6 @@ +# diesel_cli is a CLI tool for managing database schema, runnig migrations, +# and generating code. when working with Diesel (https://diesel.rs/). +cargo install diesel_cli --no-default-features --features sqlite-bundled && +# Setup a new database and runs the migrations. +echo DATABASE_URL=test_memo.db > .env && +diesel setup