Skip to content

Commit 2702eb4

Browse files
committed
make memo table genric of expression traits
This commit replaces the specific expression types with traits that define the behavior the in-memory represenations of both logical and physical expressions need to have. Right now, the `PhysicalExpression` trait does not do that much, but the `LogicalExpression` trait is super important to how the persistent memo table works.
1 parent e9fba27 commit 2702eb4

File tree

5 files changed

+157
-110
lines changed

5 files changed

+157
-110
lines changed

optd-mvp/src/expression/logical_expression.rs

Lines changed: 78 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,35 @@
55
//! TODO Figure out if each relation should be in a different submodule.
66
//! TODO This entire file is a WIP.
77
8-
use crate::{entities::*, memo::GroupId};
8+
use crate::{entities::logical_expression::Model, memo::GroupId};
99
use fxhash::hash;
1010
use serde::{Deserialize, Serialize};
11+
use std::fmt::Debug;
12+
13+
/// An interface defining what an in-memory logical expression representation should be able to do.
14+
pub trait LogicalExpression: From<Model> + Into<Model> + Clone + Debug {
15+
/// Returns the kind of relation / operator node encoded as an integer.
16+
fn kind(&self) -> i16;
17+
18+
/// Retrieves the child groups IDs of this logical expression.
19+
fn children(&self) -> Vec<GroupId>;
20+
21+
/// Computes the fingerprint of this expression, which should generate an integer for equality
22+
/// checks that has a low collision rate.
23+
fn fingerprint(&self) -> i64;
24+
25+
/// Checks if the current expression is a duplicate of the other expression.
26+
///
27+
/// Note that this is similar to `Eq` and `PartialEq`, but the implementor should be aware that
28+
/// different expressions can be duplicates of each other without having the exact same data.
29+
fn is_duplicate(&self, other: &Self) -> bool;
30+
31+
/// Rewrites the expression to use new child groups IDs, where `rewrites` is a slice of tuples
32+
/// representing `(old_group_id, new_group_id)`.
33+
///
34+
/// TODO: There's definitely a better way to represent this API
35+
fn rewrite(&self, rewrites: &[(GroupId, GroupId)]) -> Self;
36+
}
1137

1238
#[derive(Clone, Debug)]
1339
pub enum DefaultLogicalExpression {
@@ -16,44 +42,32 @@ pub enum DefaultLogicalExpression {
1642
Join(Join),
1743
}
1844

19-
impl DefaultLogicalExpression {
20-
pub fn kind(&self) -> i16 {
45+
impl LogicalExpression for DefaultLogicalExpression {
46+
fn kind(&self) -> i16 {
2147
match self {
22-
DefaultLogicalExpression::Scan(_) => 0,
23-
DefaultLogicalExpression::Filter(_) => 1,
24-
DefaultLogicalExpression::Join(_) => 2,
48+
Self::Scan(_) => 0,
49+
Self::Filter(_) => 1,
50+
Self::Join(_) => 2,
2551
}
2652
}
2753

28-
/// Calculates the fingerprint of a given expression, but replaces all of the children group IDs
29-
/// with a new group ID if it is listed in the input `rewrites` list.
30-
///
31-
/// TODO Allow each expression to implement a trait that does this.
32-
pub fn fingerprint_with_rewrite(&self, rewrites: &[(GroupId, GroupId)]) -> i64 {
33-
// Closure that rewrites a group ID if needed.
34-
let rewrite = |x: GroupId| {
35-
if rewrites.is_empty() {
36-
return x;
37-
}
38-
39-
if let Some(i) = rewrites.iter().position(|(curr, _new)| &x == curr) {
40-
assert_eq!(rewrites[i].0, x);
41-
rewrites[i].1
42-
} else {
43-
x
44-
}
45-
};
54+
fn children(&self) -> Vec<GroupId> {
55+
match self {
56+
Self::Scan(_) => vec![],
57+
Self::Filter(filter) => vec![filter.child],
58+
Self::Join(join) => vec![join.left, join.right],
59+
}
60+
}
4661

62+
fn fingerprint(&self) -> i64 {
4763
let kind = self.kind() as u16 as usize;
4864
let hash = match self {
49-
DefaultLogicalExpression::Scan(scan) => hash(scan.table.as_str()),
50-
DefaultLogicalExpression::Filter(filter) => {
51-
hash(&rewrite(filter.child).0) ^ hash(filter.expression.as_str())
52-
}
53-
DefaultLogicalExpression::Join(join) => {
65+
Self::Scan(scan) => hash(scan.table.as_str()),
66+
Self::Filter(filter) => hash(&filter.child.0) ^ hash(filter.expression.as_str()),
67+
Self::Join(join) => {
5468
// Make sure that there is a difference between `Join(A, B)` and `Join(B, A)`.
55-
hash(&(rewrite(join.left).0 + 1))
56-
^ hash(&(rewrite(join.right).0 + 2))
69+
hash(&(join.left.0 + 1))
70+
^ hash(&(join.right.0 + 2))
5771
^ hash(join.expression.as_str())
5872
}
5973
};
@@ -62,10 +76,23 @@ impl DefaultLogicalExpression {
6276
((hash & !0xFFFF) | kind) as i64
6377
}
6478

65-
/// Checks equality between two expressions, with both expression rewriting their child group
66-
/// IDs according to the input `rewrites` list.
67-
pub fn eq_with_rewrite(&self, other: &Self, rewrites: &[(GroupId, GroupId)]) -> bool {
68-
// Closure that rewrites a group ID if needed.
79+
fn is_duplicate(&self, other: &Self) -> bool {
80+
match (self, other) {
81+
(Self::Scan(scan_left), Self::Scan(scan_right)) => scan_left.table == scan_right.table,
82+
(Self::Filter(filter_left), Self::Filter(filter_right)) => {
83+
filter_left.child == filter_right.child
84+
&& filter_left.expression == filter_right.expression
85+
}
86+
(Self::Join(join_left), Self::Join(join_right)) => {
87+
join_left.left == join_right.left
88+
&& join_left.right == join_right.right
89+
&& join_left.expression == join_right.expression
90+
}
91+
_ => false,
92+
}
93+
}
94+
95+
fn rewrite(&self, rewrites: &[(GroupId, GroupId)]) -> Self {
6996
let rewrite = |x: GroupId| {
7097
if rewrites.is_empty() {
7198
return x;
@@ -79,35 +106,17 @@ impl DefaultLogicalExpression {
79106
}
80107
};
81108

82-
match (self, other) {
83-
(
84-
DefaultLogicalExpression::Scan(scan_left),
85-
DefaultLogicalExpression::Scan(scan_right),
86-
) => scan_left.table == scan_right.table,
87-
(
88-
DefaultLogicalExpression::Filter(filter_left),
89-
DefaultLogicalExpression::Filter(filter_right),
90-
) => {
91-
rewrite(filter_left.child) == rewrite(filter_right.child)
92-
&& filter_left.expression == filter_right.expression
93-
}
94-
(
95-
DefaultLogicalExpression::Join(join_left),
96-
DefaultLogicalExpression::Join(join_right),
97-
) => {
98-
rewrite(join_left.left) == rewrite(join_right.left)
99-
&& rewrite(join_left.right) == rewrite(join_right.right)
100-
&& join_left.expression == join_right.expression
101-
}
102-
_ => false,
103-
}
104-
}
105-
106-
pub fn children(&self) -> Vec<GroupId> {
107109
match self {
108-
DefaultLogicalExpression::Scan(_) => vec![],
109-
DefaultLogicalExpression::Filter(filter) => vec![filter.child],
110-
DefaultLogicalExpression::Join(join) => vec![join.left, join.right],
110+
Self::Scan(_) => self.clone(),
111+
Self::Filter(filter) => Self::Filter(Filter {
112+
child: rewrite(filter.child),
113+
expression: filter.expression.clone(),
114+
}),
115+
Self::Join(join) => Self::Join(Join {
116+
left: rewrite(join.left),
117+
right: rewrite(join.right),
118+
expression: join.expression.clone(),
119+
}),
111120
}
112121
}
113122
}
@@ -130,9 +139,8 @@ pub struct Join {
130139
expression: String,
131140
}
132141

133-
/// TODO Use a macro.
134-
impl From<logical_expression::Model> for DefaultLogicalExpression {
135-
fn from(value: logical_expression::Model) -> Self {
142+
impl From<Model> for DefaultLogicalExpression {
143+
fn from(value: Model) -> Self {
136144
match value.kind {
137145
0 => Self::Scan(
138146
serde_json::from_value(value.data)
@@ -151,14 +159,10 @@ impl From<logical_expression::Model> for DefaultLogicalExpression {
151159
}
152160
}
153161

154-
/// TODO Use a macro.
155-
impl From<DefaultLogicalExpression> for logical_expression::Model {
156-
fn from(value: DefaultLogicalExpression) -> logical_expression::Model {
157-
fn create_logical_expression(
158-
kind: i16,
159-
data: serde_json::Value,
160-
) -> logical_expression::Model {
161-
logical_expression::Model {
162+
impl From<DefaultLogicalExpression> for Model {
163+
fn from(value: DefaultLogicalExpression) -> Model {
164+
fn create_logical_expression(kind: i16, data: serde_json::Value) -> Model {
165+
Model {
162166
id: -1,
163167
group_id: -1,
164168
kind,

optd-mvp/src/expression/physical_expression.rs

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,42 @@
22
//!
33
//! FIXME: All fields are placeholders.
44
//!
5+
//! TODO Remove dead code.
56
//! TODO Figure out if each operator should be in a different submodule.
67
//! TODO This entire file is a WIP.
78
8-
use crate::{entities::*, memo::GroupId};
9+
#![allow(dead_code)]
10+
11+
use crate::{entities::physical_expression::Model, memo::GroupId};
912
use serde::{Deserialize, Serialize};
13+
use std::fmt::Debug;
14+
15+
/// An interface defining what an in-memory physical expression representation should be able to do.
16+
pub trait PhysicalExpression: From<Model> + Into<Model> + Clone + Debug {
17+
/// Returns the kind of relation / operator node encoded as an integer.
18+
fn kind(&self) -> i16;
19+
20+
/// Retrieves the child groups IDs of this logical expression.
21+
fn children(&self) -> Vec<GroupId>;
22+
}
23+
24+
impl PhysicalExpression for DefaultPhysicalExpression {
25+
fn kind(&self) -> i16 {
26+
match self {
27+
Self::TableScan(_) => 0,
28+
Self::Filter(_) => 1,
29+
Self::HashJoin(_) => 2,
30+
}
31+
}
32+
33+
fn children(&self) -> Vec<GroupId> {
34+
match self {
35+
Self::TableScan(_) => vec![],
36+
Self::Filter(filter) => vec![filter.child],
37+
Self::HashJoin(hash_join) => vec![hash_join.left, hash_join.right],
38+
}
39+
}
40+
}
1041

1142
#[derive(Clone, Debug, PartialEq, Eq)]
1243
pub enum DefaultPhysicalExpression {
@@ -33,9 +64,8 @@ pub struct HashJoin {
3364
expression: String,
3465
}
3566

36-
/// TODO Use a macro.
37-
impl From<physical_expression::Model> for DefaultPhysicalExpression {
38-
fn from(value: physical_expression::Model) -> Self {
67+
impl From<Model> for DefaultPhysicalExpression {
68+
fn from(value: Model) -> Self {
3969
match value.kind {
4070
0 => Self::TableScan(
4171
serde_json::from_value(value.data)
@@ -54,14 +84,10 @@ impl From<physical_expression::Model> for DefaultPhysicalExpression {
5484
}
5585
}
5686

57-
/// TODO Use a macro.
58-
impl From<DefaultPhysicalExpression> for physical_expression::Model {
59-
fn from(value: DefaultPhysicalExpression) -> physical_expression::Model {
60-
fn create_physical_expression(
61-
kind: i16,
62-
data: serde_json::Value,
63-
) -> physical_expression::Model {
64-
physical_expression::Model {
87+
impl From<DefaultPhysicalExpression> for Model {
88+
fn from(value: DefaultPhysicalExpression) -> Model {
89+
fn create_physical_expression(kind: i16, data: serde_json::Value) -> Model {
90+
Model {
6591
id: -1,
6692
group_id: -1,
6793
kind,

0 commit comments

Comments
 (0)