Skip to content

Commit 2c015b4

Browse files
committed
add errors and panics documentation in memo module
1 parent 2702eb4 commit 2c015b4

File tree

5 files changed

+140
-37
lines changed

5 files changed

+140
-37
lines changed

optd-mvp/src/lib.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
//! This crate is an attempt to make an MVP of a duplicate-detecting memo table for query
2+
//! optimization.
3+
//!
4+
//! TODO write more docs.
5+
16
use sea_orm::*;
27
use sea_orm_migration::prelude::*;
38
use thiserror::Error;
@@ -7,23 +12,22 @@ use migrator::Migrator;
712

813
mod entities;
914

10-
mod memo;
11-
use memo::MemoError;
12-
13-
mod expression;
15+
pub mod expression;
16+
pub mod memo;
1417

1518
/// The filename of the SQLite database for migration.
1619
pub const DATABASE_FILENAME: &str = "sqlite.db";
1720
/// The URL of the SQLite database for migration.
1821
pub const DATABASE_URL: &str = "sqlite:./sqlite.db?mode=rwc";
1922

2023
/// An error type wrapping all the different kinds of error the optimizer might raise.
24+
#[allow(missing_docs)]
2125
#[derive(Error, Debug)]
2226
pub enum OptimizerError {
2327
#[error("SeaORM error")]
2428
Database(#[from] sea_orm::error::DbErr),
2529
#[error("Memo table logical error")]
26-
Memo(#[from] MemoError),
30+
Memo(#[from] memo::MemoError),
2731
#[error("unknown error")]
2832
Unknown,
2933
}
@@ -32,6 +36,10 @@ pub enum OptimizerError {
3236
pub type OptimizerResult<T> = Result<T, OptimizerError>;
3337

3438
/// Applies all migrations.
39+
///
40+
/// # Errors
41+
///
42+
/// Returns a [`DbErr`] if unable to apply any migrations.
3543
pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> {
3644
Migrator::refresh(db).await
3745
}

optd-mvp/src/memo/mod.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
//!
33
//! TODO more docs.
44
5+
#![warn(missing_docs)]
6+
#![warn(clippy::missing_docs_in_private_items)]
7+
#![warn(clippy::missing_errors_doc)]
8+
#![warn(clippy::missing_panics_doc)]
9+
#![warn(clippy::missing_safety_doc)]
10+
511
use serde::{Deserialize, Serialize};
612
use thiserror::Error;
713

@@ -21,12 +27,16 @@ pub struct PhysicalExpressionId(pub i32);
2127
/// A status enum representing the different states a group can be during query optimization.
2228
#[repr(u8)]
2329
pub enum GroupStatus {
30+
/// Represents a group that is currently being logically explored.
2431
InProgress = 0,
32+
/// Represents a logically explored group that is currently being physically optimized.
2533
Explored = 1,
34+
/// Represents a fully optimized group.
2635
Optimized = 2,
2736
}
2837

2938
/// The different kinds of errors that might occur while running operations on a memo table.
39+
#[allow(missing_docs)]
3040
#[derive(Error, Debug)]
3141
pub enum MemoError {
3242
#[error("unknown group ID {0:?}")]
@@ -39,4 +49,4 @@ pub enum MemoError {
3949
InvalidExpression,
4050
}
4151

42-
mod persistent;
52+
pub mod persistent;

optd-mvp/src/memo/persistent/implementation.rs

Lines changed: 102 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ where
2727
{
2828
/// Creates a new `PersistentMemo` struct by connecting to a database defined at
2929
/// [`DATABASE_URL`].
30+
///
31+
/// # Panics
32+
///
33+
/// Panics if unable to create a databse connection to [`DATABASE_URL`].
3034
pub async fn new() -> Self {
3135
Self {
3236
db: Database::connect(DATABASE_URL).await.unwrap(),
@@ -39,7 +43,14 @@ where
3943
///
4044
/// Since there is no asynchronous drop yet in Rust, in order to drop all objects in the
4145
/// database, the user must call this manually.
46+
///
47+
/// # Panics
48+
///
49+
/// May panic if unable to delete entities from any table.
4250
pub async fn cleanup(&self) {
51+
/// Simple private macro to teardown all tables in the database.
52+
/// Note that these have to be specified manually, so when adding a new table to the
53+
/// database, we must make sure to add that table here.
4354
macro_rules! delete_all {
4455
($($module: ident),+ $(,)?) => {
4556
$(
@@ -63,9 +74,11 @@ where
6374

6475
/// Retrieves a [`group::Model`] given its ID.
6576
///
66-
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
67-
///
6877
/// FIXME: use an in-memory representation of a group instead.
78+
///
79+
/// # Errors
80+
///
81+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
6982
pub async fn get_group(&self, group_id: GroupId) -> OptimizerResult<group::Model> {
7083
Ok(group::Entity::find_by_id(group_id.0)
7184
.one(&self.db)
@@ -80,39 +93,48 @@ where
8093
///
8194
/// This function uses the path compression optimization, which amortizes the cost to a single
8295
/// lookup (theoretically in constant time, but we must be wary of the I/O roundtrip).
96+
///
97+
/// # Errors
98+
///
99+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. This function
100+
/// also performs path compression pointer updates, so any of those updates can fail with a
101+
/// [`DbErr`].
83102
pub async fn get_root_group(&self, group_id: GroupId) -> OptimizerResult<GroupId> {
84-
let mut curr_group = self.get_group(group_id).await?;
85-
86-
// Traverse up the path and find the root group, keeping track of groups we have visited.
87-
let mut path = vec![];
88-
while let Some(parent_id) = curr_group.parent_id {
89-
let next_group = self.get_group(GroupId(parent_id)).await?;
90-
path.push(curr_group);
91-
curr_group = next_group;
92-
}
103+
let curr_group = self.get_group(group_id).await?;
104+
105+
// If we have no parent, then we are at the root.
106+
let Some(parent_id) = curr_group.parent_id else {
107+
return Ok(GroupId(curr_group.id));
108+
};
93109

94-
let root_id = GroupId(curr_group.id);
110+
// Recursively find the root group ID.
111+
let root_id = Box::pin(self.get_root_group(GroupId(parent_id))).await?;
95112

96113
// Path Compression Optimization:
97114
// For every group along the path that we walked, set their parent id pointer to the root.
98115
// This allows for an amortized O(1) cost for `get_root_group`.
99-
for group in path {
100-
let mut active_group = group.into_active_model();
116+
let mut active_group = curr_group.into_active_model();
101117

102-
// Update the group to point to the new parent.
103-
active_group.parent_id = Set(Some(root_id.0));
104-
active_group.update(&self.db).await?;
105-
}
118+
// Update the group to point to the new parent.
119+
active_group.parent_id = Set(Some(root_id.0));
120+
active_group.update(&self.db).await?;
106121

107-
Ok(root_id)
122+
Ok(GroupId(root_id.0))
108123
}
109124

110125
/// Retrieves every group ID of groups that share the same root group with the input group.
111126
///
112-
/// If a group does not exist in the cycle, returns a [`MemoError::UnknownGroup`] error.
113-
///
114127
/// The group records form a union-find data structure that also maintains a circular linked
115128
/// list in every set that allows us to iterate over all elements in a set in linear time.
129+
///
130+
/// # Errors
131+
///
132+
/// If the input group does not exist, or if any pointer along the path is invalid, returns a
133+
/// [`MemoError::UnknownGroup`] error.
134+
///
135+
/// # Panics
136+
///
137+
/// Panics if the embedded union-find data structure is malformed.
116138
pub async fn get_group_set(&self, group_id: GroupId) -> OptimizerResult<Vec<GroupId>> {
117139
// Iterate over the circular linked list until we reach ourselves again.
118140
let base_group = self.get_group(group_id).await?;
@@ -148,6 +170,8 @@ where
148170

149171
/// Retrieves a [`physical_expression::Model`] given a [`PhysicalExpressionId`].
150172
///
173+
/// # Errors
174+
///
151175
/// If the physical expression does not exist, returns a
152176
/// [`MemoError::UnknownPhysicalExpression`] error.
153177
pub async fn get_physical_expression(
@@ -168,6 +192,8 @@ where
168192

169193
/// Retrieves a [`logical_expression::Model`] given its [`LogicalExpressionId`].
170194
///
195+
/// # Errors
196+
///
171197
/// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`]
172198
/// error.
173199
pub async fn get_logical_expression(
@@ -188,13 +214,19 @@ where
188214

189215
/// Retrieves all of the logical expression "children" IDs of a group.
190216
///
191-
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
192-
///
193217
/// FIXME: `find_related` does not work for some reason, have to use manual `filter`.
218+
///
219+
/// # Errors
220+
///
221+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return
222+
/// a [`DbErr`] if the something goes wrong with the filter scan.
194223
pub async fn get_logical_children(
195224
&self,
196225
group_id: GroupId,
197226
) -> OptimizerResult<Vec<LogicalExpressionId>> {
227+
// First ensure that the group exists.
228+
let _ = self.get_group(group_id).await?;
229+
198230
// Search for expressions that have the given parent group ID.
199231
let children = logical_expression::Entity::find()
200232
.filter(logical_expression::Column::GroupId.eq(group_id.0))
@@ -209,11 +241,19 @@ where
209241

210242
/// Retrieves all of the physical expression "children" IDs of a group.
211243
///
212-
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
244+
/// FIXME: `find_related` does not work for some reason, have to use manual `filter`.
245+
///
246+
/// # Errors
247+
///
248+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return
249+
/// a [`DbErr`] if the something goes wrong with the filter scan.
213250
pub async fn get_physical_children(
214251
&self,
215252
group_id: GroupId,
216253
) -> OptimizerResult<Vec<PhysicalExpressionId>> {
254+
// First ensure that the group exists.
255+
let _ = self.get_group(group_id).await?;
256+
217257
// Search for expressions that have the given parent group ID.
218258
let children = physical_expression::Entity::find()
219259
.filter(physical_expression::Column::GroupId.eq(group_id.0))
@@ -228,7 +268,10 @@ where
228268

229269
/// Updates / replaces a group's status. Returns the previous group status.
230270
///
231-
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
271+
/// # Errors
272+
///
273+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a
274+
/// [`DbErr`] if the update fails.
232275
pub async fn update_group_status(
233276
&self,
234277
group_id: GroupId,
@@ -246,7 +289,7 @@ where
246289
0 => GroupStatus::InProgress,
247290
1 => GroupStatus::Explored,
248291
2 => GroupStatus::Optimized,
249-
_ => panic!("encountered an invalid group status"),
292+
_ => unreachable!("encountered an invalid group status"),
250293
};
251294

252295
Ok(old_status)
@@ -255,10 +298,13 @@ where
255298
/// Updates / replaces a group's best physical plan (winner). Optionally returns the previous
256299
/// winner's physical expression ID.
257300
///
258-
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
259-
///
260301
/// FIXME: In the future, this should first check that we aren't overwriting a winner that was
261302
/// updated from another thread by comparing against the cost of the plan.
303+
///
304+
/// # Errors
305+
///
306+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also return a
307+
/// [`DbErr`] if the update fails.
262308
pub async fn update_group_winner(
263309
&self,
264310
group_id: GroupId,
@@ -291,6 +337,13 @@ where
291337
///
292338
/// If the memo table detects that the input is unique, it will insert the expression into the
293339
/// input group and return an `Ok(Ok(expression_id))`.
340+
///
341+
/// # Errors
342+
///
343+
/// Note that the return value is a [`Result`] wrapped in an [`OptimizerResult`]. The outer
344+
/// result is used for raising [`DbErr`] or other database/IO-related errors. The inner result
345+
/// is used for notifying the caller if the expression that they attempted to insert was a
346+
/// duplicate expression or not.
294347
pub async fn add_logical_expression_to_group(
295348
&self,
296349
group_id: GroupId,
@@ -359,9 +412,13 @@ where
359412
/// The caller is required to pass in a slice of [`GroupId`] that represent the child groups of
360413
/// the input expression.
361414
///
362-
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
363-
///
364415
/// On successful insertion, returns the ID of the physical expression.
416+
///
417+
/// # Errors
418+
///
419+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error. Can also fail if
420+
/// insertion of the new physical expression or any of its child junction entries are not able
421+
/// to be inserted.
365422
pub async fn add_physical_expression_to_group(
366423
&self,
367424
group_id: GroupId,
@@ -403,6 +460,10 @@ where
403460
/// This function assumes that the child groups of the expression are currently roots of their
404461
/// group sets. For example, if G1 and G2 should be merged, and G1 is the root, then the input
405462
/// expression should _not_ have G2 as a child, and should be replaced with G1.
463+
///
464+
/// # Errors
465+
///
466+
/// Returns a [`DbErr`] when a database operation fails.
406467
pub async fn is_duplicate_logical_expression(
407468
&self,
408469
logical_expression: &L,
@@ -480,6 +541,13 @@ where
480541
///
481542
/// If the expression does not exist, this function will create a new group and a new
482543
/// expression, returning brand new IDs for both.
544+
///
545+
/// # Errors
546+
///
547+
/// Note that the return value is a [`Result`] wrapped in an [`OptimizerResult`]. The outer
548+
/// result is used for raising [`DbErr`] or other database/IO-related errors. The inner result
549+
/// is used for notifying the caller if the expression/group that they attempted to insert was a
550+
/// duplicate expression or not.
483551
pub async fn add_group(
484552
&self,
485553
logical_expression: L,
@@ -558,6 +626,10 @@ where
558626
/// TODO write docs.
559627
/// TODO highly inefficient, need to understand metrics and performance testing.
560628
/// TODO Optimization: add rank / size into data structure
629+
///
630+
/// # Errors
631+
///
632+
/// TODO
561633
pub async fn merge_groups(
562634
&self,
563635
left_group_id: GroupId,

optd-mvp/src/memo/persistent/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ pub struct PersistentMemo<L, P> {
2222
_phantom_physical: PhantomData<P>,
2323
}
2424

25-
mod implementation;
25+
pub mod implementation;

optd-mvp/src/migrator/mod.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
1+
//! This module defines the tables and their schemas for representing a persistent memo table.
2+
//!
3+
//! The most important tables represented here are the [`group`], [`logical_expression`], and
4+
//! [`physical_expression`] tables. See the corresponding modules for more information on their
5+
//! relations and fields.
6+
//!
7+
//! See the SeaORM docs for more information specific to migrations.
8+
//!
9+
//! [`group`]: memo::group
10+
//! [`logical_expression`]: memo::logical_expression
11+
//! [`physical_expression`]: memo::physical_expression
12+
113
use sea_orm_migration::prelude::*;
214

315
mod memo;
416

17+
/// A unit struct that implements the [`MigratorTrait`] for running custom migrations.
518
pub struct Migrator;
619

720
#[async_trait::async_trait]

0 commit comments

Comments
 (0)