From 044a716e1ebc3ef1047dfba740d41b7325b05d13 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Fri, 9 Sep 2022 15:53:25 -0700 Subject: [PATCH 1/4] Add Bernoulli keyword for parsing tablesample --- src/keywords.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/keywords.rs b/src/keywords.rs index 30ec735f7..45c943427 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -98,6 +98,7 @@ define_keywords!( BEGIN, BEGIN_FRAME, BEGIN_PARTITION, + BERNOULLI, BETWEEN, BIGINT, BINARY, From d12d11fdd430b258f9aecc9125efabcd4fc97db6 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Fri, 9 Sep 2022 15:54:00 -0700 Subject: [PATCH 2/4] Add struct for tablesample and make it a member of TableFactor::table, add enum for samplingMethods --- src/ast/query.rs | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index bc5af9e5f..76940eb2d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -344,6 +344,7 @@ pub enum TableFactor { args: Option>, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, + tablesample: Option, }, Derived { lateral: bool, @@ -389,6 +390,7 @@ impl fmt::Display for TableFactor { alias, args, with_hints, + tablesample, } => { write!(f, "{}", name)?; if let Some(args) = args { @@ -400,6 +402,9 @@ impl fmt::Display for TableFactor { if !with_hints.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_hints))?; } + if let Some(tablesample) = tablesample { + write!(f, " {}", tablesample)?; + } Ok(()) } TableFactor::Derived { @@ -455,6 +460,44 @@ impl fmt::Display for TableFactor { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct TableSample { + pub sampling_method: SamplingMethod, + pub sampling_fraction: Box, + pub repeatable_seed: Option>, +} + +impl fmt::Display for TableSample { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "TABLESAMPLE {} ({})", + self.sampling_method, self.sampling_fraction + )?; + if let Some(ref repeatable_seed) = self.repeatable_seed { + write!(f, " REPEATABLE ({})", repeatable_seed)?; + } + Ok(()) + } +} +/// Stores the different Sampling Methods +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum SamplingMethod { + BERNOULLI, + SYSTEM, +} + +impl fmt::Display for SamplingMethod { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SamplingMethod::BERNOULLI => write!(f, " BERNOULLI"), + SamplingMethod::SYSTEM => write!(f, "SYSTEM"), + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct TableAlias { From 1539807c5121a312c29cd4f56725de1a6eb54480 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Fri, 9 Sep 2022 15:55:03 -0700 Subject: [PATCH 3/4] Add logic to parse tablesample --- src/ast/mod.rs | 4 ++-- src/parser.rs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6c279518e..29c7a76de 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -36,8 +36,8 @@ pub use self::ddl::{ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ Cte, Fetch, Join, JoinConstraint, JoinOperator, LateralView, LockType, Offset, OffsetRows, - OrderByExpr, Query, Select, SelectInto, SelectItem, SetExpr, SetOperator, TableAlias, - TableFactor, TableWithJoins, Top, Values, With, + OrderByExpr, Query, SamplingMethod, Select, SelectInto, SelectItem, SetExpr, SetOperator, + TableAlias, TableFactor, TableSample, TableWithJoins, Top, Values, With, }; pub use self::value::{DateTimeField, TrimWhereField, Value}; diff --git a/src/parser.rs b/src/parser.rs index 894bb84f1..a9f2f471f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4201,11 +4201,14 @@ impl<'a> Parser<'a> { self.prev_token(); } }; + let tablesample = self.parse_tablesample_args()?; + Ok(TableFactor::Table { name, alias, args, with_hints, + tablesample, }) } } @@ -4227,6 +4230,33 @@ impl<'a> Parser<'a> { }) } + pub fn parse_tablesample_args(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::TABLESAMPLE) { + let sampling_method = + match self.expect_one_of_keywords(&[Keyword::BERNOULLI, Keyword::SYSTEM])? { + Keyword::BERNOULLI => SamplingMethod::BERNOULLI, + Keyword::SYSTEM => SamplingMethod::SYSTEM, + _ => unreachable!(), + }; + self.expect_token(&Token::LParen)?; + let sampling_fraction = Box::new(self.parse_expr()?); + self.expect_token(&Token::RParen)?; + let mut repeatable_seed = None; + if self.parse_keyword(Keyword::REPEATABLE) { + self.expect_token(&Token::LParen)?; + repeatable_seed = Some(Box::new(self.parse_expr()?)); + self.expect_token(&Token::RParen)?; + } + Ok(Some(TableSample { + sampling_method, + sampling_fraction, + repeatable_seed, + })) + } else { + Ok(None) + } + } + pub fn parse_join_constraint(&mut self, natural: bool) -> Result { if natural { Ok(JoinConstraint::Natural) From bcc8270cca64f95e079c857d63c44093e02db9e2 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Fri, 9 Sep 2022 15:55:37 -0700 Subject: [PATCH 4/4] update existing tests to update the creation of tableFactor::table to include tablesample --- src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 1 + tests/sqlparser_common.rs | 32 ++++++++++++++++++++++++++------ tests/sqlparser_mysql.rs | 7 +++++-- tests/sqlparser_postgres.rs | 2 ++ tests/sqlparser_redshift.rs | 2 ++ 7 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index c5ea62085..93233a194 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -179,6 +179,7 @@ pub fn table(name: impl Into) -> TableFactor { alias: None, args: None, with_hints: vec![], + tablesample: None, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 0a606c3ec..1ab35d031 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -46,6 +46,7 @@ fn parse_table_identifiers() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![] },] diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index a61df73cc..1602f5e01 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -61,6 +61,7 @@ fn parse_map_access_expr() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![] }], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6fee5e88b..2fc638053 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -204,6 +204,7 @@ fn parse_update_with_table_alias() { }), args: None, with_hints: vec![], + tablesample: None, }, joins: vec![] }, @@ -257,7 +258,8 @@ fn parse_delete_statement() { name: ObjectName(vec![Ident::with_quote('"', "table")]), alias: None, args: None, - with_hints: vec![] + with_hints: vec![], + tablesample: None, }, table_name ); @@ -282,7 +284,8 @@ fn parse_where_delete_statement() { name: ObjectName(vec![Ident::new("foo")]), alias: None, args: None, - with_hints: vec![] + with_hints: vec![], + tablesample: None, }, table_name, ); @@ -320,7 +323,8 @@ fn parse_where_delete_with_alias_statement() { columns: vec![] }), args: None, - with_hints: vec![] + with_hints: vec![], + tablesample: None }, table_name, ); @@ -333,7 +337,8 @@ fn parse_where_delete_with_alias_statement() { columns: vec![] }), args: None, - with_hints: vec![] + with_hints: vec![], + tablesample: None, }), using ); @@ -3273,11 +3278,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + tablesample, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(tablesample.is_none()); } _ => panic!("Expecting TableFactor::Table"), } @@ -3414,6 +3421,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![], }, @@ -3423,6 +3431,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![], } @@ -3440,6 +3449,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -3447,6 +3457,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }] @@ -3457,6 +3468,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -3464,6 +3476,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }] @@ -3484,6 +3497,7 @@ fn parse_cross_join() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, join_operator: JoinOperator::CrossJoin }, @@ -3504,6 +3518,7 @@ fn parse_joins_on() { alias, args: None, with_hints: vec![], + tablesample: None, }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -3557,6 +3572,7 @@ fn parse_joins_using() { alias, args: None, with_hints: vec![], + tablesample: None, }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -3602,6 +3618,7 @@ fn parse_natural_join() { alias, args: None, with_hints: vec![], + tablesample: None, }, join_operator: f(JoinConstraint::Natural), } @@ -3869,6 +3886,7 @@ fn parse_derived_tables() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4996,7 +5014,8 @@ fn parse_merge() { columns: vec![] }), args: None, - with_hints: vec![] + with_hints: vec![], + tablesample: None, } ); assert_eq!(table, table_no_into); @@ -5017,7 +5036,8 @@ fn parse_merge() { name: ObjectName(vec![Ident::new("s"), Ident::new("foo")]), alias: None, args: None, - with_hints: vec![] + with_hints: vec![], + tablesample: None, }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f46d5d23e..1864fd581 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -810,10 +810,11 @@ fn parse_update_with_joins() { name: ObjectName(vec![Ident::new("orders")]), alias: Some(TableAlias { name: Ident::new("o"), - columns: vec![] + columns: vec![], }), args: None, with_hints: vec![], + tablesample: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -824,6 +825,7 @@ fn parse_update_with_joins() { }), args: None, with_hints: vec![], + tablesample: None, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -930,7 +932,8 @@ fn parse_substring_in_select() { }]), alias: None, args: None, - with_hints: vec![] + with_hints: vec![], + tablesample: None, }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 3aaabc9e3..5b07e3642 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -414,6 +414,7 @@ fn parse_update_set_from() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![], }, @@ -440,6 +441,7 @@ fn parse_update_set_from() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![], }], diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 6f77cf335..34c8637f9 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -45,6 +45,7 @@ fn test_square_brackets_over_db_schema_table_name() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![], } @@ -89,6 +90,7 @@ fn test_double_quotes_over_db_schema_table_name() { alias: None, args: None, with_hints: vec![], + tablesample: None, }, joins: vec![], }