From deac26971084d0790e718a3352a43ecbbc868e64 Mon Sep 17 00:00:00 2001 From: Philip Cristiano Date: Mon, 17 Jun 2024 14:10:40 -0400 Subject: [PATCH 001/506] CreateIndex: Move Display fmt to struct (#1307) --- src/ast/dml.rs | 43 +++++++++++++++++++++++++++++++++++++++++++ src/ast/mod.rs | 43 +------------------------------------------ 2 files changed, 44 insertions(+), 42 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 74bb5435c..b35b2b970 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -47,6 +47,49 @@ pub struct CreateIndex { pub nulls_distinct: Option, pub predicate: Option, } + +impl Display for CreateIndex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "CREATE {unique}INDEX {concurrently}{if_not_exists}", + unique = if self.unique { "UNIQUE " } else { "" }, + concurrently = if self.concurrently { + "CONCURRENTLY " + } else { + "" + }, + if_not_exists = if self.if_not_exists { + "IF NOT EXISTS " + } else { + "" + }, + )?; + if let Some(value) = &self.name { + write!(f, "{value} ")?; + } + write!(f, "ON {}", self.table_name)?; + if let Some(value) = &self.using { + write!(f, " USING {value} ")?; + } + write!(f, "({})", display_separated(&self.columns, ","))?; + if !self.include.is_empty() { + write!(f, " INCLUDE ({})", display_separated(&self.include, ","))?; + } + if let Some(value) = self.nulls_distinct { + if value { + write!(f, " NULLS DISTINCT")?; + } else { + write!(f, " NULLS NOT DISTINCT")?; + } + } + if let Some(predicate) = &self.predicate { + write!(f, " WHERE {predicate}")?; + } + Ok(()) + } +} + /// CREATE TABLE statement. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 49d6499c5..6e306b1e3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3383,48 +3383,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::CreateIndex(CreateIndex { - name, - table_name, - using, - columns, - unique, - concurrently, - if_not_exists, - include, - nulls_distinct, - predicate, - }) => { - write!( - f, - "CREATE {unique}INDEX {concurrently}{if_not_exists}", - unique = if *unique { "UNIQUE " } else { "" }, - concurrently = if *concurrently { "CONCURRENTLY " } else { "" }, - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, - )?; - if let Some(value) = name { - write!(f, "{value} ")?; - } - write!(f, "ON {table_name}")?; - if let Some(value) = using { - write!(f, " USING {value} ")?; - } - write!(f, "({})", display_separated(columns, ","))?; - if !include.is_empty() { - write!(f, " INCLUDE ({})", display_separated(include, ","))?; - } - if let Some(value) = nulls_distinct { - if *value { - write!(f, " NULLS DISTINCT")?; - } else { - write!(f, " NULLS NOT DISTINCT")?; - } - } - if let Some(predicate) = predicate { - write!(f, " WHERE {predicate}")?; - } - Ok(()) - } + Statement::CreateIndex(create_index) => create_index.fmt(f), Statement::CreateExtension { name, if_not_exists, From 0330f9def5ebd6b7813dc4656f40edc717dbd0a3 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Mon, 17 Jun 2024 22:14:40 +0400 Subject: [PATCH 002/506] Support use of `BY NAME` quantifier across all set ops (#1309) Co-authored-by: Alexander Beedie Co-authored-by: Joey Hain --- README.md | 6 +++--- src/ast/data_type.rs | 4 ++-- src/ast/mod.rs | 6 +++--- src/parser/mod.rs | 15 +++------------ src/tokenizer.rs | 2 +- tests/sqlparser_common.rs | 6 ++++++ 6 files changed, 18 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 512f5f6c0..3226b9549 100644 --- a/README.md +++ b/README.md @@ -114,13 +114,12 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users -This parser is currently being used by the [DataFusion] query engine, -[LocustDB], [Ballista], [GlueSQL], [Opteryx], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. +This parser is currently being used by the [DataFusion] query engine, [LocustDB], +[Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. - ## Design The core expression parser uses the [Pratt Parser] design, which is a top-down @@ -210,6 +209,7 @@ licensed as above, without any additional terms or conditions. [Ballista]: https://github.com/apache/arrow-ballista [GlueSQL]: https://github.com/gluesql/gluesql [Opteryx]: https://github.com/mabel-dev/opteryx +[Polars]: https://pola.rs/ [PRQL]: https://github.com/PRQL/prql [Qrlew]: https://github.com/Qrlew/qrlew [JumpWire]: https://github.com/extragoodlabs/jumpwire diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 7d0aec8fc..6b1a542f4 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -168,7 +168,7 @@ pub enum DataType { UnsignedInt(Option), /// Unsigned int4 with optional display width e.g. INT4 UNSIGNED or INT4(11) UNSIGNED UnsignedInt4(Option), - /// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED + /// Unsigned integer with optional display width e.g. INTEGER UNSIGNED or INTEGER(11) UNSIGNED UnsignedInteger(Option), /// Unsigned integer type in [clickhouse] /// Note: UInt8 mean 8 bits in [clickhouse] @@ -699,7 +699,7 @@ pub enum CharacterLength { /// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly unit: Option, }, - /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Miscrosoft SQL Server) + /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Microsoft SQL Server) Max, } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6e306b1e3..7af8efaec 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2265,7 +2265,7 @@ pub enum Statement { /// SET [ SESSION | LOCAL ] ROLE role_name /// ``` /// - /// Sets sesssion state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] + /// Sets session state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#set-role-statement /// [2]: https://www.postgresql.org/docs/14/sql-set-role.html @@ -2283,7 +2283,7 @@ pub enum Statement { /// ``` /// /// Note: this is not a standard SQL statement, but it is supported by at - /// least MySQL and PostgreSQL. Not all MySQL-specific syntatic forms are + /// least MySQL and PostgreSQL. Not all MySQL-specific syntactic forms are /// supported yet. SetVariable { local: bool, @@ -4750,7 +4750,7 @@ impl fmt::Display for FunctionArguments { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FunctionArgumentList { - /// `[ ALL | DISTINCT ] + /// `[ ALL | DISTINCT ]` pub duplicate_treatment: Option, /// The function arguments. pub args: Vec, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c591b8116..e240441b9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8138,7 +8138,7 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { - Some(SetOperator::Union) => { + Some(SetOperator::Except | SetOperator::Intersect | SetOperator::Union) => { if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { SetQuantifier::DistinctByName } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { @@ -8155,15 +8155,6 @@ impl<'a> Parser<'a> { SetQuantifier::None } } - Some(SetOperator::Except) | Some(SetOperator::Intersect) => { - if self.parse_keyword(Keyword::ALL) { - SetQuantifier::All - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - SetQuantifier::None - } - } _ => SetQuantifier::None, } } @@ -8547,10 +8538,10 @@ impl<'a> Parser<'a> { }) } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { if self.parse_keyword(Keyword::SNAPSHOT) { - let snaphot_id = self.parse_value()?; + let snapshot_id = self.parse_value()?; return Ok(Statement::SetTransaction { modes: vec![], - snapshot: Some(snaphot_id), + snapshot: Some(snapshot_id), session: false, }); } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index bcc5478bc..4e64e0712 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -654,7 +654,7 @@ impl<'a> Tokenizer<'a> { Ok(()) } - // Tokenize the identifer or keywords in `ch` + // Tokenize the identifier or keywords in `ch` fn tokenize_identifier_or_keyword( &self, ch: impl IntoIterator, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f6518e276..a86858129 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6010,6 +6010,12 @@ fn parse_union_except_intersect() { verified_stmt("SELECT foo FROM tab UNION SELECT bar FROM TAB"); verified_stmt("(SELECT * FROM new EXCEPT SELECT * FROM old) UNION ALL (SELECT * FROM old EXCEPT SELECT * FROM new) ORDER BY 1"); verified_stmt("(SELECT * FROM new EXCEPT DISTINCT SELECT * FROM old) UNION DISTINCT (SELECT * FROM old EXCEPT DISTINCT SELECT * FROM new) ORDER BY 1"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT ALL BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT DISTINCT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 AS y, 8 AS x"); } #[test] From 345e2098fb3cdd720f99e8183720529c4fd0acc5 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 18 Jun 2024 15:28:39 +0200 Subject: [PATCH 003/506] add support for update statements that contain tuple assignments (#1317) --- src/ast/mod.rs | 26 ++++++++++++++++++++++++-- src/parser/mod.rs | 16 ++++++++++++++-- tests/sqlparser_bigquery.rs | 4 ++-- tests/sqlparser_common.rs | 23 ++++++++++++++++------- tests/sqlparser_mysql.rs | 25 +++++++++++++++++++------ tests/sqlparser_postgres.rs | 10 +++++----- tests/sqlparser_sqlite.rs | 34 ++++++++++++++++++++++++++++++++++ 7 files changed, 114 insertions(+), 24 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7af8efaec..769bda598 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4553,13 +4553,35 @@ impl fmt::Display for GrantObjects { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Assignment { - pub id: Vec, + pub target: AssignmentTarget, pub value: Expr, } impl fmt::Display for Assignment { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} = {}", display_separated(&self.id, "."), self.value) + write!(f, "{} = {}", self.target, self.value) + } +} + +/// Left-hand side of an assignment in an UPDATE statement, +/// e.g. `foo` in `foo = 5` (ColumnName assignment) or +/// `(a, b)` in `(a, b) = (1, 2)` (Tuple assignment). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AssignmentTarget { + /// A single column + ColumnName(ObjectName), + /// A tuple of columns + Tuple(Vec), +} + +impl fmt::Display for AssignmentTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AssignmentTarget::ColumnName(column) => write!(f, "{}", column), + AssignmentTarget::Tuple(columns) => write!(f, "({})", display_comma_separated(columns)), + } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e240441b9..62222c6fb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9937,10 +9937,22 @@ impl<'a> Parser<'a> { /// Parse a `var = expr` assignment, used in an UPDATE statement pub fn parse_assignment(&mut self) -> Result { - let id = self.parse_identifiers()?; + let target = self.parse_assignment_target()?; self.expect_token(&Token::Eq)?; let value = self.parse_expr()?; - Ok(Assignment { id, value }) + Ok(Assignment { target, value }) + } + + /// Parse the left-hand side of an assignment, used in an UPDATE statement + pub fn parse_assignment_target(&mut self) -> Result { + if self.consume_token(&Token::LParen) { + let columns = self.parse_comma_separated(|p| p.parse_object_name(false))?; + self.expect_token(&Token::RParen)?; + Ok(AssignmentTarget::Tuple(columns)) + } else { + let column = self.parse_object_name(false)?; + Ok(AssignmentTarget::ColumnName(column)) + } } pub fn parse_function_args(&mut self) -> Result { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 171439d19..fb6e3b88a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1590,11 +1590,11 @@ fn parse_merge() { let update_action = MergeAction::Update { assignments: vec![ Assignment { - id: vec![Ident::new("a")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("a")])), value: Expr::Value(number("1")), }, Assignment { - id: vec![Ident::new("b")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("b")])), value: Expr::Value(number("2")), }, ], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a86858129..15b3b69dd 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -296,15 +296,15 @@ fn parse_update() { assignments, vec![ Assignment { - id: vec!["a".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["a".into()])), value: Expr::Value(number("1")), }, Assignment { - id: vec!["b".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["b".into()])), value: Expr::Value(number("2")), }, Assignment { - id: vec!["c".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["c".into()])), value: Expr::Value(number("3")), }, ] @@ -363,7 +363,7 @@ fn parse_update_set_from() { joins: vec![], }, assignments: vec![Assignment { - id: vec![Ident::new("name")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("name")])), value: Expr::CompoundIdentifier(vec![Ident::new("t2"), Ident::new("name")]) }], from: Some(TableWithJoins { @@ -466,7 +466,10 @@ fn parse_update_with_table_alias() { ); assert_eq!( vec![Assignment { - id: vec![Ident::new("u"), Ident::new("username")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("u"), + Ident::new("username") + ])), value: Expr::Value(Value::SingleQuotedString("new_user".to_string())), }], assignments @@ -7702,14 +7705,20 @@ fn parse_merge() { action: MergeAction::Update { assignments: vec![ Assignment { - id: vec![Ident::new("dest"), Ident::new("F")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("dest"), + Ident::new("F") + ])), value: Expr::CompoundIdentifier(vec![ Ident::new("stg"), Ident::new("F"), ]), }, Assignment { - id: vec![Ident::new("dest"), Ident::new("G")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("dest"), + Ident::new("G") + ])), value: Expr::CompoundIdentifier(vec![ Ident::new("stg"), Ident::new("G"), diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e65fc181b..ff8a49de7 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1639,23 +1639,33 @@ fn parse_insert_with_on_duplicate_update() { assert_eq!( Some(OnInsert::DuplicateKeyUpdate(vec![ Assignment { - id: vec![Ident::new("description".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "description".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("description"))]), }, Assignment { - id: vec![Ident::new("perm_create".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_create".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_create"))]), }, Assignment { - id: vec![Ident::new("perm_read".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_read".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_read"))]), }, Assignment { - id: vec![Ident::new("perm_update".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_update".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_update"))]), }, Assignment { - id: vec![Ident::new("perm_delete".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_delete".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_delete"))]), }, ])), @@ -1835,7 +1845,10 @@ fn parse_update_with_joins() { ); assert_eq!( vec![Assignment { - id: vec![Ident::new("o"), Ident::new("completed")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("o"), + Ident::new("completed") + ])), value: Expr::Value(Value::Boolean(true)) }], assignments diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 5343fe5e0..fe735b8b2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1557,7 +1557,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::CompoundIdentifier(vec!["EXCLUDED".into(), "dname".into()]) },], selection: None @@ -1588,14 +1588,14 @@ fn parse_pg_on_conflict() { OnConflictAction::DoUpdate(DoUpdate { assignments: vec![ Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::CompoundIdentifier(vec![ "EXCLUDED".into(), "dname".into() ]) }, Assignment { - id: vec!["area".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["area".into()])), value: Expr::CompoundIdentifier(vec!["EXCLUDED".into(), "area".into()]) }, ], @@ -1645,7 +1645,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::Value(Value::Placeholder("$1".to_string())) },], selection: Some(Expr::BinaryOp { @@ -1682,7 +1682,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::Value(Value::Placeholder("$1".to_string())) },], selection: Some(Expr::BinaryOp { diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 16ea9eb8c..1181c480b 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -373,6 +373,40 @@ fn parse_attach_database() { } } +#[test] +fn parse_update_tuple_row_values() { + // See https://github.com/sqlparser-rs/sqlparser-rs/issues/1311 + assert_eq!( + sqlite().verified_stmt("UPDATE x SET (a, b) = (1, 2)"), + Statement::Update { + assignments: vec![Assignment { + target: AssignmentTarget::Tuple(vec![ + ObjectName(vec![Ident::new("a"),]), + ObjectName(vec![Ident::new("b"),]), + ]), + value: Expr::Tuple(vec![ + Expr::Value(Value::Number("1".parse().unwrap(), false)), + Expr::Value(Value::Number("2".parse().unwrap(), false)) + ]) + }], + selection: None, + table: TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new("x")]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![] + }, + joins: vec![], + }, + from: None, + returning: None + } + ); +} + #[test] fn parse_where_in_empty_list() { let sql = "SELECT * FROM t1 WHERE a IN ()"; From 79af31b6727fbe60e21705f4bbf8dafc59516e42 Mon Sep 17 00:00:00 2001 From: Emil Ejbyfeldt Date: Tue, 18 Jun 2024 15:30:24 +0200 Subject: [PATCH 004/506] Return errors, not panic, when integers fail to parse in `AUTO_INCREMENT` and `TOP` (#1305) --- src/parser/mod.rs | 40 ++++++++++++++++++++++++--------------- tests/sqlparser_common.rs | 15 +++++++++++++++ 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 62222c6fb..67aebcb33 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -20,7 +20,10 @@ use alloc::{ vec, vec::Vec, }; -use core::fmt; +use core::{ + fmt::{self, Display}, + str::FromStr, +}; use log::debug; @@ -3260,6 +3263,18 @@ impl<'a> Parser<'a> { } } + fn parse(s: String, loc: Location) -> Result + where + ::Err: Display, + { + s.parse::().map_err(|e| { + ParserError::ParserError(format!( + "Could not parse '{s}' as {}: {e}{loc}", + core::any::type_name::() + )) + }) + } + /// Parse a comma-separated list of 1+ SelectItem pub fn parse_projection(&mut self) -> Result, ParserError> { // BigQuery and Snowflake allow trailing commas, but only in project lists @@ -5281,7 +5296,7 @@ impl<'a> Parser<'a> { let _ = self.consume_token(&Token::Eq); let next_token = self.next_token(); match next_token.token { - Token::Number(s, _) => Some(s.parse::().expect("literal int")), + Token::Number(s, _) => Some(Self::parse::(s, next_token.location)?), _ => self.expected("literal int", next_token)?, } } else { @@ -6725,10 +6740,7 @@ impl<'a> Parser<'a> { // The call to n.parse() returns a bigdecimal when the // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). - Token::Number(ref n, l) => match n.parse() { - Ok(n) => Ok(Value::Number(n, l)), - Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location), - }, + Token::Number(n, l) => Ok(Value::Number(Self::parse(n, location)?, l)), Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), Token::TripleSingleQuotedString(ref s) => { @@ -6820,9 +6832,7 @@ impl<'a> Parser<'a> { pub fn parse_literal_uint(&mut self) -> Result { let next_token = self.next_token(); match next_token.token { - Token::Number(s, _) => s.parse::().map_err(|e| { - ParserError::ParserError(format!("Could not parse '{s}' as u64: {e}")) - }), + Token::Number(s, _) => Self::parse::(s, next_token.location), _ => self.expected("literal int", next_token), } } @@ -9273,7 +9283,7 @@ impl<'a> Parser<'a> { return self.expected("literal number", next_token); }; self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::AtMost(n.parse().expect("literal int")) + RepetitionQuantifier::AtMost(Self::parse(n, token.location)?) } Token::Number(n, _) if self.consume_token(&Token::Comma) => { let next_token = self.next_token(); @@ -9281,12 +9291,12 @@ impl<'a> Parser<'a> { Token::Number(m, _) => { self.expect_token(&Token::RBrace)?; RepetitionQuantifier::Range( - n.parse().expect("literal int"), - m.parse().expect("literal int"), + Self::parse(n, token.location)?, + Self::parse(m, token.location)?, ) } Token::RBrace => { - RepetitionQuantifier::AtLeast(n.parse().expect("literal int")) + RepetitionQuantifier::AtLeast(Self::parse(n, token.location)?) } _ => { return self.expected("} or upper bound", next_token); @@ -9295,7 +9305,7 @@ impl<'a> Parser<'a> { } Token::Number(n, _) => { self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::Exactly(n.parse().expect("literal int")) + RepetitionQuantifier::Exactly(Self::parse(n, token.location)?) } _ => return self.expected("quantifier range", token), } @@ -10329,7 +10339,7 @@ impl<'a> Parser<'a> { } else { let next_token = self.next_token(); let quantity = match next_token.token { - Token::Number(s, _) => s.parse::().expect("literal int"), + Token::Number(s, _) => Self::parse::(s, next_token.location)?, _ => self.expected("literal int", next_token)?, }; Some(TopQuantity::Constant(quantity)) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 15b3b69dd..a87883908 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10006,3 +10006,18 @@ fn parse_select_wildcard_with_except() { "sql parser error: Expected identifier, found: )" ); } + +#[test] +fn parse_auto_increment_too_large() { + let dialect = GenericDialect {}; + let u64_max = u64::MAX; + let sql = + format!("CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) AUTO_INCREMENT=1{u64_max}"); + + let res = Parser::new(&dialect) + .try_with_sql(&sql) + .expect("tokenize to work") + .parse_statements(); + + assert!(res.is_err(), "{res:?}"); +} From f16c1afed0fa273228e74a633f3885c9c6609911 Mon Sep 17 00:00:00 2001 From: Lorrens Pantelis <100197010+LorrensP-2158466@users.noreply.github.com> Date: Sat, 22 Jun 2024 00:26:23 +0200 Subject: [PATCH 005/506] Improve error messages with additional colons (#1319) --- src/parser/mod.rs | 6 +- src/tokenizer.rs | 4 +- tests/sqlparser_bigquery.rs | 20 ++-- tests/sqlparser_common.rs | 168 +++++++++++++++++----------------- tests/sqlparser_databricks.rs | 2 +- tests/sqlparser_hive.rs | 8 +- tests/sqlparser_mssql.rs | 2 +- tests/sqlparser_mysql.rs | 2 +- tests/sqlparser_postgres.rs | 28 +++--- tests/sqlparser_snowflake.rs | 40 ++++---- tests/sqlparser_sqlite.rs | 8 +- 11 files changed, 144 insertions(+), 144 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 67aebcb33..27520a6c4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3116,7 +3116,7 @@ impl<'a> Parser<'a> { /// Report `found` was encountered instead of `expected` pub fn expected(&self, expected: &str, found: TokenWithLocation) -> Result { parser_err!( - format!("Expected {expected}, found: {found}"), + format!("Expected: {expected}, found: {found}"), found.location ) } @@ -11581,7 +11581,7 @@ mod tests { assert_eq!( ast, Err(ParserError::TokenizerError( - "Unterminated string literal at Line: 1, Column 5".to_string() + "Unterminated string literal at Line: 1, Column: 5".to_string() )) ); } @@ -11593,7 +11593,7 @@ mod tests { assert_eq!( ast, Err(ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16" + "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16" .to_string() )) ); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 4e64e0712..b8336cec8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -429,7 +429,7 @@ impl fmt::Display for Location { write!( f, // TODO: use standard compiler location syntax (::) - " at Line: {}, Column {}", + " at Line: {}, Column: {}", self.line, self.column, ) } @@ -1816,7 +1816,7 @@ mod tests { use std::error::Error; assert!(err.source().is_none()); } - assert_eq!(err.to_string(), "test at Line: 1, Column 1"); + assert_eq!(err.to_string(), "test at Line: 1, Column: 1"); } #[test] diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index fb6e3b88a..ec4ddca96 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -535,7 +535,7 @@ fn parse_invalid_brackets() { bigquery_and_generic() .parse_sql_statements(sql) .unwrap_err(), - ParserError::ParserError("Expected (, found: >".to_string()) + ParserError::ParserError("Expected: (, found: >".to_string()) ); let sql = "CREATE TABLE table (x STRUCT>>)"; @@ -544,7 +544,7 @@ fn parse_invalid_brackets() { .parse_sql_statements(sql) .unwrap_err(), ParserError::ParserError( - "Expected ',' or ')' after column definition, found: >".to_string() + "Expected: ',' or ')' after column definition, found: >".to_string() ) ); } @@ -1753,11 +1753,11 @@ fn parse_merge_invalid_statements() { for (sql, err_msg) in [ ( "MERGE T USING U ON TRUE WHEN MATCHED BY TARGET AND 1 THEN DELETE", - "Expected THEN, found: BY", + "Expected: THEN, found: BY", ), ( "MERGE T USING U ON TRUE WHEN MATCHED BY SOURCE AND 1 THEN DELETE", - "Expected THEN, found: BY", + "Expected: THEN, found: BY", ), ( "MERGE T USING U ON TRUE WHEN NOT MATCHED BY SOURCE THEN INSERT(a) VALUES (b)", @@ -1898,13 +1898,13 @@ fn parse_big_query_declare() { let error_sql = "DECLARE x"; assert_eq!( - ParserError::ParserError("Expected a data type name, found: EOF".to_owned()), + ParserError::ParserError("Expected: a data type name, found: EOF".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE x 42"; assert_eq!( - ParserError::ParserError("Expected a data type name, found: 42".to_owned()), + ParserError::ParserError("Expected: a data type name, found: 42".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); } @@ -2069,7 +2069,7 @@ fn test_bigquery_create_function() { "AS ((SELECT 1 FROM mytable)) ", "OPTIONS(a = [1, 2])", ), - "Expected end of statement, found: OPTIONS", + "Expected: end of statement, found: OPTIONS", ), ( concat!( @@ -2077,7 +2077,7 @@ fn test_bigquery_create_function() { "IMMUTABLE ", "AS ((SELECT 1 FROM mytable)) ", ), - "Expected AS, found: IMMUTABLE", + "Expected: AS, found: IMMUTABLE", ), ( concat!( @@ -2085,7 +2085,7 @@ fn test_bigquery_create_function() { "AS \"console.log('hello');\" ", "LANGUAGE js ", ), - "Expected end of statement, found: LANGUAGE", + "Expected: end of statement, found: LANGUAGE", ), ]; for (sql, error) in error_sqls { @@ -2116,7 +2116,7 @@ fn test_bigquery_trim() { // missing comma separation let error_sql = "SELECT TRIM('xyz' 'a')"; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a87883908..0149bad5d 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -115,7 +115,7 @@ fn parse_replace_into() { let sql = "REPLACE INTO public.customer (id, name, active) VALUES (1, 2, 3)"; assert_eq!( - ParserError::ParserError("Unsupported statement REPLACE at Line: 1, Column 9".to_string()), + ParserError::ParserError("Unsupported statement REPLACE at Line: 1, Column: 9".to_string()), Parser::parse_sql(&dialect, sql,).unwrap_err(), ) } @@ -199,7 +199,7 @@ fn parse_insert_default_values() { let insert_with_columns_and_default_values = "INSERT INTO test_table (test_col) DEFAULT VALUES"; assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: DEFAULT".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: DEFAULT".to_string() ), parse_sql_statements(insert_with_columns_and_default_values).unwrap_err() ); @@ -207,20 +207,20 @@ fn parse_insert_default_values() { let insert_with_default_values_and_hive_after_columns = "INSERT INTO test_table DEFAULT VALUES (some_column)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), parse_sql_statements(insert_with_default_values_and_hive_after_columns).unwrap_err() ); let insert_with_default_values_and_hive_partition = "INSERT INTO test_table DEFAULT VALUES PARTITION (some_column)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: PARTITION".to_string()), + ParserError::ParserError("Expected: end of statement, found: PARTITION".to_string()), parse_sql_statements(insert_with_default_values_and_hive_partition).unwrap_err() ); let insert_with_default_values_and_values_list = "INSERT INTO test_table DEFAULT VALUES (1)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), parse_sql_statements(insert_with_default_values_and_values_list).unwrap_err() ); } @@ -319,14 +319,14 @@ fn parse_update() { let sql = "UPDATE t WHERE 1"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected SET, found: WHERE".to_string()), + ParserError::ParserError("Expected: SET, found: WHERE".to_string()), res.unwrap_err() ); let sql = "UPDATE t SET a = 1 extrabadstuff"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: extrabadstuff".to_string()), + ParserError::ParserError("Expected: end of statement, found: extrabadstuff".to_string()), res.unwrap_err() ); } @@ -577,7 +577,7 @@ fn parse_delete_without_from_error() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected FROM, found: WHERE".to_string()), + ParserError::ParserError("Expected: FROM, found: WHERE".to_string()), res.unwrap_err() ); } @@ -892,7 +892,7 @@ fn parse_select_distinct_on() { fn parse_select_distinct_missing_paren() { let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer"); assert_eq!( - ParserError::ParserError("Expected ), found: FROM".to_string()), + ParserError::ParserError("Expected: ), found: FROM".to_string()), result.unwrap_err(), ); } @@ -936,7 +936,7 @@ fn parse_select_into() { let sql = "SELECT * INTO table0 asdf FROM table1"; let result = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: asdf".to_string()), + ParserError::ParserError("Expected: end of statement, found: asdf".to_string()), result.unwrap_err() ) } @@ -973,7 +973,7 @@ fn parse_select_wildcard() { let sql = "SELECT * + * FROM foo;"; let result = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: +".to_string()), + ParserError::ParserError("Expected: end of statement, found: +".to_string()), result.unwrap_err(), ); } @@ -1002,7 +1002,7 @@ fn parse_column_aliases() { assert_eq!(&Expr::Value(number("1")), right.as_ref()); assert_eq!(&Ident::new("newname"), alias); } else { - panic!("Expected ExprWithAlias") + panic!("Expected: ExprWithAlias") } // alias without AS is parsed correctly: @@ -1013,13 +1013,13 @@ fn parse_column_aliases() { fn test_eof_after_as() { let res = parse_sql_statements("SELECT foo AS"); assert_eq!( - ParserError::ParserError("Expected an identifier after AS, found: EOF".to_string()), + ParserError::ParserError("Expected: an identifier after AS, found: EOF".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT 1 FROM foo AS"); assert_eq!( - ParserError::ParserError("Expected an identifier after AS, found: EOF".to_string()), + ParserError::ParserError("Expected: an identifier after AS, found: EOF".to_string()), res.unwrap_err() ); } @@ -1104,7 +1104,7 @@ fn parse_not() { fn parse_invalid_infix_not() { let res = parse_sql_statements("SELECT c FROM t WHERE c NOT ("); assert_eq!( - ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + ParserError::ParserError("Expected: end of statement, found: NOT".to_string()), res.unwrap_err(), ); } @@ -1177,11 +1177,11 @@ fn parse_exponent_in_select() -> Result<(), ParserError> { let select = match select.pop().unwrap() { Statement::Query(inner) => *inner, - _ => panic!("Expected Query"), + _ => panic!("Expected: Query"), }; let select = match *select.body { SetExpr::Select(inner) => *inner, - _ => panic!("Expected SetExpr::Select"), + _ => panic!("Expected: SetExpr::Select"), }; assert_eq!( @@ -1810,7 +1810,7 @@ fn parse_in_error() { let sql = "SELECT * FROM customers WHERE segment in segment"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected (, found: segment".to_string()), + ParserError::ParserError("Expected: (, found: segment".to_string()), res.unwrap_err() ); } @@ -2023,14 +2023,14 @@ fn parse_tuple_invalid() { let sql = "select (1"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected ), found: EOF".to_string()), + ParserError::ParserError("Expected: ), found: EOF".to_string()), res.unwrap_err() ); let sql = "select (), 2"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_string()), + ParserError::ParserError("Expected: an expression:, found: )".to_string()), res.unwrap_err() ); } @@ -2442,7 +2442,7 @@ fn parse_extract() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT EXTRACT(JIFFY FROM d)"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2481,7 +2481,7 @@ fn parse_ceil_datetime() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT CEIL(d TO JIFFY) FROM df"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2508,7 +2508,7 @@ fn parse_floor_datetime() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT FLOOR(d TO JIFFY) FROM df"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2709,7 +2709,7 @@ fn parse_window_function_null_treatment_arg() { let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1"; assert_eq!( dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected end of statement, found: NULLS".to_string()) + ParserError::ParserError("Expected: end of statement, found: NULLS".to_string()) ); let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1"; @@ -2717,7 +2717,7 @@ fn parse_window_function_null_treatment_arg() { all_dialects_where(|d| !d.supports_window_function_null_treatment_arg()) .parse_sql_statements(sql) .unwrap_err(), - ParserError::ParserError("Expected ), found: IGNORE".to_string()) + ParserError::ParserError("Expected: ), found: IGNORE".to_string()) ); } @@ -2907,13 +2907,13 @@ fn parse_create_table() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: GARBAGE")); + .contains("Expected: \',\' or \')\' after column definition, found: GARBAGE")); let res = parse_sql_statements("CREATE TABLE t (a int NOT NULL CONSTRAINT foo)"); assert!(res .unwrap_err() .to_string() - .contains("Expected constraint details after CONSTRAINT ")); + .contains("Expected: constraint details after CONSTRAINT ")); } #[test] @@ -3052,7 +3052,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: NOT")); + .contains("Expected: \',\' or \')\' after column definition, found: NOT")); let res = parse_sql_statements("CREATE TABLE t ( a int NOT NULL, @@ -3061,7 +3061,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: ENFORCED")); + .contains("Expected: \',\' or \')\' after column definition, found: ENFORCED")); let res = parse_sql_statements("CREATE TABLE t ( a int NOT NULL, @@ -3070,7 +3070,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: INITIALLY")); + .contains("Expected: \',\' or \')\' after column definition, found: INITIALLY")); } #[test] @@ -3161,7 +3161,7 @@ fn parse_create_table_column_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected one of DEFERRED or IMMEDIATE, found: BADVALUE")); + .contains("Expected: one of DEFERRED or IMMEDIATE, found: BADVALUE")); let res = parse_sql_statements( "CREATE TABLE t (a int NOT NULL UNIQUE INITIALLY IMMEDIATE DEFERRABLE INITIALLY DEFERRED)", @@ -3260,7 +3260,7 @@ fn parse_create_table_hive_array() { assert_eq!( dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected >, found: )".to_string()) + ParserError::ParserError("Expected: >, found: )".to_string()) ); } @@ -4035,7 +4035,7 @@ fn parse_alter_table_alter_column_type() { let res = dialect.parse_sql_statements(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT")); assert_eq!( - ParserError::ParserError("Expected SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), + ParserError::ParserError("Expected: SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), res.unwrap_err() ); @@ -4043,7 +4043,7 @@ fn parse_alter_table_alter_column_type() { "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" )); assert_eq!( - ParserError::ParserError("Expected end of statement, found: USING".to_string()), + ParserError::ParserError("Expected: end of statement, found: USING".to_string()), res.unwrap_err() ); } @@ -4082,7 +4082,7 @@ fn parse_alter_table_drop_constraint() { let res = parse_sql_statements(&format!("{alter_stmt} DROP CONSTRAINT is_active TEXT")); assert_eq!( - ParserError::ParserError("Expected end of statement, found: TEXT".to_string()), + ParserError::ParserError("Expected: end of statement, found: TEXT".to_string()), res.unwrap_err() ); } @@ -4091,14 +4091,14 @@ fn parse_alter_table_drop_constraint() { fn parse_bad_constraint() { let res = parse_sql_statements("ALTER TABLE tab ADD"); assert_eq!( - ParserError::ParserError("Expected identifier, found: EOF".to_string()), + ParserError::ParserError("Expected: identifier, found: EOF".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CREATE TABLE tab (foo int,"); assert_eq!( ParserError::ParserError( - "Expected column name or constraint definition, found: EOF".to_string() + "Expected: column name or constraint definition, found: EOF".to_string() ), res.unwrap_err() ); @@ -4440,7 +4440,7 @@ fn parse_window_clause() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected (, found: window2".to_string()), + ParserError::ParserError("Expected: (, found: window2".to_string()), res.unwrap_err() ); } @@ -4851,13 +4851,13 @@ fn parse_interval() { let result = parse_sql_statements("SELECT INTERVAL '1' SECOND TO SECOND"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: SECOND".to_string()), + ParserError::ParserError("Expected: end of statement, found: SECOND".to_string()), result.unwrap_err(), ); let result = parse_sql_statements("SELECT INTERVAL '10' HOUR (1) TO HOUR (2)"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), result.unwrap_err(), ); @@ -5198,13 +5198,13 @@ fn parse_table_function() { let res = parse_sql_statements("SELECT * FROM TABLE '1' AS a"); assert_eq!( - ParserError::ParserError("Expected (, found: \'1\'".to_string()), + ParserError::ParserError("Expected: (, found: \'1\'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT * FROM TABLE (FUN(a) AS a"); assert_eq!( - ParserError::ParserError("Expected ), found: AS".to_string()), + ParserError::ParserError("Expected: ), found: AS".to_string()), res.unwrap_err() ); } @@ -5752,7 +5752,7 @@ fn parse_natural_join() { let sql = "SELECT * FROM t1 natural"; assert_eq!( - ParserError::ParserError("Expected a join type after NATURAL, found: EOF".to_string()), + ParserError::ParserError("Expected: a join type after NATURAL, found: EOF".to_string()), parse_sql_statements(sql).unwrap_err(), ); } @@ -5833,7 +5833,7 @@ fn parse_join_syntax_variants() { let res = parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1"); assert_eq!( - ParserError::ParserError("Expected APPLY, found: JOIN".to_string()), + ParserError::ParserError("Expected: APPLY, found: JOIN".to_string()), res.unwrap_err() ); } @@ -5871,7 +5871,7 @@ fn parse_ctes() { Expr::Subquery(ref subquery) => { assert_ctes_in_select(&cte_sqls, subquery.as_ref()); } - _ => panic!("Expected subquery"), + _ => panic!("Expected: subquery"), } // CTE in a derived table let sql = &format!("SELECT * FROM ({with})"); @@ -5880,13 +5880,13 @@ fn parse_ctes() { TableFactor::Derived { subquery, .. } => { assert_ctes_in_select(&cte_sqls, subquery.as_ref()) } - _ => panic!("Expected derived table"), + _ => panic!("Expected: derived table"), } // CTE in a view let sql = &format!("CREATE VIEW v AS {with}"); match verified_stmt(sql) { Statement::CreateView { query, .. } => assert_ctes_in_select(&cte_sqls, &query), - _ => panic!("Expected CREATE VIEW"), + _ => panic!("Expected: CREATE VIEW"), } // CTE in a CTE... let sql = &format!("WITH outer_cte AS ({with}) SELECT * FROM outer_cte"); @@ -6047,7 +6047,7 @@ fn parse_multiple_statements() { // Check that forgetting the semicolon results in an error: let res = parse_sql_statements(&(sql1.to_owned() + " " + sql2_kw + sql2_rest)); assert_eq!( - ParserError::ParserError("Expected end of statement, found: ".to_string() + sql2_kw), + ParserError::ParserError("Expected: end of statement, found: ".to_string() + sql2_kw), res.unwrap_err() ); } @@ -6102,7 +6102,7 @@ fn parse_overlay() { "SELECT OVERLAY('abccccde' PLACING 'abc' FROM 3 FOR 12)", ); assert_eq!( - ParserError::ParserError("Expected PLACING, found: FROM".to_owned()), + ParserError::ParserError("Expected: PLACING, found: FROM".to_owned()), parse_sql_statements("SELECT OVERLAY('abccccde' FROM 3)").unwrap_err(), ); @@ -6151,7 +6151,7 @@ fn parse_trim() { ); assert_eq!( - ParserError::ParserError("Expected ), found: 'xyz'".to_owned()), + ParserError::ParserError("Expected: ), found: 'xyz'".to_owned()), parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); @@ -6173,7 +6173,7 @@ fn parse_trim() { options: None, }; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), all_expected_snowflake .parse_sql_statements("SELECT TRIM('xyz', 'a')") .unwrap_err() @@ -6210,7 +6210,7 @@ fn parse_exists_subquery() { .parse_sql_statements("SELECT EXISTS ("); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: EOF".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: EOF".to_string() ), res.unwrap_err(), ); @@ -6219,7 +6219,7 @@ fn parse_exists_subquery() { .parse_sql_statements("SELECT EXISTS (NULL)"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: NULL".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: NULL".to_string() ), res.unwrap_err(), ); @@ -6581,7 +6581,7 @@ fn parse_drop_table() { let sql = "DROP TABLE"; assert_eq!( - ParserError::ParserError("Expected identifier, found: EOF".to_string()), + ParserError::ParserError("Expected: identifier, found: EOF".to_string()), parse_sql_statements(sql).unwrap_err(), ); @@ -6613,7 +6613,7 @@ fn parse_drop_view() { fn parse_invalid_subquery_without_parens() { let res = parse_sql_statements("SELECT SELECT 1 FROM bar WHERE 1=1 FROM baz"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: 1".to_string()), + ParserError::ParserError("Expected: end of statement, found: 1".to_string()), res.unwrap_err() ); } @@ -6826,7 +6826,7 @@ fn lateral_derived() { let sql = "SELECT * FROM LATERAL UNNEST ([10,20,30]) as numbers WITH OFFSET;"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: WITH".to_string()), + ParserError::ParserError("Expected: end of statement, found: WITH".to_string()), res.unwrap_err() ); @@ -6834,7 +6834,7 @@ fn lateral_derived() { let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: b".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: b".to_string() ), res.unwrap_err() ); @@ -6952,19 +6952,19 @@ fn parse_start_transaction() { let res = parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD"); assert_eq!( - ParserError::ParserError("Expected isolation level, found: BAD".to_string()), + ParserError::ParserError("Expected: isolation level, found: BAD".to_string()), res.unwrap_err() ); let res = parse_sql_statements("START TRANSACTION BAD"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: BAD".to_string()), + ParserError::ParserError("Expected: end of statement, found: BAD".to_string()), res.unwrap_err() ); let res = parse_sql_statements("START TRANSACTION READ ONLY,"); assert_eq!( - ParserError::ParserError("Expected transaction mode, found: EOF".to_string()), + ParserError::ParserError("Expected: transaction mode, found: EOF".to_string()), res.unwrap_err() ); } @@ -7050,8 +7050,8 @@ fn parse_set_variable() { } let error_sqls = [ - ("SET (a, b, c) = (1, 2, 3", "Expected ), found: EOF"), - ("SET (a, b, c) = 1, 2, 3", "Expected (, found: 1"), + ("SET (a, b, c) = (1, 2, 3", "Expected: ), found: EOF"), + ("SET (a, b, c) = 1, 2, 3", "Expected: (, found: 1"), ]; for (sql, error) in error_sqls { assert_eq!( @@ -8051,19 +8051,19 @@ fn parse_offset_and_limit() { // Can't repeat OFFSET / LIMIT let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 OFFSET 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: OFFSET".to_string()), + ParserError::ParserError("Expected: end of statement, found: OFFSET".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT foo FROM bar LIMIT 2 LIMIT 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: LIMIT".to_string()), + ParserError::ParserError("Expected: end of statement, found: LIMIT".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 LIMIT 2 OFFSET 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: OFFSET".to_string()), + ParserError::ParserError("Expected: end of statement, found: OFFSET".to_string()), res.unwrap_err() ); } @@ -8132,7 +8132,7 @@ fn parse_position_negative() { let sql = "SELECT POSITION(foo IN) from bar"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_string()), + ParserError::ParserError("Expected: an expression:, found: )".to_string()), res.unwrap_err() ); } @@ -8190,7 +8190,7 @@ fn parse_is_boolean() { let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0" + "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0" .to_string() ), res.unwrap_err() @@ -8383,7 +8383,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE TABLE 'table_name' foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8391,7 +8391,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE flag TABLE 'table_name' OPTIONS('K1'='V1') foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8399,7 +8399,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE TABLE 'table_name' AS foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8407,26 +8407,26 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE flag TABLE 'table_name' OPTIONS('K1'='V1') AS foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); let res = parse_sql_statements("CACHE 'table_name'"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CACHE 'table_name' OPTIONS('K1'='V1')"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: OPTIONS".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: OPTIONS".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CACHE flag 'table_name' OPTIONS('K1'='V1')"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), res.unwrap_err() ); } @@ -8451,19 +8451,19 @@ fn parse_uncache_table() { let res = parse_sql_statements("UNCACHE TABLE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected an `EOF`, found: foo".to_string()), + ParserError::ParserError("Expected: an `EOF`, found: foo".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE IF EXISTS 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: IF".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: IF".to_string()), res.unwrap_err() ); } @@ -8927,7 +8927,7 @@ fn parse_trailing_comma() { .parse_sql_statements("CREATE TABLE employees (name text, age int,)") .unwrap_err(), ParserError::ParserError( - "Expected column name or constraint definition, found: )".to_string() + "Expected: column name or constraint definition, found: )".to_string() ) ); } @@ -8955,7 +8955,7 @@ fn parse_projection_trailing_comma() { trailing_commas .parse_sql_statements("SELECT * FROM track ORDER BY milliseconds,") .unwrap_err(), - ParserError::ParserError("Expected an expression:, found: EOF".to_string()) + ParserError::ParserError("Expected: an expression:, found: EOF".to_string()) ); assert_eq!( @@ -8963,7 +8963,7 @@ fn parse_projection_trailing_comma() { .parse_sql_statements("CREATE TABLE employees (name text, age int,)") .unwrap_err(), ParserError::ParserError( - "Expected column name or constraint definition, found: )".to_string() + "Expected: column name or constraint definition, found: )".to_string() ), ); } @@ -9962,14 +9962,14 @@ fn tests_select_values_without_parens_and_set_op() { assert_eq!(SetOperator::Union, op); match *left { SetExpr::Select(_) => {} - _ => panic!("Expected a SELECT statement"), + _ => panic!("Expected: a SELECT statement"), } match *right { SetExpr::Select(_) => {} - _ => panic!("Expected a SELECT statement"), + _ => panic!("Expected: a SELECT statement"), } } - _ => panic!("Expected a SET OPERATION"), + _ => panic!("Expected: a SET OPERATION"), } } @@ -10003,7 +10003,7 @@ fn parse_select_wildcard_with_except() { .parse_sql_statements("SELECT * EXCEPT () FROM employee_table") .unwrap_err() .to_string(), - "sql parser error: Expected identifier, found: )" + "sql parser error: Expected: identifier, found: )" ); } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 430647ded..90056f0f7 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -64,7 +64,7 @@ fn test_databricks_exists() { let res = databricks().parse_sql_statements("SELECT EXISTS ("); assert_eq!( // TODO: improve this error message... - ParserError::ParserError("Expected an expression:, found: EOF".to_string()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_string()), res.unwrap_err(), ); } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index b661b6cd3..a5a6e2435 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -284,7 +284,7 @@ fn set_statement_with_minus() { assert_eq!( hive().parse_sql_statements("SET hive.tez.java.opts = -"), Err(ParserError::ParserError( - "Expected variable value, found: EOF".to_string() + "Expected: variable value, found: EOF".to_string() )) ) } @@ -327,14 +327,14 @@ fn parse_create_function() { assert_eq!( unsupported_dialects.parse_sql_statements(sql).unwrap_err(), ParserError::ParserError( - "Expected an object type after CREATE, found: FUNCTION".to_string() + "Expected: an object type after CREATE, found: FUNCTION".to_string() ) ); let sql = "CREATE TEMPORARY FUNCTION mydb.myfunc AS 'org.random.class.Name' USING JAR"; assert_eq!( hive().parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected literal string, found: EOF".to_string()), + ParserError::ParserError("Expected: literal string, found: EOF".to_string()), ); } @@ -398,7 +398,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } hive().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 86d3990f6..f570de11d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -481,7 +481,7 @@ fn parse_convert() { let error_sql = "SELECT CONVERT(INT, 'foo',) FROM T"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_owned()), + ParserError::ParserError("Expected: an expression:, found: )".to_owned()), ms().parse_sql_statements(error_sql).unwrap_err() ); } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ff8a49de7..a25f4c208 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -2518,7 +2518,7 @@ fn parse_fulltext_expression() { } #[test] -#[should_panic = "Expected FULLTEXT or SPATIAL option without constraint name, found: cons"] +#[should_panic = "Expected: FULLTEXT or SPATIAL option without constraint name, found: cons"] fn parse_create_table_with_fulltext_definition_should_not_accept_constraint_name() { mysql_and_generic().verified_stmt("CREATE TABLE tb (c1 INT, CONSTRAINT cons FULLTEXT (c1))"); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fe735b8b2..63c53227a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -648,7 +648,7 @@ fn parse_alter_table_alter_column_add_generated() { "ALTER TABLE t ALTER COLUMN id ADD GENERATED ( INCREMENT 1 MINVALUE 1 )", ); assert_eq!( - ParserError::ParserError("Expected AS, found: (".to_string()), + ParserError::ParserError("Expected: AS, found: (".to_string()), res.unwrap_err() ); @@ -656,14 +656,14 @@ fn parse_alter_table_alter_column_add_generated() { "ALTER TABLE t ALTER COLUMN id ADD GENERATED AS IDENTITY ( INCREMENT )", ); assert_eq!( - ParserError::ParserError("Expected a value, found: )".to_string()), + ParserError::ParserError("Expected: a value, found: )".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("ALTER TABLE t ALTER COLUMN id ADD GENERATED AS IDENTITY ("); assert_eq!( - ParserError::ParserError("Expected ), found: EOF".to_string()), + ParserError::ParserError("Expected: ), found: EOF".to_string()), res.unwrap_err() ); } @@ -733,25 +733,25 @@ fn parse_create_table_if_not_exists() { fn parse_bad_if_not_exists() { let res = pg().parse_sql_statements("CREATE TABLE NOT EXISTS uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXISTS".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF EXISTS uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXISTS".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: uk_cities".to_string()), + ParserError::ParserError("Expected: end of statement, found: uk_cities".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF NOT uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + ParserError::ParserError("Expected: end of statement, found: NOT".to_string()), res.unwrap_err() ); } @@ -1300,21 +1300,21 @@ fn parse_set() { assert_eq!( pg_and_generic().parse_sql_statements("SET"), Err(ParserError::ParserError( - "Expected identifier, found: EOF".to_string() + "Expected: identifier, found: EOF".to_string() )), ); assert_eq!( pg_and_generic().parse_sql_statements("SET a b"), Err(ParserError::ParserError( - "Expected equals sign or TO, found: b".to_string() + "Expected: equals sign or TO, found: b".to_string() )), ); assert_eq!( pg_and_generic().parse_sql_statements("SET a ="), Err(ParserError::ParserError( - "Expected variable value, found: EOF".to_string() + "Expected: variable value, found: EOF".to_string() )), ); } @@ -2685,7 +2685,7 @@ fn parse_json_table_is_not_reserved() { name: ObjectName(name), .. } => assert_eq!("JSON_TABLE", name[0].value), - other => panic!("Expected JSON_TABLE to be parsed as a table name, but got {other:?}"), + other => panic!("Expected: JSON_TABLE to be parsed as a table name, but got {other:?}"), } } @@ -2874,7 +2874,7 @@ fn parse_escaped_literal_string() { .parse_sql_statements(sql) .unwrap_err() .to_string(), - "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" + "sql parser error: Unterminated encoded string literal at Line: 1, Column: 8" ); let sql = r"SELECT E'\u0001', E'\U0010FFFF', E'\xC', E'\x25', E'\2', E'\45', E'\445'"; @@ -2917,7 +2917,7 @@ fn parse_escaped_literal_string() { .parse_sql_statements(sql) .unwrap_err() .to_string(), - "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" + "sql parser error: Unterminated encoded string literal at Line: 1, Column: 8" ); } } @@ -3455,7 +3455,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } pg().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index f0a7c7735..160bbcbd5 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -385,14 +385,14 @@ fn test_snowflake_create_invalid_local_global_table() { assert_eq!( snowflake().parse_sql_statements("CREATE LOCAL GLOBAL TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an SQL statement, found: LOCAL".to_string() + "Expected: an SQL statement, found: LOCAL".to_string() )) ); assert_eq!( snowflake().parse_sql_statements("CREATE GLOBAL LOCAL TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an SQL statement, found: GLOBAL".to_string() + "Expected: an SQL statement, found: GLOBAL".to_string() )) ); } @@ -402,21 +402,21 @@ fn test_snowflake_create_invalid_temporal_table() { assert_eq!( snowflake().parse_sql_statements("CREATE TEMP TEMPORARY TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an object type after CREATE, found: TEMPORARY".to_string() + "Expected: an object type after CREATE, found: TEMPORARY".to_string() )) ); assert_eq!( snowflake().parse_sql_statements("CREATE TEMP VOLATILE TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an object type after CREATE, found: VOLATILE".to_string() + "Expected: an object type after CREATE, found: VOLATILE".to_string() )) ); assert_eq!( snowflake().parse_sql_statements("CREATE TEMP TRANSIENT TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an object type after CREATE, found: TRANSIENT".to_string() + "Expected: an object type after CREATE, found: TRANSIENT".to_string() )) ); } @@ -851,7 +851,7 @@ fn parse_semi_structured_data_traversal() { .parse_sql_statements("SELECT a:42") .unwrap_err() .to_string(), - "sql parser error: Expected variant object key name, found: 42" + "sql parser error: Expected: variant object key name, found: 42" ); } @@ -908,7 +908,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } snowflake().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); @@ -1034,7 +1034,7 @@ fn test_select_wildcard_with_exclude_and_rename() { .parse_sql_statements("SELECT * RENAME col_a AS col_b EXCLUDE col_z FROM data") .unwrap_err() .to_string(), - "sql parser error: Expected end of statement, found: EXCLUDE" + "sql parser error: Expected: end of statement, found: EXCLUDE" ); } @@ -1134,13 +1134,13 @@ fn parse_snowflake_declare_cursor() { let error_sql = "DECLARE c1 CURSOR SELECT id FROM invoices"; assert_eq!( - ParserError::ParserError("Expected FOR, found: SELECT".to_owned()), + ParserError::ParserError("Expected: FOR, found: SELECT".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE c1 CURSOR res"; assert_eq!( - ParserError::ParserError("Expected FOR, found: res".to_owned()), + ParserError::ParserError("Expected: FOR, found: res".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1188,13 +1188,13 @@ fn parse_snowflake_declare_result_set() { let error_sql = "DECLARE res RESULTSET DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE res RESULTSET :="; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1280,19 +1280,19 @@ fn parse_snowflake_declare_variable() { let error_sql = "DECLARE profit INT 2"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: 2".to_owned()), + ParserError::ParserError("Expected: end of statement, found: 2".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE profit INT DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE profit DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1327,7 +1327,7 @@ fn parse_snowflake_declare_multi_statements() { let error_sql = "DECLARE profit DEFAULT 42 c1 CURSOR FOR res;"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: c1".to_owned()), + ParserError::ParserError("Expected: end of statement, found: c1".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1902,7 +1902,7 @@ fn test_snowflake_trim() { // missing comma separation let error_sql = "SELECT TRIM('xyz' 'a')"; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -2064,7 +2064,7 @@ fn test_select_wildcard_with_ilike_double_quote() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE "%id" FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected ilike pattern, found: \"%id\"" + "sql parser error: Expected: ilike pattern, found: \"%id\"" ); } @@ -2073,7 +2073,7 @@ fn test_select_wildcard_with_ilike_number() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE 42 FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected ilike pattern, found: 42" + "sql parser error: Expected: ilike pattern, found: 42" ); } @@ -2082,7 +2082,7 @@ fn test_select_wildcard_with_ilike_replace() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE '%id%' EXCLUDE col FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected end of statement, found: EXCLUDE" + "sql parser error: Expected: end of statement, found: EXCLUDE" ); } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 1181c480b..e329abae7 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -428,7 +428,7 @@ fn invalid_empty_list() { let sql = "SELECT * FROM t1 WHERE a IN (,,)"; let sqlite = sqlite_with_options(ParserOptions::new().with_trailing_commas(true)); assert_eq!( - "sql parser error: Expected an expression:, found: ,", + "sql parser error: Expected: an expression:, found: ,", sqlite.parse_sql_statements(sql).unwrap_err().to_string() ); } @@ -452,17 +452,17 @@ fn parse_start_transaction_with_modifier() { }; let res = unsupported_dialects.parse_sql_statements("BEGIN DEFERRED"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: DEFERRED".to_string()), + ParserError::ParserError("Expected: end of statement, found: DEFERRED".to_string()), res.unwrap_err(), ); let res = unsupported_dialects.parse_sql_statements("BEGIN IMMEDIATE"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: IMMEDIATE".to_string()), + ParserError::ParserError("Expected: end of statement, found: IMMEDIATE".to_string()), res.unwrap_err(), ); let res = unsupported_dialects.parse_sql_statements("BEGIN EXCLUSIVE"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXCLUSIVE".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXCLUSIVE".to_string()), res.unwrap_err(), ); } From f3d2f78fb2b7ebdc539b0bec0be535b3d1b9d88f Mon Sep 17 00:00:00 2001 From: Bidaya0 Date: Sun, 23 Jun 2024 19:13:16 +0800 Subject: [PATCH 006/506] Support `TO` in `CREATE VIEW` clause for Clickhouse (#1313) Co-authored-by: Ifeanyi Ubah Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 12 ++++++++++-- src/parser/mod.rs | 9 +++++++++ tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 15 +++++++++++++++ tests/sqlparser_common.rs | 14 ++++++++++++++ tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 1 + 7 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 769bda598..70190b35b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2029,6 +2029,9 @@ pub enum Statement { if_not_exists: bool, /// if true, has SQLite `TEMP` or `TEMPORARY` clause temporary: bool, + /// if not None, has Clickhouse `TO` clause, specify the table into which to insert results + /// + to: Option, }, /// ```sql /// CREATE TABLE @@ -3329,15 +3332,20 @@ impl fmt::Display for Statement { with_no_schema_binding, if_not_exists, temporary, + to, } => { write!( f, - "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}", + "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}{to}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, materialized = if *materialized { "MATERIALIZED " } else { "" }, name = name, temporary = if *temporary { "TEMPORARY " } else { "" }, - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" } + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + to = to + .as_ref() + .map(|to| format!(" TO {to}")) + .unwrap_or_default() )?; if let Some(comment) = comment { write!( diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 27520a6c4..c568640a9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4172,6 +4172,14 @@ impl<'a> Parser<'a> { }; } + let to = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::TO) + { + Some(self.parse_object_name(false)?) + } else { + None + }; + let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) && self.parse_keyword(Keyword::COMMENT) { @@ -4209,6 +4217,7 @@ impl<'a> Parser<'a> { with_no_schema_binding, if_not_exists, temporary, + to, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ec4ddca96..88e2ef912 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -315,6 +315,7 @@ fn parse_create_view_if_not_exists() { with_no_schema_binding: late_binding, if_not_exists, temporary, + .. } => { assert_eq!("mydataset.newview", name.to_string()); assert_eq!(Vec::::new(), columns); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index ed3b2de22..5cd483242 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -561,6 +561,21 @@ fn parse_select_star_except_no_parens() { ); } +#[test] +fn parse_create_materialized_view() { + // example sql + // https://clickhouse.com/docs/en/guides/developer/cascading-materialized-views + let sql = concat!( + "CREATE MATERIALIZED VIEW analytics.monthly_aggregated_data_mv ", + "TO analytics.monthly_aggregated_data ", + "AS SELECT toDate(toStartOfMonth(event_time)) ", + "AS month, domain_name, sumState(count_views) ", + "AS sumCountViews FROM analytics.hourly_data ", + "GROUP BY domain_name, month" + ); + clickhouse_and_generic().verified_stmt(sql); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0149bad5d..f7162ddef 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6279,6 +6279,7 @@ fn parse_create_view() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6291,6 +6292,7 @@ fn parse_create_view() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6335,6 +6337,7 @@ fn parse_create_view_with_columns() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!( @@ -6357,6 +6360,7 @@ fn parse_create_view_with_columns() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6378,6 +6382,7 @@ fn parse_create_view_temporary() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6390,6 +6395,7 @@ fn parse_create_view_temporary() { assert!(!late_binding); assert!(!if_not_exists); assert!(temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6411,6 +6417,7 @@ fn parse_create_or_replace_view() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -6423,6 +6430,7 @@ fn parse_create_or_replace_view() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6448,6 +6456,7 @@ fn parse_create_or_replace_materialized_view() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -6460,6 +6469,7 @@ fn parse_create_or_replace_materialized_view() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6481,6 +6491,7 @@ fn parse_create_materialized_view() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6493,6 +6504,7 @@ fn parse_create_materialized_view() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6514,6 +6526,7 @@ fn parse_create_materialized_view_with_cluster_by() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6526,6 +6539,7 @@ fn parse_create_materialized_view_with_cluster_by() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 160bbcbd5..b6be2c3f5 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -552,6 +552,7 @@ fn parse_sf_create_or_replace_with_comment_for_snowflake() { with_no_schema_binding: late_binding, if_not_exists, temporary, + .. } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index e329abae7..3670b1784 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -171,6 +171,7 @@ fn parse_create_view_temporary_if_not_exists() { with_no_schema_binding: late_binding, if_not_exists, temporary, + .. } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); From 7a9793b72e268f6e7e830ec6f4e857878e0b6bc7 Mon Sep 17 00:00:00 2001 From: Lorrens Pantelis <100197010+LorrensP-2158466@users.noreply.github.com> Date: Sun, 23 Jun 2024 13:14:57 +0200 Subject: [PATCH 007/506] Allow semi-colon at the end of UNCACHE statement (#1320) --- src/parser/mod.rs | 22 +++++++--------------- tests/sqlparser_common.rs | 6 +++--- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c568640a9..337c1dac5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3611,21 +3611,13 @@ impl<'a> Parser<'a> { /// Parse a UNCACHE TABLE statement pub fn parse_uncache_table(&mut self) -> Result { - let has_table = self.parse_keyword(Keyword::TABLE); - if has_table { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - if self.peek_token().token == Token::EOF { - Ok(Statement::UNCache { - table_name, - if_exists, - }) - } else { - self.expected("an `EOF`", self.peek_token()) - } - } else { - self.expected("a `TABLE` keyword", self.peek_token()) - } + self.expect_keyword(Keyword::TABLE)?; + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + Ok(Statement::UNCache { + table_name, + if_exists, + }) } /// SQLite-specific `CREATE VIRTUAL TABLE` diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f7162ddef..0f5afb341 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8465,19 +8465,19 @@ fn parse_uncache_table() { let res = parse_sql_statements("UNCACHE TABLE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected: an `EOF`, found: foo".to_string()), + ParserError::ParserError("Expected: end of statement, found: foo".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: TABLE, found: 'table_name'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE IF EXISTS 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected: a `TABLE` keyword, found: IF".to_string()), + ParserError::ParserError("Expected: TABLE, found: IF".to_string()), res.unwrap_err() ); } From a685e1199355b0150fd5a4f6c7b938ecc07a6818 Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 23 Jun 2024 19:36:05 +0800 Subject: [PATCH 008/506] Support parametric arguments to `FUNCTION` for ClickHouse dialect (#1315) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 12 ++++++++- src/ast/visitor.rs | 1 + src/parser/mod.rs | 33 ++++++++++++++++++++--- src/test_utils.rs | 1 + tests/sqlparser_clickhouse.rs | 50 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 14 ++++++++++ tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_postgres.rs | 7 +++++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 1 + 13 files changed, 119 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 70190b35b..8182d1144 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4695,6 +4695,16 @@ impl fmt::Display for CloseCursor { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Function { pub name: ObjectName, + /// The parameters to the function, including any options specified within the + /// delimiting parentheses. + /// + /// Example: + /// ```plaintext + /// HISTOGRAM(0.5, 0.6)(x, y) + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/parametric-functions) + pub parameters: FunctionArguments, /// The arguments to the function, including any options specified within the /// delimiting parentheses. pub args: FunctionArguments, @@ -4723,7 +4733,7 @@ pub struct Function { impl fmt::Display for Function { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}{}", self.name, self.args)?; + write!(f, "{}{}{}", self.name, self.parameters, self.args)?; if !self.within_group.is_empty() { write!( diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 57dcca2e5..1b8a43802 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -533,6 +533,7 @@ where /// null_treatment: None, /// filter: None, /// over: None, +/// parameters: FunctionArguments::None, /// within_group: vec![], /// }); /// } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 337c1dac5..537609973 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -27,6 +27,7 @@ use core::{ use log::debug; +use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; @@ -146,8 +147,6 @@ mod recursion { pub struct DepthGuard {} } -use recursion::RecursionCounter; - #[derive(PartialEq, Eq)] pub enum IsOptional { Optional, @@ -1002,6 +1001,7 @@ impl<'a> Parser<'a> { { Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -1058,6 +1058,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(query), filter: None, null_treatment: None, @@ -1293,6 +1294,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; return Ok(Expr::Function(Function { name, + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(subquery), filter: None, null_treatment: None, @@ -1301,7 +1303,16 @@ impl<'a> Parser<'a> { })); } - let args = self.parse_function_argument_list()?; + let mut args = self.parse_function_argument_list()?; + let mut parameters = FunctionArguments::None; + // ClickHouse aggregations support parametric functions like `HISTOGRAM(0.5, 0.6)(x, y)` + // which (0.5, 0.6) is a parameter to the function. + if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.consume_token(&Token::LParen) + { + parameters = FunctionArguments::List(args); + args = self.parse_function_argument_list()?; + } let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { self.expect_token(&Token::LParen)?; @@ -1350,6 +1361,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, + parameters, args: FunctionArguments::List(args), null_treatment, filter, @@ -1382,6 +1394,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::Function(Function { name, + parameters: FunctionArguments::None, args, filter: None, over: None, @@ -6470,6 +6483,7 @@ impl<'a> Parser<'a> { } else { Ok(Statement::Call(Function { name: object_name, + parameters: FunctionArguments::None, args: FunctionArguments::None, over: None, filter: None, @@ -8092,7 +8106,7 @@ impl<'a> Parser<'a> { pub fn parse_query_body(&mut self, precedence: u8) -> Result { // We parse the expression using a Pratt parser, as in `parse_expr()`. // Start by parsing a restricted SELECT or a `(subquery)`: - let mut expr = if self.parse_keyword(Keyword::SELECT) { + let expr = if self.parse_keyword(Keyword::SELECT) { SetExpr::Select(self.parse_select().map(Box::new)?) } else if self.consume_token(&Token::LParen) { // CTEs are not allowed here, but the parser currently accepts them @@ -8111,6 +8125,17 @@ impl<'a> Parser<'a> { ); }; + self.parse_remaining_set_exprs(expr, precedence) + } + + /// Parse any extra set expressions that may be present in a query body + /// + /// (this is its own function to reduce required stack size in debug builds) + fn parse_remaining_set_exprs( + &mut self, + mut expr: SetExpr, + precedence: u8, + ) -> Result { loop { // The query can be optionally followed by a set operator: let op = self.parse_set_operator(&self.peek_token().token); diff --git a/src/test_utils.rs b/src/test_utils.rs index 9af9c8098..1a31d4611 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -336,6 +336,7 @@ pub fn join(relation: TableFactor) -> Join { pub fn call(function: &str, args: impl IntoIterator) -> Expr { Expr::Function(Function { name: ObjectName(vec![Ident::new(function)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: args diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 5cd483242..50d4faf5d 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -183,6 +183,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -553,6 +554,55 @@ fn parse_select_star_except() { clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); } +#[test] +fn parse_select_parametric_function() { + match clickhouse_and_generic().verified_stmt("SELECT HISTOGRAM(0.5, 0.6)(x, y) FROM t") { + Statement::Query(query) => { + let projection: &Vec = query.body.as_select().unwrap().projection.as_ref(); + assert_eq!(projection.len(), 1); + match &projection[0] { + UnnamedExpr(Expr::Function(f)) => { + let args = match &f.args { + FunctionArguments::List(ref args) => args, + _ => unreachable!(), + }; + assert_eq!(args.args.len(), 2); + assert_eq!( + args.args[0], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Identifier(Ident::from("x")))) + ); + assert_eq!( + args.args[1], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Identifier(Ident::from("y")))) + ); + + let parameters = match f.parameters { + FunctionArguments::List(ref args) => args, + _ => unreachable!(), + }; + assert_eq!(parameters.args.len(), 2); + assert_eq!( + parameters.args[0], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(Value::Number( + "0.5".parse().unwrap(), + false + )))) + ); + assert_eq!( + parameters.args[1], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(Value::Number( + "0.6".parse().unwrap(), + false + )))) + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + #[test] fn parse_select_star_except_no_parens() { clickhouse().one_statement_parses_to( diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0f5afb341..76e6a98bb 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1045,6 +1045,7 @@ fn parse_select_count_wildcard() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], @@ -1066,6 +1067,7 @@ fn parse_select_count_distinct() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp { @@ -2151,6 +2153,7 @@ fn parse_select_having() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], @@ -2180,6 +2183,7 @@ fn parse_select_qualify() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -2523,6 +2527,7 @@ fn parse_listagg() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("LISTAGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), args: vec![ @@ -4227,6 +4232,7 @@ fn parse_named_argument_function() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ @@ -4265,6 +4271,7 @@ fn parse_named_argument_function_with_eq_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ @@ -4337,6 +4344,7 @@ fn parse_window_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -4465,6 +4473,7 @@ fn test_parse_named_window() { value: "MIN".to_string(), quote_style: None, }]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -4494,6 +4503,7 @@ fn test_parse_named_window() { value: "MAX".to_string(), quote_style: None, }]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -8089,6 +8099,7 @@ fn parse_time_functions() { let select = verified_only_select(&sql); let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -9017,6 +9028,7 @@ fn parse_call() { assert_eq!( verified_stmt("CALL my_procedure('a')"), Statement::Call(Function { + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( @@ -9418,6 +9430,7 @@ fn test_selective_aggregation() { vec![ SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -9435,6 +9448,7 @@ fn test_selective_aggregation() { SelectItem::ExprWithAlias { expr: Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 8d12945dd..eaa1faa90 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -488,6 +488,7 @@ fn test_duckdb_named_argument_function_with_assignment_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index a5a6e2435..53280d7d8 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -381,6 +381,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index f570de11d..5f03bb093 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -354,6 +354,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 63c53227a..197597e9b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2369,6 +2369,7 @@ fn parse_array_subquery_expr() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY")]), + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(Box::new(Query { with: None, body: Box::new(SetExpr::SetOperation { @@ -2729,6 +2730,7 @@ fn test_composite_value() { Ident::new("information_schema"), Ident::new("_pg_expandarray") ]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Array( @@ -2955,6 +2957,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2966,6 +2969,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2977,6 +2981,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2988,6 +2993,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -3438,6 +3444,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 0a5710ff4..938e6e887 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -136,6 +136,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index b6be2c3f5..5e8fef0c5 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -892,6 +892,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 3670b1784..dd1e77d5d 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -335,6 +335,7 @@ fn parse_window_function_with_filter() { select.projection, vec![SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new(func_name)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( From f5ccef6ea9234dc2b9fcd15dfda2551aced19309 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Thu, 27 Jun 2024 15:56:21 +0400 Subject: [PATCH 009/506] Fix Snowflake `SELECT *` wildcard `REPLACE ... RENAME` order (#1321) --- src/ast/query.rs | 13 ++++++------ src/parser/mod.rs | 11 +++++----- tests/sqlparser_snowflake.rs | 39 ++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index fcd5b970d..0fde3e6b7 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -547,19 +547,20 @@ impl fmt::Display for IdentWithAlias { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WildcardAdditionalOptions { /// `[ILIKE...]`. - /// Snowflake syntax: + /// Snowflake syntax: pub opt_ilike: Option, /// `[EXCLUDE...]`. pub opt_exclude: Option, /// `[EXCEPT...]`. /// Clickhouse syntax: pub opt_except: Option, - /// `[RENAME ...]`. - pub opt_rename: Option, /// `[REPLACE]` /// BigQuery syntax: /// Clickhouse syntax: + /// Snowflake syntax: pub opt_replace: Option, + /// `[RENAME ...]`. + pub opt_rename: Option, } impl fmt::Display for WildcardAdditionalOptions { @@ -573,12 +574,12 @@ impl fmt::Display for WildcardAdditionalOptions { if let Some(except) = &self.opt_except { write!(f, " {except}")?; } - if let Some(rename) = &self.opt_rename { - write!(f, " {rename}")?; - } if let Some(replace) = &self.opt_replace { write!(f, " {replace}")?; } + if let Some(rename) = &self.opt_rename { + write!(f, " {rename}")?; + } Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 537609973..33095c428 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10175,15 +10175,14 @@ impl<'a> Parser<'a> { } else { None }; - let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { - self.parse_optional_select_item_rename()? + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) + { + self.parse_optional_select_item_replace()? } else { None }; - - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) - { - self.parse_optional_select_item_replace()? + let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + self.parse_optional_select_item_rename()? } else { None }; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 5e8fef0c5..2f4ed1316 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1016,6 +1016,44 @@ fn test_select_wildcard_with_rename() { assert_eq!(expected, select.projection[0]); } +#[test] +fn test_select_wildcard_with_replace_and_rename() { + let select = snowflake_and_generic().verified_only_select( + "SELECT * REPLACE (col_z || col_z AS col_z) RENAME (col_z AS col_zz) FROM data", + ); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_replace: Some(ReplaceSelectItem { + items: vec![Box::new(ReplaceSelectElement { + expr: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col_z"))), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Identifier(Ident::new("col_z"))), + }, + column_name: Ident::new("col_z"), + as_keyword: true, + })], + }), + opt_rename: Some(RenameSelectItem::Multiple(vec![IdentWithAlias { + ident: Ident::new("col_z"), + alias: Ident::new("col_zz"), + }])), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); + + // rename cannot precede replace + // https://docs.snowflake.com/en/sql-reference/sql/select#parameters + assert_eq!( + snowflake_and_generic() + .parse_sql_statements( + "SELECT * RENAME (col_z AS col_zz) REPLACE (col_z || col_z AS col_z) FROM data" + ) + .unwrap_err() + .to_string(), + "sql parser error: Expected: end of statement, found: REPLACE" + ); +} + #[test] fn test_select_wildcard_with_exclude_and_rename() { let select = snowflake_and_generic() @@ -1031,6 +1069,7 @@ fn test_select_wildcard_with_exclude_and_rename() { assert_eq!(expected, select.projection[0]); // rename cannot precede exclude + // https://docs.snowflake.com/en/sql-reference/sql/select#parameters assert_eq!( snowflake_and_generic() .parse_sql_statements("SELECT * RENAME col_a AS col_b EXCLUDE col_z FROM data") From f9ab8dcc27fd2d55030b9c5fa71e41d5c08dd601 Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Thu, 27 Jun 2024 08:58:11 -0300 Subject: [PATCH 010/506] Support for DuckDB Union datatype (#1322) --- src/ast/data_type.rs | 9 +++- src/ast/mod.rs | 17 +++++++ src/parser/mod.rs | 31 +++++++++++++ tests/sqlparser_duckdb.rs | 95 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 1 deletion(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 6b1a542f4..e6477f56b 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::{display_comma_separated, ObjectName, StructField}; +use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField}; use super::{value::escape_single_quote_string, ColumnDef}; @@ -303,6 +303,10 @@ pub enum DataType { /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec), + /// Union + /// + /// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html + Union(Vec), /// Nullable - special marker NULL represents in ClickHouse as a data type. /// /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable @@ -516,6 +520,9 @@ impl fmt::Display for DataType { write!(f, "STRUCT") } } + DataType::Union(fields) => { + write!(f, "UNION({})", display_comma_separated(fields)) + } // ClickHouse DataType::Nullable(data_type) => { write!(f, "Nullable({})", data_type) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 8182d1144..9ed837825 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -294,6 +294,23 @@ impl fmt::Display for StructField { } } +/// A field definition within a union +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct UnionField { + pub field_name: Ident, + pub field_type: DataType, +} + +impl fmt::Display for UnionField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.field_name, self.field_type) + } +} + /// A dictionary field within a dictionary. /// /// [duckdb]: https://duckdb.org/docs/sql/data_types/struct#creating-structs diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 33095c428..f58304960 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2246,6 +2246,32 @@ impl<'a> Parser<'a> { )) } + /// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs. + /// + /// Syntax: + /// + /// ```sql + /// UNION(field_name field_type[,...]) + /// ``` + /// + /// [1]: https://duckdb.org/docs/sql/data_types/union.html + fn parse_union_type_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::UNION)?; + + self.expect_token(&Token::LParen)?; + + let fields = self.parse_comma_separated(|p| { + Ok(UnionField { + field_name: p.parse_identifier(false)?, + field_type: p.parse_data_type()?, + }) + })?; + + self.expect_token(&Token::RParen)?; + + Ok(fields) + } + /// DuckDB specific: Parse a duckdb dictionary [1] /// /// Syntax: @@ -7136,6 +7162,11 @@ impl<'a> Parser<'a> { trailing_bracket = _trailing_bracket; Ok(DataType::Struct(field_defs)) } + Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { + self.prev_token(); + let fields = self.parse_union_type_def()?; + Ok(DataType::Union(fields)) + } Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { Ok(self.parse_sub_type(DataType::Nullable)?) } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index eaa1faa90..253318b32 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -545,3 +545,98 @@ fn test_array_index() { expr ); } + +#[test] +fn test_duckdb_union_datatype() { + let sql = "CREATE TABLE tbl1 (one UNION(a INT), two UNION(a INT, b INT), nested UNION(a UNION(b INT)))"; + let stmt = duckdb_and_generic().verified_stmt(sql); + assert_eq!( + Statement::CreateTable(CreateTable { + or_replace: Default::default(), + temporary: Default::default(), + external: Default::default(), + global: Default::default(), + if_not_exists: Default::default(), + transient: Default::default(), + volatile: Default::default(), + name: ObjectName(vec!["tbl1".into()]), + columns: vec![ + ColumnDef { + name: "one".into(), + data_type: DataType::Union(vec![UnionField { + field_name: "a".into(), + field_type: DataType::Int(None) + }]), + collation: Default::default(), + options: Default::default() + }, + ColumnDef { + name: "two".into(), + data_type: DataType::Union(vec![ + UnionField { + field_name: "a".into(), + field_type: DataType::Int(None) + }, + UnionField { + field_name: "b".into(), + field_type: DataType::Int(None) + } + ]), + collation: Default::default(), + options: Default::default() + }, + ColumnDef { + name: "nested".into(), + data_type: DataType::Union(vec![UnionField { + field_name: "a".into(), + field_type: DataType::Union(vec![UnionField { + field_name: "b".into(), + field_type: DataType::Int(None) + }]) + }]), + collation: Default::default(), + options: Default::default() + } + ], + constraints: Default::default(), + hive_distribution: HiveDistributionStyle::NONE, + hive_formats: Some(HiveFormat { + row_format: Default::default(), + serde_properties: Default::default(), + storage: Default::default(), + location: Default::default() + }), + table_properties: Default::default(), + with_options: Default::default(), + file_format: Default::default(), + location: Default::default(), + query: Default::default(), + without_rowid: Default::default(), + like: Default::default(), + clone: Default::default(), + engine: Default::default(), + comment: Default::default(), + auto_increment_offset: Default::default(), + default_charset: Default::default(), + collation: Default::default(), + on_commit: Default::default(), + on_cluster: Default::default(), + primary_key: Default::default(), + order_by: Default::default(), + partition_by: Default::default(), + cluster_by: Default::default(), + options: Default::default(), + strict: Default::default(), + copy_grants: Default::default(), + enable_schema_evolution: Default::default(), + change_tracking: Default::default(), + data_retention_time_in_days: Default::default(), + max_data_extension_time_in_days: Default::default(), + default_ddl_collation: Default::default(), + with_aggregation_policy: Default::default(), + with_row_access_policy: Default::default(), + with_tags: Default::default() + }), + stmt + ); +} From 376889ae5de7b4e738dd097ce08b0867475aacbb Mon Sep 17 00:00:00 2001 From: Emil Sivervik Date: Sun, 30 Jun 2024 13:03:08 +0200 Subject: [PATCH 011/506] chore(docs): refine docs (#1326) --- src/parser/mod.rs | 105 +++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f58304960..869662976 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -71,11 +71,11 @@ mod recursion { use super::ParserError; /// Tracks remaining recursion depth. This value is decremented on - /// each call to `try_decrease()`, when it reaches 0 an error will + /// each call to [`RecursionCounter::try_decrease()`], when it reaches 0 an error will /// be returned. /// - /// Note: Uses an Rc and Cell in order to satisfy the Rust - /// borrow checker so the automatic DepthGuard decrement a + /// Note: Uses an [`std::rc::Rc`] and [`std::cell::Cell`] in order to satisfy the Rust + /// borrow checker so the automatic [`DepthGuard`] decrement a /// reference to the counter. pub(crate) struct RecursionCounter { remaining_depth: Rc>, @@ -92,7 +92,7 @@ mod recursion { /// Decreases the remaining depth by 1. /// - /// Returns `Err` if the remaining depth falls to 0. + /// Returns [`Err`] if the remaining depth falls to 0. /// /// Returns a [`DepthGuard`] which will adds 1 to the /// remaining depth upon drop; @@ -131,7 +131,7 @@ mod recursion { /// Implementation [`RecursionCounter`] if std is NOT available (and does not /// guard against stack overflow). /// - /// Has the same API as the std RecursionCounter implementation + /// Has the same API as the std [`RecursionCounter`] implementation /// but does not actually limit stack depth. pub(crate) struct RecursionCounter {} @@ -270,17 +270,17 @@ enum ParserState { pub struct Parser<'a> { tokens: Vec, - /// The index of the first unprocessed token in `self.tokens` + /// The index of the first unprocessed token in [`Parser::tokens`]. index: usize, /// The current state of the parser. state: ParserState, - /// The current dialect to use + /// The current dialect to use. dialect: &'a dyn Dialect, /// Additional options that allow you to mix & match behavior /// otherwise constrained to certain dialects (e.g. trailing - /// commas) and/or format of parse (e.g. unescaping) + /// commas) and/or format of parse (e.g. unescaping). options: ParserOptions, - /// ensure the stack does not overflow by limiting recursion depth + /// Ensure the stack does not overflow by limiting recursion depth. recursion_counter: RecursionCounter, } @@ -313,7 +313,6 @@ impl<'a> Parser<'a> { /// Specify the maximum recursion limit while parsing. /// - /// /// [`Parser`] prevents stack overflows by returning /// [`ParserError::RecursionLimitExceeded`] if the parser exceeds /// this depth while processing the query. @@ -338,7 +337,6 @@ impl<'a> Parser<'a> { /// Specify additional parser options /// - /// /// [`Parser`] supports additional options ([`ParserOptions`]) /// that allow you to mix & match behavior otherwise constrained /// to certain dialects (e.g. trailing commas). @@ -824,7 +822,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a new expression including wildcard & qualified wildcard + /// Parse a new expression including wildcard & qualified wildcard. pub fn parse_wildcard_expr(&mut self) -> Result { let index = self.index; @@ -867,13 +865,13 @@ impl<'a> Parser<'a> { self.parse_expr() } - /// Parse a new expression + /// Parse a new expression. pub fn parse_expr(&mut self) -> Result { let _guard = self.recursion_counter.try_decrease()?; self.parse_subexpr(0) } - /// Parse tokens until the precedence changes + /// Parse tokens until the precedence changes. pub fn parse_subexpr(&mut self, precedence: u8) -> Result { debug!("parsing expr"); let mut expr = self.parse_prefix()?; @@ -908,8 +906,7 @@ impl<'a> Parser<'a> { Ok(expr) } - /// Get the precedence of the next token - /// With AND, OR, and XOR + /// Get the precedence of the next token, with AND, OR, and XOR. pub fn get_next_interval_precedence(&self) -> Result { let token = self.peek_token(); @@ -944,7 +941,7 @@ impl<'a> Parser<'a> { Ok(Statement::ReleaseSavepoint { name }) } - /// Parse an expression prefix + /// Parse an expression prefix. pub fn parse_prefix(&mut self) -> Result { // allow the dialect to override prefix parsing if let Some(prefix) = self.dialect.parse_prefix(self) { @@ -1456,8 +1453,7 @@ impl<'a> Parser<'a> { } } - /// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple - /// expr. + /// Parse a group by expr. Group by expr can be one of group sets, roll up, cube, or simple expr. fn parse_group_by_expr(&mut self) -> Result { if self.dialect.supports_group_by_expr() { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { @@ -1484,7 +1480,7 @@ impl<'a> Parser<'a> { } } - /// parse a tuple with `(` and `)`. + /// Parse a tuple with `(` and `)`. /// If `lift_singleton` is true, then a singleton tuple is lifted to a tuple of length 1, otherwise it will fail. /// If `allow_empty` is true, then an empty tuple is allowed. fn parse_tuple( @@ -1953,13 +1949,11 @@ impl<'a> Parser<'a> { } } - /// Parses fulltext expressions [(1)] + /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] /// /// # Errors /// This method will raise an error if the column list is empty or with invalid identifiers, /// the match expression is not a literal string, or if the search modifier is not valid. - /// - /// [(1)]: Expr::MatchAgainst pub fn parse_match_against(&mut self) -> Result { let columns = self.parse_parenthesized_column_list(Mandatory, false)?; @@ -2004,17 +1998,19 @@ impl<'a> Parser<'a> { }) } - /// Parse an INTERVAL expression. + /// Parse an `INTERVAL` expression. /// /// Some syntactically valid intervals: /// - /// 1. `INTERVAL '1' DAY` - /// 2. `INTERVAL '1-1' YEAR TO MONTH` - /// 3. `INTERVAL '1' SECOND` - /// 4. `INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5)` - /// 5. `INTERVAL '1.1' SECOND (2, 2)` - /// 6. `INTERVAL '1:1' HOUR (5) TO MINUTE (5)` - /// 7. (MySql and BigQuey only):`INTERVAL 1 DAY` + /// ```sql + /// 1. INTERVAL '1' DAY + /// 2. INTERVAL '1-1' YEAR TO MONTH + /// 3. INTERVAL '1' SECOND + /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) + /// 5. INTERVAL '1.1' SECOND (2, 2) + /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) + /// 7. (MySql & BigQuey only): INTERVAL 1 DAY + /// ``` /// /// Note that we do not currently attempt to parse the quoted value. pub fn parse_interval(&mut self) -> Result { @@ -2210,15 +2206,15 @@ impl<'a> Parser<'a> { )) } - /// Parse a field definition in a struct [1] or tuple [2]. + /// Parse a field definition in a [struct] or [tuple]. /// Syntax: /// /// ```sql /// [field_name] field_type /// ``` /// - /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type - /// [2]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple fn parse_struct_field_def( &mut self, ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { @@ -2272,7 +2268,7 @@ impl<'a> Parser<'a> { Ok(fields) } - /// DuckDB specific: Parse a duckdb dictionary [1] + /// DuckDB specific: Parse a duckdb [dictionary] /// /// Syntax: /// @@ -2280,7 +2276,7 @@ impl<'a> Parser<'a> { /// {'field_name': expr1[, ... ]} /// ``` /// - /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs fn parse_duckdb_struct_literal(&mut self) -> Result { self.expect_token(&Token::LBrace)?; @@ -2291,13 +2287,15 @@ impl<'a> Parser<'a> { Ok(Expr::Dictionary(fields)) } - /// Parse a field for a duckdb dictionary [1] + /// Parse a field for a duckdb [dictionary] + /// /// Syntax + /// /// ```sql /// 'name': expr /// ``` /// - /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs fn parse_duckdb_dictionary_field(&mut self) -> Result { let key = self.parse_identifier(false)?; @@ -2311,13 +2309,15 @@ impl<'a> Parser<'a> { }) } - /// Parse clickhouse map [1] + /// Parse clickhouse [map] + /// /// Syntax + /// /// ```sql /// Map(key_data_type, value_data_type) /// ``` /// - /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/map + /// [map]: https://clickhouse.com/docs/en/sql-reference/data-types/map fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> { self.expect_keyword(Keyword::MAP)?; self.expect_token(&Token::LParen)?; @@ -2329,13 +2329,15 @@ impl<'a> Parser<'a> { Ok((key_data_type, value_data_type)) } - /// Parse clickhouse tuple [1] + /// Parse clickhouse [tuple] + /// /// Syntax + /// /// ```sql /// Tuple([field_name] field_type, ...) /// ``` /// - /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { self.expect_keyword(Keyword::TUPLE)?; self.expect_token(&Token::LParen)?; @@ -2649,7 +2651,7 @@ impl<'a> Parser<'a> { } } - /// parse the ESCAPE CHAR portion of LIKE, ILIKE, and SIMILAR TO + /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` pub fn parse_escape_char(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::ESCAPE) { Ok(Some(self.parse_literal_string()?)) @@ -2836,7 +2838,7 @@ impl<'a> Parser<'a> { }) } - /// Parses the parens following the `[ NOT ] IN` operator + /// Parses the parens following the `[ NOT ] IN` operator. pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { // BigQuery allows `IN UNNEST(array_expression)` // https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#in_operators @@ -2873,7 +2875,7 @@ impl<'a> Parser<'a> { Ok(in_op) } - /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed + /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed. pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { // Stop parsing subexpressions for and on tokens with // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. @@ -2888,7 +2890,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a postgresql casting style which is in the form of `expr::datatype` + /// Parse a postgresql casting style which is in the form of `expr::datatype`. pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { Ok(Expr::Cast { kind: CastKind::DoubleColon, @@ -2898,7 +2900,7 @@ impl<'a> Parser<'a> { }) } - // use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference + // Use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference // higher number = higher precedence // // NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator @@ -3217,7 +3219,7 @@ impl<'a> Parser<'a> { /// If the current token is one of the given `keywords`, consume the token /// and return the keyword that matches. Otherwise, no tokens are consumed - /// and returns `None`. + /// and returns [`None`]. #[must_use] pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { match self.peek_token().token { @@ -3393,8 +3395,7 @@ impl<'a> Parser<'a> { self.parse_comma_separated(f) } - /// Run a parser method `f`, reverting back to the current position - /// if unsuccessful. + /// Run a parser method `f`, reverting back to the current position if unsuccessful. #[must_use] fn maybe_parse(&mut self, mut f: F) -> Option where @@ -3409,8 +3410,8 @@ impl<'a> Parser<'a> { } } - /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed - /// and results in a `ParserError` if both `ALL` and `DISTINCT` are found. + /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed + /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { let loc = self.peek_token().location; let all = self.parse_keyword(Keyword::ALL); From 0b1a413e64006286308500731323d50617dc6ed8 Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 30 Jun 2024 19:06:20 +0800 Subject: [PATCH 012/506] Fix a few typos in comment lines (#1316) Co-authored-by: Andrew Lamb --- src/parser/mod.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 869662976..563fd86bc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -108,7 +108,7 @@ mod recursion { } } - /// Guard that increass the remaining depth by 1 on drop + /// Guard that increases the remaining depth by 1 on drop pub struct DepthGuard { remaining_depth: Rc>, } @@ -194,7 +194,7 @@ const DEFAULT_REMAINING_DEPTH: usize = 50; /// nested such that the following declaration is possible: /// `ARRAY>` /// But the tokenizer recognizes the `>>` as a ShiftRight token. -/// We work-around that limitation when parsing a data type by accepting +/// We work around that limitation when parsing a data type by accepting /// either a `>` or `>>` token in such cases, remembering which variant we /// matched. /// In the latter case having matched a `>>`, the parent type will not look to @@ -1075,7 +1075,7 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } - // Here `w` is a word, check if it's a part of a multi-part + // Here `w` is a word, check if it's a part of a multipart // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { Token::LParen | Token::Period => { @@ -2009,7 +2009,7 @@ impl<'a> Parser<'a> { /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) /// 5. INTERVAL '1.1' SECOND (2, 2) /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) - /// 7. (MySql & BigQuey only): INTERVAL 1 DAY + /// 7. (MySql & BigQuery only): INTERVAL 1 DAY /// ``` /// /// Note that we do not currently attempt to parse the quoted value. @@ -2749,7 +2749,7 @@ impl<'a> Parser<'a> { match token.token { Token::Word(Word { value, - // path segments in SF dot notation can be unquoted or double quoted + // path segments in SF dot notation can be unquoted or double-quoted quote_style: quote_style @ (Some('"') | None), // some experimentation suggests that snowflake permits // any keyword here unquoted. @@ -2948,7 +2948,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token { // The precedence of NOT varies depending on keyword that // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise it + // it takes on the precedence of those tokens. Otherwise, it // is not an infix operator, and therefore has zero // precedence. Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), @@ -3251,7 +3251,7 @@ impl<'a> Parser<'a> { } /// If the current token is the `expected` keyword, consume the token. - /// Otherwise return an error. + /// Otherwise, return an error. pub fn expect_keyword(&mut self, expected: Keyword) -> Result<(), ParserError> { if self.parse_keyword(expected) { Ok(()) @@ -4508,7 +4508,7 @@ impl<'a> Parser<'a> { self.peek_token(), ); }; - // Many dialects support the non standard `IF EXISTS` clause and allow + // Many dialects support the non-standard `IF EXISTS` clause and allow // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; @@ -4822,7 +4822,7 @@ impl<'a> Parser<'a> { continue; } _ => { - // Put back the semi-colon, this is the end of the DECLARE statement. + // Put back the semicolon, this is the end of the DECLARE statement. self.prev_token(); } } @@ -7278,7 +7278,7 @@ impl<'a> Parser<'a> { // ignore the and treat the multiple strings as // a single ." Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), - // Support for MySql dialect double quoted string, `AS "HOUR"` for example + // Support for MySql dialect double-quoted string, `AS "HOUR"` for example Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))), _ => { if after_as { From 44d7a20f641c9cd8f0c3e08f7d77f02534452ce8 Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 30 Jun 2024 19:33:43 +0800 Subject: [PATCH 013/506] Support `GROUP BY WITH MODIFIER` for ClickHouse (#1323) Co-authored-by: Ifeanyi Ubah --- src/ast/mod.rs | 4 +-- src/ast/query.rs | 56 +++++++++++++++++++++++++++------ src/keywords.rs | 1 + src/parser/mod.rs | 37 +++++++++++++++++++--- tests/sqlparser_clickhouse.rs | 57 ++++++++++++++++++++++++++++++++- tests/sqlparser_common.rs | 53 +++++++++++++++++-------------- tests/sqlparser_duckdb.rs | 4 +-- tests/sqlparser_mssql.rs | 4 +-- tests/sqlparser_mysql.rs | 16 +++++----- tests/sqlparser_postgres.rs | 59 ++++++++++++++++++++--------------- 10 files changed, 215 insertions(+), 76 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9ed837825..c7f461418 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -43,8 +43,8 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, JoinOperator, - JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, + GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, + JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, diff --git a/src/ast/query.rs b/src/ast/query.rs index 0fde3e6b7..d00a0dfcc 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -299,10 +299,10 @@ impl fmt::Display for Select { write!(f, " WHERE {selection}")?; } match &self.group_by { - GroupByExpr::All => write!(f, " GROUP BY ALL")?, - GroupByExpr::Expressions(exprs) => { + GroupByExpr::All(_) => write!(f, " {}", self.group_by)?, + GroupByExpr::Expressions(exprs, _) => { if !exprs.is_empty() { - write!(f, " GROUP BY {}", display_comma_separated(exprs))?; + write!(f, " {}", self.group_by)? } } } @@ -1866,27 +1866,65 @@ impl fmt::Display for SelectInto { } } +/// ClickHouse supports GROUP BY WITH modifiers(includes ROLLUP|CUBE|TOTALS). +/// e.g. GROUP BY year WITH ROLLUP WITH TOTALS +/// +/// [ClickHouse]: +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum GroupByWithModifier { + Rollup, + Cube, + Totals, +} + +impl fmt::Display for GroupByWithModifier { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"), + GroupByWithModifier::Cube => write!(f, "WITH CUBE"), + GroupByWithModifier::Totals => write!(f, "WITH TOTALS"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GroupByExpr { - /// ALL syntax of [Snowflake], and [DuckDB] + /// ALL syntax of [Snowflake], [DuckDB] and [ClickHouse]. /// /// [Snowflake]: /// [DuckDB]: - All, + /// [ClickHouse]: + /// + /// ClickHouse also supports WITH modifiers after GROUP BY ALL and expressions. + /// + /// [ClickHouse]: + All(Vec), /// Expressions - Expressions(Vec), + Expressions(Vec, Vec), } impl fmt::Display for GroupByExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - GroupByExpr::All => write!(f, "GROUP BY ALL"), - GroupByExpr::Expressions(col_names) => { + GroupByExpr::All(modifiers) => { + write!(f, "GROUP BY ALL")?; + if !modifiers.is_empty() { + write!(f, " {}", display_separated(modifiers, " "))?; + } + Ok(()) + } + GroupByExpr::Expressions(col_names, modifiers) => { let col_names = display_comma_separated(col_names); - write!(f, "GROUP BY ({col_names})") + write!(f, "GROUP BY {col_names}")?; + if !modifiers.is_empty() { + write!(f, " {}", display_separated(modifiers, " "))?; + } + Ok(()) } } } diff --git a/src/keywords.rs b/src/keywords.rs index e75d45e44..5db55e9da 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -721,6 +721,7 @@ define_keywords!( TINYINT, TO, TOP, + TOTALS, TRAILING, TRANSACTION, TRANSIENT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 563fd86bc..4e9c3836b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8319,13 +8319,42 @@ impl<'a> Parser<'a> { }; let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { - if self.parse_keyword(Keyword::ALL) { - GroupByExpr::All + let expressions = if self.parse_keyword(Keyword::ALL) { + None } else { - GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?) + Some(self.parse_comma_separated(Parser::parse_group_by_expr)?) + }; + + let mut modifiers = vec![]; + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + loop { + if !self.parse_keyword(Keyword::WITH) { + break; + } + let keyword = self.expect_one_of_keywords(&[ + Keyword::ROLLUP, + Keyword::CUBE, + Keyword::TOTALS, + ])?; + modifiers.push(match keyword { + Keyword::ROLLUP => GroupByWithModifier::Rollup, + Keyword::CUBE => GroupByWithModifier::Cube, + Keyword::TOTALS => GroupByWithModifier::Totals, + _ => { + return parser_err!( + "BUG: expected to match GroupBy modifier keyword", + self.peek_token().location + ) + } + }); + } + } + match expressions { + None => GroupByExpr::All(modifiers), + Some(exprs) => GroupByExpr::Expressions(exprs, modifiers), } } else { - GroupByExpr::Expressions(vec![]) + GroupByExpr::Expressions(vec![], vec![]) }; let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 50d4faf5d..0c188a24b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -88,7 +88,7 @@ fn parse_map_access_expr() { right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), }), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -626,6 +626,61 @@ fn parse_create_materialized_view() { clickhouse_and_generic().verified_stmt(sql); } +#[test] +fn parse_group_by_with_modifier() { + let clauses = ["x", "a, b", "ALL"]; + let modifiers = [ + "WITH ROLLUP", + "WITH CUBE", + "WITH TOTALS", + "WITH ROLLUP WITH CUBE", + ]; + let expected_modifiers = [ + vec![GroupByWithModifier::Rollup], + vec![GroupByWithModifier::Cube], + vec![GroupByWithModifier::Totals], + vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube], + ]; + for clause in &clauses { + for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) { + let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}"); + match clickhouse_and_generic().verified_stmt(&sql) { + Statement::Query(query) => { + let group_by = &query.body.as_select().unwrap().group_by; + if clause == &"ALL" { + assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec())); + } else { + assert_eq!( + group_by, + &GroupByExpr::Expressions( + clause + .split(", ") + .map(|c| Identifier(Ident::new(c))) + .collect(), + expected_modifier.to_vec() + ) + ); + } + } + _ => unreachable!(), + } + } + } + + // invalid cases + let invalid_cases = [ + "SELECT * FROM t GROUP BY x WITH", + "SELECT * FROM t GROUP BY x WITH ROLLUP CUBE", + "SELECT * FROM t GROUP BY x WITH WITH ROLLUP", + "SELECT * FROM t GROUP BY WITH ROLLUP", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH"); + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 76e6a98bb..ac2133946 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -392,9 +392,10 @@ fn parse_update_set_from() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![Expr::Identifier(Ident::new( - "id" - ))]), + group_by: GroupByExpr::Expressions( + vec![Expr::Identifier(Ident::new("id"))], + vec![] + ), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2119,10 +2120,13 @@ fn parse_select_group_by() { let sql = "SELECT id, fname, lname FROM customer GROUP BY lname, fname"; let select = verified_only_select(sql); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("lname")), - Expr::Identifier(Ident::new("fname")), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("lname")), + Expr::Identifier(Ident::new("fname")), + ], + vec![] + ), select.group_by ); @@ -2137,7 +2141,7 @@ fn parse_select_group_by() { fn parse_select_group_by_all() { let sql = "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL"; let select = verified_only_select(sql); - assert_eq!(GroupByExpr::All, select.group_by); + assert_eq!(GroupByExpr::All(vec![]), select.group_by); one_statement_parses_to( "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL", @@ -4545,7 +4549,7 @@ fn test_parse_named_window() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -4974,7 +4978,7 @@ fn parse_interval_and_or_xor() { }), }), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -6908,7 +6912,7 @@ fn lateral_function() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -7627,7 +7631,7 @@ fn parse_merge() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9133,7 +9137,7 @@ fn parse_unload() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9276,7 +9280,7 @@ fn parse_connect_by() { into: None, lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9364,7 +9368,7 @@ fn parse_connect_by() { op: BinaryOperator::NotEq, right: Box::new(Expr::Value(number("42"))), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9484,15 +9488,18 @@ fn test_group_by_grouping_sets() { all_dialects_where(|d| d.supports_group_by_expr()) .verified_only_select(sql) .group_by, - GroupByExpr::Expressions(vec![Expr::GroupingSets(vec![ - vec![ - Expr::Identifier(Ident::new("city")), - Expr::Identifier(Ident::new("car_model")) - ], - vec![Expr::Identifier(Ident::new("city")),], - vec![Expr::Identifier(Ident::new("car_model"))], + GroupByExpr::Expressions( + vec![Expr::GroupingSets(vec![ + vec![ + Expr::Identifier(Ident::new("city")), + Expr::Identifier(Ident::new("car_model")) + ], + vec![Expr::Identifier(Ident::new("city")),], + vec![Expr::Identifier(Ident::new("car_model"))], + vec![] + ])], vec![] - ])]) + ) ); } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 253318b32..948e150c9 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -171,7 +171,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -209,7 +209,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 5f03bb093..993850299 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -111,7 +111,7 @@ fn parse_create_procedure() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -528,7 +528,7 @@ fn parse_substring_in_select() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index a25f4c208..4c18d4a75 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -907,7 +907,7 @@ fn parse_escaped_quote_identifiers_with_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -954,7 +954,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -998,7 +998,7 @@ fn parse_escaped_backticks_with_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1042,7 +1042,7 @@ fn parse_escaped_backticks_with_no_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1703,7 +1703,7 @@ fn parse_select_with_numeric_prefix_column_name() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1756,7 +1756,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2255,7 +2255,7 @@ fn parse_substring_in_select() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2559,7 +2559,7 @@ fn parse_hex_string_introducer() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 197597e9b..2606fb96e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1075,7 +1075,7 @@ fn parse_copy_to() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), having: None, named_window: vec![], window_before_qualify: false, @@ -2383,7 +2383,7 @@ fn parse_array_subquery_expr() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2402,7 +2402,7 @@ fn parse_array_subquery_expr() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -3711,14 +3711,17 @@ fn parse_select_group_by_grouping_sets() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, GROUPING SETS ((brand), (size), ())" ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::GroupingSets(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - vec![], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::GroupingSets(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + vec![], + ]), + ], + vec![] + ), select.group_by ); } @@ -3729,13 +3732,16 @@ fn parse_select_group_by_rollup() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, ROLLUP (brand, size)", ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::Rollup(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::Rollup(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + ]), + ], + vec![] + ), select.group_by ); } @@ -3746,13 +3752,16 @@ fn parse_select_group_by_cube() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, CUBE (brand, size)", ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::Cube(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::Cube(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + ]), + ], + vec![] + ), select.group_by ); } From 700bd03d6f4aa97c5b0901fd399dd3c10114a760 Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 7 Jul 2024 19:17:43 +0800 Subject: [PATCH 014/506] Support `SETTINGS` pairs for ClickHouse dialect (#1327) Co-authored-by: Ifeanyi Ubah --- src/ast/mod.rs | 2 +- src/ast/query.rs | 21 +++++++++++++++++++ src/keywords.rs | 3 +++ src/parser/mod.rs | 18 +++++++++++++++++ tests/sqlparser_clickhouse.rs | 38 ++++++++++++++++++++++++++++++++++- tests/sqlparser_common.rs | 6 ++++++ tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_mysql.rs | 15 ++++++++++++++ tests/sqlparser_postgres.rs | 11 +++++++--- 9 files changed, 111 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c7f461418..c904d4bc9 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -48,7 +48,7 @@ pub use self::query::{ MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, - SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, SymbolDefinition, Table, + SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, }; diff --git a/src/ast/query.rs b/src/ast/query.rs index d00a0dfcc..241e45a9c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -50,6 +50,10 @@ pub struct Query { /// `FOR JSON { AUTO | PATH } [ , INCLUDE_NULL_VALUES ]` /// (MSSQL-specific) pub for_clause: Option, + /// ClickHouse syntax: `SELECT * FROM t SETTINGS key1 = value1, key2 = value2` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query) + pub settings: Option>, } impl fmt::Display for Query { @@ -70,6 +74,9 @@ impl fmt::Display for Query { if !self.limit_by.is_empty() { write!(f, " BY {}", display_separated(&self.limit_by, ", "))?; } + if let Some(ref settings) = self.settings { + write!(f, " SETTINGS {}", display_comma_separated(settings))?; + } if let Some(ref fetch) = self.fetch { write!(f, " {fetch}")?; } @@ -828,6 +835,20 @@ impl fmt::Display for ConnectBy { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Setting { + pub key: Ident, + pub value: Value, +} + +impl fmt::Display for Setting { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} = {}", self.key, self.value) + } +} + /// An expression optionally followed by an alias. /// /// Example: diff --git a/src/keywords.rs b/src/keywords.rs index 5db55e9da..cbba92c5b 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -650,6 +650,7 @@ define_keywords!( SESSION_USER, SET, SETS, + SETTINGS, SHARE, SHOW, SIMILAR, @@ -850,6 +851,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FOR, // for MYSQL PARTITION SELECTION Keyword::PARTITION, + // for ClickHouse SELECT * FROM t SETTINGS ... + Keyword::SETTINGS, // for Snowflake START WITH .. CONNECT BY Keyword::START, Keyword::CONNECT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4e9c3836b..7614307bf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7871,6 +7871,7 @@ impl<'a> Parser<'a> { fetch: None, locks: vec![], for_clause: None, + settings: None, }) } else if self.parse_keyword(Keyword::UPDATE) { Ok(Query { @@ -7883,6 +7884,7 @@ impl<'a> Parser<'a> { fetch: None, locks: vec![], for_clause: None, + settings: None, }) } else { let body = self.parse_boxed_query_body(0)?; @@ -7928,6 +7930,20 @@ impl<'a> Parser<'a> { vec![] }; + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) } else { @@ -7955,6 +7971,7 @@ impl<'a> Parser<'a> { fetch, locks, for_clause, + settings, }) } } @@ -9091,6 +9108,7 @@ impl<'a> Parser<'a> { fetch: None, locks: vec![], for_clause: None, + settings: None, }), alias, }) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 0c188a24b..b3e03c4ab 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -21,8 +21,8 @@ use test_utils::*; use sqlparser::ast::Expr::{BinaryOp, Identifier, MapAccess}; use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Table; +use sqlparser::ast::Value::Number; use sqlparser::ast::*; - use sqlparser::dialect::ClickHouseDialect; use sqlparser::dialect::GenericDialect; @@ -549,6 +549,42 @@ fn parse_limit_by() { ); } +#[test] +fn parse_settings_in_query() { + match clickhouse_and_generic() + .verified_stmt(r#"SELECT * FROM t SETTINGS max_threads = 1, max_block_size = 10000"#) + { + Statement::Query(query) => { + assert_eq!( + query.settings, + Some(vec![ + Setting { + key: Ident::new("max_threads"), + value: Number("1".parse().unwrap(), false) + }, + Setting { + key: Ident::new("max_block_size"), + value: Number("10000".parse().unwrap(), false) + }, + ]) + ); + } + _ => unreachable!(), + } + + let invalid_cases = vec![ + "SELECT * FROM t SETTINGS a", + "SELECT * FROM t SETTINGS a=", + "SELECT * FROM t SETTINGS a=1, b", + "SELECT * FROM t SETTINGS a=1, b=", + "SELECT * FROM t SETTINGS a=1, b=c", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: SETTINGS key = value, found: "); + } +} #[test] fn parse_select_star_except() { clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ac2133946..609d2600d 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -413,6 +413,7 @@ fn parse_update_set_from() { fetch: None, locks: vec![], for_clause: None, + settings: None, }), alias: Some(TableAlias { name: Ident::new("t2"), @@ -3427,6 +3428,7 @@ fn parse_create_table_as_table() { fetch: None, locks: vec![], for_clause: None, + settings: None, }); match verified_stmt(sql1) { @@ -3452,6 +3454,7 @@ fn parse_create_table_as_table() { fetch: None, locks: vec![], for_clause: None, + settings: None, }); match verified_stmt(sql2) { @@ -4996,6 +4999,7 @@ fn parse_interval_and_or_xor() { fetch: None, locks: vec![], for_clause: None, + settings: None, }))]; assert_eq!(actual_ast, expected_ast); @@ -7649,6 +7653,7 @@ fn parse_merge() { fetch: None, locks: vec![], for_clause: None, + settings: None, }), alias: Some(TableAlias { name: Ident { @@ -9156,6 +9161,7 @@ fn parse_unload() { locks: vec![], for_clause: None, order_by: vec![], + settings: None, }), to: Ident { value: "s3://...".to_string(), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 993850299..84ab474b0 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -103,6 +103,7 @@ fn parse_create_procedure() { locks: vec![], for_clause: None, order_by: vec![], + settings: None, body: Box::new(SetExpr::Select(Box::new(Select { distinct: None, top: None, @@ -546,6 +547,7 @@ fn parse_substring_in_select() { fetch: None, locks: vec![], for_clause: None, + settings: None, }), query ); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 4c18d4a75..cf9b717be 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -925,6 +925,7 @@ fn parse_escaped_quote_identifiers_with_escape() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ); } @@ -972,6 +973,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ); } @@ -1016,6 +1018,7 @@ fn parse_escaped_backticks_with_escape() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ); } @@ -1060,6 +1063,7 @@ fn parse_escaped_backticks_with_no_escape() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ); } @@ -1264,6 +1268,7 @@ fn parse_simple_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1306,6 +1311,7 @@ fn parse_ignore_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1348,6 +1354,7 @@ fn parse_priority_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1387,6 +1394,7 @@ fn parse_priority_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1434,6 +1442,7 @@ fn parse_insert_as() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1493,6 +1502,7 @@ fn parse_insert_as() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1536,6 +1546,7 @@ fn parse_replace_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1573,6 +1584,7 @@ fn parse_empty_row_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1633,6 +1645,7 @@ fn parse_insert_with_on_duplicate_update() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -2273,6 +2286,7 @@ fn parse_substring_in_select() { fetch: None, locks: vec![], for_clause: None, + settings: None, }), query ); @@ -2578,6 +2592,7 @@ fn parse_hex_string_introducer() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2606fb96e..243116a3f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1093,6 +1093,7 @@ fn parse_copy_to() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), to: true, target: CopyTarget::File { @@ -2421,6 +2422,7 @@ fn parse_array_subquery_expr() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), filter: None, null_treatment: None, @@ -3941,7 +3943,8 @@ fn test_simple_postgres_insert_with_alias() { offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, })), partitioned: None, after_columns: vec![], @@ -4008,7 +4011,8 @@ fn test_simple_postgres_insert_with_alias() { offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, })), partitioned: None, after_columns: vec![], @@ -4071,7 +4075,8 @@ fn test_simple_insert_with_quoted_alias() { offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, })), partitioned: None, after_columns: vec![], From 0884dd920d2a2bbd5c8c67cbf9ed812ce8a1dd5d Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 7 Jul 2024 20:03:23 +0800 Subject: [PATCH 015/506] Support `PREWHERE` condition for ClickHouse dialect (#1328) --- src/ast/query.rs | 8 ++++++ src/keywords.rs | 3 +++ src/parser/mod.rs | 9 +++++++ tests/sqlparser_clickhouse.rs | 51 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 8 ++++++ tests/sqlparser_duckdb.rs | 2 ++ tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_mysql.rs | 8 ++++++ tests/sqlparser_postgres.rs | 3 +++ 9 files changed, 94 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 241e45a9c..7d2626b2d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -247,6 +247,11 @@ pub struct Select { pub from: Vec, /// LATERAL VIEWs pub lateral_views: Vec, + /// ClickHouse syntax: `PREWHERE a = 1 WHERE b = 2`, + /// and it can be used together with WHERE selection. + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/prewhere) + pub prewhere: Option, /// WHERE pub selection: Option, /// GROUP BY @@ -302,6 +307,9 @@ impl fmt::Display for Select { write!(f, "{lv}")?; } } + if let Some(ref prewhere) = self.prewhere { + write!(f, " PREWHERE {prewhere}")?; + } if let Some(ref selection) = self.selection { write!(f, " WHERE {selection}")?; } diff --git a/src/keywords.rs b/src/keywords.rs index cbba92c5b..eb69a209b 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -558,6 +558,7 @@ define_keywords!( PRECISION, PREPARE, PRESERVE, + PREWHERE, PRIMARY, PRIOR, PRIVILEGES, @@ -851,6 +852,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FOR, // for MYSQL PARTITION SELECTION Keyword::PARTITION, + // for Clickhouse PREWHERE + Keyword::PREWHERE, // for ClickHouse SELECT * FROM t SETTINGS ... Keyword::SETTINGS, // for Snowflake START WITH .. CONNECT BY diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7614307bf..a81d53e7c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8329,6 +8329,14 @@ impl<'a> Parser<'a> { } } + let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::PREWHERE) + { + Some(self.parse_expr()?) + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) } else { @@ -8440,6 +8448,7 @@ impl<'a> Parser<'a> { into, from, lateral_views, + prewhere, selection, group_by, cluster_by, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index b3e03c4ab..29a5b15aa 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -63,6 +63,7 @@ fn parse_map_access_expr() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: Some(BinaryOp { left: Box::new(BinaryOp { left: Box::new(Identifier(Ident::new("id"))), @@ -717,6 +718,56 @@ fn parse_group_by_with_modifier() { } } +#[test] +fn test_prewhere() { + match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 WHERE y = 2") { + Statement::Query(query) => { + let prewhere = query.body.as_select().unwrap().prewhere.as_ref(); + assert_eq!( + prewhere, + Some(&BinaryOp { + left: Box::new(Identifier(Ident::new("x"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("1".parse().unwrap(), false))), + }) + ); + let selection = query.as_ref().body.as_select().unwrap().selection.as_ref(); + assert_eq!( + selection, + Some(&BinaryOp { + left: Box::new(Identifier(Ident::new("y"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("2".parse().unwrap(), false))), + }) + ); + } + _ => unreachable!(), + } + + match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 AND y = 2") { + Statement::Query(query) => { + let prewhere = query.body.as_select().unwrap().prewhere.as_ref(); + assert_eq!( + prewhere, + Some(&BinaryOp { + left: Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("x"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("1".parse().unwrap(), false))), + }), + op: BinaryOperator::And, + right: Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("y"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("2".parse().unwrap(), false))), + }), + }) + ); + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 609d2600d..256680b3e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -391,6 +391,7 @@ fn parse_update_set_from() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions( vec![Expr::Identifier(Ident::new("id"))], @@ -4551,6 +4552,7 @@ fn test_parse_named_window() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -4932,6 +4934,7 @@ fn parse_interval_and_or_xor() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: Some(Expr::BinaryOp { left: Box::new(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident { @@ -6915,6 +6918,7 @@ fn lateral_function() { }], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -7634,6 +7638,7 @@ fn parse_merge() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -9141,6 +9146,7 @@ fn parse_unload() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -9285,6 +9291,7 @@ fn parse_connect_by() { }], into: None, lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -9369,6 +9376,7 @@ fn parse_connect_by() { }], into: None, lateral_views: vec![], + prewhere: None, selection: Some(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("employee_id"))), op: BinaryOperator::NotEq, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 948e150c9..400daa8a8 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -170,6 +170,7 @@ fn test_select_union_by_name() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -208,6 +209,7 @@ fn test_select_union_by_name() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 84ab474b0..e0e0f7c70 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -111,6 +111,7 @@ fn parse_create_procedure() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -528,6 +529,7 @@ fn parse_substring_in_select() { joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index cf9b717be..a5fa75200 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -906,6 +906,7 @@ fn parse_escaped_quote_identifiers_with_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -954,6 +955,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -999,6 +1001,7 @@ fn parse_escaped_backticks_with_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -1044,6 +1047,7 @@ fn parse_escaped_backticks_with_no_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -1715,6 +1719,7 @@ fn parse_select_with_numeric_prefix_column_name() { joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -1768,6 +1773,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -2267,6 +2273,7 @@ fn parse_substring_in_select() { joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -2572,6 +2579,7 @@ fn parse_hex_string_introducer() { })], from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 243116a3f..2d3097cf9 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1074,6 +1074,7 @@ fn parse_copy_to() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), having: None, @@ -2383,6 +2384,7 @@ fn parse_array_subquery_expr() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -2402,6 +2404,7 @@ fn parse_array_subquery_expr() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], From f77192d4ec19c47c90654aa6514a7e63b0d67a0b Mon Sep 17 00:00:00 2001 From: Mohamed Abdeen <83442793+MohamedAbdeen21@users.noreply.github.com> Date: Mon, 8 Jul 2024 13:31:33 +0300 Subject: [PATCH 016/506] Re-enable trailing commas in DCL (#1318) --- src/parser/mod.rs | 39 +++++++++++++++++++++++++++++---------- tests/sqlparser_common.rs | 12 ++++++++++++ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a81d53e7c..1dc6bff5e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -46,6 +46,9 @@ pub enum ParserError { RecursionLimitExceeded, } +// avoid clippy type_complexity warnings +type ParsedAction = (Keyword, Option>); + // Use `Parser::expected` instead, if possible macro_rules! parser_err { ($MSG:expr, $loc:expr) => { @@ -3334,6 +3337,29 @@ impl<'a> Parser<'a> { ret } + pub fn parse_actions_list(&mut self) -> Result, ParserError> { + let mut values = vec![]; + loop { + values.push(self.parse_grant_permission()?); + if !self.consume_token(&Token::Comma) { + break; + } else if self.options.trailing_commas { + match self.peek_token().token { + Token::Word(kw) if kw.keyword == Keyword::ON => { + break; + } + Token::RParen + | Token::SemiColon + | Token::EOF + | Token::RBracket + | Token::RBrace => break, + _ => continue, + } + } + } + Ok(values) + } + /// Parse a comma-separated list of 1+ items accepted by `F` pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> where @@ -3347,9 +3373,7 @@ impl<'a> Parser<'a> { } else if self.options.trailing_commas { match self.peek_token().token { Token::Word(kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS - .iter() - .any(|d| kw.keyword == *d) => + if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => { break; } @@ -9680,11 +9704,8 @@ impl<'a> Parser<'a> { with_privileges_keyword: self.parse_keyword(Keyword::PRIVILEGES), } } else { - let old_value = self.options.trailing_commas; - self.options.trailing_commas = false; - let (actions, err): (Vec<_>, Vec<_>) = self - .parse_comma_separated(Parser::parse_grant_permission)? + .parse_actions_list()? .into_iter() .map(|(kw, columns)| match kw { Keyword::DELETE => Ok(Action::Delete), @@ -9706,8 +9727,6 @@ impl<'a> Parser<'a> { }) .partition(Result::is_ok); - self.options.trailing_commas = old_value; - if !err.is_empty() { let errors: Vec = err.into_iter().filter_map(|x| x.err()).collect(); return Err(ParserError::ParserError(format!( @@ -9753,7 +9772,7 @@ impl<'a> Parser<'a> { Ok((privileges, objects)) } - pub fn parse_grant_permission(&mut self) -> Result<(Keyword, Option>), ParserError> { + pub fn parse_grant_permission(&mut self) -> Result { if let Some(kw) = self.parse_one_of_keywords(&[ Keyword::CONNECT, Keyword::CREATE, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 256680b3e..132874aa9 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8942,6 +8942,11 @@ fn parse_trailing_comma() { "CREATE TABLE employees (name TEXT, age INT)", ); + trailing_commas.one_statement_parses_to( + "GRANT USAGE, SELECT, INSERT, ON p TO u", + "GRANT USAGE, SELECT, INSERT ON p TO u", + ); + trailing_commas.verified_stmt("SELECT album_id, name FROM track"); trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); @@ -8961,6 +8966,13 @@ fn parse_trailing_comma() { ParserError::ParserError("Expected an expression, found: from".to_string()) ); + assert_eq!( + trailing_commas + .parse_sql_statements("REVOKE USAGE, SELECT, ON p TO u") + .unwrap_err(), + ParserError::ParserError("Expected a privilege keyword, found: ON".to_string()) + ); + assert_eq!( trailing_commas .parse_sql_statements("CREATE TABLE employees (name text, age int,)") From 66b4ec8486a18d2f542d6b83450d421ceca6572c Mon Sep 17 00:00:00 2001 From: Leonardo Yvens Date: Mon, 8 Jul 2024 11:32:45 +0100 Subject: [PATCH 017/506] Fix typo in `sqlparser-derive` README (#1310) --- derive/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/derive/README.md b/derive/README.md index ad4978a89..ffb5d266e 100644 --- a/derive/README.md +++ b/derive/README.md @@ -97,7 +97,7 @@ impl Visit for TableFactor { match self { Self::Table { name, alias } => { visitor.pre_visit_relation(name)?; - alias.visit(name)?; + name.visit(visitor)?; visitor.post_visit_relation(name)?; alias.visit(visitor)?; } From 17e5c0c1b6c3c52e5ffd0d2caa4aad7bd7d35958 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 8 Jul 2024 07:37:00 -0400 Subject: [PATCH 018/506] Fix CI error message in CI (#1333) --- tests/sqlparser_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 132874aa9..2b208016a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8970,7 +8970,7 @@ fn parse_trailing_comma() { trailing_commas .parse_sql_statements("REVOKE USAGE, SELECT, ON p TO u") .unwrap_err(), - ParserError::ParserError("Expected a privilege keyword, found: ON".to_string()) + ParserError::ParserError("Expected: a privilege keyword, found: ON".to_string()) ); assert_eq!( From bbee052890bb3eb64fe3e9fc20ad70ca06df3c5f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 8 Jul 2024 14:38:59 -0400 Subject: [PATCH 019/506] Add stale PR github workflow (#1331) --- .github/workflows/stale.yml | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/stale.yml diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 000000000..231252682 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: "Close stale PRs" +on: + schedule: + - cron: "30 1 * * *" + +jobs: + close-stale-prs: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v9 + with: + stale-pr-message: "Thank you for your contribution. Unfortunately, this pull request is stale because it has been open 60 days with no activity. Please remove the stale label or comment or this will be closed in 7 days." + days-before-pr-stale: 60 + days-before-pr-close: 7 + # do not close stale issues + days-before-issue-stale: -1 + days-before-issue-close: -1 + repo-token: ${{ secrets.GITHUB_TOKEN }} From 9f60eb1571c4513140cb9a95bd107e26fcf6c7be Mon Sep 17 00:00:00 2001 From: Lorrens Pantelis <100197010+LorrensP-2158466@users.noreply.github.com> Date: Tue, 9 Jul 2024 13:46:49 +0200 Subject: [PATCH 020/506] Support `DROP PROCEDURE` statement (#1324) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 26 +++++++++ src/parser/mod.rs | 24 ++++++++- tests/sqlparser_postgres.rs | 102 ++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c904d4bc9..beee9f4bc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2215,6 +2215,16 @@ pub enum Statement { option: Option, }, /// ```sql + /// DROP PROCEDURE + /// ``` + DropProcedure { + if_exists: bool, + /// One or more function to drop + proc_desc: Vec, + /// `CASCADE` or `RESTRICT` + option: Option, + }, + /// ```sql /// DROP SECRET /// ``` DropSecret { @@ -3644,6 +3654,22 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::DropProcedure { + if_exists, + proc_desc, + option, + } => { + write!( + f, + "DROP PROCEDURE{} {}", + if *if_exists { " IF EXISTS" } else { "" }, + display_comma_separated(proc_desc), + )?; + if let Some(op) = option { + write!(f, " {op}")?; + } + Ok(()) + } Statement::DropSecret { if_exists, temporary, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1dc6bff5e..a88cfcb9c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4524,11 +4524,13 @@ impl<'a> Parser<'a> { ObjectType::Stage } else if self.parse_keyword(Keyword::FUNCTION) { return self.parse_drop_function(); + } else if self.parse_keyword(Keyword::PROCEDURE) { + return self.parse_drop_procedure(); } else if self.parse_keyword(Keyword::SECRET) { return self.parse_drop_secret(temporary, persistent); } else { return self.expected( - "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, STAGE or SEQUENCE after DROP", + "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, PROCEDURE, STAGE or SEQUENCE after DROP", self.peek_token(), ); }; @@ -4580,6 +4582,26 @@ impl<'a> Parser<'a> { }) } + /// ```sql + /// DROP PROCEDURE [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] + /// [ CASCADE | RESTRICT ] + /// ``` + fn parse_drop_procedure(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let proc_desc = self.parse_comma_separated(Parser::parse_drop_function_desc)?; + let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + Some(_) => unreachable!(), // parse_one_of_keywords does not return other keywords + None => None, + }; + Ok(Statement::DropProcedure { + if_exists, + proc_desc, + option, + }) + } + fn parse_drop_function_desc(&mut self) -> Result { let name = self.parse_object_name(false)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2d3097cf9..2da82c122 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3629,6 +3629,108 @@ fn parse_drop_function() { ); } +#[test] +fn parse_drop_procedure() { + let sql = "DROP PROCEDURE IF EXISTS test_proc"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc".to_string(), + quote_style: None + }]), + args: None + }], + option: None + } + ); + + let sql = "DROP PROCEDURE IF EXISTS test_proc(a INTEGER, IN b INTEGER = 1)"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Integer(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number("1".parse().unwrap(), false))), + } + ]), + }], + option: None + } + ); + + let sql = "DROP PROCEDURE IF EXISTS test_proc1(a INTEGER, IN b INTEGER = 1), test_proc2(a VARCHAR, IN b INTEGER = 1)"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![ + DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc1".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Integer(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + ))), + } + ]), + }, + DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc2".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Varchar(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + ))), + } + ]), + } + ], + option: None + } + ); + + let res = pg().parse_sql_statements("DROP PROCEDURE testproc DROP"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: DROP".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("DROP PROCEDURE testproc SET NULL"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: SET".to_string()), + res.unwrap_err() + ); +} + #[test] fn parse_dollar_quoted_string() { let sql = "SELECT $$hello$$, $tag_name$world$tag_name$, $$Foo$Bar$$, $$Foo$Bar$$col_name, $$$$, $tag_name$$tag_name$"; From 07278952f9ba9c717652ae463febf14db13777ce Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 9 Jul 2024 19:49:04 +0800 Subject: [PATCH 021/506] Add support of FORMAT clause for ClickHouse parser (#1335) --- src/ast/mod.rs | 16 ++++++++-------- src/ast/query.rs | 28 ++++++++++++++++++++++++++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 16 ++++++++++++++++ tests/sqlparser_clickhouse.rs | 32 ++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 6 ++++++ tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_mysql.rs | 15 +++++++++++++++ tests/sqlparser_postgres.rs | 5 +++++ 9 files changed, 114 insertions(+), 8 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index beee9f4bc..58f094411 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -43,14 +43,14 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, - JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, - MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, - NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem, - RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, - SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, - Values, WildcardAdditionalOptions, With, + FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, + JoinConstraint, JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, + LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, + NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, + PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, + ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, + SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, + TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index 7d2626b2d..70c781409 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -54,6 +54,11 @@ pub struct Query { /// /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query) pub settings: Option>, + /// `SELECT * FROM t FORMAT JSONCompact` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format) + /// (ClickHouse-specific) + pub format_clause: Option, } impl fmt::Display for Query { @@ -86,6 +91,9 @@ impl fmt::Display for Query { if let Some(ref for_clause) = self.for_clause { write!(f, " {}", for_clause)?; } + if let Some(ref format) = self.format_clause { + write!(f, " {}", format)?; + } Ok(()) } } @@ -1959,6 +1967,26 @@ impl fmt::Display for GroupByExpr { } } +/// FORMAT identifier or FORMAT NULL clause, specific to ClickHouse. +/// +/// [ClickHouse]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FormatClause { + Identifier(Ident), + Null, +} + +impl fmt::Display for FormatClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident), + FormatClause::Null => write!(f, "FORMAT NULL"), + } + } +} + /// FOR XML or FOR JSON clause, specific to MSSQL /// (formats the output of a query as XML or JSON) #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index eb69a209b..edd3271f3 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -856,6 +856,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::PREWHERE, // for ClickHouse SELECT * FROM t SETTINGS ... Keyword::SETTINGS, + // for ClickHouse SELECT * FROM t FORMAT... + Keyword::FORMAT, // for Snowflake START WITH .. CONNECT BY Keyword::START, Keyword::CONNECT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a88cfcb9c..aada0bc56 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7918,6 +7918,7 @@ impl<'a> Parser<'a> { locks: vec![], for_clause: None, settings: None, + format_clause: None, }) } else if self.parse_keyword(Keyword::UPDATE) { Ok(Query { @@ -7931,6 +7932,7 @@ impl<'a> Parser<'a> { locks: vec![], for_clause: None, settings: None, + format_clause: None, }) } else { let body = self.parse_boxed_query_body(0)?; @@ -8006,6 +8008,18 @@ impl<'a> Parser<'a> { locks.push(self.parse_lock()?); } } + let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::FORMAT) + { + if self.parse_keyword(Keyword::NULL) { + Some(FormatClause::Null) + } else { + let ident = self.parse_identifier(false)?; + Some(FormatClause::Identifier(ident)) + } + } else { + None + }; Ok(Query { with, @@ -8018,6 +8032,7 @@ impl<'a> Parser<'a> { locks, for_clause, settings, + format_clause, }) } } @@ -9164,6 +9179,7 @@ impl<'a> Parser<'a> { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), alias, }) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 29a5b15aa..f6b787f5c 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -768,6 +768,38 @@ fn test_prewhere() { } } +#[test] +fn test_query_with_format_clause() { + let format_options = vec!["TabSeparated", "JSONCompact", "NULL"]; + for format in &format_options { + let sql = format!("SELECT * FROM t FORMAT {}", format); + match clickhouse_and_generic().verified_stmt(&sql) { + Statement::Query(query) => { + if *format == "NULL" { + assert_eq!(query.format_clause, Some(FormatClause::Null)); + } else { + assert_eq!( + query.format_clause, + Some(FormatClause::Identifier(Ident::new(*format))) + ); + } + } + _ => unreachable!(), + } + } + + let invalid_cases = [ + "SELECT * FROM t FORMAT", + "SELECT * FROM t FORMAT TabSeparated JSONCompact", + "SELECT * FROM t FORMAT TabSeparated TabSeparated", + ]; + for sql in &invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: FORMAT {identifier}, found: "); + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 2b208016a..86357234c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -415,6 +415,7 @@ fn parse_update_set_from() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), alias: Some(TableAlias { name: Ident::new("t2"), @@ -3430,6 +3431,7 @@ fn parse_create_table_as_table() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }); match verified_stmt(sql1) { @@ -3456,6 +3458,7 @@ fn parse_create_table_as_table() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }); match verified_stmt(sql2) { @@ -5003,6 +5006,7 @@ fn parse_interval_and_or_xor() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }))]; assert_eq!(actual_ast, expected_ast); @@ -7659,6 +7663,7 @@ fn parse_merge() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), alias: Some(TableAlias { name: Ident { @@ -9180,6 +9185,7 @@ fn parse_unload() { for_clause: None, order_by: vec![], settings: None, + format_clause: None, }), to: Ident { value: "s3://...".to_string(), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index e0e0f7c70..6968347ec 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -104,6 +104,7 @@ fn parse_create_procedure() { for_clause: None, order_by: vec![], settings: None, + format_clause: None, body: Box::new(SetExpr::Select(Box::new(Select { distinct: None, top: None, @@ -550,6 +551,7 @@ fn parse_substring_in_select() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), query ); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index a5fa75200..74def31bf 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -927,6 +927,7 @@ fn parse_escaped_quote_identifiers_with_escape() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ); } @@ -976,6 +977,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ); } @@ -1022,6 +1024,7 @@ fn parse_escaped_backticks_with_escape() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ); } @@ -1068,6 +1071,7 @@ fn parse_escaped_backticks_with_no_escape() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ); } @@ -1273,6 +1277,7 @@ fn parse_simple_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1316,6 +1321,7 @@ fn parse_ignore_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1359,6 +1365,7 @@ fn parse_priority_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1399,6 +1406,7 @@ fn parse_priority_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1447,6 +1455,7 @@ fn parse_insert_as() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1507,6 +1516,7 @@ fn parse_insert_as() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1551,6 +1561,7 @@ fn parse_replace_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1589,6 +1600,7 @@ fn parse_empty_row_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1650,6 +1662,7 @@ fn parse_insert_with_on_duplicate_update() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -2294,6 +2307,7 @@ fn parse_substring_in_select() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), query ); @@ -2601,6 +2615,7 @@ fn parse_hex_string_introducer() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2da82c122..74f70a6e5 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1095,6 +1095,7 @@ fn parse_copy_to() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), to: true, target: CopyTarget::File { @@ -2426,6 +2427,7 @@ fn parse_array_subquery_expr() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), filter: None, null_treatment: None, @@ -4050,6 +4052,7 @@ fn test_simple_postgres_insert_with_alias() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], @@ -4118,6 +4121,7 @@ fn test_simple_postgres_insert_with_alias() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], @@ -4182,6 +4186,7 @@ fn test_simple_insert_with_quoted_alias() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], From 32b8276b328ad014cdfbeb85d1618bb0b25c7130 Mon Sep 17 00:00:00 2001 From: gai takano Date: Tue, 9 Jul 2024 20:49:48 +0900 Subject: [PATCH 022/506] Postgres: support for `OWNER TO` clause (#1314) Co-authored-by: Andrew Lamb --- src/ast/ddl.rs | 29 +++++++++++++++ src/ast/mod.rs | 4 +-- src/keywords.rs | 1 + src/parser/mod.rs | 19 ++++++++++ tests/sqlparser_postgres.rs | 72 +++++++++++++++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 9c30999ab..1ed3857d7 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -157,6 +157,32 @@ pub enum AlterTableOperation { SwapWith { table_name: ObjectName }, /// 'SET TBLPROPERTIES ( { property_key [ = ] property_val } [, ...] )' SetTblProperties { table_properties: Vec }, + + /// `OWNER TO { | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + /// + /// Note: this is PostgreSQL-specific + OwnerTo { new_owner: Owner }, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Owner { + Ident(Ident), + CurrentRole, + CurrentUser, + SessionUser, +} + +impl fmt::Display for Owner { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Owner::Ident(ident) => write!(f, "{}", ident), + Owner::CurrentRole => write!(f, "CURRENT_ROLE"), + Owner::CurrentUser => write!(f, "CURRENT_USER"), + Owner::SessionUser => write!(f, "SESSION_USER"), + } + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -322,6 +348,9 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::SwapWith { table_name } => { write!(f, "SWAP WITH {table_name}") } + AlterTableOperation::OwnerTo { new_owner } => { + write!(f, "OWNER TO {new_owner}") + } AlterTableOperation::SetTblProperties { table_properties } => { write!( f, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 58f094411..b8d72e233 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -34,8 +34,8 @@ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue} pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs, - GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, - ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, + GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition, + ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, }; pub use self::dml::{CreateIndex, CreateTable, Delete, Insert}; diff --git a/src/keywords.rs b/src/keywords.rs index edd3271f3..7146c4efe 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -527,6 +527,7 @@ define_keywords!( OVERLAY, OVERWRITE, OWNED, + OWNER, PARALLEL, PARAMETER, PARQUET, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index aada0bc56..87166f503 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6447,6 +6447,25 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::WITH)?; let table_name = self.parse_object_name(false)?; AlterTableOperation::SwapWith { table_name } + } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) + { + let new_owner = match self.parse_one_of_keywords( &[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { + Some(Keyword::CURRENT_USER) => Owner::CurrentUser, + Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, + Some(Keyword::SESSION_USER) => Owner::SessionUser, + Some(_) => unreachable!(), + None => { + match self.parse_identifier(false) { + Ok(ident) => Owner::Ident(ident), + Err(e) => { + return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}"))) + } + } + }, + }; + + AlterTableOperation::OwnerTo { new_owner } } else { let options: Vec = self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 74f70a6e5..9af4f4d6c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -713,6 +713,78 @@ fn parse_alter_table_add_columns() { } } +#[test] +fn parse_alter_table_owner_to() { + struct TestCase { + sql: &'static str, + expected_owner: Owner, + } + + let test_cases = vec![ + TestCase { + sql: "ALTER TABLE tab OWNER TO new_owner", + expected_owner: Owner::Ident(Ident::new("new_owner".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO postgres", + expected_owner: Owner::Ident(Ident::new("postgres".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CREATE", // treats CREATE as an identifier + expected_owner: Owner::Ident(Ident::new("CREATE".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO \"new_owner\"", + expected_owner: Owner::Ident(Ident::with_quote('\"', "new_owner".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CURRENT_USER", + expected_owner: Owner::CurrentUser, + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CURRENT_ROLE", + expected_owner: Owner::CurrentRole, + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO SESSION_USER", + expected_owner: Owner::SessionUser, + }, + ]; + + for case in test_cases { + match pg_and_generic().verified_stmt(case.sql) { + Statement::AlterTable { + name, + if_exists: _, + only: _, + operations, + location: _, + } => { + assert_eq!(name.to_string(), "tab"); + assert_eq!( + operations, + vec![AlterTableOperation::OwnerTo { + new_owner: case.expected_owner.clone() + }] + ); + } + _ => unreachable!("Expected an AlterTable statement"), + } + } + + let res = pg().parse_sql_statements("ALTER TABLE tab OWNER TO CREATE FOO"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: FOO".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("ALTER TABLE tab OWNER TO 4"); + assert_eq!( + ParserError::ParserError("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. sql parser error: Expected: identifier, found: 4".to_string()), + res.unwrap_err() + ); +} + #[test] fn parse_create_table_if_not_exists() { let sql = "CREATE TABLE IF NOT EXISTS uk_cities ()"; From 4e956a172344952f1162405db74599391bc25860 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jul 2024 08:58:02 -0400 Subject: [PATCH 023/506] Add CHANGELOG for 0.48.0 (#1334) --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18df2e33a..ed5c9ecb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,43 @@ changes that break via addition as "Added". ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.48.0] 2024-07-09 + +Huge shout out to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! + +### Fixed +* Fix CI error message in CI (#1333) - Thanks @alamb +* Fix typo in sqlparser-derive README (#1310) - Thanks @leoyvens +* Re-enable trailing commas in DCL (#1318) - Thanks @MohamedAbdeen21 +* Fix a few typos in comment lines (#1316) - Thanks @git-hulk +* Fix Snowflake `SELECT * wildcard REPLACE ... RENAME` order (#1321) - Thanks @alexander-beedie +* Allow semi-colon at the end of UNCACHE statement (#1320) - Thanks @LorrensP-2158466 +* Return errors, not panic, when integers fail to parse in `AUTO_INCREMENT` and `TOP` (#1305) - Thanks @eejbyfeldt + +### Added +* Support `OWNER TO` clause in Postgres (#1314) - Thanks @gainings +* Support `FORMAT` clause for ClickHouse (#1335) - Thanks @git-hulk +* Support `DROP PROCEDURE` statement (#1324) - Thanks @LorrensP-2158466 +* Support `PREWHERE` condition for ClickHouse dialect (#1328) - Thanks @git-hulk +* Support `SETTINGS` pairs for ClickHouse dialect (#1327) - Thanks @git-hulk +* Support `GROUP BY WITH MODIFIER` for ClickHouse dialect (#1323) - Thanks @git-hulk +* Support DuckDB Union datatype (#1322) - Thanks @gstvg +* Support parametric arguments to `FUNCTION` for ClickHouse dialect (#1315) - Thanks @git-hulk +* Support `TO` in `CREATE VIEW` clause for Clickhouse (#1313) - Thanks @Bidaya0 +* Support `UPDATE` statements that contain tuple assignments (#1317) - Thanks @lovasoa +* Support `BY NAME quantifier across all set ops (#1309) - Thanks @alexander-beedie +* Support SnowFlake exclusive `CREATE TABLE` options (#1233) - Thanks @balliegojr +* Support ClickHouse `CREATE TABLE` with primary key and parametrised table engine (#1289) - Thanks @7phs +* Support custom operators in Postgres (#1302) - Thanks @lovasoa +* Support ClickHouse data types (#1285) - Thanks @7phs + +### Changed +* Add stale PR github workflow (#1331) - Thanks @alamb +* Refine docs (#1326) - Thanks @emilsivervik +* Improve error messages with additional colons (#1319) - Thanks @LorrensP-2158466 +* Move Display fmt to struct for `CreateIndex` (#1307) - Thanks @philipcristiano +* Enhancing Trailing Comma Option (#1212) - Thanks @MohamedAbdeen21 +* Encapsulate `CreateTable`, `CreateIndex` into specific structs (#1291) - Thanks @philipcristiano ## [0.47.0] 2024-06-01 From 285f49258967df22a455febe22773d158dd2476f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jul 2024 08:58:59 -0400 Subject: [PATCH 024/506] chore: Release sqlparser version 0.48.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8d015968b..b0bee003e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.47.0" +version = "0.48.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 9108bffc9a021aa1f5137381c8f3aec47e71e319 Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 10 Jul 2024 05:43:22 +0800 Subject: [PATCH 025/506] Add support of table function WITH ORDINALITY modifier for Postgre Parser (#1337) --- src/ast/query.rs | 14 ++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 5 ++++ src/test_utils.rs | 2 ++ tests/sqlparser_bigquery.rs | 8 ++++++- tests/sqlparser_clickhouse.rs | 2 ++ tests/sqlparser_common.rs | 43 +++++++++++++++++++++++++++++++++++ tests/sqlparser_databricks.rs | 3 ++- tests/sqlparser_duckdb.rs | 2 ++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 3 +++ tests/sqlparser_mysql.rs | 5 ++++ tests/sqlparser_postgres.rs | 37 +++++++++++++++++++++++++++++- tests/sqlparser_redshift.rs | 3 +++ tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 3 ++- 16 files changed, 129 insertions(+), 4 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 70c781409..608ac2e96 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -913,6 +913,10 @@ pub enum TableFactor { /// Optional version qualifier to facilitate table time-travel, as /// supported by BigQuery and MSSQL. version: Option, + // Optional table function modifier to generate the ordinality for column. + /// For example, `SELECT * FROM generate_series(1, 10) WITH ORDINALITY AS t(a, b);` + /// [WITH ORDINALITY](https://www.postgresql.org/docs/current/functions-srf.html), supported by Postgres. + with_ordinality: bool, /// [Partition selection](https://dev.mysql.com/doc/refman/8.0/en/partitioning-selection.html), supported by MySQL. partitions: Vec, }, @@ -948,6 +952,7 @@ pub enum TableFactor { array_exprs: Vec, with_offset: bool, with_offset_alias: Option, + with_ordinality: bool, }, /// The `JSON_TABLE` table-valued function. /// Part of the SQL standard, but implemented only by MySQL, Oracle, and DB2. @@ -1293,6 +1298,7 @@ impl fmt::Display for TableFactor { with_hints, version, partitions, + with_ordinality, } => { write!(f, "{name}")?; if !partitions.is_empty() { @@ -1301,6 +1307,9 @@ impl fmt::Display for TableFactor { if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; } + if *with_ordinality { + write!(f, " WITH ORDINALITY")?; + } if let Some(alias) = alias { write!(f, " AS {alias}")?; } @@ -1354,9 +1363,14 @@ impl fmt::Display for TableFactor { array_exprs, with_offset, with_offset_alias, + with_ordinality, } => { write!(f, "UNNEST({})", display_comma_separated(array_exprs))?; + if *with_ordinality { + write!(f, " WITH ORDINALITY")?; + } + if let Some(alias) = alias { write!(f, " AS {alias}")?; } diff --git a/src/keywords.rs b/src/keywords.rs index 7146c4efe..a53eaccba 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -518,6 +518,7 @@ define_keywords!( OR, ORC, ORDER, + ORDINALITY, OUT, OUTER, OUTPUTFORMAT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 87166f503..e89eba9b1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9209,6 +9209,7 @@ impl<'a> Parser<'a> { let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { Ok(Some(alias)) => Some(alias), Ok(None) => None, @@ -9235,6 +9236,7 @@ impl<'a> Parser<'a> { array_exprs, with_offset, with_offset_alias, + with_ordinality, }) } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { let json_expr = self.parse_expr()?; @@ -9273,6 +9275,8 @@ impl<'a> Parser<'a> { None }; + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: @@ -9294,6 +9298,7 @@ impl<'a> Parser<'a> { with_hints, version, partitions, + with_ordinality, }; while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { diff --git a/src/test_utils.rs b/src/test_utils.rs index 1a31d4611..1f5300be1 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -309,6 +309,7 @@ pub fn table(name: impl Into) -> TableFactor { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } } @@ -323,6 +324,7 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 88e2ef912..089a41889 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -224,6 +224,7 @@ fn parse_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -1353,6 +1354,7 @@ fn parse_table_identifiers() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -1525,6 +1527,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -1551,7 +1554,8 @@ fn parse_join_constraint_unnest_alias() { Ident::new("a") ])], with_offset: false, - with_offset_alias: None + with_offset_alias: None, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -1620,6 +1624,7 @@ fn parse_merge() { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + with_ordinality: false, }, table ); @@ -1634,6 +1639,7 @@ fn parse_merge() { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + with_ordinality: false, }, source ); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index f6b787f5c..99db3d10c 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -59,6 +59,7 @@ fn parse_map_access_expr() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -162,6 +163,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 86357234c..1adda149e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -359,6 +359,7 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -387,6 +388,7 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -463,6 +465,7 @@ fn parse_update_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -530,6 +533,7 @@ fn parse_select_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }] @@ -566,6 +570,7 @@ fn parse_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -612,6 +617,7 @@ fn parse_delete_statement_for_multi_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -623,6 +629,7 @@ fn parse_delete_statement_for_multi_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].joins[0].relation ); @@ -648,6 +655,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -659,6 +667,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[1].relation ); @@ -670,6 +679,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, using[0].relation ); @@ -681,6 +691,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, using[0].joins[0].relation ); @@ -711,6 +722,7 @@ fn parse_where_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation, ); @@ -755,6 +767,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation, ); @@ -770,6 +783,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }]), @@ -4551,6 +4565,7 @@ fn test_parse_named_window() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -4933,6 +4948,7 @@ fn parse_interval_and_or_xor() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -5286,6 +5302,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5303,6 +5320,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5320,6 +5338,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5340,6 +5359,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5364,6 +5384,7 @@ fn parse_unnest_in_from_clause() { )], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5394,6 +5415,7 @@ fn parse_unnest_in_from_clause() { ], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5503,6 +5525,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -5514,6 +5537,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -5533,6 +5557,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -5542,6 +5567,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5554,6 +5580,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -5563,6 +5590,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5585,6 +5613,7 @@ fn parse_cross_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::CrossJoin, }, @@ -5607,6 +5636,7 @@ fn parse_joins_on() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -5678,6 +5708,7 @@ fn parse_joins_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -5741,6 +5772,7 @@ fn parse_natural_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::Natural), } @@ -6008,6 +6040,7 @@ fn parse_derived_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6905,6 +6938,7 @@ fn lateral_function() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Function { @@ -7613,6 +7647,7 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } ); assert_eq!(table, table_no_into); @@ -7638,6 +7673,7 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -8700,6 +8736,7 @@ fn parse_pivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), aggregate_functions: vec![ expected_function("a", None), @@ -8769,6 +8806,7 @@ fn parse_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), value: Ident { value: "quantity".to_string(), @@ -8835,6 +8873,7 @@ fn parse_pivot_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), value: Ident { value: "population".to_string(), @@ -9159,6 +9198,7 @@ fn parse_unload() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9304,6 +9344,7 @@ fn parse_connect_by() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9389,6 +9430,7 @@ fn parse_connect_by() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9548,6 +9590,7 @@ fn test_match_recognize() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }; fn check(options: &str, expect: TableFactor) { diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 90056f0f7..280b97b49 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -177,7 +177,8 @@ fn test_values_clause() { args: None, with_hints: vec![], version: None, - partitions: vec![] + partitions: vec![], + with_ordinality: false, }), query .body diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 400daa8a8..0e61b86c9 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -166,6 +166,7 @@ fn test_select_union_by_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -205,6 +206,7 @@ fn test_select_union_by_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 53280d7d8..5f0b9f575 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -359,6 +359,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 6968347ec..26bece81d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -64,6 +64,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -335,6 +336,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -526,6 +528,7 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 74def31bf..ec094bcd6 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1728,6 +1728,7 @@ fn parse_select_with_numeric_prefix_column_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], @@ -1782,6 +1783,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], @@ -1847,6 +1849,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -1859,6 +1862,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -2282,6 +2286,7 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9af4f4d6c..164bb72c7 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3501,6 +3501,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -4054,7 +4055,8 @@ fn parse_join_constraint_unnest_alias() { Ident::new("a") ])], with_offset: false, - with_offset_alias: None + with_offset_alias: None, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -4362,3 +4364,36 @@ fn parse_create_table_with_options() { _ => unreachable!(), } } + +#[test] +fn test_table_function_with_ordinality() { + let from = pg_and_generic() + .verified_only_select("SELECT * FROM generate_series(1, 10) WITH ORDINALITY AS t") + .from; + assert_eq!(1, from.len()); + match from[0].relation { + TableFactor::Table { + ref name, + with_ordinality: true, + .. + } => { + assert_eq!("generate_series", name.to_string().as_str()); + } + _ => panic!("Expecting TableFactor::Table with ordinality"), + } +} + +#[test] +fn test_table_unnest_with_ordinality() { + let from = pg_and_generic() + .verified_only_select("SELECT * FROM UNNEST([10, 20, 30]) WITH ORDINALITY AS t") + .from; + assert_eq!(1, from.len()); + match from[0].relation { + TableFactor::UNNEST { + with_ordinality: true, + .. + } => {} + _ => panic!("Expecting TableFactor::UNNEST with ordinality"), + } +} diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 938e6e887..440116e02 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -48,6 +48,7 @@ fn test_square_brackets_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], } @@ -94,6 +95,7 @@ fn test_double_quotes_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], } @@ -114,6 +116,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 2f4ed1316..7a2288cbb 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -870,6 +870,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index dd1e77d5d..629ab5fc2 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -399,7 +399,8 @@ fn parse_update_tuple_row_values() { args: None, with_hints: vec![], version: None, - partitions: vec![] + partitions: vec![], + with_ordinality: false, }, joins: vec![], }, From 993216f3ac279e1e86a16de8696e60dc78d5a418 Mon Sep 17 00:00:00 2001 From: hulk Date: Sat, 13 Jul 2024 17:46:26 +0800 Subject: [PATCH 026/506] Enable PARTITION BY feature for PostgreSQL while parsing the create table statement (#1338) --- src/ast/helpers/stmt_create_table.rs | 4 +- src/parser/mod.rs | 59 +++++++++++++++------------- tests/sqlparser_postgres.rs | 44 +++++++++++++++++++++ 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index d862a36ae..92c75e6a4 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -496,9 +496,9 @@ impl TryFrom for CreateTableBuilder { } } -/// Helper return type when parsing configuration for a BigQuery `CREATE TABLE` statement. +/// Helper return type when parsing configuration for a `CREATE TABLE` statement. #[derive(Default)] -pub(crate) struct BigQueryTableConfiguration { +pub(crate) struct CreateTableConfiguration { pub partition_by: Option>, pub cluster_by: Option>>, pub options: Option>, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e89eba9b1..4d2319a08 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -31,7 +31,7 @@ use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::helpers::stmt_create_table::{BigQueryTableConfiguration, CreateTableBuilder}; +use crate::ast::helpers::stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}; use crate::ast::*; use crate::dialect::*; use crate::keywords::{Keyword, ALL_KEYWORDS}; @@ -5416,11 +5416,7 @@ impl<'a> Parser<'a> { None }; - let big_query_config = if dialect_of!(self is BigQueryDialect | GenericDialect) { - self.parse_optional_big_query_create_table_config()? - } else { - Default::default() - }; + let create_table_config = self.parse_optional_create_table_config()?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { @@ -5505,39 +5501,46 @@ impl<'a> Parser<'a> { .collation(collation) .on_commit(on_commit) .on_cluster(on_cluster) - .partition_by(big_query_config.partition_by) - .cluster_by(big_query_config.cluster_by) - .options(big_query_config.options) + .partition_by(create_table_config.partition_by) + .cluster_by(create_table_config.cluster_by) + .options(create_table_config.options) .primary_key(primary_key) .strict(strict) .build()) } - /// Parse configuration like partitioning, clustering information during big-query table creation. - /// - fn parse_optional_big_query_create_table_config( + /// Parse configuration like partitioning, clustering information during the table creation. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) + /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) + fn parse_optional_create_table_config( &mut self, - ) -> Result { - let mut partition_by = None; - if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - partition_by = Some(Box::new(self.parse_expr()?)); + ) -> Result { + let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None }; let mut cluster_by = None; - if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - cluster_by = Some(WrappedCollection::NoWrapping( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - )); - }; - let mut options = None; - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = Some(self.parse_options(Keyword::OPTIONS)?); - } - }; + if dialect_of!(self is BigQueryDialect | GenericDialect) { + if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + cluster_by = Some(WrappedCollection::NoWrapping( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + )); + }; + + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = Some(self.parse_options(Keyword::OPTIONS)?); + } + }; + } - Ok(BigQueryTableConfiguration { + Ok(CreateTableConfiguration { partition_by, cluster_by, options, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 164bb72c7..ed17e9d8f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4039,6 +4039,50 @@ fn parse_create_table_with_alias() { } } +#[test] +fn parse_create_table_with_partition_by() { + let sql = "CREATE TABLE t1 (a INT, b TEXT) PARTITION BY RANGE(a)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("t1", create_table.name.to_string()); + assert_eq!( + vec![ + ColumnDef { + name: "a".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![] + }, + ColumnDef { + name: "b".into(), + data_type: DataType::Text, + collation: None, + options: vec![] + } + ], + create_table.columns + ); + match *create_table.partition_by.unwrap() { + Expr::Function(f) => { + assert_eq!("RANGE", f.name.to_string()); + assert_eq!( + FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + clauses: vec![], + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("a")) + ))], + }), + f.args + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + #[test] fn parse_join_constraint_unnest_alias() { assert_eq!( From 20f7ac59e38d52e293476b7ad844e7f744a16c43 Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 16 Jul 2024 01:54:44 +0800 Subject: [PATCH 027/506] Fix AS query clause should be after the create table options (#1339) --- src/ast/dml.rs | 6 +++--- src/parser/mod.rs | 14 +++++++------- tests/sqlparser_clickhouse.rs | 24 ++++++++++++++++++++++++ tests/sqlparser_mysql.rs | 27 +++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 10 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index b35b2b970..0ebbaa3e9 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -418,9 +418,6 @@ impl Display for CreateTable { write!(f, " WITH TAG ({})", display_comma_separated(tag.as_slice()))?; } - if let Some(query) = &self.query { - write!(f, " AS {query}")?; - } if let Some(default_charset) = &self.default_charset { write!(f, " DEFAULT CHARSET={default_charset}")?; } @@ -440,6 +437,9 @@ impl Display for CreateTable { if self.strict { write!(f, " STRICT")?; } + if let Some(query) = &self.query { + write!(f, " AS {query}")?; + } Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4d2319a08..d00f28a55 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5418,13 +5418,6 @@ impl<'a> Parser<'a> { let create_table_config = self.parse_optional_create_table_config()?; - // Parse optional `AS ( query )` - let query = if self.parse_keyword(Keyword::AS) { - Some(self.parse_boxed_query()?) - } else { - None - }; - let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { self.expect_token(&Token::Eq)?; let next_token = self.next_token(); @@ -5477,6 +5470,13 @@ impl<'a> Parser<'a> { None }; + // Parse optional `AS ( query )` + let query = if self.parse_keyword(Keyword::AS) { + Some(self.parse_boxed_query()?) + } else { + None + }; + Ok(CreateTableBuilder::new(table_name) .temporary(temporary) .columns(columns) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 99db3d10c..752940551 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -802,6 +802,30 @@ fn test_query_with_format_clause() { } } +#[test] +fn parse_create_table_on_commit_and_as_query() { + let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#; + match clickhouse_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + on_commit, + query, + .. + }) => { + assert_eq!(name.to_string(), "test"); + assert_eq!(on_commit, Some(OnCommit::PreserveRows)); + assert_eq!( + query.unwrap().body.as_select().unwrap().projection, + vec![UnnamedExpr(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + )))] + ); + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ec094bcd6..c2ce407a7 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -812,6 +812,33 @@ fn parse_create_table_collate() { } } +#[test] +fn parse_create_table_both_options_and_as_query() { + let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb4_0900_ai_ci AS SELECT 1"; + match mysql_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + collation, + query, + .. + }) => { + assert_eq!(name.to_string(), "foo"); + assert_eq!(collation, Some("utf8mb4_0900_ai_ci".to_string())); + assert_eq!( + query.unwrap().body.as_select().unwrap().projection, + vec![SelectItem::UnnamedExpr(Expr::Value(number("1")))] + ); + } + _ => unreachable!(), + } + + let sql = r"CREATE TABLE foo (id INT(11)) ENGINE=InnoDB AS SELECT 1 DEFAULT CHARSET=utf8mb3"; + assert!(matches!( + mysql_and_generic().parse_sql_statements(sql), + Err(ParserError::ParserError(_)) + )); +} + #[test] fn parse_create_table_comment_character_set() { let sql = "CREATE TABLE foo (s TEXT CHARACTER SET utf8mb4 COMMENT 'comment')"; From 845a1aaddd371a586c41ab9b68ad21a4bbc3884f Mon Sep 17 00:00:00 2001 From: Nick Presta Date: Sat, 20 Jul 2024 06:51:12 -0400 Subject: [PATCH 028/506] [ClickHouse] Add support for WITH FILL to OrderByExpr (#1330) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 17 ++-- src/ast/query.rs | 91 +++++++++++++++++- src/keywords.rs | 3 + src/parser/mod.rs | 84 ++++++++++++++++- tests/sqlparser_clickhouse.rs | 169 ++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 35 +++++-- tests/sqlparser_mssql.rs | 4 +- tests/sqlparser_mysql.rs | 31 ++++--- tests/sqlparser_postgres.rs | 10 +- 9 files changed, 397 insertions(+), 47 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b8d72e233..2a519fc7c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -43,14 +43,15 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, - JoinConstraint, JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, - LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, - NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, - PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, - ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, - SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, - TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, + FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Interpolate, + InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn, + JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern, + MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, + OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem, + RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, + SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, + TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, + Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index 608ac2e96..978604266 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -33,7 +33,7 @@ pub struct Query { /// SELECT or UNION / EXCEPT / INTERSECT pub body: Box, /// ORDER BY - pub order_by: Vec, + pub order_by: Option, /// `LIMIT { | ALL }` pub limit: Option, @@ -67,8 +67,17 @@ impl fmt::Display for Query { write!(f, "{with} ")?; } write!(f, "{}", self.body)?; - if !self.order_by.is_empty() { - write!(f, " ORDER BY {}", display_comma_separated(&self.order_by))?; + if let Some(ref order_by) = self.order_by { + write!(f, " ORDER BY")?; + if !order_by.exprs.is_empty() { + write!(f, " {}", display_comma_separated(&order_by.exprs))?; + } + if let Some(ref interpolate) = order_by.interpolate { + match &interpolate.exprs { + Some(exprs) => write!(f, " INTERPOLATE ({})", display_comma_separated(exprs))?, + None => write!(f, " INTERPOLATE")?, + } + } } if let Some(ref limit) = self.limit { write!(f, " LIMIT {limit}")?; @@ -1668,6 +1677,18 @@ pub enum JoinConstraint { None, } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OrderBy { + pub exprs: Vec, + /// Optional: `INTERPOLATE` + /// Supported by [ClickHouse syntax] + /// + /// [ClickHouse syntax]: + pub interpolate: Option, +} + /// An `ORDER BY` expression #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1678,6 +1699,9 @@ pub struct OrderByExpr { pub asc: Option, /// Optional `NULLS FIRST` or `NULLS LAST` pub nulls_first: Option, + /// Optional: `WITH FILL` + /// Supported by [ClickHouse syntax]: + pub with_fill: Option, } impl fmt::Display for OrderByExpr { @@ -1693,6 +1717,67 @@ impl fmt::Display for OrderByExpr { Some(false) => write!(f, " NULLS LAST")?, None => (), } + if let Some(ref with_fill) = self.with_fill { + write!(f, " {}", with_fill)? + } + Ok(()) + } +} + +/// ClickHouse `WITH FILL` modifier for `ORDER BY` clause. +/// Supported by [ClickHouse syntax] +/// +/// [ClickHouse syntax]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct WithFill { + pub from: Option, + pub to: Option, + pub step: Option, +} + +impl fmt::Display for WithFill { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "WITH FILL")?; + if let Some(ref from) = self.from { + write!(f, " FROM {}", from)?; + } + if let Some(ref to) = self.to { + write!(f, " TO {}", to)?; + } + if let Some(ref step) = self.step { + write!(f, " STEP {}", step)?; + } + Ok(()) + } +} + +/// ClickHouse `INTERPOLATE` clause for use in `ORDER BY` clause when using `WITH FILL` modifier. +/// Supported by [ClickHouse syntax] +/// +/// [ClickHouse syntax]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct InterpolateExpr { + pub column: Ident, + pub expr: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Interpolate { + pub exprs: Option>, +} + +impl fmt::Display for InterpolateExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.column)?; + if let Some(ref expr) = self.expr { + write!(f, " AS {}", expr)?; + } Ok(()) } } diff --git a/src/keywords.rs b/src/keywords.rs index a53eaccba..2b6900fba 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -297,6 +297,7 @@ define_keywords!( FILE, FILES, FILE_FORMAT, + FILL, FILTER, FIRST, FIRST_VALUE, @@ -382,6 +383,7 @@ define_keywords!( INT64, INT8, INTEGER, + INTERPOLATE, INTERSECT, INTERSECTION, INTERVAL, @@ -682,6 +684,7 @@ define_keywords!( STDDEV_SAMP, STDIN, STDOUT, + STEP, STORAGE_INTEGRATION, STORED, STRICT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d00f28a55..fb15275e9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7934,7 +7934,7 @@ impl<'a> Parser<'a> { body: self.parse_insert_setexpr_boxed()?, limit: None, limit_by: vec![], - order_by: vec![], + order_by: None, offset: None, fetch: None, locks: vec![], @@ -7948,7 +7948,7 @@ impl<'a> Parser<'a> { body: self.parse_update_setexpr_boxed()?, limit: None, limit_by: vec![], - order_by: vec![], + order_by: None, offset: None, fetch: None, locks: vec![], @@ -7960,9 +7960,19 @@ impl<'a> Parser<'a> { let body = self.parse_boxed_query_body(0)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? + let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { + self.parse_interpolations()? + } else { + None + }; + + Some(OrderBy { + exprs: order_by_exprs, + interpolate, + }) } else { - vec![] + None }; let mut limit = None; @@ -9193,7 +9203,7 @@ impl<'a> Parser<'a> { subquery: Box::new(Query { with: None, body: Box::new(values), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -10519,13 +10529,77 @@ impl<'a> Parser<'a> { None }; + let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) + { + Some(self.parse_with_fill()?) + } else { + None + }; + Ok(OrderByExpr { expr, asc, nulls_first, + with_fill, }) } + // Parse a WITH FILL clause (ClickHouse dialect) + // that follow the WITH FILL keywords in a ORDER BY clause + pub fn parse_with_fill(&mut self) -> Result { + let from = if self.parse_keyword(Keyword::FROM) { + Some(self.parse_expr()?) + } else { + None + }; + + let to = if self.parse_keyword(Keyword::TO) { + Some(self.parse_expr()?) + } else { + None + }; + + let step = if self.parse_keyword(Keyword::STEP) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(WithFill { from, to, step }) + } + + // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) + // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier + pub fn parse_interpolations(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::INTERPOLATE) { + return Ok(None); + } + + if self.consume_token(&Token::LParen) { + let interpolations = self.parse_comma_separated0(|p| p.parse_interpolation())?; + self.expect_token(&Token::RParen)?; + // INTERPOLATE () and INTERPOLATE ( ... ) variants + return Ok(Some(Interpolate { + exprs: Some(interpolations), + })); + } + + // INTERPOLATE + Ok(Some(Interpolate { exprs: None })) + } + + // Parse a INTERPOLATE expression (ClickHouse dialect) + pub fn parse_interpolation(&mut self) -> Result { + let column = self.parse_identifier(false)?; + let expr = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(InterpolateExpr { column, expr }) + } + /// Parse a TOP clause, MSSQL equivalent of LIMIT, /// that follows after `SELECT [DISTINCT]`. pub fn parse_top(&mut self) -> Result { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 752940551..10d7d66ff 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -720,6 +720,175 @@ fn parse_group_by_with_modifier() { } } +#[test] +fn parse_select_order_by_with_fill_interpolate() { + let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \ + ORDER BY \ + fname ASC NULLS FIRST WITH FILL FROM 10 TO 20 STEP 2, \ + lname DESC NULLS LAST WITH FILL FROM 30 TO 40 STEP 3 \ + INTERPOLATE (col1 AS col1 + 1) \ + LIMIT 2"; + let select = clickhouse().verified_query(sql); + assert_eq!( + OrderBy { + exprs: vec![ + OrderByExpr { + expr: Expr::Identifier(Ident::new("fname")), + asc: Some(true), + nulls_first: Some(true), + with_fill: Some(WithFill { + from: Some(Expr::Value(number("10"))), + to: Some(Expr::Value(number("20"))), + step: Some(Expr::Value(number("2"))), + }), + }, + OrderByExpr { + expr: Expr::Identifier(Ident::new("lname")), + asc: Some(false), + nulls_first: Some(false), + with_fill: Some(WithFill { + from: Some(Expr::Value(number("30"))), + to: Some(Expr::Value(number("40"))), + step: Some(Expr::Value(number("3"))), + }), + }, + ], + interpolate: Some(Interpolate { + exprs: Some(vec![InterpolateExpr { + column: Ident::new("col1"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1"))), + }), + }]) + }) + }, + select.order_by.expect("ORDER BY expected") + ); + assert_eq!(Some(Expr::Value(number("2"))), select.limit); +} + +#[test] +fn parse_select_order_by_with_fill_interpolate_multi_interpolates() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY fname WITH FILL \ + INTERPOLATE (col1 AS col1 + 1) INTERPOLATE (col2 AS col2 + 2)"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY only accepts a single INTERPOLATE clause"); +} + +#[test] +fn parse_select_order_by_with_fill_interpolate_multi_with_fill_interpolates() { + let sql = "SELECT id, fname, lname FROM customer \ + ORDER BY \ + fname WITH FILL INTERPOLATE (col1 AS col1 + 1), \ + lname WITH FILL INTERPOLATE (col2 AS col2 + 2)"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY only accepts a single INTERPOLATE clause"); +} + +#[test] +fn parse_select_order_by_interpolate_not_last() { + let sql = "SELECT id, fname, lname FROM customer \ + ORDER BY \ + fname INTERPOLATE (col2 AS col2 + 2), + lname"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY INTERPOLATE must be in the last position"); +} + +#[test] +fn parse_with_fill() { + let sql = "SELECT fname FROM customer ORDER BY fname \ + WITH FILL FROM 10 TO 20 STEP 2"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(WithFill { + from: Some(Expr::Value(number("10"))), + to: Some(Expr::Value(number("20"))), + step: Some(Expr::Value(number("2"))), + }), + select.order_by.expect("ORDER BY expected").exprs[0].with_fill + ); +} + +#[test] +fn parse_with_fill_missing_single_argument() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY \ + fname WITH FILL FROM TO 20"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("WITH FILL requires expressions for all arguments"); +} + +#[test] +fn parse_with_fill_multiple_incomplete_arguments() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY \ + fname WITH FILL FROM TO 20, lname WITH FILL FROM TO STEP 1"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("WITH FILL requires expressions for all arguments"); +} + +#[test] +fn parse_interpolate_body_with_columns() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL \ + INTERPOLATE (col1 AS col1 + 1, col2 AS col3, col4 AS col4 + 4)"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { + exprs: Some(vec![ + InterpolateExpr { + column: Ident::new("col1"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1"))), + }), + }, + InterpolateExpr { + column: Ident::new("col2"), + expr: Some(Expr::Identifier(Ident::new("col3"))), + }, + InterpolateExpr { + column: Ident::new("col4"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col4"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("4"))), + }), + }, + ]) + }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn parse_interpolate_without_body() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL INTERPOLATE"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { exprs: None }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn parse_interpolate_with_empty_body() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL INTERPOLATE ()"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { + exprs: Some(vec![]) + }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + #[test] fn test_prewhere() { match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 WHERE y = 2") { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1adda149e..125e5f1f8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -409,7 +409,7 @@ fn parse_update_set_from() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2065,19 +2065,22 @@ fn parse_select_order_by() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("id")), asc: None, nulls_first: None, + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expected").exprs ); } chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); @@ -2097,14 +2100,16 @@ fn parse_select_order_by_limit() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: None, + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expected").exprs ); assert_eq!(Some(Expr::Value(number("2"))), select.limit); } @@ -2120,14 +2125,16 @@ fn parse_select_order_by_nulls_order() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: Some(true), + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: Some(false), + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expeccted").exprs ); assert_eq!(Some(Expr::Value(number("2"))), select.limit); } @@ -2219,6 +2226,7 @@ fn parse_select_qualify() { expr: Expr::Identifier(Ident::new("o")), asc: None, nulls_first: None, + with_fill: None, }], window_frame: None, })), @@ -2579,6 +2587,7 @@ fn parse_listagg() { }), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident { @@ -2587,6 +2596,7 @@ fn parse_listagg() { }), asc: None, nulls_first: None, + with_fill: None, }, ] }), @@ -3437,7 +3447,7 @@ fn parse_create_table_as_table() { table_name: Some("old_table".to_string()), schema_name: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -3464,7 +3474,7 @@ fn parse_create_table_as_table() { table_name: Some("old_table".to_string()), schema_name: Some("schema_name".to_string()), }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4384,6 +4394,7 @@ fn parse_window_functions() { expr: Expr::Identifier(Ident::new("dt")), asc: Some(false), nulls_first: None, + with_fill: None, }], window_frame: None, })), @@ -4593,6 +4604,7 @@ fn test_parse_named_window() { }), asc: None, nulls_first: None, + with_fill: None, }], window_frame: None, }), @@ -5014,7 +5026,7 @@ fn parse_interval_and_or_xor() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -7300,11 +7312,13 @@ fn parse_create_index() { expr: Expr::Identifier(Ident::new("name")), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("age")), asc: Some(false), nulls_first: None, + with_fill: None, }, ]; match verified_stmt(sql) { @@ -7334,11 +7348,13 @@ fn test_create_index_with_using_function() { expr: Expr::Identifier(Ident::new("name")), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("age")), asc: Some(false), nulls_first: None, + with_fill: None, }, ]; match verified_stmt(sql) { @@ -7691,7 +7707,7 @@ fn parse_merge() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -9223,7 +9239,7 @@ fn parse_unload() { fetch: None, locks: vec![], for_clause: None, - order_by: vec![], + order_by: None, settings: None, format_clause: None, }), @@ -9622,6 +9638,7 @@ fn test_match_recognize() { expr: Expr::Identifier(Ident::new("price_date")), asc: None, nulls_first: None, + with_fill: None, }], measures: vec![ Measure { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 26bece81d..3e8b6afbf 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -103,7 +103,7 @@ fn parse_create_procedure() { fetch: None, locks: vec![], for_clause: None, - order_by: vec![], + order_by: None, settings: None, format_clause: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -546,7 +546,7 @@ fn parse_substring_in_select() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c2ce407a7..b0b29f347 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -946,7 +946,7 @@ fn parse_escaped_quote_identifiers_with_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -996,7 +996,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1043,7 +1043,7 @@ fn parse_escaped_backticks_with_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1090,7 +1090,7 @@ fn parse_escaped_backticks_with_no_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1296,7 +1296,7 @@ fn parse_simple_insert() { ] ] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1340,7 +1340,7 @@ fn parse_ignore_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1384,7 +1384,7 @@ fn parse_priority_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1425,7 +1425,7 @@ fn parse_priority_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1474,7 +1474,7 @@ fn parse_insert_as() { "2024-01-01".to_string() ))]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1535,7 +1535,7 @@ fn parse_insert_as() { Expr::Value(Value::SingleQuotedString("2024-01-01".to_string())) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1580,7 +1580,7 @@ fn parse_replace_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1619,7 +1619,7 @@ fn parse_empty_row_insert() { explicit_row: false, rows: vec![vec![], vec![]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1681,7 +1681,7 @@ fn parse_insert_with_on_duplicate_update() { Expr::Value(Value::Boolean(true)), ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1946,6 +1946,7 @@ fn parse_delete_with_order_by() { }), asc: Some(false), nulls_first: None, + with_fill: None, }], order_by ); @@ -2331,7 +2332,7 @@ fn parse_substring_in_select() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2639,7 +2640,7 @@ fn parse_hex_string_introducer() { into: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ed17e9d8f..5ac421da0 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1159,7 +1159,7 @@ fn parse_copy_to() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2491,7 +2491,7 @@ fn parse_array_subquery_expr() { connect_by: None, }))), }), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4162,7 +4162,7 @@ fn test_simple_postgres_insert_with_alias() { Expr::Value(Value::Number("123".to_string(), false)) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4231,7 +4231,7 @@ fn test_simple_postgres_insert_with_alias() { )) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4296,7 +4296,7 @@ fn test_simple_insert_with_quoted_alias() { Expr::Value(Value::SingleQuotedString("0123".to_string())) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, From 028ada8350d3b2ada4aa67f5e828b318565590f2 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Sat, 20 Jul 2024 12:55:24 +0200 Subject: [PATCH 029/506] Support subquery expression in SET expressions (#1343) --- src/parser/mod.rs | 42 +++++++++++++++++++++++++-------------- tests/sqlparser_common.rs | 30 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fb15275e9..132e4f04e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1208,20 +1208,18 @@ impl<'a> Parser<'a> { Ok(Expr::Value(self.parse_value()?)) } Token::LParen => { - let expr = - if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { - self.prev_token(); - Expr::Subquery(self.parse_boxed_query()?) - } else if let Some(lambda) = self.try_parse_lambda() { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; + let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda() { + return Ok(lambda); + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + match exprs.len() { + 0 => unreachable!(), // parse_comma_separated ensures 1 or more + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; self.expect_token(&Token::RParen)?; if !self.consume_token(&Token::Period) { Ok(expr) @@ -1263,6 +1261,18 @@ impl<'a> Parser<'a> { } } + fn try_parse_expr_sub_query(&mut self) -> Result, ParserError> { + if self + .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + .is_none() + { + return Ok(None); + } + self.prev_token(); + + Ok(Some(Expr::Subquery(self.parse_boxed_query()?))) + } + fn try_parse_lambda(&mut self) -> Option { if !self.dialect.supports_lambda_functions() { return None; @@ -8709,7 +8719,9 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { - let value = if let Ok(expr) = self.parse_expr() { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { expr } else { self.expected("variable value", self.peek_token())? diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 125e5f1f8..b1afdf28b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7135,9 +7135,39 @@ fn parse_set_variable() { _ => unreachable!(), } + // Subquery expression + for (sql, canonical) in [ + ( + "SET (a) = (SELECT 22 FROM tbl1)", + "SET (a) = ((SELECT 22 FROM tbl1))", + ), + ( + "SET (a) = (SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2))", + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + ), + ( + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + ), + ( + "SET (a, b) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)), SELECT 33 FROM tbl3)", + "SET (a, b) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)), (SELECT 33 FROM tbl3))", + ), + ] { + multi_variable_dialects.one_statement_parses_to(sql, canonical); + } + let error_sqls = [ ("SET (a, b, c) = (1, 2, 3", "Expected: ), found: EOF"), ("SET (a, b, c) = 1, 2, 3", "Expected: (, found: 1"), + ( + "SET (a) = ((SELECT 22 FROM tbl1)", + "Expected: ), found: EOF", + ), + ( + "SET (a) = ((SELECT 22 FROM tbl1) (SELECT 22 FROM tbl1))", + "Expected: ), found: (", + ), ]; for (sql, error) in error_sqls { assert_eq!( From 71dc96658655e25288acdb9dc1d5c9d0f245016a Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Sun, 21 Jul 2024 14:02:12 +0400 Subject: [PATCH 030/506] Fix quoted identifier regression edge-case with "from" in SELECT (#1346) --- src/parser/mod.rs | 2 +- tests/sqlparser_common.rs | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 132e4f04e..175b02765 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10331,7 +10331,7 @@ impl<'a> Parser<'a> { Expr::Wildcard => Ok(SelectItem::Wildcard( self.parse_wildcard_additional_options()?, )), - Expr::Identifier(v) if v.value.to_lowercase() == "from" => { + Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { parser_err!( format!("Expected an expression, found: {}", v), self.peek_token().location diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b1afdf28b..dbadb4813 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -9005,7 +9005,7 @@ fn parse_non_latin_identifiers() { #[test] fn parse_trailing_comma() { - // At the moment, Duck DB is the only dialect that allows + // At the moment, DuckDB is the only dialect that allows // trailing commas anywhere in the query let trailing_commas = TestedDialects { dialects: vec![Box::new(DuckDbDialect {})], @@ -9038,11 +9038,16 @@ fn parse_trailing_comma() { ); trailing_commas.verified_stmt("SELECT album_id, name FROM track"); - trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); - trailing_commas.verified_stmt("SELECT DISTINCT ON (album_id) name FROM track"); + // check quoted "from" identifier edge-case + trailing_commas.one_statement_parses_to( + r#"SELECT "from", FROM "from""#, + r#"SELECT "from" FROM "from""#, + ); + trailing_commas.verified_stmt(r#"SELECT "from" FROM "from""#); + // doesn't allow any trailing commas let trailing_commas = TestedDialects { dialects: vec![Box::new(GenericDialect {})], From 48ea5640a221b91a93fad769f96cd2aa37932436 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Sun, 21 Jul 2024 20:18:50 +0800 Subject: [PATCH 031/506] Support Map literal syntax for DuckDB and Generic (#1344) --- src/ast/mod.rs | 42 ++++++++++++++ src/dialect/duckdb.rs | 7 +++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 5 ++ src/parser/mod.rs | 44 ++++++++++++++ tests/sqlparser_common.rs | 95 +++++++++++++++++++++++++++++++ tests/sqlparser_custom_dialect.rs | 22 +++++++ 7 files changed, 219 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2a519fc7c..cdc2e2049 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -329,6 +329,37 @@ impl fmt::Display for DictionaryField { } } +/// Represents a Map expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Map { + pub entries: Vec, +} + +impl Display for Map { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MAP {{{}}}", display_comma_separated(&self.entries)) + } +} + +/// A map field within a map. +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MapEntry { + pub key: Box, + pub value: Box, +} + +impl fmt::Display for MapEntry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {}", self.key, self.value) + } +} + /// Options for `CAST` / `TRY_CAST` /// BigQuery: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -764,6 +795,14 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs Dictionary(Vec), + /// `DuckDB` specific `Map` literal expression [1] + /// + /// Syntax: + /// ```sql + /// syntax: Map {key1: value1[, ... ]} + /// ``` + /// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps + Map(Map), /// An access of nested data using subscript syntax, for example `array[2]`. Subscript { expr: Box, @@ -1331,6 +1370,9 @@ impl fmt::Display for Expr { Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) } + Expr::Map(map) => { + write!(f, "{map}") + } Expr::Subscript { expr, subscript: key, diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index c6edeac14..1fc211685 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -48,4 +48,11 @@ impl Dialect for DuckDbDialect { fn supports_dictionary_syntax(&self) -> bool { true } + + // DuckDB uses this syntax for `MAP`s. + // + // https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 33391d479..8d762d780 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -70,4 +70,8 @@ impl Dialect for GenericDialect { fn supports_select_wildcard_except(&self) -> bool { true } + + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b223ead47..3ff7bb2a5 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -215,6 +215,11 @@ pub trait Dialect: Debug + Any { fn supports_dictionary_syntax(&self) -> bool { false } + /// Returns true if the dialect supports defining object using the + /// syntax like `Map {1: 10, 2: 20}`. + fn support_map_literal_syntax(&self) -> bool { + false + } /// Returns true if the dialect supports lambda functions, for example: /// /// ```sql diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 175b02765..878cabfcc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1078,6 +1078,9 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } + Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { + self.parse_duckdb_map_literal() + } // Here `w` is a word, check if it's a part of a multipart // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { @@ -2322,6 +2325,47 @@ impl<'a> Parser<'a> { }) } + /// DuckDB specific: Parse a duckdb [map] + /// + /// Syntax: + /// + /// ```sql + /// Map {key1: value1[, ... ]} + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + + let fields = self.parse_comma_separated(Self::parse_duckdb_map_field)?; + + self.expect_token(&Token::RBrace)?; + + Ok(Expr::Map(Map { entries: fields })) + } + + /// Parse a field for a duckdb [map] + /// + /// Syntax + /// + /// ```sql + /// key: value + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_field(&mut self) -> Result { + let key = self.parse_expr()?; + + self.expect_token(&Token::Colon)?; + + let value = self.parse_expr()?; + + Ok(MapEntry { + key: Box::new(key), + value: Box::new(value), + }) + } + /// Parse clickhouse [map] /// /// Syntax diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index dbadb4813..ac5098f58 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10077,6 +10077,101 @@ fn test_dictionary_syntax() { ) } +#[test] +fn test_map_syntax() { + fn check(sql: &str, expect: Expr) { + assert_eq!( + all_dialects_where(|d| d.support_map_literal_syntax()).verified_expr(sql), + expect + ); + } + + check( + "MAP {'Alberta': 'Edmonton', 'Manitoba': 'Winnipeg'}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("Alberta".to_owned()))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Edmonton".to_owned(), + ))), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString( + "Manitoba".to_owned(), + ))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Winnipeg".to_owned(), + ))), + }, + ], + }), + ); + + fn number_expr(s: &str) -> Expr { + Expr::Value(number(s)) + } + + check( + "MAP {1: 10.0, 2: 20.0}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(number_expr("1")), + value: Box::new(number_expr("10.0")), + }, + MapEntry { + key: Box::new(number_expr("2")), + value: Box::new(number_expr("20.0")), + }, + ], + }), + ); + + check( + "MAP {[1, 2, 3]: 10.0, [4, 5, 6]: 20.0}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("1"), number_expr("2"), number_expr("3")], + named: false, + })), + value: Box::new(Expr::Value(number("10.0"))), + }, + MapEntry { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("4"), number_expr("5"), number_expr("6")], + named: false, + })), + value: Box::new(Expr::Value(number("20.0"))), + }, + ], + }), + ); + + check( + "MAP {'a': 10, 'b': 20}['a']", + Expr::Subscript { + expr: Box::new(Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), + value: Box::new(number_expr("10")), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), + value: Box::new(number_expr("20")), + }, + ], + })), + subscript: Box::new(Subscript::Index { + index: Expr::Value(Value::SingleQuotedString("a".to_owned())), + }), + }, + ); +} + #[test] fn parse_within_group() { verified_expr("PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sales_amount)"); diff --git a/tests/sqlparser_custom_dialect.rs b/tests/sqlparser_custom_dialect.rs index 516591382..5b29047a4 100644 --- a/tests/sqlparser_custom_dialect.rs +++ b/tests/sqlparser_custom_dialect.rs @@ -125,6 +125,28 @@ fn custom_statement_parser() -> Result<(), ParserError> { Ok(()) } +#[test] +fn test_map_syntax_not_support_default() -> Result<(), ParserError> { + #[derive(Debug)] + struct MyDialect {} + + impl Dialect for MyDialect { + fn is_identifier_start(&self, ch: char) -> bool { + is_identifier_start(ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + is_identifier_part(ch) + } + } + + let dialect = MyDialect {}; + let sql = "SELECT MAP {1: 2}"; + let ast = Parser::parse_sql(&dialect, sql); + assert!(ast.is_err()); + Ok(()) +} + fn is_identifier_start(ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } From b27abf00e2e67b28b25afc9da7c2ddd2a104c449 Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 23 Jul 2024 03:50:24 +0800 Subject: [PATCH 032/506] Allow to use `()` as the GROUP BY nothing (#1347) --- src/parser/mod.rs | 5 +++++ tests/sqlparser_common.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 878cabfcc..11fa9e4a9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1487,6 +1487,11 @@ impl<'a> Parser<'a> { let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; self.expect_token(&Token::RParen)?; Ok(Expr::Rollup(result)) + } else if self.consume_tokens(&[Token::LParen, Token::RParen]) { + // PostgreSQL allow to use empty tuple as a group by expression, + // e.g. `GROUP BY (), name`. Please refer to GROUP BY Clause section in + // [PostgreSQL](https://www.postgresql.org/docs/16/sql-select.html) + Ok(Expr::Tuple(vec![])) } else { self.parse_expr() } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ac5098f58..dd3ed0515 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -42,6 +42,7 @@ mod test_utils; #[cfg(test)] use pretty_assertions::assert_eq; +use sqlparser::ast::Expr::Identifier; use sqlparser::test_utils::all_dialects_except; #[test] @@ -10278,3 +10279,30 @@ fn parse_auto_increment_too_large() { assert!(res.is_err(), "{res:?}"); } + +#[test] +fn test_group_by_nothing() { + let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) + .verified_only_select("SELECT count(1) FROM t GROUP BY ()"); + { + std::assert_eq!( + GroupByExpr::Expressions(vec![Expr::Tuple(vec![])], vec![]), + group_by + ); + } + + let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) + .verified_only_select("SELECT name, count(1) FROM t GROUP BY name, ()"); + { + std::assert_eq!( + GroupByExpr::Expressions( + vec![ + Identifier(Ident::new("name".to_string())), + Expr::Tuple(vec![]) + ], + vec![] + ), + group_by + ); + } +} From 390d4d3554580f618c6d8edd177b875b849f326f Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 24 Jul 2024 00:41:07 +0800 Subject: [PATCH 033/506] Add support of MATERIALIZED/ALIAS/EPHERMERAL default column options for ClickHouse (#1348) --- src/ast/ddl.rs | 21 ++++++++ src/keywords.rs | 2 + src/parser/mod.rs | 18 +++++++ tests/sqlparser_clickhouse.rs | 96 +++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 1ed3857d7..5cc671cf5 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -923,6 +923,18 @@ pub enum ColumnOption { NotNull, /// `DEFAULT ` Default(Expr), + + /// ClickHouse supports `MATERIALIZE`, `EPHEMERAL` and `ALIAS` expr to generate default values. + /// Syntax: `b INT MATERIALIZE (a + 1)` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/create/table#default_values) + + /// `MATERIALIZE ` + Materialized(Expr), + /// `EPHEMERAL []` + Ephemeral(Option), + /// `ALIAS ` + Alias(Expr), + /// `{ PRIMARY KEY | UNIQUE } []` Unique { is_primary: bool, @@ -978,6 +990,15 @@ impl fmt::Display for ColumnOption { Null => write!(f, "NULL"), NotNull => write!(f, "NOT NULL"), Default(expr) => write!(f, "DEFAULT {expr}"), + Materialized(expr) => write!(f, "MATERIALIZED {expr}"), + Ephemeral(expr) => { + if let Some(e) = expr { + write!(f, "EPHEMERAL {e}") + } else { + write!(f, "EPHEMERAL") + } + } + Alias(expr) => write!(f, "ALIAS {expr}"), Unique { is_primary, characteristics, diff --git a/src/keywords.rs b/src/keywords.rs index 2b6900fba..e59e49339 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -77,6 +77,7 @@ define_keywords!( AFTER, AGAINST, AGGREGATION, + ALIAS, ALL, ALLOCATE, ALTER, @@ -267,6 +268,7 @@ define_keywords!( ENFORCED, ENGINE, ENUM, + EPHEMERAL, EPOCH, EQUALS, ERROR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 11fa9e4a9..f8267a7cb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5748,6 +5748,24 @@ impl<'a> Parser<'a> { Ok(Some(ColumnOption::Null)) } else if self.parse_keyword(Keyword::DEFAULT) { Ok(Some(ColumnOption::Default(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::MATERIALIZED) + { + Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::ALIAS) + { + Ok(Some(ColumnOption::Alias(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::EPHEMERAL) + { + // The expression is optional for the EPHEMERAL syntax, so we need to check + // if the column definition has remaining tokens before parsing the expression. + if matches!(self.peek_token().token, Token::Comma | Token::RParen) { + Ok(Some(ColumnOption::Ephemeral(None))) + } else { + Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) + } } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { let characteristics = self.parse_constraint_characteristics()?; Ok(Some(ColumnOption::Unique { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 10d7d66ff..6fdadc366 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -493,6 +493,102 @@ fn parse_create_table_with_primary_key() { .expect_err("ORDER BY supports one expression with tuple"); } +#[test] +fn parse_create_table_with_variant_default_expressions() { + let sql = concat!( + "CREATE TABLE table (", + "a DATETIME MATERIALIZED now(),", + " b DATETIME EPHEMERAL now(),", + " c DATETIME EPHEMERAL,", + " d STRING ALIAS toString(c)", + ") ENGINE=MergeTree" + ); + match clickhouse_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("a"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Materialized(Expr::Function(Function { + name: ObjectName(vec![Ident::new("now")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })) + }], + }, + ColumnDef { + name: Ident::new("b"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Ephemeral(Some(Expr::Function(Function { + name: ObjectName(vec![Ident::new("now")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + }))) + }], + }, + ColumnDef { + name: Ident::new("c"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Ephemeral(None) + }], + }, + ColumnDef { + name: Ident::new("d"), + data_type: DataType::String(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Alias(Expr::Function(Function { + name: ObjectName(vec![Ident::new("toString")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Identifier(Ident::new("c")) + ))], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })) + }], + } + ] + ) + } + _ => unreachable!(), + } +} + #[test] fn parse_create_view_with_fields_data_types() { match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) { From 1e82a145adcc090b2768814f19f23fd4d80267a5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 23 Jul 2024 12:56:55 -0400 Subject: [PATCH 034/506] Add CHANGELOG for 0.49.0 (#1350) --- CHANGELOG.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed5c9ecb4..cf2d1321b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,27 @@ changes that break via addition as "Added". ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.49.0] 2024-07-23 +As always, huge props to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! + +We are in the process of moving sqlparser to governed as part of the Apache +DataFusion project: https://github.com/sqlparser-rs/sqlparser-rs/issues/1294 + +### Fixed +* Fix quoted identifier regression edge-case with "from" in SELECT (#1346) - Thanks @alexander-beedie +* Fix `AS` query clause should be after the create table options (#1339) - Thanks @git-hulk + +### Added + +* Support `MATERIALIZED`/`ALIAS`/`EPHERMERAL` default column options for ClickHouse (#1348) - Thanks @git-hulk +* Support `()` as the `GROUP BY` nothing (#1347) - Thanks @git-hulk +* Support Map literal syntax for DuckDB and Generic (#1344) - Thanks @goldmedal +* Support subquery expression in `SET` expressions (#1343) - Thanks @iffyio +* Support `WITH FILL` for ClickHouse (#1330) - Thanks @nickpresta +* Support `PARTITION BY` for PostgreSQL in `CREATE TABLE` statement (#1338) - Thanks @git-hulk +* Support of table function `WITH ORDINALITY` modifier for Postgres (#1337) - Thanks @git-hulk + + ## [0.48.0] 2024-07-09 Huge shout out to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! From 6c64d43e1bbf4ebc78754c63560894f0d867bdac Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 23 Jul 2024 13:11:16 -0400 Subject: [PATCH 035/506] chore: Release sqlparser version 0.49.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b0bee003e..4c510a8c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.48.0" +version = "0.49.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 547d82f07de4480d236a061a41bfadac21235434 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 29 Jul 2024 14:49:05 +0200 Subject: [PATCH 036/506] fix CI clippy `1.80` warnings (#1357) --- src/keywords.rs | 2 +- src/test_utils.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/keywords.rs b/src/keywords.rs index e59e49339..4b599f12a 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -20,7 +20,7 @@ //! As a matter of fact, most of these keywords are not used at all //! and could be removed. //! 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a -//! "table alias" context. +//! "table alias" context. #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; diff --git a/src/test_utils.rs b/src/test_utils.rs index 1f5300be1..5ed6339bd 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -111,10 +111,10 @@ impl TestedDialects { /// that: /// /// 1. parsing `sql` results in the same [`Statement`] as parsing - /// `canonical`. + /// `canonical`. /// /// 2. re-serializing the result of parsing `sql` produces the same - /// `canonical` sql string + /// `canonical` sql string pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); @@ -180,10 +180,10 @@ impl TestedDialects { /// Ensures that `sql` parses as a single [`Select`], and that additionally: /// /// 1. parsing `sql` results in the same [`Statement`] as parsing - /// `canonical`. + /// `canonical`. /// /// 2. re-serializing the result of parsing `sql` produces the same - /// `canonical` sql string + /// `canonical` sql string pub fn verified_only_select_with_canonical(&self, query: &str, canonical: &str) -> Select { let q = match self.one_statement_parses_to(query, canonical) { Statement::Query(query) => *query, From 7fdb2ec5d195bebca887a1532c49ec38741eca1b Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 30 Jul 2024 05:16:29 +0800 Subject: [PATCH 037/506] Allow to use the TABLE keyword in DESC|DESCRIBE|EXPLAIN TABLE statement (#1351) --- src/ast/mod.rs | 9 +++++++++ src/parser/mod.rs | 3 +++ tests/sqlparser_common.rs | 42 ++++++++++++++++++++++++++++----------- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cdc2e2049..d27baadc4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2699,6 +2699,11 @@ pub enum Statement { describe_alias: DescribeAlias, /// Hive style `FORMATTED | EXTENDED` hive_format: Option, + /// Snowflake and ClickHouse support `DESC|DESCRIBE TABLE ` syntax + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/desc-table.html) + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/describe-table) + has_table_keyword: bool, /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, @@ -2872,6 +2877,7 @@ impl fmt::Display for Statement { Statement::ExplainTable { describe_alias, hive_format, + has_table_keyword, table_name, } => { write!(f, "{describe_alias} ")?; @@ -2879,6 +2885,9 @@ impl fmt::Display for Statement { if let Some(format) = hive_format { write!(f, "{} ", format)?; } + if *has_table_keyword { + write!(f, "TABLE ")?; + } write!(f, "{table_name}") } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f8267a7cb..931033f7b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7972,10 +7972,13 @@ impl<'a> Parser<'a> { _ => {} } + // only allow to use TABLE keyword for DESC|DESCRIBE statement + let has_table_keyword = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name(false)?; Ok(Statement::ExplainTable { describe_alias, hive_format, + has_table_keyword, table_name, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index dd3ed0515..e68f25eb2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4186,31 +4186,49 @@ fn run_explain_analyze( #[test] fn parse_explain_table() { let validate_explain = - |query: &str, expected_describe_alias: DescribeAlias| match verified_stmt(query) { - Statement::ExplainTable { - describe_alias, - hive_format, - table_name, - } => { - assert_eq!(describe_alias, expected_describe_alias); - assert_eq!(hive_format, None); - assert_eq!("test_identifier", table_name.to_string()); + |query: &str, expected_describe_alias: DescribeAlias, expected_table_keyword| { + match verified_stmt(query) { + Statement::ExplainTable { + describe_alias, + hive_format, + has_table_keyword, + table_name, + } => { + assert_eq!(describe_alias, expected_describe_alias); + assert_eq!(hive_format, None); + assert_eq!(has_table_keyword, expected_table_keyword); + assert_eq!("test_identifier", table_name.to_string()); + } + _ => panic!("Unexpected Statement, must be ExplainTable"), } - _ => panic!("Unexpected Statement, must be ExplainTable"), }; - validate_explain("EXPLAIN test_identifier", DescribeAlias::Explain); - validate_explain("DESCRIBE test_identifier", DescribeAlias::Describe); + validate_explain("EXPLAIN test_identifier", DescribeAlias::Explain, false); + validate_explain("DESCRIBE test_identifier", DescribeAlias::Describe, false); + validate_explain("DESC test_identifier", DescribeAlias::Desc, false); + validate_explain( + "EXPLAIN TABLE test_identifier", + DescribeAlias::Explain, + true, + ); + validate_explain( + "DESCRIBE TABLE test_identifier", + DescribeAlias::Describe, + true, + ); + validate_explain("DESC TABLE test_identifier", DescribeAlias::Desc, true); } #[test] fn explain_describe() { verified_stmt("DESCRIBE test.table"); + verified_stmt("DESCRIBE TABLE test.table"); } #[test] fn explain_desc() { verified_stmt("DESC test.table"); + verified_stmt("DESC TABLE test.table"); } #[test] From c3ba2f33c6f52ce4dfea87bae9e77460db8f917f Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 29 Jul 2024 14:17:11 -0700 Subject: [PATCH 038/506] Snowflake: support position with normal function call syntax (#1341) Co-authored-by: Ifeanyi Ubah --- src/parser/mod.rs | 30 ++++++++++++++++-------------- tests/sqlparser_common.rs | 30 +++++++++++++++++------------- tests/sqlparser_snowflake.rs | 6 ++++++ 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 931033f7b..b3120bb30 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1038,7 +1038,7 @@ impl<'a> Parser<'a> { Keyword::CEIL => self.parse_ceil_floor_expr(true), Keyword::FLOOR => self.parse_ceil_floor_expr(false), Keyword::POSITION if self.peek_token().token == Token::LParen => { - self.parse_position_expr() + self.parse_position_expr(w.to_ident()) } Keyword::SUBSTRING => self.parse_substring_expr(), Keyword::OVERLAY => self.parse_overlay_expr(), @@ -1707,24 +1707,26 @@ impl<'a> Parser<'a> { } } - pub fn parse_position_expr(&mut self) -> Result { - // PARSE SELECT POSITION('@' in field) - self.expect_token(&Token::LParen)?; + pub fn parse_position_expr(&mut self, ident: Ident) -> Result { + let position_expr = self.maybe_parse(|p| { + // PARSE SELECT POSITION('@' in field) + p.expect_token(&Token::LParen)?; - // Parse the subexpr till the IN keyword - let expr = self.parse_subexpr(Self::BETWEEN_PREC)?; - if self.parse_keyword(Keyword::IN) { - let from = self.parse_expr()?; - self.expect_token(&Token::RParen)?; + // Parse the subexpr till the IN keyword + let expr = p.parse_subexpr(Self::BETWEEN_PREC)?; + p.expect_keyword(Keyword::IN)?; + let from = p.parse_expr()?; + p.expect_token(&Token::RParen)?; Ok(Expr::Position { expr: Box::new(expr), r#in: Box::new(from), }) - } else { - parser_err!( - "Position function must include IN keyword".to_string(), - self.peek_token().location - ) + }); + match position_expr { + Some(expr) => Ok(expr), + // Snowflake supports `position` as an ordinary function call + // without the special `IN` syntax. + None => self.parse_function(ObjectName(vec![ident])), } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e68f25eb2..5de76f78f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4151,7 +4151,7 @@ fn parse_scalar_function_in_projection() { for function_name in names { // like SELECT sqrt(id) FROM foo - let sql = dbg!(format!("SELECT {function_name}(id) FROM foo")); + let sql = format!("SELECT {function_name}(id) FROM foo"); let select = verified_only_select(&sql); assert_eq!( &call(function_name, [Expr::Identifier(Ident::new("id"))]), @@ -8254,30 +8254,34 @@ fn parse_time_functions() { #[test] fn parse_position() { - let sql = "SELECT POSITION('@' IN field)"; - let select = verified_only_select(sql); assert_eq!( - &Expr::Position { + Expr::Position { expr: Box::new(Expr::Value(Value::SingleQuotedString("@".to_string()))), r#in: Box::new(Expr::Identifier(Ident::new("field"))), }, - expr_from_projection(only(&select.projection)) + verified_expr("POSITION('@' IN field)"), ); -} -#[test] -fn parse_position_negative() { - let sql = "SELECT POSITION(foo) from bar"; - let res = parse_sql_statements(sql); + // some dialects (e.g. snowflake) support position as a function call (i.e. without IN) assert_eq!( - ParserError::ParserError("Position function must include IN keyword".to_string()), - res.unwrap_err() + call( + "position", + [ + Expr::Value(Value::SingleQuotedString("an".to_owned())), + Expr::Value(Value::SingleQuotedString("banana".to_owned())), + Expr::Value(number("1")), + ] + ), + verified_expr("position('an', 'banana', 1)") ); +} +#[test] +fn parse_position_negative() { let sql = "SELECT POSITION(foo IN) from bar"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected: an expression:, found: )".to_string()), + ParserError::ParserError("Expected: (, found: )".to_string()), res.unwrap_err() ); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7a2288cbb..7abb1a947 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2256,3 +2256,9 @@ fn asof_joins() { "ORDER BY s.observed", )); } + +#[test] +fn test_parse_position() { + snowflake().verified_query("SELECT position('an', 'banana', 1)"); + snowflake().verified_query("SELECT n, h, POSITION(n IN h) FROM pos"); +} From bc15f7b4ceab849a974e84fcd38bde353cb7c2d1 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 29 Jul 2024 23:18:16 +0200 Subject: [PATCH 039/506] Support for postgres String Constants with Unicode Escapes (#1355) --- src/ast/value.rs | 40 +++++++++++++++++++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 15 +++++++ src/dialect/postgresql.rs | 4 ++ src/parser/mod.rs | 7 ++++ src/tokenizer.rs | 78 +++++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 32 +++++++++++++++ 7 files changed, 180 insertions(+) diff --git a/src/ast/value.rs b/src/ast/value.rs index 4c1a56a92..17cdb839d 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -52,6 +52,10 @@ pub enum Value { /// See [Postgres docs](https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS) /// for more details. EscapedStringLiteral(String), + /// u&'string value' (postgres extension) + /// See [Postgres docs](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE) + /// for more details. + UnicodeStringLiteral(String), /// B'string value' SingleQuotedByteStringLiteral(String), /// B"string value" @@ -102,6 +106,7 @@ impl fmt::Display for Value { } Value::DollarQuotedString(v) => write!(f, "{v}"), Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), + Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), @@ -347,6 +352,41 @@ pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> { EscapeEscapedStringLiteral(s) } +pub struct EscapeUnicodeStringLiteral<'a>(&'a str); + +impl<'a> fmt::Display for EscapeUnicodeStringLiteral<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for c in self.0.chars() { + match c { + '\'' => { + write!(f, "''")?; + } + '\\' => { + write!(f, r#"\\"#)?; + } + x if x.is_ascii() => { + write!(f, "{}", c)?; + } + _ => { + let codepoint = c as u32; + // if the character fits in 32 bits, we can use the \XXXX format + // otherwise, we need to use the \+XXXXXX format + if codepoint <= 0xFFFF { + write!(f, "\\{:04X}", codepoint)?; + } else { + write!(f, "\\+{:06X}", codepoint)?; + } + } + } + } + Ok(()) + } +} + +pub fn escape_unicode_string(s: &str) -> EscapeUnicodeStringLiteral<'_> { + EscapeUnicodeStringLiteral(s) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 8d762d780..2777dfb02 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -35,6 +35,10 @@ impl Dialect for GenericDialect { || ch == '_' } + fn supports_unicode_string_literal(&self) -> bool { + true + } + fn supports_group_by_expr(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 3ff7bb2a5..22e0baeb2 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -145,6 +145,21 @@ pub trait Dialect: Debug + Any { fn supports_string_literal_backslash_escape(&self) -> bool { false } + + /// Determine if the dialect supports string literals with `U&` prefix. + /// This is used to specify Unicode code points in string literals. + /// For example, in PostgreSQL, the following is a valid string literal: + /// ```sql + /// SELECT U&'\0061\0062\0063'; + /// ``` + /// This is equivalent to the string literal `'abc'`. + /// See + /// - [Postgres docs](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE) + /// - [H2 docs](http://www.h2database.com/html/grammar.html#string) + fn supports_unicode_string_literal(&self) -> bool { + false + } + /// Does the dialect support `FILTER (WHERE expr)` for aggregate queries? fn supports_filter_during_aggregation(&self) -> bool { false diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 0e04bfa27..8254e807b 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -40,6 +40,10 @@ impl Dialect for PostgreSqlDialect { ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' } + fn supports_unicode_string_literal(&self) -> bool { + true + } + /// See fn is_custom_operator_part(&self, ch: char) -> bool { matches!( diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b3120bb30..2b1c1ab7f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1191,6 +1191,10 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } + Token::UnicodeStringLiteral(_) => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } Token::Number(_, _) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) @@ -1868,6 +1872,7 @@ impl<'a> Parser<'a> { } Token::SingleQuotedString(_) | Token::EscapedStringLiteral(_) + | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( @@ -6965,6 +6970,7 @@ impl<'a> Parser<'a> { } Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), + Token::UnicodeStringLiteral(ref s) => Ok(Value::UnicodeStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), tok @ Token::Colon | tok @ Token::AtSign => { @@ -7056,6 +7062,7 @@ impl<'a> Parser<'a> { Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { Ok(s) } + Token::UnicodeStringLiteral(s) => Ok(s), _ => self.expected("literal string", next_token), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b8336cec8..be11a3140 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -94,6 +94,8 @@ pub enum Token { NationalStringLiteral(String), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), + /// Unicode string literal: i.e: U&'first \000A second' + UnicodeStringLiteral(String), /// Hexadecimal string literal: i.e.: X'deadbeef' HexStringLiteral(String), /// Comma @@ -251,6 +253,7 @@ impl fmt::Display for Token { Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), + Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"), Token::TripleSingleQuotedByteStringLiteral(ref s) => write!(f, "B'''{s}'''"), @@ -794,6 +797,23 @@ impl<'a> Tokenizer<'a> { } } } + // Unicode string literals like U&'first \000A second' are supported in some dialects, including PostgreSQL + x @ 'u' | x @ 'U' if self.dialect.supports_unicode_string_literal() => { + chars.next(); // consume, to check the next char + if chars.peek() == Some(&'&') { + // we cannot advance the iterator here, as we need to consume the '&' later if the 'u' was an identifier + let mut chars_clone = chars.peekable.clone(); + chars_clone.next(); // consume the '&' in the clone + if chars_clone.peek() == Some(&'\'') { + chars.next(); // consume the '&' in the original iterator + let s = unescape_unicode_single_quoted_string(chars)?; + return Ok(Some(Token::UnicodeStringLiteral(s))); + } + } + // regular identifier starting with an "U" or "u" + let s = self.tokenize_word(x, chars); + Ok(Some(Token::make_word(&s, None))) + } // The spec only allows an uppercase 'X' to introduce a hex // string, but PostgreSQL, at least, allows a lowercase 'x' too. x @ 'x' | x @ 'X' => { @@ -1797,6 +1817,64 @@ impl<'a: 'b, 'b> Unescape<'a, 'b> { } } +fn unescape_unicode_single_quoted_string(chars: &mut State<'_>) -> Result { + let mut unescaped = String::new(); + chars.next(); // consume the opening quote + while let Some(c) = chars.next() { + match c { + '\'' => { + if chars.peek() == Some(&'\'') { + chars.next(); + unescaped.push('\''); + } else { + return Ok(unescaped); + } + } + '\\' => match chars.peek() { + Some('\\') => { + chars.next(); + unescaped.push('\\'); + } + Some('+') => { + chars.next(); + unescaped.push(take_char_from_hex_digits(chars, 6)?); + } + _ => unescaped.push(take_char_from_hex_digits(chars, 4)?), + }, + _ => { + unescaped.push(c); + } + } + } + Err(TokenizerError { + message: "Unterminated unicode encoded string literal".to_string(), + location: chars.location(), + }) +} + +fn take_char_from_hex_digits( + chars: &mut State<'_>, + max_digits: usize, +) -> Result { + let mut result = 0u32; + for _ in 0..max_digits { + let next_char = chars.next().ok_or_else(|| TokenizerError { + message: "Unexpected EOF while parsing hex digit in escaped unicode string." + .to_string(), + location: chars.location(), + })?; + let digit = next_char.to_digit(16).ok_or_else(|| TokenizerError { + message: format!("Invalid hex digit in escaped unicode string: {}", next_char), + location: chars.location(), + })?; + result = result * 16 + digit; + } + char::from_u32(result).ok_or_else(|| TokenizerError { + message: format!("Invalid unicode character: {:x}", result), + location: chars.location(), + }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 5ac421da0..44231e7d3 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4441,3 +4441,35 @@ fn test_table_unnest_with_ordinality() { _ => panic!("Expecting TableFactor::UNNEST with ordinality"), } } + +#[test] +fn test_escaped_string_literal() { + match pg().verified_expr(r#"E'\n'"#) { + Expr::Value(Value::EscapedStringLiteral(s)) => { + assert_eq!("\n", s); + } + _ => unreachable!(), + } +} + +#[test] +fn test_unicode_string_literal() { + let pairs = [ + // Example from the postgres docs + (r#"U&'\0441\043B\043E\043D'"#, "слон"), + // High unicode code point (> 0xFFFF) + (r#"U&'\+01F418'"#, "🐘"), + // Escaped backslash + (r#"U&'\\'"#, r#"\"#), + // Escaped single quote + (r#"U&''''"#, "'"), + ]; + for (input, expected) in pairs { + match pg_and_generic().verified_expr(input) { + Expr::Value(Value::UnicodeStringLiteral(s)) => { + assert_eq!(expected, s); + } + _ => unreachable!(), + } + } +} From f96658006f85b1e88cc112a36584391c01ee766d Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 31 Jul 2024 04:30:46 +0800 Subject: [PATCH 040/506] Allow to use the GLOBAL keyword before the join operator (#1353) --- src/ast/query.rs | 7 ++++ src/keywords.rs | 1 + src/parser/mod.rs | 5 +++ src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_common.rs | 71 ++++++++++++++++++++++++++++++++---- tests/sqlparser_mysql.rs | 1 + tests/sqlparser_postgres.rs | 1 + tests/sqlparser_snowflake.rs | 1 + 9 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 978604266..b318f686a 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1537,6 +1537,9 @@ impl Display for TableVersion { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Join { pub relation: TableFactor, + /// ClickHouse supports the optional `GLOBAL` keyword before the join operator. + /// See [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/join) + pub global: bool, pub join_operator: JoinOperator, } @@ -1563,6 +1566,10 @@ impl fmt::Display for Join { } Suffix(constraint) } + if self.global { + write!(f, " GLOBAL")?; + } + match &self.join_operator { JoinOperator::Inner(constraint) => write!( f, diff --git a/src/keywords.rs b/src/keywords.rs index 4b599f12a..ee2bd6173 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -850,6 +850,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::USING, Keyword::CLUSTER, Keyword::DISTRIBUTE, + Keyword::GLOBAL, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, Keyword::SET, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2b1c1ab7f..cd2cf2186 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9015,6 +9015,7 @@ impl<'a> Parser<'a> { // a table alias. let mut joins = vec![]; loop { + let global = self.parse_keyword(Keyword::GLOBAL); let join = if self.parse_keyword(Keyword::CROSS) { let join_operator = if self.parse_keyword(Keyword::JOIN) { JoinOperator::CrossJoin @@ -9026,6 +9027,7 @@ impl<'a> Parser<'a> { }; Join { relation: self.parse_table_factor()?, + global, join_operator, } } else if self.parse_keyword(Keyword::OUTER) { @@ -9033,6 +9035,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::APPLY)?; Join { relation: self.parse_table_factor()?, + global, join_operator: JoinOperator::OuterApply, } } else if self.parse_keyword(Keyword::ASOF) { @@ -9042,6 +9045,7 @@ impl<'a> Parser<'a> { let match_condition = self.parse_parenthesized(Self::parse_expr)?; Join { relation, + global, join_operator: JoinOperator::AsOf { match_condition, constraint: self.parse_join_constraint(false)?, @@ -9127,6 +9131,7 @@ impl<'a> Parser<'a> { let join_constraint = self.parse_join_constraint(natural)?; Join { relation, + global, join_operator: join_operator_type(join_constraint), } }; diff --git a/src/test_utils.rs b/src/test_utils.rs index 5ed6339bd..b8e9ecee4 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -331,6 +331,7 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta pub fn join(relation: TableFactor) -> Join { Join { relation, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 089a41889..a0dd5a662 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1557,6 +1557,7 @@ fn parse_join_constraint_unnest_alias() { with_offset_alias: None, with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), op: BinaryOperator::Eq, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5de76f78f..a8f3919df 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5600,6 +5600,7 @@ fn parse_implicit_join() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }, @@ -5623,6 +5624,7 @@ fn parse_implicit_join() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }, @@ -5646,6 +5648,7 @@ fn parse_cross_join() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::CrossJoin, }, only(only(select.from).joins), @@ -5657,6 +5660,7 @@ fn parse_joins_on() { fn join_with_constraint( relation: impl Into, alias: Option, + global: bool, f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { @@ -5669,6 +5673,7 @@ fn parse_joins_on() { partitions: vec![], with_ordinality: false, }, + global, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), op: BinaryOperator::Eq, @@ -5682,6 +5687,7 @@ fn parse_joins_on() { vec![join_with_constraint( "t2", table_alias("foo"), + false, JoinOperator::Inner, )] ); @@ -5692,35 +5698,80 @@ fn parse_joins_on() { // Test parsing of different join operators assert_eq!( only(&verified_only_select("SELECT * FROM t1 JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::Inner)] + vec![join_with_constraint("t2", None, false, JoinOperator::Inner)] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::LeftOuter + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::RightOuter + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 LEFT SEMI JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::LeftSemi)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::LeftSemi + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 RIGHT SEMI JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::RightSemi)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::RightSemi + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 LEFT ANTI JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::LeftAnti)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::LeftAnti + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 RIGHT ANTI JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::RightAnti)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::RightAnti + )] ); assert_eq!( only(&verified_only_select("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2").from).joins, - vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] + vec![join_with_constraint( + "t2", + None, + false, + JoinOperator::FullOuter + )] + ); + + assert_eq!( + only(&verified_only_select("SELECT * FROM t1 GLOBAL FULL JOIN t2 ON c1 = c2").from).joins, + vec![join_with_constraint( + "t2", + None, + true, + JoinOperator::FullOuter + )] ); } @@ -5741,6 +5792,7 @@ fn parse_joins_using() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } } @@ -5805,6 +5857,7 @@ fn parse_natural_join() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: f(JoinConstraint::Natural), } } @@ -6073,6 +6126,7 @@ fn parse_derived_tables() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], }), @@ -6983,6 +7037,7 @@ fn lateral_function() { ], alias: None, }, + global: false, join_operator: JoinOperator::LeftOuter(JoinConstraint::None), }], }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index b0b29f347..1c9c009d9 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1891,6 +1891,7 @@ fn parse_update_with_joins() { partitions: vec![], with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ Ident::new("o"), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 44231e7d3..6410199ab 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4102,6 +4102,7 @@ fn parse_join_constraint_unnest_alias() { with_offset_alias: None, with_ordinality: false, }, + global: false, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), op: BinaryOperator::Eq, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7abb1a947..eaf8c1d14 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2206,6 +2206,7 @@ fn asof_joins() { relation: table_with_alias("trades_unixtime", "tu"), joins: vec![Join { relation: table_with_alias("quotes_unixtime", "qu"), + global: false, join_operator: JoinOperator::AsOf { match_condition: Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ From cc13841a370190df00cfc623593453054ac187e9 Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 31 Jul 2024 04:31:42 +0800 Subject: [PATCH 041/506] Add support of parsing ON CLUSTER in ALTER TABLE for ClickHouse (#1342) --- src/ast/dml.rs | 8 ++---- src/ast/helpers/stmt_create_table.rs | 4 +-- src/ast/mod.rs | 11 +++++++- src/parser/mod.rs | 21 +++++++-------- src/test_utils.rs | 1 + tests/sqlparser_common.rs | 38 ++++++++++++++++++++++++++-- tests/sqlparser_mysql.rs | 3 +++ tests/sqlparser_postgres.rs | 2 ++ 8 files changed, 67 insertions(+), 21 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 0ebbaa3e9..aad7d2e22 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -126,7 +126,7 @@ pub struct CreateTable { pub on_commit: Option, /// ClickHouse "ON CLUSTER" clause: /// - pub on_cluster: Option, + pub on_cluster: Option, /// ClickHouse "PRIMARY KEY " clause. /// pub primary_key: Option>, @@ -206,11 +206,7 @@ impl Display for CreateTable { name = self.name, )?; if let Some(on_cluster) = &self.on_cluster { - write!( - f, - " ON CLUSTER {}", - on_cluster.replace('{', "'{").replace('}', "}'") - )?; + write!(f, " ON CLUSTER {}", on_cluster)?; } if !self.columns.is_empty() || !self.constraints.is_empty() { write!(f, " ({}", display_comma_separated(&self.columns))?; diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 92c75e6a4..19efaeece 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -73,7 +73,7 @@ pub struct CreateTableBuilder { pub default_charset: Option, pub collation: Option, pub on_commit: Option, - pub on_cluster: Option, + pub on_cluster: Option, pub primary_key: Option>, pub order_by: Option>, pub partition_by: Option>, @@ -261,7 +261,7 @@ impl CreateTableBuilder { self } - pub fn on_cluster(mut self, on_cluster: Option) -> Self { + pub fn on_cluster(mut self, on_cluster: Option) -> Self { self.on_cluster = on_cluster; self } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d27baadc4..70f96c5c5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2162,6 +2162,10 @@ pub enum Statement { only: bool, operations: Vec, location: Option, + /// ClickHouse dialect supports `ON CLUSTER` clause for ALTER TABLE + /// For example: `ALTER TABLE table_name ON CLUSTER cluster_name ADD COLUMN c UInt32` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/update) + on_cluster: Option, }, /// ```sql /// ALTER INDEX @@ -3632,6 +3636,7 @@ impl fmt::Display for Statement { only, operations, location, + on_cluster, } => { write!(f, "ALTER TABLE ")?; if *if_exists { @@ -3640,9 +3645,13 @@ impl fmt::Display for Statement { if *only { write!(f, "ONLY ")?; } + write!(f, "{name} ", name = name)?; + if let Some(cluster) = on_cluster { + write!(f, "ON CLUSTER {cluster} ")?; + } write!( f, - "{name} {operations}", + "{operations}", operations = display_comma_separated(operations) )?; if let Some(loc) = location { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cd2cf2186..725e24bfb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5379,6 +5379,14 @@ impl<'a> Parser<'a> { } } + fn parse_optional_on_cluster(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { + Ok(Some(self.parse_identifier(false)?)) + } else { + Ok(None) + } + } + pub fn parse_create_table( &mut self, or_replace: bool, @@ -5391,16 +5399,7 @@ impl<'a> Parser<'a> { let table_name = self.parse_object_name(allow_unquoted_hyphen)?; // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs - let on_cluster = if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(s) => Some(s), - Token::Word(s) => Some(s.to_string()), - _ => self.expected("identifier or cluster literal", next_token)?, - } - } else { - None - }; + let on_cluster = self.parse_optional_on_cluster()?; let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { self.parse_object_name(allow_unquoted_hyphen).ok() @@ -6583,6 +6582,7 @@ impl<'a> Parser<'a> { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] let table_name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; let mut location = None; @@ -6604,6 +6604,7 @@ impl<'a> Parser<'a> { only, operations, location, + on_cluster, }) } Keyword::INDEX => { diff --git a/src/test_utils.rs b/src/test_utils.rs index b8e9ecee4..d9100d351 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -274,6 +274,7 @@ pub fn alter_table_op_with_name(stmt: Statement, expected_name: &str) -> AlterTa if_exists, only: is_only, operations, + on_cluster: _, location: _, } => { assert_eq!(name.to_string(), expected_name); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a8f3919df..44e245254 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3506,7 +3506,7 @@ fn parse_create_table_on_cluster() { let sql = "CREATE TABLE t ON CLUSTER '{cluster}' (a INT, b INT)"; match generic.verified_stmt(sql) { Statement::CreateTable(CreateTable { on_cluster, .. }) => { - assert_eq!(on_cluster.unwrap(), "{cluster}".to_string()); + assert_eq!(on_cluster.unwrap().to_string(), "'{cluster}'".to_string()); } _ => unreachable!(), } @@ -3515,7 +3515,7 @@ fn parse_create_table_on_cluster() { let sql = "CREATE TABLE t ON CLUSTER my_cluster (a INT, b INT)"; match generic.verified_stmt(sql) { Statement::CreateTable(CreateTable { on_cluster, .. }) => { - assert_eq!(on_cluster.unwrap(), "my_cluster".to_string()); + assert_eq!(on_cluster.unwrap().to_string(), "my_cluster".to_string()); } _ => unreachable!(), } @@ -3822,6 +3822,40 @@ fn parse_alter_table() { } } +#[test] +fn test_alter_table_with_on_cluster() { + match all_dialects() + .verified_stmt("ALTER TABLE t ON CLUSTER 'cluster' ADD CONSTRAINT bar PRIMARY KEY (baz)") + { + Statement::AlterTable { + name, on_cluster, .. + } => { + std::assert_eq!(name.to_string(), "t"); + std::assert_eq!(on_cluster, Some(Ident::with_quote('\'', "cluster"))); + } + _ => unreachable!(), + } + + match all_dialects() + .verified_stmt("ALTER TABLE t ON CLUSTER cluster_name ADD CONSTRAINT bar PRIMARY KEY (baz)") + { + Statement::AlterTable { + name, on_cluster, .. + } => { + std::assert_eq!(name.to_string(), "t"); + std::assert_eq!(on_cluster, Some(Ident::new("cluster_name"))); + } + _ => unreachable!(), + } + + let res = all_dialects() + .parse_sql_statements("ALTER TABLE t ON CLUSTER 123 ADD CONSTRAINT bar PRIMARY KEY (baz)"); + std::assert_eq!( + res.unwrap_err(), + ParserError::ParserError("Expected: identifier, found: 123".to_string()) + ) +} + #[test] fn parse_alter_index() { let rename_index = "ALTER INDEX idx RENAME TO new_idx"; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 1c9c009d9..397a722b5 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1976,6 +1976,7 @@ fn parse_alter_table_add_column() { only, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); @@ -2005,6 +2006,7 @@ fn parse_alter_table_add_column() { only, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); @@ -2042,6 +2044,7 @@ fn parse_alter_table_add_columns() { only, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(!if_exists); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6410199ab..7406bdd74 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -677,6 +677,7 @@ fn parse_alter_table_add_columns() { only, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert!(if_exists); @@ -759,6 +760,7 @@ fn parse_alter_table_owner_to() { only: _, operations, location: _, + on_cluster: _, } => { assert_eq!(name.to_string(), "tab"); assert_eq!( From a692ba5fd1902e0c40dc5714304594aee642a899 Mon Sep 17 00:00:00 2001 From: hulk Date: Fri, 2 Aug 2024 05:20:56 +0800 Subject: [PATCH 042/506] Add support of parsing OPTIMIZE TABLE statement for ClickHouse (#1359) --- src/ast/ddl.rs | 41 +++++++++++++++++++----- src/ast/mod.rs | 36 ++++++++++++++++++++- src/keywords.rs | 3 ++ src/parser/mod.rs | 45 +++++++++++++++++++++++++- tests/sqlparser_clickhouse.rs | 60 +++++++++++++++++++++++++++++++++++ 5 files changed, 175 insertions(+), 10 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 5cc671cf5..af679d469 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1296,20 +1296,45 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef { } } -/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL +/// PARTITION statement used in ALTER TABLE et al. such as in Hive and ClickHouse SQL. +/// For example, ClickHouse's OPTIMIZE TABLE supports syntax like PARTITION ID 'partition_id' and PARTITION expr. +/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct Partition { - pub partitions: Vec, +pub enum Partition { + Identifier(Ident), + Expr(Expr), + Partitions(Vec), } impl fmt::Display for Partition { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "PARTITION ({})", - display_comma_separated(&self.partitions) - ) + match self { + Partition::Identifier(id) => write!(f, "PARTITION ID {id}"), + Partition::Expr(expr) => write!(f, "PARTITION {expr}"), + Partition::Partitions(partitions) => { + write!(f, "PARTITION ({})", display_comma_separated(partitions)) + } + } + } +} + +/// DEDUPLICATE statement used in OPTIMIZE TABLE et al. such as in ClickHouse SQL +/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Deduplicate { + All, + ByExpression(Expr), +} + +impl fmt::Display for Deduplicate { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Deduplicate::All => write!(f, "DEDUPLICATE"), + Deduplicate::ByExpression(expr) => write!(f, "DEDUPLICATE BY {expr}"), + } } } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 70f96c5c5..6444556ef 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -33,7 +33,7 @@ pub use self::data_type::{ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, - ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs, + ColumnOptionDef, ConstraintCharacteristics, Deduplicate, DeferrableInitial, GeneratedAs, GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition, ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, @@ -2831,6 +2831,18 @@ pub enum Statement { to: Ident, with: Vec, }, + /// ```sql + /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] + /// ``` + /// + /// See ClickHouse + OptimizeTable { + name: ObjectName, + on_cluster: Option, + partition: Option, + include_final: bool, + deduplicate: Option, + }, } impl fmt::Display for Statement { @@ -4283,6 +4295,28 @@ impl fmt::Display for Statement { Ok(()) } + Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + } => { + write!(f, "OPTIMIZE TABLE {name}")?; + if let Some(on_cluster) = on_cluster { + write!(f, " ON CLUSTER {on_cluster}", on_cluster = on_cluster)?; + } + if let Some(partition) = partition { + write!(f, " {partition}", partition = partition)?; + } + if *include_final { + write!(f, " FINAL")?; + } + if let Some(deduplicate) = deduplicate { + write!(f, " {deduplicate}")?; + } + Ok(()) + } } } } diff --git a/src/keywords.rs b/src/keywords.rs index ee2bd6173..49bd969af 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -218,6 +218,7 @@ define_keywords!( DECADE, DECIMAL, DECLARE, + DEDUPLICATE, DEFAULT, DEFAULT_DDL_COLLATION, DEFERRABLE, @@ -301,6 +302,7 @@ define_keywords!( FILE_FORMAT, FILL, FILTER, + FINAL, FIRST, FIRST_VALUE, FIXEDSTRING, @@ -354,6 +356,7 @@ define_keywords!( HOSTS, HOUR, HOURS, + ID, IDENTITY, IF, IGNORE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 725e24bfb..67d58ea75 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -551,6 +551,10 @@ impl<'a> Parser<'a> { Keyword::LOAD if dialect_of!(self is DuckDbDialect | GenericDialect) => { Ok(self.parse_load()?) } + // `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/ + Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_optimize_table()?) + } _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -6270,7 +6274,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; - Ok(Partition { partitions }) + Ok(Partition::Partitions(partitions)) } pub fn parse_alter_table_operation(&mut self) -> Result { @@ -11165,6 +11169,45 @@ impl<'a> Parser<'a> { Ok(Statement::Load { extension_name }) } + /// ```sql + /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] + /// ``` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + pub fn parse_optimize_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; + + let partition = if self.parse_keyword(Keyword::PARTITION) { + if self.parse_keyword(Keyword::ID) { + Some(Partition::Identifier(self.parse_identifier(false)?)) + } else { + Some(Partition::Expr(self.parse_expr()?)) + } + } else { + None + }; + + let include_final = self.parse_keyword(Keyword::FINAL); + let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { + if self.parse_keyword(Keyword::BY) { + Some(Deduplicate::ByExpression(self.parse_expr()?)) + } else { + Some(Deduplicate::All) + } + } else { + None + }; + + Ok(Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + }) + } + /// ```sql /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] /// ``` diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 6fdadc366..5263be29e 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -25,6 +25,7 @@ use sqlparser::ast::Value::Number; use sqlparser::ast::*; use sqlparser::dialect::ClickHouseDialect; use sqlparser::dialect::GenericDialect; +use sqlparser::parser::ParserError::ParserError; #[test] fn parse_map_access_expr() { @@ -221,6 +222,65 @@ fn parse_create_table() { ); } +#[test] +fn parse_optimize_table() { + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE db.t0"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster'"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster' FINAL"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE BY id"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE BY id"); + clickhouse_and_generic() + .verified_stmt("OPTIMIZE TABLE t0 PARTITION tuple('2023-04-22') DEDUPLICATE BY id"); + match clickhouse_and_generic().verified_stmt( + "OPTIMIZE TABLE t0 ON CLUSTER cluster PARTITION ID '2024-07' FINAL DEDUPLICATE BY id", + ) { + Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + .. + } => { + assert_eq!(name.to_string(), "t0"); + assert_eq!(on_cluster, Some(Ident::new("cluster"))); + assert_eq!( + partition, + Some(Partition::Identifier(Ident::with_quote('\'', "2024-07"))) + ); + assert!(include_final); + assert_eq!( + deduplicate, + Some(Deduplicate::ByExpression(Identifier(Ident::new("id")))) + ); + } + _ => unreachable!(), + } + + // negative cases + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 DEDUPLICATE BY") + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 PARTITION") + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 PARTITION ID") + .unwrap_err(), + ParserError("Expected: identifier, found: EOF".to_string()) + ); +} + fn column_def(name: Ident, data_type: DataType) -> ColumnDef { ColumnDef { name, From d49acc67b13e1d68f2e6a25546161a68e165da4f Mon Sep 17 00:00:00 2001 From: Jesse Date: Thu, 1 Aug 2024 23:28:15 +0200 Subject: [PATCH 043/506] Parse SETTINGS clause for ClickHouse table-valued functions (#1358) --- src/ast/mod.rs | 4 +- src/ast/query.rs | 25 ++++++++- src/parser/mod.rs | 95 ++++++++++++++++++++++++----------- tests/sqlparser_clickhouse.rs | 77 ++++++++++++++++++++++++++++ 4 files changed, 167 insertions(+), 34 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6444556ef..e0c929a9d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -50,8 +50,8 @@ pub use self::query::{ OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, - Values, WildcardAdditionalOptions, With, WithFill, + TableAlias, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, Top, TopQuantity, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index b318f686a..cda7430be 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -899,6 +899,19 @@ impl fmt::Display for ExprWithAlias { } } +/// Arguments to a table-valued function +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableFunctionArgs { + pub args: Vec, + /// ClickHouse-specific SETTINGS clause. + /// For example, + /// `SELECT * FROM executable('generate_random.py', TabSeparated, 'id UInt32, random String', SETTINGS send_chunk_header = false, pool_size = 16)` + /// [`executable` table function](https://clickhouse.com/docs/en/engines/table-functions/executable) + pub settings: Option>, +} + /// A table name or a parenthesized subquery with an optional alias #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -916,7 +929,7 @@ pub enum TableFactor { /// This field's value is `Some(v)`, where `v` is a (possibly empty) /// vector of arguments, in the case of a table-valued function call, /// whereas it's `None` in the case of a regular table name. - args: Option>, + args: Option, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, /// Optional version qualifier to facilitate table time-travel, as @@ -1314,7 +1327,15 @@ impl fmt::Display for TableFactor { write!(f, "PARTITION ({})", display_comma_separated(partitions))?; } if let Some(args) = args { - write!(f, "({})", display_comma_separated(args))?; + write!(f, "(")?; + write!(f, "{}", display_comma_separated(&args.args))?; + if let Some(ref settings) = args.settings { + if !args.args.is_empty() { + write!(f, ", ")?; + } + write!(f, "SETTINGS {}", display_comma_separated(settings))?; + } + write!(f, ")")?; } if *with_ordinality { write!(f, " WITH ORDINALITY")?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 67d58ea75..da9ca2672 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3430,6 +3430,29 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse the comma of a comma-separated syntax element. + /// Returns true if there is a next element + fn is_parse_comma_separated_end(&mut self) -> bool { + if !self.consume_token(&Token::Comma) { + true + } else if self.options.trailing_commas { + let token = self.peek_token().token; + match token { + Token::Word(ref kw) + if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => + { + true + } + Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { + true + } + _ => false, + } + } else { + false + } + } + /// Parse a comma-separated list of 1+ items accepted by `F` pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> where @@ -3438,22 +3461,8 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { values.push(f(self)?); - if !self.consume_token(&Token::Comma) { + if self.is_parse_comma_separated_end() { break; - } else if self.options.trailing_commas { - match self.peek_token().token { - Token::Word(kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => - { - break; - } - Token::RParen - | Token::SemiColon - | Token::EOF - | Token::RBracket - | Token::RBrace => break, - _ => continue, - } } } Ok(values) @@ -8104,19 +8113,7 @@ impl<'a> Parser<'a> { vec![] }; - let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::SETTINGS) - { - let key_values = self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - p.expect_token(&Token::Eq)?; - let value = p.parse_value()?; - Ok(Setting { key, value }) - })?; - Some(key_values) - } else { - None - }; + let settings = self.parse_settings()?; let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) @@ -8163,6 +8160,23 @@ impl<'a> Parser<'a> { } } + fn parse_settings(&mut self) -> Result>, ParserError> { + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + Ok(settings) + } + /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause pub fn parse_for_clause(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::XML) { @@ -9382,9 +9396,9 @@ impl<'a> Parser<'a> { // Parse potential version qualifier let version = self.parse_table_version()?; - // Postgres, MSSQL: table-valued functions: + // Postgres, MSSQL, ClickHouse: table-valued functions: let args = if self.consume_token(&Token::LParen) { - Some(self.parse_optional_args()?) + Some(self.parse_table_function_args()?) } else { None }; @@ -10327,6 +10341,27 @@ impl<'a> Parser<'a> { } } + fn parse_table_function_args(&mut self) -> Result { + if self.consume_token(&Token::RParen) { + return Ok(TableFunctionArgs { + args: vec![], + settings: None, + }); + } + let mut args = vec![]; + let settings = loop { + if let Some(settings) = self.parse_settings()? { + break Some(settings); + } + args.push(self.parse_function_args()?); + if self.is_parse_comma_separated_end() { + break None; + } + }; + self.expect_token(&Token::RParen)?; + Ok(TableFunctionArgs { args, settings }) + } + /// Parses a potentially empty list of arguments to a window function /// (including the closing parenthesis). /// diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 5263be29e..4108958fb 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1151,6 +1151,83 @@ fn parse_create_table_on_commit_and_as_query() { } } +#[test] +fn parse_select_table_function_settings() { + fn check_settings(sql: &str, expected: &TableFunctionArgs) { + match clickhouse_and_generic().verified_stmt(sql) { + Statement::Query(q) => { + let from = &q.body.as_select().unwrap().from; + assert_eq!(from.len(), 1); + assert_eq!(from[0].joins, vec![]); + match &from[0].relation { + Table { args, .. } => { + let args = args.as_ref().unwrap(); + assert_eq!(args, expected); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } + } + check_settings( + "SELECT * FROM table_function(arg, SETTINGS s0 = 3, s1 = 's')", + &TableFunctionArgs { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()), + ))], + + settings: Some(vec![ + Setting { + key: "s0".into(), + value: Value::Number("3".parse().unwrap(), false), + }, + Setting { + key: "s1".into(), + value: Value::SingleQuotedString("s".into()), + }, + ]), + }, + ); + check_settings( + r#"SELECT * FROM table_function(arg)"#, + &TableFunctionArgs { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()), + ))], + settings: None, + }, + ); + check_settings( + "SELECT * FROM table_function(SETTINGS s0 = 3, s1 = 's')", + &TableFunctionArgs { + args: vec![], + settings: Some(vec![ + Setting { + key: "s0".into(), + value: Value::Number("3".parse().unwrap(), false), + }, + Setting { + key: "s1".into(), + value: Value::SingleQuotedString("s".into()), + }, + ]), + }, + ); + let invalid_cases = vec![ + "SELECT * FROM t(SETTINGS a)", + "SELECT * FROM t(SETTINGS a=)", + "SELECT * FROM t(SETTINGS a=1, b)", + "SELECT * FROM t(SETTINGS a=1, b=)", + "SELECT * FROM t(SETTINGS a=1, b=c)", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: SETTINGS key = value, found: "); + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], From 8f8c96f87ffe58945a0875c9c897f36c989b0095 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Sun, 4 Aug 2024 19:47:32 +0800 Subject: [PATCH 044/506] Support parsing empty map literal syntax for DuckDB and Genric (#1361) --- src/parser/mod.rs | 42 ++++++++++++++++----------------------- tests/sqlparser_common.rs | 2 ++ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index da9ca2672..fe8acb4f2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1849,17 +1849,9 @@ impl<'a> Parser<'a> { /// Parses an array expression `[ex1, ex2, ..]` /// if `named` is `true`, came from an expression like `ARRAY[ex1, ex2]` pub fn parse_array_expr(&mut self, named: bool) -> Result { - if self.peek_token().token == Token::RBracket { - let _ = self.next_token(); // consume ] - Ok(Expr::Array(Array { - elem: vec![], - named, - })) - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RBracket)?; - Ok(Expr::Array(Array { elem: exprs, named })) - } + let exprs = self.parse_comma_separated0(Parser::parse_expr, Token::RBracket)?; + self.expect_token(&Token::RBracket)?; + Ok(Expr::Array(Array { elem: exprs, named })) } pub fn parse_listagg_on_overflow(&mut self) -> Result, ParserError> { @@ -2352,11 +2344,8 @@ impl<'a> Parser<'a> { /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps fn parse_duckdb_map_literal(&mut self) -> Result { self.expect_token(&Token::LBrace)?; - - let fields = self.parse_comma_separated(Self::parse_duckdb_map_field)?; - + let fields = self.parse_comma_separated0(Self::parse_duckdb_map_field, Token::RBrace)?; self.expect_token(&Token::RBrace)?; - Ok(Expr::Map(Map { entries: fields })) } @@ -2937,7 +2926,7 @@ impl<'a> Parser<'a> { Expr::InList { expr: Box::new(expr), list: if self.dialect.supports_in_empty_list() { - self.parse_comma_separated0(Parser::parse_expr)? + self.parse_comma_separated0(Parser::parse_expr, Token::RParen)? } else { self.parse_comma_separated(Parser::parse_expr)? }, @@ -3479,18 +3468,20 @@ impl<'a> Parser<'a> { } /// Parse a comma-separated list of 0+ items accepted by `F` - pub fn parse_comma_separated0(&mut self, f: F) -> Result, ParserError> + /// * `end_token` - expected end token for the closure (e.g. [Token::RParen], [Token::RBrace] ...) + pub fn parse_comma_separated0( + &mut self, + f: F, + end_token: Token, + ) -> Result, ParserError> where F: FnMut(&mut Parser<'a>) -> Result, { - // () - if matches!(self.peek_token().token, Token::RParen) { + if self.peek_token().token == end_token { return Ok(vec![]); } - // (,) - if self.options.trailing_commas - && matches!(self.peek_tokens(), [Token::Comma, Token::RParen]) - { + + if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] { let _ = self.consume_token(&Token::Comma); return Ok(vec![]); } @@ -4059,7 +4050,7 @@ impl<'a> Parser<'a> { }) }; self.expect_token(&Token::LParen)?; - let args = self.parse_comma_separated0(parse_function_param)?; + let args = self.parse_comma_separated0(parse_function_param, Token::RParen)?; self.expect_token(&Token::RParen)?; let return_type = if self.parse_keyword(Keyword::RETURNS) { @@ -10713,7 +10704,8 @@ impl<'a> Parser<'a> { } if self.consume_token(&Token::LParen) { - let interpolations = self.parse_comma_separated0(|p| p.parse_interpolation())?; + let interpolations = + self.parse_comma_separated0(|p| p.parse_interpolation(), Token::RParen)?; self.expect_token(&Token::RParen)?; // INTERPOLATE () and INTERPOLATE ( ... ) variants return Ok(Some(Interpolate { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 44e245254..7ec017269 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10282,6 +10282,8 @@ fn test_map_syntax() { }), }, ); + + check("MAP {}", Expr::Map(Map { entries: vec![] })); } #[test] From a5480ae4982d84d37c6294b3e70ca24fb72d6a4d Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Tue, 6 Aug 2024 12:49:37 +0100 Subject: [PATCH 045/506] Support `Dialect` level precedence, update Postgres `Dialect` to match Postgres (#1360) --- src/ast/operator.rs | 2 +- src/dialect/mod.rs | 165 +++++++++++++++++++++++++++++++++++- src/dialect/postgresql.rs | 134 +++++++++++++++++++++++++++++ src/dialect/snowflake.rs | 9 ++ src/parser/mod.rs | 148 +++++--------------------------- tests/sqlparser_postgres.rs | 112 ++++++++++++++++++++++++ 6 files changed, 440 insertions(+), 130 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index e70df344a..db6ed0564 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -151,7 +151,7 @@ pub enum BinaryOperator { Arrow, /// The `->>` operator. /// - /// On PostgreSQL, this operator that extracts a JSON object field or JSON + /// On PostgreSQL, this operator extracts a JSON object field or JSON /// array element and converts it to text, for example `'{"a":"b"}'::json /// ->> 'a'` or `[1, 2, 3]'::json ->> 2`. /// diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 22e0baeb2..fc45545d4 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -24,12 +24,13 @@ mod redshift; mod snowflake; mod sqlite; -use crate::ast::{Expr, Statement}; use core::any::{Any, TypeId}; use core::fmt::Debug; use core::iter::Peekable; use core::str::Chars; +use log::debug; + pub use self::ansi::AnsiDialect; pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; @@ -43,8 +44,11 @@ pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; +use crate::ast::{Expr, Statement}; pub use crate::keywords; +use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; +use crate::tokenizer::Token; #[cfg(not(feature = "std"))] use alloc::boxed::Box; @@ -300,13 +304,172 @@ pub trait Dialect: Debug + Any { // return None to fall back to the default behavior None } + + /// Get the precedence of the next token. This "full" method means all precedence logic and remain + /// in the dialect. while still allowing overriding the `get_next_precedence` method with the option to + /// fallback to the default behavior. + /// + /// Higher number => higher precedence + fn get_next_precedence_full(&self, parser: &Parser) -> Result { + if let Some(precedence) = self.get_next_precedence(parser) { + return precedence; + } + + let token = parser.peek_token(); + debug!("get_next_precedence() {:?}", token); + match token.token { + Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC), + Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC), + Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC), + + Token::Word(w) if w.keyword == Keyword::AT => { + match ( + parser.peek_nth_token(1).token, + parser.peek_nth_token(2).token, + ) { + (Token::Word(w), Token::Word(w2)) + if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => + { + Ok(AT_TZ_PREC) + } + _ => Ok(UNKNOWN_PREC), + } + } + + Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise, it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC), + _ => Ok(UNKNOWN_PREC), + }, + Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC), + Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC), + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Tilde + | Token::TildeAsterisk + | Token::ExclamationMarkTilde + | Token::ExclamationMarkTildeAsterisk + | Token::DoubleTilde + | Token::DoubleTildeAsterisk + | Token::ExclamationMarkDoubleTilde + | Token::ExclamationMarkDoubleTildeAsterisk + | Token::Spaceship => Ok(EQ_PREC), + Token::Pipe => Ok(PIPE_PREC), + Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC), + Token::Ampersand => Ok(AMPERSAND_PREC), + Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC), + Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { + Ok(MUL_DIV_MOD_OP_PREC) + } + Token::DoubleColon + | Token::ExclamationMark + | Token::LBracket + | Token::Overlap + | Token::CaretAt => Ok(DOUBLE_COLON_PREC), + // Token::Colon if (self as dyn Dialect).is::() => Ok(DOUBLE_COLON_PREC), + Token::Arrow + | Token::LongArrow + | Token::HashArrow + | Token::HashLongArrow + | Token::AtArrow + | Token::ArrowAt + | Token::HashMinus + | Token::AtQuestion + | Token::AtAt + | Token::Question + | Token::QuestionAnd + | Token::QuestionPipe + | Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC), + _ => Ok(UNKNOWN_PREC), + } + } + /// Dialect-specific statement parser override fn parse_statement(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior None } + + /// The following precedence values are used directly by `Parse` or in dialects, + /// so have to be made public by the dialect. + fn prec_double_colon(&self) -> u8 { + DOUBLE_COLON_PREC + } + + fn prec_mul_div_mod_op(&self) -> u8 { + MUL_DIV_MOD_OP_PREC + } + + fn prec_plus_minus(&self) -> u8 { + PLUS_MINUS_PREC + } + + fn prec_between(&self) -> u8 { + BETWEEN_PREC + } + + fn prec_like(&self) -> u8 { + LIKE_PREC + } + + fn prec_unary_not(&self) -> u8 { + UNARY_NOT_PREC + } + + fn prec_unknown(&self) -> u8 { + UNKNOWN_PREC + } } +// Define the lexical Precedence of operators. +// +// Uses (APPROXIMATELY) as a reference +// higher number = higher precedence +// +// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator +// actually has higher precedence than addition. +// See . +const DOUBLE_COLON_PREC: u8 = 50; +const AT_TZ_PREC: u8 = 41; +const MUL_DIV_MOD_OP_PREC: u8 = 40; +const PLUS_MINUS_PREC: u8 = 30; +const XOR_PREC: u8 = 24; +const AMPERSAND_PREC: u8 = 23; +const CARET_PREC: u8 = 22; +const PIPE_PREC: u8 = 21; +const BETWEEN_PREC: u8 = 20; +const EQ_PREC: u8 = 20; +const LIKE_PREC: u8 = 19; +const IS_PREC: u8 = 17; +const PG_OTHER_PREC: u8 = 16; +const UNARY_NOT_PREC: u8 = 15; +const AND_PREC: u8 = 10; +const OR_PREC: u8 = 5; +const UNKNOWN_PREC: u8 = 0; + impl dyn Dialect { #[inline] pub fn is(&self) -> bool { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 8254e807b..293fb9e7d 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -9,6 +9,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +use log::debug; use crate::ast::{CommentObject, Statement}; use crate::dialect::Dialect; @@ -20,6 +21,23 @@ use crate::tokenizer::Token; #[derive(Debug)] pub struct PostgreSqlDialect {} +const DOUBLE_COLON_PREC: u8 = 140; +const BRACKET_PREC: u8 = 130; +const COLLATE_PREC: u8 = 120; +const AT_TZ_PREC: u8 = 110; +const CARET_PREC: u8 = 100; +const MUL_DIV_MOD_OP_PREC: u8 = 90; +const PLUS_MINUS_PREC: u8 = 80; +// there's no XOR operator in PostgreSQL, but support it here to avoid breaking tests +const XOR_PREC: u8 = 75; +const PG_OTHER_PREC: u8 = 70; +const BETWEEN_LIKE_PREC: u8 = 60; +const EQ_PREC: u8 = 50; +const IS_PREC: u8 = 40; +const NOT_PREC: u8 = 30; +const AND_PREC: u8 = 20; +const OR_PREC: u8 = 10; + impl Dialect for PostgreSqlDialect { fn identifier_quote_style(&self, _identifier: &str) -> Option { Some('"') @@ -67,6 +85,102 @@ impl Dialect for PostgreSqlDialect { ) } + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token(); + debug!("get_next_precedence() {:?}", token); + + let precedence = match token.token { + Token::Word(w) if w.keyword == Keyword::OR => OR_PREC, + Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC, + Token::Word(w) if w.keyword == Keyword::AND => AND_PREC, + Token::Word(w) if w.keyword == Keyword::AT => { + match ( + parser.peek_nth_token(1).token, + parser.peek_nth_token(2).token, + ) { + (Token::Word(w), Token::Word(w2)) + if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => + { + AT_TZ_PREC + } + _ => self.prec_unknown(), + } + } + + Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise, it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC, + _ => self.prec_unknown(), + }, + Token::Word(w) if w.keyword == Keyword::IS => IS_PREC, + Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC, + Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC, + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Tilde + | Token::TildeAsterisk + | Token::ExclamationMarkTilde + | Token::ExclamationMarkTildeAsterisk + | Token::DoubleTilde + | Token::DoubleTildeAsterisk + | Token::ExclamationMarkDoubleTilde + | Token::ExclamationMarkDoubleTildeAsterisk + | Token::Spaceship => EQ_PREC, + Token::Caret => CARET_PREC, + Token::Plus | Token::Minus => PLUS_MINUS_PREC, + Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC, + Token::DoubleColon => DOUBLE_COLON_PREC, + Token::LBracket => BRACKET_PREC, + Token::Arrow + | Token::LongArrow + | Token::HashArrow + | Token::HashLongArrow + | Token::AtArrow + | Token::ArrowAt + | Token::HashMinus + | Token::AtQuestion + | Token::AtAt + | Token::Question + | Token::QuestionAnd + | Token::QuestionPipe + | Token::ExclamationMark + | Token::Overlap + | Token::CaretAt + | Token::StringConcat + | Token::Sharp + | Token::ShiftRight + | Token::ShiftLeft + | Token::Pipe + | Token::Ampersand + | Token::CustomBinaryOperator(_) => PG_OTHER_PREC, + _ => self.prec_unknown(), + }; + Some(Ok(precedence)) + } + fn parse_statement(&self, parser: &mut Parser) -> Option> { if parser.parse_keyword(Keyword::COMMENT) { Some(parse_comment(parser)) @@ -82,6 +196,26 @@ impl Dialect for PostgreSqlDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn prec_mul_div_mod_op(&self) -> u8 { + MUL_DIV_MOD_OP_PREC + } + + fn prec_plus_minus(&self) -> u8 { + PLUS_MINUS_PREC + } + + fn prec_between(&self) -> u8 { + BETWEEN_LIKE_PREC + } + + fn prec_like(&self) -> u8 { + BETWEEN_LIKE_PREC + } + + fn prec_unary_not(&self) -> u8 { + NOT_PREC + } } pub fn parse_comment(parser: &mut Parser) -> Result { diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 9f1d7f27b..fe35d8da3 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -145,6 +145,15 @@ impl Dialect for SnowflakeDialect { None } + + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token(); + // Snowflake supports the `:` cast operator unlike other dialects + match token.token { + Token::Colon => Some(Ok(self.prec_double_colon())), + _ => None, + } + } } /// Parse snowflake create table statement. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fe8acb4f2..1fdba5ecf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -875,7 +875,7 @@ impl<'a> Parser<'a> { /// Parse a new expression. pub fn parse_expr(&mut self) -> Result { let _guard = self.recursion_counter.try_decrease()?; - self.parse_subexpr(0) + self.parse_subexpr(self.dialect.prec_unknown()) } /// Parse tokens until the precedence changes. @@ -897,7 +897,7 @@ impl<'a> Parser<'a> { } pub fn parse_interval_expr(&mut self) -> Result { - let precedence = 0; + let precedence = self.dialect.prec_unknown(); let mut expr = self.parse_prefix()?; loop { @@ -918,9 +918,9 @@ impl<'a> Parser<'a> { let token = self.peek_token(); match token.token { - Token::Word(w) if w.keyword == Keyword::AND => Ok(0), - Token::Word(w) if w.keyword == Keyword::OR => Ok(0), - Token::Word(w) if w.keyword == Keyword::XOR => Ok(0), + Token::Word(w) if w.keyword == Keyword::AND => Ok(self.dialect.prec_unknown()), + Token::Word(w) if w.keyword == Keyword::OR => Ok(self.dialect.prec_unknown()), + Token::Word(w) if w.keyword == Keyword::XOR => Ok(self.dialect.prec_unknown()), _ => self.get_next_precedence(), } } @@ -1079,7 +1079,7 @@ impl<'a> Parser<'a> { self.parse_bigquery_struct_literal() } Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { - let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; + let expr = self.parse_subexpr(self.dialect.prec_plus_minus())?; Ok(Expr::Prior(Box::new(expr))) } Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { @@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::UnaryOp { op, - expr: Box::new(self.parse_subexpr(Self::MUL_DIV_MOD_OP_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_mul_div_mod_op())?), }) } tok @ Token::DoubleExclamationMark @@ -1187,7 +1187,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::UnaryOp { op, - expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_plus_minus())?), }) } Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => @@ -1716,12 +1716,13 @@ impl<'a> Parser<'a> { } pub fn parse_position_expr(&mut self, ident: Ident) -> Result { + let between_prec = self.dialect.prec_between(); let position_expr = self.maybe_parse(|p| { // PARSE SELECT POSITION('@' in field) p.expect_token(&Token::LParen)?; // Parse the subexpr till the IN keyword - let expr = p.parse_subexpr(Self::BETWEEN_PREC)?; + let expr = p.parse_subexpr(between_prec)?; p.expect_keyword(Keyword::IN)?; let from = p.parse_expr()?; p.expect_token(&Token::RParen)?; @@ -1963,12 +1964,12 @@ impl<'a> Parser<'a> { } _ => Ok(Expr::UnaryOp { op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?), }), }, _ => Ok(Expr::UnaryOp { op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?), }), } } @@ -2641,7 +2642,7 @@ impl<'a> Parser<'a> { Ok(Expr::RLike { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), regexp, }) } else if self.parse_keyword(Keyword::IN) { @@ -2652,21 +2653,21 @@ impl<'a> Parser<'a> { Ok(Expr::Like { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else if self.parse_keyword(Keyword::ILIKE) { Ok(Expr::ILike { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) { Ok(Expr::SimilarTo { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else { @@ -2941,9 +2942,9 @@ impl<'a> Parser<'a> { pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { // Stop parsing subexpressions for and on tokens with // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. - let low = self.parse_subexpr(Self::BETWEEN_PREC)?; + let low = self.parse_subexpr(self.dialect.prec_between())?; self.expect_keyword(Keyword::AND)?; - let high = self.parse_subexpr(Self::BETWEEN_PREC)?; + let high = self.parse_subexpr(self.dialect.prec_between())?; Ok(Expr::Between { expr: Box::new(expr), negated, @@ -2962,118 +2963,9 @@ impl<'a> Parser<'a> { }) } - // Use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference - // higher number = higher precedence - // - // NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator - // actually has higher precedence than addition. - // See https://postgrespro.com/list/thread-id/2673331. - const AT_TZ_PREC: u8 = 41; - const MUL_DIV_MOD_OP_PREC: u8 = 40; - const PLUS_MINUS_PREC: u8 = 30; - const XOR_PREC: u8 = 24; - const BETWEEN_PREC: u8 = 20; - const LIKE_PREC: u8 = 19; - const IS_PREC: u8 = 17; - const PG_OTHER_PREC: u8 = 16; - const UNARY_NOT_PREC: u8 = 15; - const AND_PREC: u8 = 10; - const OR_PREC: u8 = 5; - /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { - // allow the dialect to override precedence logic - if let Some(precedence) = self.dialect.get_next_precedence(self) { - return precedence; - } - - let token = self.peek_token(); - debug!("get_next_precedence() {:?}", token); - let [token_0, token_1, token_2] = self.peek_tokens_with_location(); - debug!("0: {token_0} 1: {token_1} 2: {token_2}"); - match token.token { - Token::Word(w) if w.keyword == Keyword::OR => Ok(Self::OR_PREC), - Token::Word(w) if w.keyword == Keyword::AND => Ok(Self::AND_PREC), - Token::Word(w) if w.keyword == Keyword::XOR => Ok(Self::XOR_PREC), - - Token::Word(w) if w.keyword == Keyword::AT => { - match (self.peek_nth_token(1).token, self.peek_nth_token(2).token) { - (Token::Word(w), Token::Word(w2)) - if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => - { - Ok(Self::AT_TZ_PREC) - } - _ => Ok(0), - } - } - - Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token { - // The precedence of NOT varies depending on keyword that - // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise, it - // is not an infix operator, and therefore has zero - // precedence. - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), - _ => Ok(0), - }, - Token::Word(w) if w.keyword == Keyword::IS => Ok(Self::IS_PREC), - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), - Token::Eq - | Token::Lt - | Token::LtEq - | Token::Neq - | Token::Gt - | Token::GtEq - | Token::DoubleEq - | Token::Tilde - | Token::TildeAsterisk - | Token::ExclamationMarkTilde - | Token::ExclamationMarkTildeAsterisk - | Token::DoubleTilde - | Token::DoubleTildeAsterisk - | Token::ExclamationMarkDoubleTilde - | Token::ExclamationMarkDoubleTildeAsterisk - | Token::Spaceship => Ok(20), - Token::Pipe => Ok(21), - Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), - Token::Ampersand => Ok(23), - Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { - Ok(Self::MUL_DIV_MOD_OP_PREC) - } - Token::DoubleColon => Ok(50), - Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50), - Token::ExclamationMark => Ok(50), - Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), - Token::Arrow - | Token::LongArrow - | Token::HashArrow - | Token::HashLongArrow - | Token::AtArrow - | Token::ArrowAt - | Token::HashMinus - | Token::AtQuestion - | Token::AtAt - | Token::Question - | Token::QuestionAnd - | Token::QuestionPipe - | Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC), - _ => Ok(0), - } + self.dialect.get_next_precedence_full(self) } /// Return the first non-whitespace token that has not yet been processed @@ -8051,7 +7943,7 @@ impl<'a> Parser<'a> { format_clause: None, }) } else { - let body = self.parse_boxed_query_body(0)?; + let body = self.parse_boxed_query_body(self.dialect.prec_unknown())?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7406bdd74..150f06913 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4476,3 +4476,115 @@ fn test_unicode_string_literal() { } } } + +fn check_arrow_precedence(sql: &str, arrow_operator: BinaryOperator) { + assert_eq!( + pg().verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: arrow_operator, + right: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + }), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::SingleQuotedString("spam".to_string()))), + })], + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + })) + ) +} + +#[test] +fn arrow_precedence() { + check_arrow_precedence("SELECT foo -> 'bar' = 'spam'", BinaryOperator::Arrow); +} + +#[test] +fn long_arrow_precedence() { + check_arrow_precedence("SELECT foo ->> 'bar' = 'spam'", BinaryOperator::LongArrow); +} + +#[test] +fn arrow_cast_precedence() { + // check this matches postgres where you would need `(foo -> 'bar')::TEXT` + let stmt = pg().verified_stmt("SELECT foo -> 'bar'::TEXT"); + assert_eq!( + stmt, + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: BinaryOperator::Arrow, + right: Box::new(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + data_type: DataType::Text, + format: None, + }), + })], + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + })) + ) +} From da484c57c4a5682da24c070d76c872148e54bbfe Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 6 Aug 2024 08:23:07 -0400 Subject: [PATCH 046/506] Improve comments on `Dialect` (#1366) --- src/dialect/mod.rs | 90 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 10 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index fc45545d4..9033ecc78 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -66,7 +66,8 @@ macro_rules! dialect_of { /// Encapsulates the differences between SQL implementations. /// /// # SQL Dialects -/// SQL implementations deviatiate from one another, either due to +/// +/// SQL implementations deviate from one another, either due to /// custom extensions or various historical reasons. This trait /// encapsulates the parsing differences between dialects. /// @@ -114,16 +115,20 @@ pub trait Dialect: Debug + Any { fn is_delimited_identifier_start(&self, ch: char) -> bool { ch == '"' || ch == '`' } + /// Return the character used to quote identifiers. fn identifier_quote_style(&self, _identifier: &str) -> Option { None } + /// Determine if quoted characters are proper for identifier fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { true } + /// Determine if a character is a valid start character for an unquoted identifier fn is_identifier_start(&self, ch: char) -> bool; + /// Determine if a character is a valid unquoted identifier character fn is_identifier_part(&self, ch: char) -> bool; @@ -168,6 +173,7 @@ pub trait Dialect: Debug + Any { fn supports_filter_during_aggregation(&self) -> bool { false } + /// Returns true if the dialect supports referencing another named window /// within a window clause declaration. /// @@ -179,6 +185,7 @@ pub trait Dialect: Debug + Any { fn supports_window_clause_named_window_reference(&self) -> bool { false } + /// Returns true if the dialect supports `ARRAY_AGG() [WITHIN GROUP (ORDER BY)]` expressions. /// Otherwise, the dialect should expect an `ORDER BY` without the `WITHIN GROUP` clause, e.g. [`ANSI`] /// @@ -186,38 +193,47 @@ pub trait Dialect: Debug + Any { fn supports_within_after_array_aggregation(&self) -> bool { false } + /// Returns true if the dialects supports `group sets, roll up, or cube` expressions. fn supports_group_by_expr(&self) -> bool { false } + /// Returns true if the dialect supports CONNECT BY. fn supports_connect_by(&self) -> bool { false } + /// Returns true if the dialect supports the MATCH_RECOGNIZE operation. fn supports_match_recognize(&self) -> bool { false } + /// Returns true if the dialect supports `(NOT) IN ()` expressions fn supports_in_empty_list(&self) -> bool { false } + /// Returns true if the dialect supports `BEGIN {DEFERRED | IMMEDIATE | EXCLUSIVE} [TRANSACTION]` statements fn supports_start_transaction_modifier(&self) -> bool { false } + /// Returns true if the dialect supports named arguments of the form FUN(a = '1', b = '2'). fn supports_named_fn_args_with_eq_operator(&self) -> bool { false } + /// Returns true if the dialect supports identifiers starting with a numeric - /// prefix such as tables named: `59901_user_login` + /// prefix such as tables named `59901_user_login` fn supports_numeric_prefix(&self) -> bool { false } + /// Returns true if the dialects supports specifying null treatment - /// as part of a window function's parameter list. As opposed + /// as part of a window function's parameter list as opposed /// to after the parameter list. + /// /// i.e The following syntax returns true /// ```sql /// FIRST_VALUE(a IGNORE NULLS) OVER () @@ -229,16 +245,19 @@ pub trait Dialect: Debug + Any { fn supports_window_function_null_treatment_arg(&self) -> bool { false } + /// Returns true if the dialect supports defining structs or objects using a /// syntax like `{'x': 1, 'y': 2, 'z': 3}`. fn supports_dictionary_syntax(&self) -> bool { false } + /// Returns true if the dialect supports defining object using the /// syntax like `Map {1: 10, 2: 20}`. fn support_map_literal_syntax(&self) -> bool { false } + /// Returns true if the dialect supports lambda functions, for example: /// /// ```sql @@ -247,6 +266,7 @@ pub trait Dialect: Debug + Any { fn supports_lambda_functions(&self) -> bool { false } + /// Returns true if the dialect supports multiple variable assignment /// using parentheses in a `SET` variable declaration. /// @@ -256,6 +276,7 @@ pub trait Dialect: Debug + Any { fn supports_parenthesized_set_variables(&self) -> bool { false } + /// Returns true if the dialect supports an `EXCEPT` clause following a /// wildcard in a select list. /// @@ -266,30 +287,40 @@ pub trait Dialect: Debug + Any { fn supports_select_wildcard_except(&self) -> bool { false } + /// Returns true if the dialect has a CONVERT function which accepts a type first /// and an expression second, e.g. `CONVERT(varchar, 1)` fn convert_type_before_value(&self) -> bool { false } + /// Returns true if the dialect supports triple quoted string /// e.g. `"""abc"""` fn supports_triple_quoted_string(&self) -> bool { false } + /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior None } + /// Does the dialect support trailing commas around the query? fn supports_trailing_commas(&self) -> bool { false } + /// Does the dialect support trailing commas in the projection list? fn supports_projection_trailing_commas(&self) -> bool { self.supports_trailing_commas() } + /// Dialect-specific infix parser override + /// + /// This method is called to parse the next infix expression. + /// + /// If `None` is returned, falls back to the default behavior. fn parse_infix( &self, _parser: &mut Parser, @@ -299,24 +330,33 @@ pub trait Dialect: Debug + Any { // return None to fall back to the default behavior None } + /// Dialect-specific precedence override + /// + /// This method is called to get the precedence of the next token. + /// + /// If `None` is returned, falls back to the default behavior. fn get_next_precedence(&self, _parser: &Parser) -> Option> { // return None to fall back to the default behavior None } - /// Get the precedence of the next token. This "full" method means all precedence logic and remain - /// in the dialect. while still allowing overriding the `get_next_precedence` method with the option to - /// fallback to the default behavior. + /// Get the precedence of the next token, looking at the full token stream. /// - /// Higher number => higher precedence + /// A higher number => higher precedence + /// + /// See [`Self::get_next_precedence`] to override the behavior for just the + /// next token. + /// + /// The default implementation is used for many dialects, but can be + /// overridden to provide dialect-specific behavior. fn get_next_precedence_full(&self, parser: &Parser) -> Result { if let Some(precedence) = self.get_next_precedence(parser) { return precedence; } let token = parser.peek_token(); - debug!("get_next_precedence() {:?}", token); + debug!("get_next_precedence_full() {:?}", token); match token.token { Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC), Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC), @@ -408,37 +448,67 @@ pub trait Dialect: Debug + Any { } /// Dialect-specific statement parser override + /// + /// This method is called to parse the next statement. + /// + /// If `None` is returned, falls back to the default behavior. fn parse_statement(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior None } - /// The following precedence values are used directly by `Parse` or in dialects, - /// so have to be made public by the dialect. + // The following precedence values are used directly by `Parse` or in dialects, + // so have to be made public by the dialect. + + /// Return the precedence of the `::` operator. + /// + /// Default is 50. fn prec_double_colon(&self) -> u8 { DOUBLE_COLON_PREC } + /// Return the precedence of `*`, `/`, and `%` operators. + /// + /// Default is 40. fn prec_mul_div_mod_op(&self) -> u8 { MUL_DIV_MOD_OP_PREC } + /// Return the precedence of the `+` and `-` operators. + /// + /// Default is 30. fn prec_plus_minus(&self) -> u8 { PLUS_MINUS_PREC } + /// Return the precedence of the `BETWEEN` operator. + /// + /// For example `BETWEEN AND ` + /// + /// Default is 22. fn prec_between(&self) -> u8 { BETWEEN_PREC } + /// Return the precedence of the `LIKE` operator. + /// + /// Default is 19. fn prec_like(&self) -> u8 { LIKE_PREC } + /// Return the precedence of the unary `NOT` operator. + /// + /// For example `NOT (a OR b)` + /// + /// Default is 15. fn prec_unary_not(&self) -> u8 { UNARY_NOT_PREC } + /// Return the default (unknown) precedence. + /// + /// Default is 0. fn prec_unknown(&self) -> u8 { UNKNOWN_PREC } From dfb8b81630ec7285c7ffc9e9113105ef1af56023 Mon Sep 17 00:00:00 2001 From: hulk Date: Thu, 8 Aug 2024 02:02:11 +0800 Subject: [PATCH 047/506] Add support of ATTACH/DETACH PARTITION for ClickHouse (#1362) --- src/ast/ddl.rs | 25 ++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 24 ++++++++++++- tests/sqlparser_clickhouse.rs | 65 +++++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index af679d469..d207f5766 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -72,6 +72,21 @@ pub enum AlterTableOperation { if_exists: bool, cascade: bool, }, + /// `ATTACH PART|PARTITION ` + /// Note: this is a ClickHouse-specific operation, please refer to + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/pakrtition#attach-partitionpart) + AttachPartition { + // PART is not a short form of PARTITION, it's a separate keyword + // which represents a physical file on disk and partition is a logical entity. + partition: Partition, + }, + /// `DETACH PART|PARTITION ` + /// Note: this is a ClickHouse-specific operation, please refer to + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#detach-partitionpart) + DetachPartition { + // See `AttachPartition` for more details + partition: Partition, + }, /// `DROP PRIMARY KEY` /// /// Note: this is a MySQL-specific operation. @@ -272,6 +287,12 @@ impl fmt::Display for AlterTableOperation { column_name, if *cascade { " CASCADE" } else { "" } ), + AlterTableOperation::AttachPartition { partition } => { + write!(f, "ATTACH {partition}") + } + AlterTableOperation::DetachPartition { partition } => { + write!(f, "DETACH {partition}") + } AlterTableOperation::EnableAlwaysRule { name } => { write!(f, "ENABLE ALWAYS RULE {name}") } @@ -1305,6 +1326,9 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef { pub enum Partition { Identifier(Ident), Expr(Expr), + /// ClickHouse supports PART expr which represents physical partition in disk. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#attach-partitionpart) + Part(Expr), Partitions(Vec), } @@ -1313,6 +1337,7 @@ impl fmt::Display for Partition { match self { Partition::Identifier(id) => write!(f, "PARTITION ID {id}"), Partition::Expr(expr) => write!(f, "PARTITION {expr}"), + Partition::Part(expr) => write!(f, "PART {expr}"), Partition::Partitions(partitions) => { write!(f, "PARTITION ({})", display_comma_separated(partitions)) } diff --git a/src/keywords.rs b/src/keywords.rs index 49bd969af..c175da874 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -539,6 +539,7 @@ define_keywords!( PARALLEL, PARAMETER, PARQUET, + PART, PARTITION, PARTITIONED, PARTITIONS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1fdba5ecf..b6d4c307f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6432,7 +6432,7 @@ impl<'a> Parser<'a> { } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { - let new_owner = match self.parse_one_of_keywords( &[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { + let new_owner = match self.parse_one_of_keywords(&[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { Some(Keyword::CURRENT_USER) => Owner::CurrentUser, Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, Some(Keyword::SESSION_USER) => Owner::SessionUser, @@ -6448,6 +6448,18 @@ impl<'a> Parser<'a> { }; AlterTableOperation::OwnerTo { new_owner } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::ATTACH) + { + AlterTableOperation::AttachPartition { + partition: self.parse_part_or_partition()?, + } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::DETACH) + { + AlterTableOperation::DetachPartition { + partition: self.parse_part_or_partition()?, + } } else { let options: Vec = self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; @@ -6465,6 +6477,16 @@ impl<'a> Parser<'a> { Ok(operation) } + fn parse_part_or_partition(&mut self) -> Result { + let keyword = self.expect_one_of_keywords(&[Keyword::PART, Keyword::PARTITION])?; + match keyword { + Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), + Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), + // unreachable because expect_one_of_keywords used above + _ => unreachable!(), + } + } + pub fn parse_alter(&mut self) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 4108958fb..4676e6e50 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -222,6 +222,71 @@ fn parse_create_table() { ); } +#[test] +fn parse_alter_table_attach_and_detach_partition() { + for operation in &["ATTACH", "DETACH"] { + match clickhouse_and_generic() + .verified_stmt(format!("ALTER TABLE t0 {operation} PARTITION part").as_str()) + { + Statement::AlterTable { + name, operations, .. + } => { + pretty_assertions::assert_eq!("t0", name.to_string()); + pretty_assertions::assert_eq!( + operations[0], + if operation == &"ATTACH" { + AlterTableOperation::AttachPartition { + partition: Partition::Expr(Identifier(Ident::new("part"))), + } + } else { + AlterTableOperation::DetachPartition { + partition: Partition::Expr(Identifier(Ident::new("part"))), + } + } + ); + } + _ => unreachable!(), + } + + match clickhouse_and_generic() + .verified_stmt(format!("ALTER TABLE t1 {operation} PART part").as_str()) + { + Statement::AlterTable { + name, operations, .. + } => { + pretty_assertions::assert_eq!("t1", name.to_string()); + pretty_assertions::assert_eq!( + operations[0], + if operation == &"ATTACH" { + AlterTableOperation::AttachPartition { + partition: Partition::Part(Identifier(Ident::new("part"))), + } + } else { + AlterTableOperation::DetachPartition { + partition: Partition::Part(Identifier(Ident::new("part"))), + } + } + ); + } + _ => unreachable!(), + } + + // negative cases + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements(format!("ALTER TABLE t0 {operation} PARTITION").as_str()) + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements(format!("ALTER TABLE t0 {operation} PART").as_str()) + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + } +} + #[test] fn parse_optimize_table() { clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0"); From 2d4b9b3e5683c8f415ecd35a9b78a3a0f8b2fcb8 Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 7 Aug 2024 20:30:01 +0200 Subject: [PATCH 048/506] Make `Parser::maybe_parse` pub (#1364) --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b6d4c307f..9b252ce29 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3383,7 +3383,7 @@ impl<'a> Parser<'a> { /// Run a parser method `f`, reverting back to the current position if unsuccessful. #[must_use] - fn maybe_parse(&mut self, mut f: F) -> Option + pub fn maybe_parse(&mut self, mut f: F) -> Option where F: FnMut(&mut Parser) -> Result, { From 68a04cd40218bf5a3244c6574f091bde344f6d12 Mon Sep 17 00:00:00 2001 From: hulk Date: Fri, 9 Aug 2024 04:57:21 +0800 Subject: [PATCH 049/506] Update version of GitHub Actions (#1363) --- .github/workflows/rust.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 64c4d114a..1d2c34276 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -8,31 +8,31 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: components: rustfmt # Note that `nightly` is required for `license_template_path`, as # it's an unstable feature. rust-version: nightly - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - run: cargo +nightly fmt -- --check --config-path <(echo 'license_template_path = "HEADER"') lint: runs-on: ubuntu-latest steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: components: clippy - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - run: cargo clippy --all-targets --all-features -- -D warnings compile: runs-on: ubuntu-latest steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 - - uses: actions/checkout@master + uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 - run: cargo check --all-targets --all-features docs: @@ -41,18 +41,18 @@ jobs: RUSTDOCFLAGS: "-Dwarnings" steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 - - uses: actions/checkout@master + uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 - run: cargo doc --document-private-items --no-deps --workspace --all-features compile-no-std: runs-on: ubuntu-latest steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: targets: 'thumbv6m-none-eabi' - - uses: actions/checkout@master + - uses: actions/checkout@v4 - run: cargo check --no-default-features --target thumbv6m-none-eabi test: @@ -62,7 +62,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: rust-version: ${{ matrix.rust }} - name: Install Tarpaulin @@ -72,7 +72,7 @@ jobs: version: 0.14.2 use-tool-cache: true - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Test run: cargo test --all-features @@ -80,7 +80,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Setup Rust - uses: hecrj/setup-rust-action@v1 + uses: hecrj/setup-rust-action@v2 with: rust-version: stable - name: Install Tarpaulin @@ -90,7 +90,7 @@ jobs: version: 0.14.2 use-tool-cache: true - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Coverage run: cargo tarpaulin -o Lcov --output-dir ./coverage - name: Coveralls @@ -104,8 +104,8 @@ jobs: needs: [test] steps: - name: Set up Rust - uses: hecrj/setup-rust-action@v1 - - uses: actions/checkout@v2 + uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 - name: Publish shell: bash run: | From 1e209d87415a5adfedccac8cee3e2860122e4acb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 8 Aug 2024 16:58:31 -0400 Subject: [PATCH 050/506] Simplify arrow_cast tests (#1367) --- tests/sqlparser_postgres.rs | 120 +++++++++--------------------------- 1 file changed, 29 insertions(+), 91 deletions(-) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 150f06913..f370748d2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4479,112 +4479,50 @@ fn test_unicode_string_literal() { fn check_arrow_precedence(sql: &str, arrow_operator: BinaryOperator) { assert_eq!( - pg().verified_stmt(sql), - Statement::Query(Box::new(Query { - with: None, - body: Box::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { - left: Box::new(Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident { - value: "foo".to_string(), - quote_style: None, - })), - op: arrow_operator, - right: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), - }), - op: BinaryOperator::Eq, - right: Box::new(Expr::Value(Value::SingleQuotedString("spam".to_string()))), - })], - into: None, - from: vec![], - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - window_before_qualify: false, - value_table_mode: None, - connect_by: None, - }))), - order_by: None, - limit: None, - limit_by: vec![], - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - })) + pg().verified_expr(sql), + Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: arrow_operator, + right: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + }), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::SingleQuotedString("spam".to_string()))), + } ) } #[test] fn arrow_precedence() { - check_arrow_precedence("SELECT foo -> 'bar' = 'spam'", BinaryOperator::Arrow); + check_arrow_precedence("foo -> 'bar' = 'spam'", BinaryOperator::Arrow); } #[test] fn long_arrow_precedence() { - check_arrow_precedence("SELECT foo ->> 'bar' = 'spam'", BinaryOperator::LongArrow); + check_arrow_precedence("foo ->> 'bar' = 'spam'", BinaryOperator::LongArrow); } #[test] fn arrow_cast_precedence() { // check this matches postgres where you would need `(foo -> 'bar')::TEXT` - let stmt = pg().verified_stmt("SELECT foo -> 'bar'::TEXT"); + let stmt = pg().verified_expr("foo -> 'bar'::TEXT"); assert_eq!( stmt, - Statement::Query(Box::new(Query { - with: None, - body: Box::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident { - value: "foo".to_string(), - quote_style: None, - })), - op: BinaryOperator::Arrow, - right: Box::new(Expr::Cast { - kind: CastKind::DoubleColon, - expr: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), - data_type: DataType::Text, - format: None, - }), - })], - into: None, - from: vec![], - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - window_before_qualify: false, - value_table_mode: None, - connect_by: None, - }))), - order_by: None, - limit: None, - limit_by: vec![], - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - })) + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: BinaryOperator::Arrow, + right: Box::new(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + data_type: DataType::Text, + format: None, + }), + } ) } From ca5262c13f5b7587c1700f26c97e147676981f6e Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 13 Aug 2024 18:59:19 +0800 Subject: [PATCH 051/506] Use the local GitHub Action to replace setup-rust-action (#1371) --- .github/actions/setup-builder/action.yaml | 42 ++++++++++++++++++++ .github/workflows/rust.yml | 47 +++++++++++------------ 2 files changed, 64 insertions(+), 25 deletions(-) create mode 100644 .github/actions/setup-builder/action.yaml diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml new file mode 100644 index 000000000..61faa055b --- /dev/null +++ b/.github/actions/setup-builder/action.yaml @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Prepare Rust Builder +description: 'Prepare Rust Build Environment' +inputs: + rust-version: + description: 'version of rust to install (e.g. stable)' + required: true + default: 'stable' + targets: + description: 'The toolchain targets to add, comma-separated' + default: '' + +runs: + using: "composite" + steps: + - name: Setup Rust Toolchain + shell: bash + run: | + echo "Installing ${{ inputs.rust-version }}" + if [ -n "${{ inputs.targets}}" ]; then + rustup toolchain install ${{ inputs.rust-version }} -t ${{ inputs.targets }} + else + rustup toolchain install ${{ inputs.rust-version }} + fi + rustup default ${{ inputs.rust-version }} + rustup component add rustfmt clippy diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1d2c34276..146ea3120 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -7,32 +7,29 @@ jobs: codestyle: runs-on: ubuntu-latest steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder with: - components: rustfmt # Note that `nightly` is required for `license_template_path`, as # it's an unstable feature. rust-version: nightly - - uses: actions/checkout@v4 - run: cargo +nightly fmt -- --check --config-path <(echo 'license_template_path = "HEADER"') lint: runs-on: ubuntu-latest steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 - with: - components: clippy - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder - run: cargo clippy --all-targets --all-features -- -D warnings compile: runs-on: ubuntu-latest steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder - run: cargo check --all-targets --all-features docs: @@ -40,19 +37,19 @@ jobs: env: RUSTDOCFLAGS: "-Dwarnings" steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder - run: cargo doc --document-private-items --no-deps --workspace --all-features compile-no-std: runs-on: ubuntu-latest steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 + - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder with: targets: 'thumbv6m-none-eabi' - - uses: actions/checkout@v4 - run: cargo check --no-default-features --target thumbv6m-none-eabi test: @@ -61,8 +58,10 @@ jobs: rust: [stable, beta, nightly] runs-on: ubuntu-latest steps: - - name: Setup Rust - uses: hecrj/setup-rust-action@v2 + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder with: rust-version: ${{ matrix.rust }} - name: Install Tarpaulin @@ -71,16 +70,16 @@ jobs: crate: cargo-tarpaulin version: 0.14.2 use-tool-cache: true - - name: Checkout - uses: actions/checkout@v4 - name: Test run: cargo test --all-features test-coverage: runs-on: ubuntu-latest steps: - - name: Setup Rust - uses: hecrj/setup-rust-action@v2 + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder with: rust-version: stable - name: Install Tarpaulin @@ -89,8 +88,6 @@ jobs: crate: cargo-tarpaulin version: 0.14.2 use-tool-cache: true - - name: Checkout - uses: actions/checkout@v4 - name: Coverage run: cargo tarpaulin -o Lcov --output-dir ./coverage - name: Coveralls @@ -103,9 +100,9 @@ jobs: runs-on: ubuntu-latest needs: [test] steps: - - name: Set up Rust - uses: hecrj/setup-rust-action@v2 - uses: actions/checkout@v4 + - name: Setup Rust Toolchain + uses: ./.github/actions/setup-builder - name: Publish shell: bash run: | From f5b818e74b8364fe2ffac70e3b3d13167b808215 Mon Sep 17 00:00:00 2001 From: Seve Martinez <20816697+seve-martinez@users.noreply.github.com> Date: Tue, 13 Aug 2024 05:56:18 -0700 Subject: [PATCH 052/506] supporting snowflake extract syntax (#1374) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 29 +++++++++++++++++++++++++++-- src/parser/mod.rs | 21 ++++++++++++++++++++- tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_common.rs | 1 + tests/sqlparser_snowflake.rs | 29 +++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e0c929a9d..86e2592a3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -477,6 +477,22 @@ pub enum CastKind { DoubleColon, } +/// `EXTRACT` syntax variants. +/// +/// In Snowflake dialect, the `EXTRACT` expression can support either the `from` syntax +/// or the comma syntax. +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ExtractSyntax { + /// `EXTRACT( FROM )` + From, + /// `EXTRACT( , )` + Comma, +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -637,13 +653,15 @@ pub enum Expr { time_zone: Box, }, /// Extract a field from a timestamp e.g. `EXTRACT(MONTH FROM foo)` + /// Or `EXTRACT(MONTH, foo)` /// /// Syntax: /// ```sql - /// EXTRACT(DateTimeField FROM ) + /// EXTRACT(DateTimeField FROM ) | EXTRACT(DateTimeField, ) /// ``` Extract { field: DateTimeField, + syntax: ExtractSyntax, expr: Box, }, /// ```sql @@ -1197,7 +1215,14 @@ impl fmt::Display for Expr { write!(f, "{expr}::{data_type}") } }, - Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), + Expr::Extract { + field, + syntax, + expr, + } => match syntax { + ExtractSyntax::From => write!(f, "EXTRACT({field} FROM {expr})"), + ExtractSyntax::Comma => write!(f, "EXTRACT({field}, {expr})"), + }, Expr::Ceil { expr, field } => { if field == &DateTimeField::NoDateTime { write!(f, "CEIL({expr})") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9b252ce29..60a7b4d0b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1682,12 +1682,25 @@ impl<'a> Parser<'a> { pub fn parse_extract_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let field = self.parse_date_time_field()?; - self.expect_keyword(Keyword::FROM)?; + + let syntax = if self.parse_keyword(Keyword::FROM) { + ExtractSyntax::From + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | GenericDialect) + { + ExtractSyntax::Comma + } else { + return Err(ParserError::ParserError( + "Expected 'FROM' or ','".to_string(), + )); + }; + let expr = self.parse_expr()?; self.expect_token(&Token::RParen)?; Ok(Expr::Extract { field, expr: Box::new(expr), + syntax, }) } @@ -1950,6 +1963,12 @@ impl<'a> Parser<'a> { } _ => self.expected("date/time field", next_token), }, + Token::SingleQuotedString(_) if dialect_of!(self is SnowflakeDialect | GenericDialect) => + { + self.prev_token(); + let custom = self.parse_identifier(false)?; + Ok(DateTimeField::Custom(custom)) + } _ => self.expected("date/time field", next_token), } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index a0dd5a662..134c8ddad 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2136,6 +2136,7 @@ fn parse_extract_weekday() { assert_eq!( &Expr::Extract { field: DateTimeField::Week(Some(Ident::new("MONDAY"))), + syntax: ExtractSyntax::From, expr: Box::new(Expr::Identifier(Ident::new("d"))), }, expr_from_projection(only(&select.projection)), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7ec017269..293269cdd 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2430,6 +2430,7 @@ fn parse_extract() { assert_eq!( &Expr::Extract { field: DateTimeField::Year, + syntax: ExtractSyntax::From, expr: Box::new(Expr::Identifier(Ident::new("d"))), }, expr_from_projection(only(&select.projection)), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index eaf8c1d14..a331c7df9 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2019,6 +2019,35 @@ fn parse_extract_custom_part() { assert_eq!( &Expr::Extract { field: DateTimeField::Custom(Ident::new("eod")), + syntax: ExtractSyntax::From, + expr: Box::new(Expr::Identifier(Ident::new("d"))), + }, + expr_from_projection(only(&select.projection)), + ); +} + +#[test] +fn parse_extract_comma() { + let sql = "SELECT EXTRACT(HOUR, d)"; + let select = snowflake_and_generic().verified_only_select(sql); + assert_eq!( + &Expr::Extract { + field: DateTimeField::Hour, + syntax: ExtractSyntax::Comma, + expr: Box::new(Expr::Identifier(Ident::new("d"))), + }, + expr_from_projection(only(&select.projection)), + ); +} + +#[test] +fn parse_extract_comma_quoted() { + let sql = "SELECT EXTRACT('hour', d)"; + let select = snowflake_and_generic().verified_only_select(sql); + assert_eq!( + &Expr::Extract { + field: DateTimeField::Custom(Ident::with_quote('\'', "hour")), + syntax: ExtractSyntax::Comma, expr: Box::new(Expr::Identifier(Ident::new("d"))), }, expr_from_projection(only(&select.projection)), From b072ce2589a16a850b456223979f75b799aaf7aa Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Wed, 14 Aug 2024 15:11:16 +0200 Subject: [PATCH 053/506] Adding support for parsing CREATE TRIGGER and DROP TRIGGER statements (#1352) Co-authored-by: hulk Co-authored-by: Ifeanyi Ubah Co-authored-by: Andrew Lamb --- src/ast/data_type.rs | 5 + src/ast/ddl.rs | 2 +- src/ast/mod.rs | 172 ++++++++++++- src/ast/trigger.rs | 158 ++++++++++++ src/keywords.rs | 5 +- src/parser/mod.rs | 216 +++++++++++++++- src/test_utils.rs | 1 + tests/sqlparser_postgres.rs | 488 +++++++++++++++++++++++++++++++++++- 8 files changed, 1022 insertions(+), 25 deletions(-) create mode 100644 src/ast/trigger.rs diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index e6477f56b..ff2a3ad04 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -319,6 +319,10 @@ pub enum DataType { /// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such /// as `CREATE TABLE t1 (a)`. Unspecified, + /// Trigger data type, returned by functions associated with triggers + /// + /// [postgresql]: https://www.postgresql.org/docs/current/plpgsql-trigger.html + Trigger, } impl fmt::Display for DataType { @@ -543,6 +547,7 @@ impl fmt::Display for DataType { write!(f, "Nested({})", display_comma_separated(fields)) } DataType::Unspecified => Ok(()), + DataType::Trigger => write!(f, "TRIGGER"), } } } diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index d207f5766..bebd98604 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1175,7 +1175,7 @@ fn display_option_spaced(option: &Option) -> impl fmt::Displ /// ` = [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ] [ ENFORCED | NOT ENFORCED ]` /// /// Used in UNIQUE and foreign key constraints. The individual settings may occur in any order. -#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Default, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ConstraintCharacteristics { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 86e2592a3..ae0522ccc 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -53,6 +53,12 @@ pub use self::query::{ TableAlias, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; + +pub use self::trigger::{ + TriggerEvent, TriggerExecBody, TriggerExecBodyType, TriggerObject, TriggerPeriod, + TriggerReferencing, TriggerReferencingType, +}; + pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value, @@ -71,6 +77,7 @@ mod dml; pub mod helpers; mod operator; mod query; +mod trigger; mod value; #[cfg(feature = "visitor")] @@ -2282,7 +2289,7 @@ pub enum Statement { DropFunction { if_exists: bool, /// One or more function to drop - func_desc: Vec, + func_desc: Vec, /// `CASCADE` or `RESTRICT` option: Option, }, @@ -2292,7 +2299,7 @@ pub enum Statement { DropProcedure { if_exists: bool, /// One or more function to drop - proc_desc: Vec, + proc_desc: Vec, /// `CASCADE` or `RESTRICT` option: Option, }, @@ -2618,6 +2625,96 @@ pub enum Statement { /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_a_remote_function) remote_connection: Option, }, + /// CREATE TRIGGER + /// + /// Examples: + /// + /// ```sql + /// CREATE TRIGGER trigger_name + /// BEFORE INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + /// + /// Postgres: + CreateTrigger { + /// The `OR REPLACE` clause is used to re-create the trigger if it already exists. + /// + /// Example: + /// ```sql + /// CREATE OR REPLACE TRIGGER trigger_name + /// AFTER INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + or_replace: bool, + /// The `CONSTRAINT` keyword is used to create a trigger as a constraint. + is_constraint: bool, + /// The name of the trigger to be created. + name: ObjectName, + /// Determines whether the function is called before, after, or instead of the event. + /// + /// Example of BEFORE: + /// + /// ```sql + /// CREATE TRIGGER trigger_name + /// BEFORE INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + /// + /// Example of AFTER: + /// + /// ```sql + /// CREATE TRIGGER trigger_name + /// AFTER INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + /// + /// Example of INSTEAD OF: + /// + /// ```sql + /// CREATE TRIGGER trigger_name + /// INSTEAD OF INSERT ON table_name + /// FOR EACH ROW + /// EXECUTE FUNCTION trigger_function(); + /// ``` + period: TriggerPeriod, + /// Multiple events can be specified using OR, such as `INSERT`, `UPDATE`, `DELETE`, or `TRUNCATE`. + events: Vec, + /// The table on which the trigger is to be created. + table_name: ObjectName, + /// The optional referenced table name that can be referenced via + /// the `FROM` keyword. + referenced_table_name: Option, + /// This keyword immediately precedes the declaration of one or two relation names that provide access to the transition relations of the triggering statement. + referencing: Vec, + /// This specifies whether the trigger function should be fired once for + /// every row affected by the trigger event, or just once per SQL statement. + trigger_object: TriggerObject, + /// Whether to include the `EACH` term of the `FOR EACH`, as it is optional syntax. + include_each: bool, + /// Triggering conditions + condition: Option, + /// Execute logic block + exec_body: TriggerExecBody, + /// The characteristic of the trigger, which include whether the trigger is `DEFERRABLE`, `INITIALLY DEFERRED`, or `INITIALLY IMMEDIATE`, + characteristics: Option, + }, + /// DROP TRIGGER + /// + /// ```sql + /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] + /// ``` + /// + DropTrigger { + if_exists: bool, + trigger_name: ObjectName, + table_name: ObjectName, + /// `CASCADE` or `RESTRICT` + option: Option, + }, /// ```sql /// CREATE PROCEDURE /// ``` @@ -3394,6 +3491,71 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateTrigger { + or_replace, + is_constraint, + name, + period, + events, + table_name, + referenced_table_name, + referencing, + trigger_object, + condition, + include_each, + exec_body, + characteristics, + } => { + write!( + f, + "CREATE {or_replace}{is_constraint}TRIGGER {name} {period}", + or_replace = if *or_replace { "OR REPLACE " } else { "" }, + is_constraint = if *is_constraint { "CONSTRAINT " } else { "" }, + )?; + + if !events.is_empty() { + write!(f, " {}", display_separated(events, " OR "))?; + } + write!(f, " ON {table_name}")?; + + if let Some(referenced_table_name) = referenced_table_name { + write!(f, " FROM {referenced_table_name}")?; + } + + if let Some(characteristics) = characteristics { + write!(f, " {characteristics}")?; + } + + if !referencing.is_empty() { + write!(f, " REFERENCING {}", display_separated(referencing, " "))?; + } + + if *include_each { + write!(f, " FOR EACH {trigger_object}")?; + } else { + write!(f, " FOR {trigger_object}")?; + } + if let Some(condition) = condition { + write!(f, " WHEN {condition}")?; + } + write!(f, " EXECUTE {exec_body}") + } + Statement::DropTrigger { + if_exists, + trigger_name, + table_name, + option, + } => { + write!(f, "DROP TRIGGER")?; + if *if_exists { + write!(f, " IF EXISTS")?; + } + write!(f, " {trigger_name} ON {table_name}")?; + if let Some(option) = option { + write!(f, " {option}")?; + } + Ok(()) + } Statement::CreateProcedure { name, or_alter, @@ -6026,16 +6188,16 @@ impl fmt::Display for DropFunctionOption { } } -/// Function describe in DROP FUNCTION. +/// Generic function description for DROP FUNCTION and CREATE TRIGGER. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct DropFunctionDesc { +pub struct FunctionDesc { pub name: ObjectName, pub args: Option>, } -impl fmt::Display for DropFunctionDesc { +impl fmt::Display for FunctionDesc { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.name)?; if let Some(args) = &self.args { diff --git a/src/ast/trigger.rs b/src/ast/trigger.rs new file mode 100644 index 000000000..a0913db94 --- /dev/null +++ b/src/ast/trigger.rs @@ -0,0 +1,158 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL Abstract Syntax Tree (AST) for triggers. +use super::*; + +/// This specifies whether the trigger function should be fired once for every row affected by the trigger event, or just once per SQL statement. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerObject { + Row, + Statement, +} + +impl fmt::Display for TriggerObject { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerObject::Row => write!(f, "ROW"), + TriggerObject::Statement => write!(f, "STATEMENT"), + } + } +} + +/// This clause indicates whether the following relation name is for the before-image transition relation or the after-image transition relation +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerReferencingType { + OldTable, + NewTable, +} + +impl fmt::Display for TriggerReferencingType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerReferencingType::OldTable => write!(f, "OLD TABLE"), + TriggerReferencingType::NewTable => write!(f, "NEW TABLE"), + } + } +} + +/// This keyword immediately precedes the declaration of one or two relation names that provide access to the transition relations of the triggering statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TriggerReferencing { + pub refer_type: TriggerReferencingType, + pub is_as: bool, + pub transition_relation_name: ObjectName, +} + +impl fmt::Display for TriggerReferencing { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{refer_type}{is_as} {relation_name}", + refer_type = self.refer_type, + is_as = if self.is_as { " AS" } else { "" }, + relation_name = self.transition_relation_name + ) + } +} + +/// Used to describe trigger events +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerEvent { + Insert, + Update(Vec), + Delete, + Truncate, +} + +impl fmt::Display for TriggerEvent { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerEvent::Insert => write!(f, "INSERT"), + TriggerEvent::Update(columns) => { + write!(f, "UPDATE")?; + if !columns.is_empty() { + write!(f, " OF")?; + write!(f, " {}", display_comma_separated(columns))?; + } + Ok(()) + } + TriggerEvent::Delete => write!(f, "DELETE"), + TriggerEvent::Truncate => write!(f, "TRUNCATE"), + } + } +} + +/// Trigger period +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerPeriod { + After, + Before, + InsteadOf, +} + +impl fmt::Display for TriggerPeriod { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerPeriod::After => write!(f, "AFTER"), + TriggerPeriod::Before => write!(f, "BEFORE"), + TriggerPeriod::InsteadOf => write!(f, "INSTEAD OF"), + } + } +} + +/// Types of trigger body execution body. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TriggerExecBodyType { + Function, + Procedure, +} + +impl fmt::Display for TriggerExecBodyType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TriggerExecBodyType::Function => write!(f, "FUNCTION"), + TriggerExecBodyType::Procedure => write!(f, "PROCEDURE"), + } + } +} +/// This keyword immediately precedes the declaration of one or two relation names that provide access to the transition relations of the triggering statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TriggerExecBody { + pub exec_type: TriggerExecBodyType, + pub func_desc: FunctionDesc, +} + +impl fmt::Display for TriggerExecBody { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{exec_type} {func_desc}", + exec_type = self.exec_type, + func_desc = self.func_desc + ) + } +} diff --git a/src/keywords.rs b/src/keywords.rs index c175da874..0c9d3dd6c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -20,7 +20,7 @@ //! As a matter of fact, most of these keywords are not used at all //! and could be removed. //! 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a -//! "table alias" context. +//! "table alias" context. #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -108,6 +108,7 @@ define_keywords!( AVRO, BACKWARD, BASE64, + BEFORE, BEGIN, BEGIN_FRAME, BEGIN_PARTITION, @@ -378,6 +379,7 @@ define_keywords!( INSENSITIVE, INSERT, INSTALL, + INSTEAD, INT, INT128, INT16, @@ -683,6 +685,7 @@ define_keywords!( STABLE, STAGE, START, + STATEMENT, STATIC, STATISTICS, STATUS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 60a7b4d0b..5706df56c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3368,6 +3368,25 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse a keyword-separated list of 1+ items accepted by `F` + pub fn parse_keyword_separated( + &mut self, + keyword: Keyword, + mut f: F, + ) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if !self.parse_keyword(keyword) { + break; + } + } + Ok(values) + } + pub fn parse_parenthesized(&mut self, mut f: F) -> Result where F: FnMut(&mut Parser<'a>) -> Result, @@ -3471,6 +3490,10 @@ impl<'a> Parser<'a> { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_create_function(or_replace, temporary) + } else if self.parse_keyword(Keyword::TRIGGER) { + self.parse_create_trigger(or_replace, false) + } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { + self.parse_create_trigger(or_replace, true) } else if self.parse_keyword(Keyword::MACRO) { self.parse_create_macro(or_replace, temporary) } else if self.parse_keyword(Keyword::SECRET) { @@ -4061,6 +4084,180 @@ impl<'a> Parser<'a> { }) } + /// Parse statements of the DropTrigger type such as: + /// + /// ```sql + /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] + /// ``` + pub fn parse_drop_trigger(&mut self) -> Result { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.prev_token(); + return self.expected("an object type after DROP", self.peek_token()); + } + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let trigger_name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let option = self + .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) + .map(|keyword| match keyword { + Keyword::CASCADE => ReferentialAction::Cascade, + Keyword::RESTRICT => ReferentialAction::Restrict, + _ => unreachable!(), + }); + Ok(Statement::DropTrigger { + if_exists, + trigger_name, + table_name, + option, + }) + } + + pub fn parse_create_trigger( + &mut self, + or_replace: bool, + is_constraint: bool, + ) -> Result { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.prev_token(); + return self.expected("an object type after CREATE", self.peek_token()); + } + + let name = self.parse_object_name(false)?; + let period = self.parse_trigger_period()?; + + let events = self.parse_keyword_separated(Keyword::OR, Parser::parse_trigger_event)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + + let referenced_table_name = if self.parse_keyword(Keyword::FROM) { + self.parse_object_name(true).ok() + } else { + None + }; + + let characteristics = self.parse_constraint_characteristics()?; + + let mut referencing = vec![]; + if self.parse_keyword(Keyword::REFERENCING) { + while let Some(refer) = self.parse_trigger_referencing()? { + referencing.push(refer); + } + } + + self.expect_keyword(Keyword::FOR)?; + let include_each = self.parse_keyword(Keyword::EACH); + let trigger_object = + match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { + Keyword::ROW => TriggerObject::Row, + Keyword::STATEMENT => TriggerObject::Statement, + _ => unreachable!(), + }; + + let condition = self + .parse_keyword(Keyword::WHEN) + .then(|| self.parse_expr()) + .transpose()?; + + self.expect_keyword(Keyword::EXECUTE)?; + + let exec_body = self.parse_trigger_exec_body()?; + + Ok(Statement::CreateTrigger { + or_replace, + is_constraint, + name, + period, + events, + table_name, + referenced_table_name, + referencing, + trigger_object, + include_each, + condition, + exec_body, + characteristics, + }) + } + + pub fn parse_trigger_period(&mut self) -> Result { + Ok( + match self.expect_one_of_keywords(&[ + Keyword::BEFORE, + Keyword::AFTER, + Keyword::INSTEAD, + ])? { + Keyword::BEFORE => TriggerPeriod::Before, + Keyword::AFTER => TriggerPeriod::After, + Keyword::INSTEAD => self + .expect_keyword(Keyword::OF) + .map(|_| TriggerPeriod::InsteadOf)?, + _ => unreachable!(), + }, + ) + } + + pub fn parse_trigger_event(&mut self) -> Result { + Ok( + match self.expect_one_of_keywords(&[ + Keyword::INSERT, + Keyword::UPDATE, + Keyword::DELETE, + Keyword::TRUNCATE, + ])? { + Keyword::INSERT => TriggerEvent::Insert, + Keyword::UPDATE => { + if self.parse_keyword(Keyword::OF) { + let cols = self.parse_comma_separated(|ident| { + Parser::parse_identifier(ident, false) + })?; + TriggerEvent::Update(cols) + } else { + TriggerEvent::Update(vec![]) + } + } + Keyword::DELETE => TriggerEvent::Delete, + Keyword::TRUNCATE => TriggerEvent::Truncate, + _ => unreachable!(), + }, + ) + } + + pub fn parse_trigger_referencing(&mut self) -> Result, ParserError> { + let refer_type = match self.parse_one_of_keywords(&[Keyword::OLD, Keyword::NEW]) { + Some(Keyword::OLD) if self.parse_keyword(Keyword::TABLE) => { + TriggerReferencingType::OldTable + } + Some(Keyword::NEW) if self.parse_keyword(Keyword::TABLE) => { + TriggerReferencingType::NewTable + } + _ => { + return Ok(None); + } + }; + + let is_as = self.parse_keyword(Keyword::AS); + let transition_relation_name = self.parse_object_name(false)?; + Ok(Some(TriggerReferencing { + refer_type, + is_as, + transition_relation_name, + })) + } + + pub fn parse_trigger_exec_body(&mut self) -> Result { + Ok(TriggerExecBody { + exec_type: match self + .expect_one_of_keywords(&[Keyword::FUNCTION, Keyword::PROCEDURE])? + { + Keyword::FUNCTION => TriggerExecBodyType::Function, + Keyword::PROCEDURE => TriggerExecBodyType::Procedure, + _ => unreachable!(), + }, + func_desc: self.parse_function_desc()?, + }) + } + pub fn parse_create_macro( &mut self, or_replace: bool, @@ -4509,9 +4706,11 @@ impl<'a> Parser<'a> { return self.parse_drop_procedure(); } else if self.parse_keyword(Keyword::SECRET) { return self.parse_drop_secret(temporary, persistent); + } else if self.parse_keyword(Keyword::TRIGGER) { + return self.parse_drop_trigger(); } else { return self.expected( - "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, PROCEDURE, STAGE or SEQUENCE after DROP", + "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, PROCEDURE, STAGE, TRIGGER, SECRET or SEQUENCE after DROP", self.peek_token(), ); }; @@ -4550,7 +4749,7 @@ impl<'a> Parser<'a> { /// ``` fn parse_drop_function(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let func_desc = self.parse_comma_separated(Parser::parse_drop_function_desc)?; + let func_desc = self.parse_comma_separated(Parser::parse_function_desc)?; let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), @@ -4569,7 +4768,7 @@ impl<'a> Parser<'a> { /// ``` fn parse_drop_procedure(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let proc_desc = self.parse_comma_separated(Parser::parse_drop_function_desc)?; + let proc_desc = self.parse_comma_separated(Parser::parse_function_desc)?; let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), @@ -4583,7 +4782,7 @@ impl<'a> Parser<'a> { }) } - fn parse_drop_function_desc(&mut self) -> Result { + fn parse_function_desc(&mut self) -> Result { let name = self.parse_object_name(false)?; let args = if self.consume_token(&Token::LParen) { @@ -4598,7 +4797,7 @@ impl<'a> Parser<'a> { None }; - Ok(DropFunctionDesc { name, args }) + Ok(FunctionDesc { name, args }) } /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. @@ -5882,11 +6081,7 @@ impl<'a> Parser<'a> { pub fn parse_constraint_characteristics( &mut self, ) -> Result, ParserError> { - let mut cc = ConstraintCharacteristics { - deferrable: None, - initially: None, - enforced: None, - }; + let mut cc = ConstraintCharacteristics::default(); loop { if cc.deferrable.is_none() && self.parse_keywords(&[Keyword::NOT, Keyword::DEFERRABLE]) @@ -7285,6 +7480,7 @@ impl<'a> Parser<'a> { let field_defs = self.parse_click_house_tuple_def()?; Ok(DataType::Tuple(field_defs)) } + Keyword::TRIGGER => Ok(DataType::Trigger), _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; diff --git a/src/test_utils.rs b/src/test_utils.rs index d9100d351..5c05ec996 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -124,6 +124,7 @@ impl TestedDialects { } let only_statement = statements.pop().unwrap(); + if !canonical.is_empty() { assert_eq!(canonical, only_statement.to_string()) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index f370748d2..2f9fe86c9 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3623,7 +3623,7 @@ fn parse_drop_function() { pg().verified_stmt(sql), Statement::DropFunction { if_exists: true, - func_desc: vec![DropFunctionDesc { + func_desc: vec![FunctionDesc { name: ObjectName(vec![Ident { value: "test_func".to_string(), quote_style: None @@ -3639,7 +3639,7 @@ fn parse_drop_function() { pg().verified_stmt(sql), Statement::DropFunction { if_exists: true, - func_desc: vec![DropFunctionDesc { + func_desc: vec![FunctionDesc { name: ObjectName(vec![Ident { value: "test_func".to_string(), quote_style: None @@ -3664,7 +3664,7 @@ fn parse_drop_function() { Statement::DropFunction { if_exists: true, func_desc: vec![ - DropFunctionDesc { + FunctionDesc { name: ObjectName(vec![Ident { value: "test_func1".to_string(), quote_style: None @@ -3682,7 +3682,7 @@ fn parse_drop_function() { } ]), }, - DropFunctionDesc { + FunctionDesc { name: ObjectName(vec![Ident { value: "test_func2".to_string(), quote_style: None @@ -3713,7 +3713,7 @@ fn parse_drop_procedure() { pg().verified_stmt(sql), Statement::DropProcedure { if_exists: true, - proc_desc: vec![DropFunctionDesc { + proc_desc: vec![FunctionDesc { name: ObjectName(vec![Ident { value: "test_proc".to_string(), quote_style: None @@ -3729,7 +3729,7 @@ fn parse_drop_procedure() { pg().verified_stmt(sql), Statement::DropProcedure { if_exists: true, - proc_desc: vec![DropFunctionDesc { + proc_desc: vec![FunctionDesc { name: ObjectName(vec![Ident { value: "test_proc".to_string(), quote_style: None @@ -3754,7 +3754,7 @@ fn parse_drop_procedure() { Statement::DropProcedure { if_exists: true, proc_desc: vec![ - DropFunctionDesc { + FunctionDesc { name: ObjectName(vec![Ident { value: "test_proc1".to_string(), quote_style: None @@ -3772,7 +3772,7 @@ fn parse_drop_procedure() { } ]), }, - DropFunctionDesc { + FunctionDesc { name: ObjectName(vec![Ident { value: "test_proc2".to_string(), quote_style: None @@ -4455,6 +4455,478 @@ fn test_escaped_string_literal() { } } +#[test] +fn parse_create_simple_before_insert_trigger() { + let sql = "CREATE TRIGGER check_insert BEFORE INSERT ON accounts FOR EACH ROW EXECUTE FUNCTION check_account_insert"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("check_insert")]), + period: TriggerPeriod::Before, + events: vec![TriggerEvent::Insert], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_insert")]), + args: None, + }, + }, + characteristics: None, + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +fn parse_create_after_update_trigger_with_condition() { + let sql = "CREATE TRIGGER check_update AFTER UPDATE ON accounts FOR EACH ROW WHEN (NEW.balance > 10000) EXECUTE FUNCTION check_account_update"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("check_update")]), + period: TriggerPeriod::After, + events: vec![TriggerEvent::Update(vec![])], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: Some(Expr::Nested(Box::new(Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("NEW"), + Ident::new("balance"), + ])), + op: BinaryOperator::Gt, + right: Box::new(Expr::Value(number("10000"))), + }))), + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_update")]), + args: None, + }, + }, + characteristics: None, + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +fn parse_create_instead_of_delete_trigger() { + let sql = "CREATE TRIGGER check_delete INSTEAD OF DELETE ON accounts FOR EACH ROW EXECUTE FUNCTION check_account_deletes"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("check_delete")]), + period: TriggerPeriod::InsteadOf, + events: vec![TriggerEvent::Delete], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_deletes")]), + args: None, + }, + }, + characteristics: None, + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +fn parse_create_trigger_with_multiple_events_and_deferrable() { + let sql = "CREATE CONSTRAINT TRIGGER check_multiple_events BEFORE INSERT OR UPDATE OR DELETE ON accounts DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION check_account_changes"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: true, + name: ObjectName(vec![Ident::new("check_multiple_events")]), + period: TriggerPeriod::Before, + events: vec![ + TriggerEvent::Insert, + TriggerEvent::Update(vec![]), + TriggerEvent::Delete, + ], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_changes")]), + args: None, + }, + }, + characteristics: Some(ConstraintCharacteristics { + deferrable: Some(true), + initially: Some(DeferrableInitial::Deferred), + enforced: None, + }), + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +fn parse_create_trigger_with_referencing() { + let sql = "CREATE TRIGGER check_referencing BEFORE INSERT ON accounts REFERENCING NEW TABLE AS new_accounts OLD TABLE AS old_accounts FOR EACH ROW EXECUTE FUNCTION check_account_referencing"; + let expected = Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("check_referencing")]), + period: TriggerPeriod::Before, + events: vec![TriggerEvent::Insert], + table_name: ObjectName(vec![Ident::new("accounts")]), + referenced_table_name: None, + referencing: vec![ + TriggerReferencing { + refer_type: TriggerReferencingType::NewTable, + is_as: true, + transition_relation_name: ObjectName(vec![Ident::new("new_accounts")]), + }, + TriggerReferencing { + refer_type: TriggerReferencingType::OldTable, + is_as: true, + transition_relation_name: ObjectName(vec![Ident::new("old_accounts")]), + }, + ], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("check_account_referencing")]), + args: None, + }, + }, + characteristics: None, + }; + + assert_eq!(pg().verified_stmt(sql), expected); +} + +#[test] +/// While in the parse_create_trigger test we test the full syntax of the CREATE TRIGGER statement, +/// here we test the invalid cases of the CREATE TRIGGER statement which should cause an appropriate +/// error to be returned. +fn parse_create_trigger_invalid_cases() { + // Test invalid cases for the CREATE TRIGGER statement + let invalid_cases = vec![ + ( + "CREATE TRIGGER check_update BEFORE UPDATE ON accounts FUNCTION check_account_update", + "Expected: FOR, found: FUNCTION" + ), + ( + "CREATE TRIGGER check_update TOMORROW UPDATE ON accounts EXECUTE FUNCTION check_account_update", + "Expected: one of BEFORE or AFTER or INSTEAD, found: TOMORROW" + ), + ( + "CREATE TRIGGER check_update BEFORE SAVE ON accounts EXECUTE FUNCTION check_account_update", + "Expected: one of INSERT or UPDATE or DELETE or TRUNCATE, found: SAVE" + ) + ]; + + for (sql, expected_error) in invalid_cases { + let res = pg().parse_sql_statements(sql); + assert_eq!( + format!("sql parser error: {expected_error}"), + res.unwrap_err().to_string() + ); + } +} + +#[test] +fn parse_drop_trigger() { + for if_exists in [true, false] { + for option in [ + None, + Some(ReferentialAction::Cascade), + Some(ReferentialAction::Restrict), + ] { + let sql = &format!( + "DROP TRIGGER{} check_update ON table_name{}", + if if_exists { " IF EXISTS" } else { "" }, + option + .map(|o| format!(" {}", o)) + .unwrap_or_else(|| "".to_string()) + ); + assert_eq!( + pg().verified_stmt(sql), + Statement::DropTrigger { + if_exists, + trigger_name: ObjectName(vec![Ident::new("check_update")]), + table_name: ObjectName(vec![Ident::new("table_name")]), + option + } + ); + } + } +} + +#[test] +fn parse_drop_trigger_invalid_cases() { + // Test invalid cases for the DROP TRIGGER statement + let invalid_cases = vec![ + ( + "DROP TRIGGER check_update ON table_name CASCADE RESTRICT", + "Expected: end of statement, found: RESTRICT", + ), + ( + "DROP TRIGGER check_update ON table_name CASCADE CASCADE", + "Expected: end of statement, found: CASCADE", + ), + ( + "DROP TRIGGER check_update ON table_name CASCADE CASCADE CASCADE", + "Expected: end of statement, found: CASCADE", + ), + ]; + + for (sql, expected_error) in invalid_cases { + let res = pg().parse_sql_statements(sql); + assert_eq!( + format!("sql parser error: {expected_error}"), + res.unwrap_err().to_string() + ); + } +} + +#[test] +fn parse_trigger_related_functions() { + // First we define all parts of the trigger definition, + // including the table creation, the function creation, the trigger creation and the trigger drop. + // The following example is taken from the PostgreSQL documentation + + let sql_table_creation = r#" + CREATE TABLE emp ( + empname text, + salary integer, + last_date timestamp, + last_user text + ); + "#; + + let sql_create_function = r#" + CREATE FUNCTION emp_stamp() RETURNS trigger AS $emp_stamp$ + BEGIN + -- Check that empname and salary are given + IF NEW.empname IS NULL THEN + RAISE EXCEPTION 'empname cannot be null'; + END IF; + IF NEW.salary IS NULL THEN + RAISE EXCEPTION '% cannot have null salary', NEW.empname; + END IF; + + -- Who works for us when they must pay for it? + IF NEW.salary < 0 THEN + RAISE EXCEPTION '% cannot have a negative salary', NEW.empname; + END IF; + + -- Remember who changed the payroll when + NEW.last_date := current_timestamp; + NEW.last_user := current_user; + RETURN NEW; + END; + $emp_stamp$ LANGUAGE plpgsql; + "#; + + let sql_create_trigger = r#" + CREATE TRIGGER emp_stamp BEFORE INSERT OR UPDATE ON emp + FOR EACH ROW EXECUTE FUNCTION emp_stamp(); + "#; + + let sql_drop_trigger = r#" + DROP TRIGGER emp_stamp ON emp; + "#; + + // Now we parse the statements and check if they are parsed correctly. + let mut statements = pg() + .parse_sql_statements(&format!( + "{}{}{}{}", + sql_table_creation, sql_create_function, sql_create_trigger, sql_drop_trigger + )) + .unwrap(); + + assert_eq!(statements.len(), 4); + let drop_trigger = statements.pop().unwrap(); + let create_trigger = statements.pop().unwrap(); + let create_function = statements.pop().unwrap(); + let create_table = statements.pop().unwrap(); + + // Check the first statement + let create_table = match create_table { + Statement::CreateTable(create_table) => create_table, + _ => panic!("Expected CreateTable statement"), + }; + + assert_eq!( + create_table, + CreateTable { + or_replace: false, + temporary: false, + external: false, + global: None, + if_not_exists: false, + transient: false, + volatile: false, + name: ObjectName(vec![Ident::new("emp")]), + columns: vec![ + ColumnDef { + name: "empname".into(), + data_type: DataType::Text, + collation: None, + options: vec![], + }, + ColumnDef { + name: "salary".into(), + data_type: DataType::Integer(None), + collation: None, + options: vec![], + }, + ColumnDef { + name: "last_date".into(), + data_type: DataType::Timestamp(None, TimezoneInfo::None), + collation: None, + options: vec![], + }, + ColumnDef { + name: "last_user".into(), + data_type: DataType::Text, + collation: None, + options: vec![], + }, + ], + constraints: vec![], + hive_distribution: HiveDistributionStyle::NONE, + hive_formats: Some(HiveFormat { + row_format: None, + serde_properties: None, + storage: None, + location: None + }), + table_properties: vec![], + with_options: vec![], + file_format: None, + location: None, + query: None, + without_rowid: false, + like: None, + clone: None, + engine: None, + comment: None, + auto_increment_offset: None, + default_charset: None, + collation: None, + on_commit: None, + on_cluster: None, + primary_key: None, + order_by: None, + partition_by: None, + cluster_by: None, + options: None, + strict: false, + copy_grants: false, + enable_schema_evolution: None, + change_tracking: None, + data_retention_time_in_days: None, + max_data_extension_time_in_days: None, + default_ddl_collation: None, + with_aggregation_policy: None, + with_row_access_policy: None, + with_tags: None, + } + ); + + // Check the second statement + + assert_eq!( + create_function, + Statement::CreateFunction { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName(vec![Ident::new("emp_stamp")]), + args: None, + return_type: Some(DataType::Trigger), + function_body: Some( + CreateFunctionBody::AsBeforeOptions( + Expr::Value( + Value::DollarQuotedString( + DollarQuotedString { + value: "\n BEGIN\n -- Check that empname and salary are given\n IF NEW.empname IS NULL THEN\n RAISE EXCEPTION 'empname cannot be null';\n END IF;\n IF NEW.salary IS NULL THEN\n RAISE EXCEPTION '% cannot have null salary', NEW.empname;\n END IF;\n \n -- Who works for us when they must pay for it?\n IF NEW.salary < 0 THEN\n RAISE EXCEPTION '% cannot have a negative salary', NEW.empname;\n END IF;\n \n -- Remember who changed the payroll when\n NEW.last_date := current_timestamp;\n NEW.last_user := current_user;\n RETURN NEW;\n END;\n ".to_owned(), + tag: Some( + "emp_stamp".to_owned(), + ), + }, + ), + ), + ), + ), + behavior: None, + called_on_null: None, + parallel: None, + using: None, + language: Some(Ident::new("plpgsql")), + determinism_specifier: None, + options: None, + remote_connection: None + } + ); + + // Check the third statement + + assert_eq!( + create_trigger, + Statement::CreateTrigger { + or_replace: false, + is_constraint: false, + name: ObjectName(vec![Ident::new("emp_stamp")]), + period: TriggerPeriod::Before, + events: vec![TriggerEvent::Insert, TriggerEvent::Update(vec![])], + table_name: ObjectName(vec![Ident::new("emp")]), + referenced_table_name: None, + referencing: vec![], + trigger_object: TriggerObject::Row, + include_each: true, + condition: None, + exec_body: TriggerExecBody { + exec_type: TriggerExecBodyType::Function, + func_desc: FunctionDesc { + name: ObjectName(vec![Ident::new("emp_stamp")]), + args: None, + } + }, + characteristics: None + } + ); + + // Check the fourth statement + assert_eq!( + drop_trigger, + Statement::DropTrigger { + if_exists: false, + trigger_name: ObjectName(vec![Ident::new("emp_stamp")]), + table_name: ObjectName(vec![Ident::new("emp")]), + option: None + } + ); +} + #[test] fn test_unicode_string_literal() { let pairs = [ From c2f46ae07b8cdcfd72f5edef0274f029b1500de6 Mon Sep 17 00:00:00 2001 From: Seve Martinez <20816697+seve-martinez@users.noreply.github.com> Date: Wed, 14 Aug 2024 06:11:40 -0700 Subject: [PATCH 054/506] adding support for scale in CEIL and FLOOR functions (#1377) --- src/ast/mod.rs | 48 +++++++++++++++++++++-------- src/parser/mod.rs | 20 +++++++++--- tests/sqlparser_common.rs | 64 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 113 insertions(+), 19 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ae0522ccc..e3e9a5371 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -500,6 +500,24 @@ pub enum ExtractSyntax { Comma, } +/// The syntax used in a CEIL or FLOOR expression. +/// +/// The `CEIL/FLOOR( TO