From 8bbb85356ceb50e2d45545da0e8a15da3680353e Mon Sep 17 00:00:00 2001 From: Jeremy Maness Date: Thu, 17 Aug 2023 06:17:57 -0400 Subject: [PATCH 001/735] Fix SUBSTRING from/to argument construction for mssql (#947) --- src/ast/mod.rs | 17 +++++++++-- src/dialect/mod.rs | 4 +++ src/dialect/mssql.rs | 4 +++ src/parser.rs | 56 ++++++++++++++++++++++++++----------- tests/sqlparser_common.rs | 34 +++++++++++++++++++--- tests/sqlparser_mssql.rs | 59 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_mysql.rs | 3 +- 7 files changed, 153 insertions(+), 24 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ad8c76298..0a366f632 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -477,6 +477,10 @@ pub enum Expr { expr: Box, substring_from: Option>, substring_for: Option>, + + // Some dialects use `SUBSTRING(expr [FROM start] [FOR len])` syntax while others omit FROM, + // FOR keywords (e.g. Microsoft SQL Server). This flags is used for formatting. + special: bool, }, /// ```sql /// TRIM([BOTH | LEADING | TRAILING] [ FROM] ) @@ -830,13 +834,22 @@ impl fmt::Display for Expr { expr, substring_from, substring_for, + special, } => { write!(f, "SUBSTRING({expr}")?; if let Some(from_part) = substring_from { - write!(f, " FROM {from_part}")?; + if *special { + write!(f, ", {from_part}")?; + } else { + write!(f, " FROM {from_part}")?; + } } if let Some(for_part) = substring_for { - write!(f, " FOR {for_part}")?; + if *special { + write!(f, ", {for_part}")?; + } else { + write!(f, " FOR {for_part}")?; + } } write!(f, ")") diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8b3a58888..e174528b0 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -117,6 +117,10 @@ pub trait Dialect: Debug + Any { fn supports_group_by_expr(&self) -> bool { false } + /// Returns true if the dialect supports `SUBSTRING(expr [FROM start] [FOR len])` expressions + fn supports_substring_from_for_expr(&self) -> bool { + true + } /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 6d1f49cd7..f04398100 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -34,4 +34,8 @@ impl Dialect for MsSqlDialect { || ch == '#' || ch == '_' } + + fn supports_substring_from_for_expr(&self) -> bool { + false + } } diff --git a/src/parser.rs b/src/parser.rs index 20e587c74..5d44ce9ce 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1223,25 +1223,47 @@ impl<'a> Parser<'a> { } pub fn parse_substring_expr(&mut self) -> Result { - // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - let mut from_expr = None; - if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) { - from_expr = Some(self.parse_expr()?); - } + if self.dialect.supports_substring_from_for_expr() { + // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + let mut from_expr = None; + if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) { + from_expr = Some(self.parse_expr()?); + } - let mut to_expr = None; - if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { - to_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; + let mut to_expr = None; + if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { + to_expr = Some(self.parse_expr()?); + } + self.expect_token(&Token::RParen)?; - Ok(Expr::Substring { - expr: Box::new(expr), - substring_from: from_expr.map(Box::new), - substring_for: to_expr.map(Box::new), - }) + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + special: !self.dialect.supports_substring_from_for_expr(), + }) + } else { + // PARSE SUBSTRING(EXPR, start, length) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + + self.expect_token(&Token::Comma)?; + let from_expr = Some(self.parse_expr()?); + + self.expect_token(&Token::Comma)?; + let to_expr = Some(self.parse_expr()?); + + self.expect_token(&Token::RParen)?; + + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + special: !self.dialect.supports_substring_from_for_expr(), + }) + } } pub fn parse_overlay_expr(&mut self) -> Result { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9ec182f21..ca30e9516 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5084,19 +5084,45 @@ fn parse_scalar_subqueries() { #[test] fn parse_substring() { - one_statement_parses_to("SELECT SUBSTRING('1')", "SELECT SUBSTRING('1')"); + let from_for_supported_dialects = TestedDialects { + dialects: vec![ + Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(AnsiDialect {}), + Box::new(SnowflakeDialect {}), + Box::new(HiveDialect {}), + Box::new(RedshiftSqlDialect {}), + Box::new(MySqlDialect {}), + Box::new(BigQueryDialect {}), + Box::new(SQLiteDialect {}), + Box::new(DuckDbDialect {}), + ], + options: None, + }; - one_statement_parses_to( + let from_for_unsupported_dialects = TestedDialects { + dialects: vec![Box::new(MsSqlDialect {})], + options: None, + }; + + from_for_supported_dialects + .one_statement_parses_to("SELECT SUBSTRING('1')", "SELECT SUBSTRING('1')"); + + from_for_supported_dialects.one_statement_parses_to( "SELECT SUBSTRING('1' FROM 1)", "SELECT SUBSTRING('1' FROM 1)", ); - one_statement_parses_to( + from_for_supported_dialects.one_statement_parses_to( "SELECT SUBSTRING('1' FROM 1 FOR 3)", "SELECT SUBSTRING('1' FROM 1 FOR 3)", ); - one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)"); + from_for_unsupported_dialects + .one_statement_parses_to("SELECT SUBSTRING('1', 1, 3)", "SELECT SUBSTRING('1', 1, 3)"); + + from_for_supported_dialects + .one_statement_parses_to("SELECT SUBSTRING('1' FOR 3)", "SELECT SUBSTRING('1' FOR 3)"); } #[test] diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index b46a0c6c9..d4e093ad1 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -379,6 +379,65 @@ fn parse_similar_to() { chk(true); } +#[test] +fn parse_substring_in_select() { + let sql = "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test"; + match ms().one_statement_parses_to( + sql, + "SELECT DISTINCT SUBSTRING(description, 0, 1) FROM test", + ) { + Statement::Query(query) => { + assert_eq!( + Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: Some(Distinct::Distinct), + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::Substring { + expr: Box::new(Expr::Identifier(Ident { + value: "description".to_string(), + quote_style: None + })), + substring_from: Some(Box::new(Expr::Value(number("0")))), + substring_for: Some(Box::new(Expr::Value(number("1")))), + special: true, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "test".to_string(), + quote_style: None + }]), + alias: None, + args: None, + with_hints: vec![] + }, + joins: vec![] + }], + lateral_views: vec![], + selection: None, + group_by: vec![], + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None + }))), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + locks: vec![], + }), + query + ); + } + _ => unreachable!(), + } +} + fn ms() -> TestedDialects { TestedDialects { dialects: vec![Box::new(MsSqlDialect {})], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 07337ec70..9618786d3 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1271,7 +1271,8 @@ fn parse_substring_in_select() { quote_style: None })), substring_from: Some(Box::new(Expr::Value(number("0")))), - substring_for: Some(Box::new(Expr::Value(number("1")))) + substring_for: Some(Box::new(Expr::Value(number("1")))), + special: false, })], into: None, from: vec![TableWithJoins { From 83e30677b071e7dd8b595c414b98f90995a2a7cf Mon Sep 17 00:00:00 2001 From: ehoeve Date: Thu, 17 Aug 2023 12:44:55 +0200 Subject: [PATCH 002/735] Add support for table-level comments (#946) Co-authored-by: Andrew Lamb --- src/ast/helpers/stmt_create_table.rs | 10 ++++++++++ src/ast/mod.rs | 5 +++++ src/parser.rs | 12 ++++++++++++ tests/sqlparser_mysql.rs | 16 ++++++++++++++++ 4 files changed, 43 insertions(+) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 2998935d9..a00555fd8 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -65,6 +65,7 @@ pub struct CreateTableBuilder { pub like: Option, pub clone: Option, pub engine: Option, + pub comment: Option, pub default_charset: Option, pub collation: Option, pub on_commit: Option, @@ -96,6 +97,7 @@ impl CreateTableBuilder { like: None, clone: None, engine: None, + comment: None, default_charset: None, collation: None, on_commit: None, @@ -197,6 +199,11 @@ impl CreateTableBuilder { self } + pub fn comment(mut self, comment: Option) -> Self { + self.comment = comment; + self + } + pub fn default_charset(mut self, default_charset: Option) -> Self { self.default_charset = default_charset; self @@ -249,6 +256,7 @@ impl CreateTableBuilder { like: self.like, clone: self.clone, engine: self.engine, + comment: self.comment, default_charset: self.default_charset, collation: self.collation, on_commit: self.on_commit, @@ -287,6 +295,7 @@ impl TryFrom for CreateTableBuilder { like, clone, engine, + comment, default_charset, collation, on_commit, @@ -314,6 +323,7 @@ impl TryFrom for CreateTableBuilder { like, clone, engine, + comment, default_charset, collation, on_commit, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 0a366f632..41a5530a3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1319,6 +1319,7 @@ pub enum Statement { like: Option, clone: Option, engine: Option, + comment: Option, default_charset: Option, collation: Option, on_commit: Option, @@ -2250,6 +2251,7 @@ impl fmt::Display for Statement { clone, default_charset, engine, + comment, collation, on_commit, on_cluster, @@ -2401,6 +2403,9 @@ impl fmt::Display for Statement { if let Some(engine) = engine { write!(f, " ENGINE={engine}")?; } + if let Some(comment) = comment { + write!(f, " COMMENT '{comment}'")?; + } if let Some(order_by) = order_by { write!(f, " ORDER BY ({})", display_comma_separated(order_by))?; } diff --git a/src/parser.rs b/src/parser.rs index 5d44ce9ce..34456e098 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3503,6 +3503,17 @@ impl<'a> Parser<'a> { None }; + let comment = if self.parse_keyword(Keyword::COMMENT) { + let _ = self.consume_token(&Token::Eq); + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(str), + _ => self.expected("comment", next_token)?, + } + } else { + None + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { if self.consume_token(&Token::LParen) { let columns = if self.peek_token() != Token::RParen { @@ -3583,6 +3594,7 @@ impl<'a> Parser<'a> { .like(like) .clone_clause(clone) .engine(engine) + .comment(comment) .order_by(order_by) .default_charset(default_charset) .collation(collation) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 9618786d3..c0a51edab 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -296,6 +296,22 @@ fn parse_create_table_auto_increment() { } } +#[test] +fn parse_create_table_comment() { + let canonical = "CREATE TABLE foo (bar INT) COMMENT 'baz'"; + let with_equal = "CREATE TABLE foo (bar INT) COMMENT = 'baz'"; + + for sql in [canonical, with_equal] { + match mysql().one_statement_parses_to(sql, canonical) { + Statement::CreateTable { name, comment, .. } => { + assert_eq!(name.to_string(), "foo"); + assert_eq!(comment.expect("Should exist").to_string(), "baz"); + } + _ => unreachable!(), + } + } +} + #[test] fn parse_create_table_set_enum() { let sql = "CREATE TABLE foo (bar SET('a', 'b'), baz ENUM('a', 'b'))"; From a7d28582e557b6d7c177d5e98809de1090581fc6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 17 Aug 2023 06:45:18 -0400 Subject: [PATCH 003/735] Minor: clarify the value of the special flag (#948) --- src/parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 34456e098..fcac8f235 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1242,7 +1242,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), substring_from: from_expr.map(Box::new), substring_for: to_expr.map(Box::new), - special: !self.dialect.supports_substring_from_for_expr(), + special: false, }) } else { // PARSE SUBSTRING(EXPR, start, length) @@ -1261,7 +1261,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), substring_from: from_expr.map(Box::new), substring_for: to_expr.map(Box::new), - special: !self.dialect.supports_substring_from_for_expr(), + special: true, }) } } From a49ea1908d3862f3471e3e1304c15926314eda48 Mon Sep 17 00:00:00 2001 From: "r.4ntix" Date: Thu, 17 Aug 2023 20:05:54 +0800 Subject: [PATCH 004/735] feat: add `ALTER ROLE` syntax of PostgreSQL and MS SQL Server (#942) --- src/ast/dcl.rs | 195 +++++++++++++++++++++++++++++ src/ast/mod.rs | 10 ++ src/keywords.rs | 1 + src/parser/alter.rs | 204 +++++++++++++++++++++++++++++++ src/{parser.rs => parser/mod.rs} | 11 +- tests/sqlparser_mssql.rs | 54 ++++++++ tests/sqlparser_postgres.rs | 193 +++++++++++++++++++++++++++++ 7 files changed, 666 insertions(+), 2 deletions(-) create mode 100644 src/ast/dcl.rs create mode 100644 src/parser/alter.rs rename src/{parser.rs => parser/mod.rs} (99%) diff --git a/src/ast/dcl.rs b/src/ast/dcl.rs new file mode 100644 index 000000000..f90de34d4 --- /dev/null +++ b/src/ast/dcl.rs @@ -0,0 +1,195 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! AST types specific to GRANT/REVOKE/ROLE variants of [`Statement`](crate::ast::Statement) +//! (commonly referred to as Data Control Language, or DCL) + +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; +use core::fmt; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "visitor")] +use sqlparser_derive::{Visit, VisitMut}; + +use super::{Expr, Ident, Password}; +use crate::ast::{display_separated, ObjectName}; + +/// An option in `ROLE` statement. +/// +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum RoleOption { + BypassRLS(bool), + ConnectionLimit(Expr), + CreateDB(bool), + CreateRole(bool), + Inherit(bool), + Login(bool), + Password(Password), + Replication(bool), + SuperUser(bool), + ValidUntil(Expr), +} + +impl fmt::Display for RoleOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + RoleOption::BypassRLS(value) => { + write!(f, "{}", if *value { "BYPASSRLS" } else { "NOBYPASSRLS" }) + } + RoleOption::ConnectionLimit(expr) => { + write!(f, "CONNECTION LIMIT {expr}") + } + RoleOption::CreateDB(value) => { + write!(f, "{}", if *value { "CREATEDB" } else { "NOCREATEDB" }) + } + RoleOption::CreateRole(value) => { + write!(f, "{}", if *value { "CREATEROLE" } else { "NOCREATEROLE" }) + } + RoleOption::Inherit(value) => { + write!(f, "{}", if *value { "INHERIT" } else { "NOINHERIT" }) + } + RoleOption::Login(value) => { + write!(f, "{}", if *value { "LOGIN" } else { "NOLOGIN" }) + } + RoleOption::Password(password) => match password { + Password::Password(expr) => write!(f, "PASSWORD {expr}"), + Password::NullPassword => write!(f, "PASSWORD NULL"), + }, + RoleOption::Replication(value) => { + write!( + f, + "{}", + if *value { + "REPLICATION" + } else { + "NOREPLICATION" + } + ) + } + RoleOption::SuperUser(value) => { + write!(f, "{}", if *value { "SUPERUSER" } else { "NOSUPERUSER" }) + } + RoleOption::ValidUntil(expr) => { + write!(f, "VALID UNTIL {expr}") + } + } + } +} + +/// SET config value option: +/// * SET `configuration_parameter` { TO | = } { `value` | DEFAULT } +/// * SET `configuration_parameter` FROM CURRENT +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum SetConfigValue { + Default, + FromCurrent, + Value(Expr), +} + +/// RESET config option: +/// * RESET `configuration_parameter` +/// * RESET ALL +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ResetConfig { + ALL, + ConfigName(ObjectName), +} + +/// An `ALTER ROLE` (`Statement::AlterRole`) operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterRoleOperation { + /// Generic + RenameRole { + role_name: Ident, + }, + /// MS SQL Server + /// + AddMember { + member_name: Ident, + }, + DropMember { + member_name: Ident, + }, + /// PostgreSQL + /// + WithOptions { + options: Vec, + }, + Set { + config_name: ObjectName, + config_value: SetConfigValue, + in_database: Option, + }, + Reset { + config_name: ResetConfig, + in_database: Option, + }, +} + +impl fmt::Display for AlterRoleOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AlterRoleOperation::RenameRole { role_name } => { + write!(f, "RENAME TO {role_name}") + } + AlterRoleOperation::AddMember { member_name } => { + write!(f, "ADD MEMBER {member_name}") + } + AlterRoleOperation::DropMember { member_name } => { + write!(f, "DROP MEMBER {member_name}") + } + AlterRoleOperation::WithOptions { options } => { + write!(f, "WITH {}", display_separated(options, " ")) + } + AlterRoleOperation::Set { + config_name, + config_value, + in_database, + } => { + if let Some(database_name) = in_database { + write!(f, "IN DATABASE {} ", database_name)?; + } + + match config_value { + SetConfigValue::Default => write!(f, "SET {config_name} TO DEFAULT"), + SetConfigValue::FromCurrent => write!(f, "SET {config_name} FROM CURRENT"), + SetConfigValue::Value(expr) => write!(f, "SET {config_name} TO {expr}"), + } + } + AlterRoleOperation::Reset { + config_name, + in_database, + } => { + if let Some(database_name) = in_database { + write!(f, "IN DATABASE {} ", database_name)?; + } + + match config_name { + ResetConfig::ALL => write!(f, "RESET ALL"), + ResetConfig::ConfigName(name) => write!(f, "RESET {name}"), + } + } + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 41a5530a3..edab3c63a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -28,6 +28,7 @@ use sqlparser_derive::{Visit, VisitMut}; pub use self::data_type::{ CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, }; +pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ProcedureParam, ReferentialAction, @@ -52,6 +53,7 @@ use crate::ast::helpers::stmt_data_loading::{ pub use visitor::*; mod data_type; +mod dcl; mod ddl; pub mod helpers; mod operator; @@ -1398,6 +1400,11 @@ pub enum Statement { query: Box, with_options: Vec, }, + /// ALTER ROLE + AlterRole { + name: Ident, + operation: AlterRoleOperation, + }, /// DROP Drop { /// The type of the object to drop: TABLE, VIEW, etc. @@ -2585,6 +2592,9 @@ impl fmt::Display for Statement { } write!(f, " AS {query}") } + Statement::AlterRole { name, operation } => { + write!(f, "ALTER ROLE {name} {operation}") + } Statement::Drop { object_type, if_exists, diff --git a/src/keywords.rs b/src/keywords.rs index 98e039414..5f3e44022 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -508,6 +508,7 @@ define_keywords!( REPEATABLE, REPLACE, REPLICATION, + RESET, RESTRICT, RESULT, RETAIN, diff --git a/src/parser/alter.rs b/src/parser/alter.rs new file mode 100644 index 000000000..838b64899 --- /dev/null +++ b/src/parser/alter.rs @@ -0,0 +1,204 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL Parser for ALTER + +#[cfg(not(feature = "std"))] +use alloc::vec; + +use super::{Parser, ParserError}; +use crate::{ + ast::{AlterRoleOperation, Expr, Password, ResetConfig, RoleOption, SetConfigValue, Statement}, + dialect::{MsSqlDialect, PostgreSqlDialect}, + keywords::Keyword, + tokenizer::Token, +}; + +impl<'a> Parser<'a> { + pub fn parse_alter_role(&mut self) -> Result { + if dialect_of!(self is PostgreSqlDialect) { + return self.parse_pg_alter_role(); + } else if dialect_of!(self is MsSqlDialect) { + return self.parse_mssql_alter_role(); + } + + Err(ParserError::ParserError( + "ALTER ROLE is only support for PostgreSqlDialect, MsSqlDialect".into(), + )) + } + + fn parse_mssql_alter_role(&mut self) -> Result { + let role_name = self.parse_identifier()?; + + let operation = if self.parse_keywords(&[Keyword::ADD, Keyword::MEMBER]) { + let member_name = self.parse_identifier()?; + AlterRoleOperation::AddMember { member_name } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::MEMBER]) { + let member_name = self.parse_identifier()?; + AlterRoleOperation::DropMember { member_name } + } else if self.parse_keywords(&[Keyword::WITH, Keyword::NAME]) { + if self.consume_token(&Token::Eq) { + let role_name = self.parse_identifier()?; + AlterRoleOperation::RenameRole { role_name } + } else { + return self.expected("= after WITH NAME ", self.peek_token()); + } + } else { + return self.expected("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token()); + }; + + Ok(Statement::AlterRole { + name: role_name, + operation, + }) + } + + fn parse_pg_alter_role(&mut self) -> Result { + let role_name = self.parse_identifier()?; + + // [ IN DATABASE _`database_name`_ ] + let in_database = if self.parse_keywords(&[Keyword::IN, Keyword::DATABASE]) { + self.parse_object_name().ok() + } else { + None + }; + + let operation = if self.parse_keyword(Keyword::RENAME) { + if self.parse_keyword(Keyword::TO) { + let role_name = self.parse_identifier()?; + AlterRoleOperation::RenameRole { role_name } + } else { + return self.expected("TO after RENAME", self.peek_token()); + } + // SET + } else if self.parse_keyword(Keyword::SET) { + let config_name = self.parse_object_name()?; + // FROM CURRENT + if self.parse_keywords(&[Keyword::FROM, Keyword::CURRENT]) { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::FromCurrent, + in_database, + } + // { TO | = } { value | DEFAULT } + } else if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + if self.parse_keyword(Keyword::DEFAULT) { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::Default, + in_database, + } + } else if let Ok(expr) = self.parse_expr() { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::Value(expr), + in_database, + } + } else { + self.expected("config value", self.peek_token())? + } + } else { + self.expected("'TO' or '=' or 'FROM CURRENT'", self.peek_token())? + } + // RESET + } else if self.parse_keyword(Keyword::RESET) { + if self.parse_keyword(Keyword::ALL) { + AlterRoleOperation::Reset { + config_name: ResetConfig::ALL, + in_database, + } + } else { + let config_name = self.parse_object_name()?; + AlterRoleOperation::Reset { + config_name: ResetConfig::ConfigName(config_name), + in_database, + } + } + // option + } else { + // [ WITH ] + let _ = self.parse_keyword(Keyword::WITH); + // option + let mut options = vec![]; + while let Some(opt) = self.maybe_parse(|parser| parser.parse_pg_role_option()) { + options.push(opt); + } + // check option + if options.is_empty() { + return self.expected("option", self.peek_token())?; + } + + AlterRoleOperation::WithOptions { options } + }; + + Ok(Statement::AlterRole { + name: role_name, + operation, + }) + } + + fn parse_pg_role_option(&mut self) -> Result { + let option = match self.parse_one_of_keywords(&[ + Keyword::BYPASSRLS, + Keyword::NOBYPASSRLS, + Keyword::CONNECTION, + Keyword::CREATEDB, + Keyword::NOCREATEDB, + Keyword::CREATEROLE, + Keyword::NOCREATEROLE, + Keyword::INHERIT, + Keyword::NOINHERIT, + Keyword::LOGIN, + Keyword::NOLOGIN, + Keyword::PASSWORD, + Keyword::REPLICATION, + Keyword::NOREPLICATION, + Keyword::SUPERUSER, + Keyword::NOSUPERUSER, + Keyword::VALID, + ]) { + Some(Keyword::BYPASSRLS) => RoleOption::BypassRLS(true), + Some(Keyword::NOBYPASSRLS) => RoleOption::BypassRLS(false), + Some(Keyword::CONNECTION) => { + self.expect_keyword(Keyword::LIMIT)?; + RoleOption::ConnectionLimit(Expr::Value(self.parse_number_value()?)) + } + Some(Keyword::CREATEDB) => RoleOption::CreateDB(true), + Some(Keyword::NOCREATEDB) => RoleOption::CreateDB(false), + Some(Keyword::CREATEROLE) => RoleOption::CreateRole(true), + Some(Keyword::NOCREATEROLE) => RoleOption::CreateRole(false), + Some(Keyword::INHERIT) => RoleOption::Inherit(true), + Some(Keyword::NOINHERIT) => RoleOption::Inherit(false), + Some(Keyword::LOGIN) => RoleOption::Login(true), + Some(Keyword::NOLOGIN) => RoleOption::Login(false), + Some(Keyword::PASSWORD) => { + let password = if self.parse_keyword(Keyword::NULL) { + Password::NullPassword + } else { + Password::Password(Expr::Value(self.parse_value()?)) + }; + RoleOption::Password(password) + } + Some(Keyword::REPLICATION) => RoleOption::Replication(true), + Some(Keyword::NOREPLICATION) => RoleOption::Replication(false), + Some(Keyword::SUPERUSER) => RoleOption::SuperUser(true), + Some(Keyword::NOSUPERUSER) => RoleOption::SuperUser(false), + Some(Keyword::VALID) => { + self.expect_keyword(Keyword::UNTIL)?; + RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) + } + _ => self.expected("option", self.peek_token())?, + }; + + Ok(option) + } +} diff --git a/src/parser.rs b/src/parser/mod.rs similarity index 99% rename from src/parser.rs rename to src/parser/mod.rs index fcac8f235..d6f45359e 100644 --- a/src/parser.rs +++ b/src/parser/mod.rs @@ -33,6 +33,8 @@ use crate::dialect::*; use crate::keywords::{self, Keyword}; use crate::tokenizer::*; +mod alter; + #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { TokenizerError(String), @@ -3990,8 +3992,12 @@ impl<'a> Parser<'a> { } pub fn parse_alter(&mut self) -> Result { - let object_type = - self.expect_one_of_keywords(&[Keyword::VIEW, Keyword::TABLE, Keyword::INDEX])?; + let object_type = self.expect_one_of_keywords(&[ + Keyword::VIEW, + Keyword::TABLE, + Keyword::INDEX, + Keyword::ROLE, + ])?; match object_type { Keyword::VIEW => self.parse_alter_view(), Keyword::TABLE => { @@ -4186,6 +4192,7 @@ impl<'a> Parser<'a> { operation, }) } + Keyword::ROLE => self.parse_alter_role(), // unreachable because expect_one_of_keywords used above _ => unreachable!(), } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index d4e093ad1..56fbd576e 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -216,6 +216,60 @@ fn parse_mssql_create_role() { } } +#[test] +fn parse_alter_role() { + let sql = "ALTER ROLE old_name WITH NAME = new_name"; + assert_eq!( + ms().parse_sql_statements(sql).unwrap(), + [Statement::AlterRole { + name: Ident { + value: "old_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::RenameRole { + role_name: Ident { + value: "new_name".into(), + quote_style: None + } + }, + }] + ); + + let sql = "ALTER ROLE role_name ADD MEMBER new_member"; + assert_eq!( + ms().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::AddMember { + member_name: Ident { + value: "new_member".into(), + quote_style: None + } + }, + } + ); + + let sql = "ALTER ROLE role_name DROP MEMBER old_member"; + assert_eq!( + ms().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::DropMember { + member_name: Ident { + value: "old_member".into(), + quote_style: None + } + }, + } + ); +} + #[test] fn parse_delimited_identifiers() { // check that quoted identifiers in any position remain quoted after serialization diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index f6bb80d7e..09196db46 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2447,6 +2447,199 @@ fn parse_create_role() { } } +#[test] +fn parse_alter_role() { + let sql = "ALTER ROLE old_name RENAME TO new_name"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "old_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::RenameRole { + role_name: Ident { + value: "new_name".into(), + quote_style: None + } + }, + } + ); + + let sql = "ALTER ROLE role_name WITH SUPERUSER CREATEDB CREATEROLE INHERIT LOGIN REPLICATION BYPASSRLS CONNECTION LIMIT 100 PASSWORD 'abcdef' VALID UNTIL '2025-01-01'"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::WithOptions { + options: vec![ + RoleOption::SuperUser(true), + RoleOption::CreateDB(true), + RoleOption::CreateRole(true), + RoleOption::Inherit(true), + RoleOption::Login(true), + RoleOption::Replication(true), + RoleOption::BypassRLS(true), + RoleOption::ConnectionLimit(Expr::Value(number("100"))), + RoleOption::Password({ + Password::Password(Expr::Value(Value::SingleQuotedString("abcdef".into()))) + }), + RoleOption::ValidUntil(Expr::Value(Value::SingleQuotedString( + "2025-01-01".into(), + ))) + ] + }, + } + ); + + let sql = "ALTER ROLE role_name WITH NOSUPERUSER NOCREATEDB NOCREATEROLE NOINHERIT NOLOGIN NOREPLICATION NOBYPASSRLS PASSWORD NULL"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::WithOptions { + options: vec![ + RoleOption::SuperUser(false), + RoleOption::CreateDB(false), + RoleOption::CreateRole(false), + RoleOption::Inherit(false), + RoleOption::Login(false), + RoleOption::Replication(false), + RoleOption::BypassRLS(false), + RoleOption::Password(Password::NullPassword), + ] + }, + } + ); + + let sql = "ALTER ROLE role_name SET maintenance_work_mem FROM CURRENT"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Set { + config_name: ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }]), + config_value: SetConfigValue::FromCurrent, + in_database: None + }, + } + ); + + let sql = "ALTER ROLE role_name IN DATABASE database_name SET maintenance_work_mem = 100000"; + assert_eq!( + pg().parse_sql_statements(sql).unwrap(), + [Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Set { + config_name: ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }]), + config_value: SetConfigValue::Value(Expr::Value(number("100000"))), + in_database: Some(ObjectName(vec![Ident { + value: "database_name".into(), + quote_style: None + }])) + }, + }] + ); + + let sql = "ALTER ROLE role_name IN DATABASE database_name SET maintenance_work_mem TO 100000"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Set { + config_name: ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }]), + config_value: SetConfigValue::Value(Expr::Value(number("100000"))), + in_database: Some(ObjectName(vec![Ident { + value: "database_name".into(), + quote_style: None + }])) + }, + } + ); + + let sql = "ALTER ROLE role_name IN DATABASE database_name SET maintenance_work_mem TO DEFAULT"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Set { + config_name: ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }]), + config_value: SetConfigValue::Default, + in_database: Some(ObjectName(vec![Ident { + value: "database_name".into(), + quote_style: None + }])) + }, + } + ); + + let sql = "ALTER ROLE role_name RESET ALL"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Reset { + config_name: ResetConfig::ALL, + in_database: None + }, + } + ); + + let sql = "ALTER ROLE role_name IN DATABASE database_name RESET maintenance_work_mem"; + assert_eq!( + pg().verified_stmt(sql), + Statement::AlterRole { + name: Ident { + value: "role_name".into(), + quote_style: None + }, + operation: AlterRoleOperation::Reset { + config_name: ResetConfig::ConfigName(ObjectName(vec![Ident { + value: "maintenance_work_mem".into(), + quote_style: None + }])), + in_database: Some(ObjectName(vec![Ident { + value: "database_name".into(), + quote_style: None + }])) + }, + } + ); +} + #[test] fn parse_delimited_identifiers() { // check that quoted identifiers in any position remain quoted after serialization From 9a39afbe07b6eecd8819fe1f7d9af2b4aad3a6d7 Mon Sep 17 00:00:00 2001 From: Forbes Lindesay Date: Thu, 17 Aug 2023 16:47:11 +0100 Subject: [PATCH 005/735] feat: support more Postgres index syntax (#943) --- src/ast/mod.rs | 35 ++++++- src/keywords.rs | 2 + src/parser/mod.rs | 38 ++++++- tests/sqlparser_common.rs | 12 ++- tests/sqlparser_postgres.rs | 202 ++++++++++++++++++++++++++++++++++++ 5 files changed, 279 insertions(+), 10 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index edab3c63a..47419a893 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1348,13 +1348,17 @@ pub enum Statement { /// CREATE INDEX CreateIndex { /// index name - name: ObjectName, + name: Option, #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, using: Option, columns: Vec, unique: bool, + concurrently: bool, if_not_exists: bool, + include: Vec, + nulls_distinct: Option, + predicate: Option, }, /// CREATE ROLE /// See [postgres](https://www.postgresql.org/docs/current/sql-createrole.html) @@ -2464,20 +2468,41 @@ impl fmt::Display for Statement { using, columns, unique, + concurrently, if_not_exists, + include, + nulls_distinct, + predicate, } => { write!( f, - "CREATE {unique}INDEX {if_not_exists}{name} ON {table_name}", + "CREATE {unique}INDEX {concurrently}{if_not_exists}", unique = if *unique { "UNIQUE " } else { "" }, + concurrently = if *concurrently { "CONCURRENTLY " } else { "" }, if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, - name = name, - table_name = table_name )?; + if let Some(value) = name { + write!(f, "{value} ")?; + } + write!(f, "ON {table_name}")?; if let Some(value) = using { write!(f, " USING {value} ")?; } - write!(f, "({})", display_separated(columns, ",")) + write!(f, "({})", display_separated(columns, ","))?; + if !include.is_empty() { + write!(f, " INCLUDE ({})", display_separated(include, ","))?; + } + if let Some(value) = nulls_distinct { + if *value { + write!(f, " NULLS DISTINCT")?; + } else { + write!(f, " NULLS NOT DISTINCT")?; + } + } + if let Some(predicate) = predicate { + write!(f, " WHERE {predicate}")?; + } + Ok(()) } Statement::CreateRole { names, diff --git a/src/keywords.rs b/src/keywords.rs index 5f3e44022..c73535fca 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -153,6 +153,7 @@ define_keywords!( COMMITTED, COMPRESSION, COMPUTE, + CONCURRENTLY, CONDITION, CONFLICT, CONNECT, @@ -310,6 +311,7 @@ define_keywords!( ILIKE, IMMUTABLE, IN, + INCLUDE, INCREMENT, INDEX, INDICATOR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d6f45359e..94814627d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3370,9 +3370,15 @@ impl<'a> Parser<'a> { } pub fn parse_create_index(&mut self, unique: bool) -> Result { + let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let index_name = self.parse_object_name()?; - self.expect_keyword(Keyword::ON)?; + let index_name = if if_not_exists || !self.parse_keyword(Keyword::ON) { + let index_name = self.parse_object_name()?; + self.expect_keyword(Keyword::ON)?; + Some(index_name) + } else { + None + }; let table_name = self.parse_object_name()?; let using = if self.parse_keyword(Keyword::USING) { Some(self.parse_identifier()?) @@ -3382,13 +3388,41 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; self.expect_token(&Token::RParen)?; + + let include = if self.parse_keyword(Keyword::INCLUDE) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + columns + } else { + vec![] + }; + + let nulls_distinct = if self.parse_keyword(Keyword::NULLS) { + let not = self.parse_keyword(Keyword::NOT); + self.expect_keyword(Keyword::DISTINCT)?; + Some(!not) + } else { + None + }; + + let predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(Statement::CreateIndex { name: index_name, table_name, using, columns, unique, + concurrently, if_not_exists, + include, + nulls_distinct, + predicate, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ca30e9516..8dfcc6e7f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5981,7 +5981,7 @@ fn parse_create_index() { ]; match verified_stmt(sql) { Statement::CreateIndex { - name, + name: Some(name), table_name, columns, unique, @@ -6015,19 +6015,25 @@ fn test_create_index_with_using_function() { ]; match verified_stmt(sql) { Statement::CreateIndex { - name, + name: Some(name), table_name, using, columns, unique, + concurrently, if_not_exists, + include, + nulls_distinct: None, + predicate: None, } => { assert_eq!("idx_name", name.to_string()); assert_eq!("test", table_name.to_string()); assert_eq!("btree", using.unwrap().to_string()); assert_eq!(indexed_columns, columns); assert!(unique); - assert!(if_not_exists) + assert!(!concurrently); + assert!(if_not_exists); + assert!(include.is_empty()); } _ => unreachable!(), } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 09196db46..a62c41e42 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1761,6 +1761,208 @@ fn parse_array_index_expr() { ); } +#[test] +fn parse_create_index() { + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + nulls_distinct: None, + include, + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_anonymous_index() { + let sql = "CREATE INDEX ON my_table(col1,col2)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name, + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + predicate: None, + } => { + assert_eq!(None, name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(!if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_index_concurrently() { + let sql = "CREATE INDEX CONCURRENTLY IF NOT EXISTS my_index ON my_table(col1,col2)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_index_with_predicate() { + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) WHERE col3 IS NULL"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + predicate: Some(_), + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_index_with_include() { + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) INCLUDE (col3)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert_eq_vec(&["col3"], &include); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_index_with_nulls_distinct() { + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) NULLS NOT DISTINCT"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: Some(nulls_distinct), + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + assert!(!nulls_distinct); + } + _ => unreachable!(), + } + + let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) NULLS DISTINCT"; + match pg().verified_stmt(sql) { + Statement::CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: Some(nulls_distinct), + predicate: None, + } => { + assert_eq_vec(&["my_index"], &name); + assert_eq_vec(&["my_table"], &table_name); + assert_eq!(None, using); + assert!(!unique); + assert!(!concurrently); + assert!(if_not_exists); + assert_eq_vec(&["col1", "col2"], &columns); + assert!(include.is_empty()); + assert!(nulls_distinct); + } + _ => unreachable!(), + } +} + #[test] fn parse_array_subquery_expr() { let sql = "SELECT ARRAY(SELECT 1 UNION SELECT 2)"; From 41e47cc0136c705a3335b1504d50ae8b22711b86 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 21 Aug 2023 19:21:45 +0200 Subject: [PATCH 006/735] add a test for mssql table name in square brackets (#952) --- tests/sqlparser_mssql.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 56fbd576e..0bb2ba3de 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -324,6 +324,26 @@ fn parse_delimited_identifiers() { //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } +#[test] +fn parse_table_name_in_square_brackets() { + let select = ms().verified_only_select(r#"SELECT [a column] FROM [a schema].[a table]"#); + if let TableFactor::Table { name, .. } = only(select.from).relation { + assert_eq!( + vec![ + Ident::with_quote('[', "a schema"), + Ident::with_quote('[', "a table") + ], + name.0 + ); + } else { + panic!("Expecting TableFactor::Table"); + } + assert_eq!( + &Expr::Identifier(Ident::with_quote('[', "a column")), + expr_from_projection(&select.projection[0]), + ); +} + #[test] fn parse_like() { fn chk(negated: bool) { From 9500649c3519f638ba54c407a4a3ed834606abe0 Mon Sep 17 00:00:00 2001 From: ehoeve Date: Mon, 21 Aug 2023 22:25:32 +0200 Subject: [PATCH 007/735] Add support for MySQL auto_increment offset (#950) --- src/ast/helpers/stmt_create_table.rs | 10 ++++++++++ src/ast/mod.rs | 5 +++++ src/parser/mod.rs | 12 ++++++++++++ tests/sqlparser_mysql.rs | 25 +++++++++++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index a00555fd8..17327e7f8 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -66,6 +66,7 @@ pub struct CreateTableBuilder { pub clone: Option, pub engine: Option, pub comment: Option, + pub auto_increment_offset: Option, pub default_charset: Option, pub collation: Option, pub on_commit: Option, @@ -98,6 +99,7 @@ impl CreateTableBuilder { clone: None, engine: None, comment: None, + auto_increment_offset: None, default_charset: None, collation: None, on_commit: None, @@ -204,6 +206,11 @@ impl CreateTableBuilder { self } + pub fn auto_increment_offset(mut self, offset: Option) -> Self { + self.auto_increment_offset = offset; + self + } + pub fn default_charset(mut self, default_charset: Option) -> Self { self.default_charset = default_charset; self @@ -257,6 +264,7 @@ impl CreateTableBuilder { clone: self.clone, engine: self.engine, comment: self.comment, + auto_increment_offset: self.auto_increment_offset, default_charset: self.default_charset, collation: self.collation, on_commit: self.on_commit, @@ -296,6 +304,7 @@ impl TryFrom for CreateTableBuilder { clone, engine, comment, + auto_increment_offset, default_charset, collation, on_commit, @@ -324,6 +333,7 @@ impl TryFrom for CreateTableBuilder { clone, engine, comment, + auto_increment_offset, default_charset, collation, on_commit, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 47419a893..50c11ba59 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1322,6 +1322,7 @@ pub enum Statement { clone: Option, engine: Option, comment: Option, + auto_increment_offset: Option, default_charset: Option, collation: Option, on_commit: Option, @@ -2263,6 +2264,7 @@ impl fmt::Display for Statement { default_charset, engine, comment, + auto_increment_offset, collation, on_commit, on_cluster, @@ -2417,6 +2419,9 @@ impl fmt::Display for Statement { if let Some(comment) = comment { write!(f, " COMMENT '{comment}'")?; } + if let Some(auto_increment_offset) = auto_increment_offset { + write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; + } if let Some(order_by) = order_by { write!(f, " ORDER BY ({})", display_comma_separated(order_by))?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 94814627d..1b11237ff 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3550,6 +3550,17 @@ impl<'a> Parser<'a> { None }; + let auto_increment_offset = if self.parse_keyword(Keyword::AUTO_INCREMENT) { + let _ = self.consume_token(&Token::Eq); + let next_token = self.next_token(); + match next_token.token { + Token::Number(s, _) => Some(s.parse::().expect("literal int")), + _ => self.expected("literal int", next_token)?, + } + } else { + None + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { if self.consume_token(&Token::LParen) { let columns = if self.peek_token() != Token::RParen { @@ -3631,6 +3642,7 @@ impl<'a> Parser<'a> { .clone_clause(clone) .engine(engine) .comment(comment) + .auto_increment_offset(auto_increment_offset) .order_by(order_by) .default_charset(default_charset) .collation(collation) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c0a51edab..209143b18 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -312,6 +312,31 @@ fn parse_create_table_comment() { } } +#[test] +fn parse_create_table_auto_increment_offset() { + let canonical = + "CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE=InnoDB AUTO_INCREMENT 123"; + let with_equal = + "CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE=InnoDB AUTO_INCREMENT=123"; + + for sql in [canonical, with_equal] { + match mysql().one_statement_parses_to(sql, canonical) { + Statement::CreateTable { + name, + auto_increment_offset, + .. + } => { + assert_eq!(name.to_string(), "foo"); + assert_eq!( + auto_increment_offset.expect("Should exist").to_string(), + "123" + ); + } + _ => unreachable!(), + } + } +} + #[test] fn parse_create_table_set_enum() { let sql = "CREATE TABLE foo (bar SET('a', 'b'), baz ENUM('a', 'b'))"; From 1ea88585759f5610a8665bfc280400dce9e8be3c Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Tue, 22 Aug 2023 12:06:32 +0200 Subject: [PATCH 008/735] Table time travel clause support, add `visit_table_factor` to Visitor (#951) --- src/ast/mod.rs | 3 +- src/ast/query.rs | 24 ++++++++++ src/ast/visitor.rs | 82 +++++++++++++++++++++++++++++++---- src/parser/mod.rs | 18 ++++++++ src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 26 +++++++++++ tests/sqlparser_clickhouse.rs | 3 ++ tests/sqlparser_common.rs | 29 +++++++++++++ tests/sqlparser_duckdb.rs | 4 ++ tests/sqlparser_hive.rs | 2 + tests/sqlparser_mssql.rs | 30 ++++++++++++- tests/sqlparser_mysql.rs | 7 ++- tests/sqlparser_postgres.rs | 2 + tests/sqlparser_redshift.rs | 4 ++ tests/sqlparser_snowflake.rs | 2 + 15 files changed, 225 insertions(+), 12 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 50c11ba59..a241f9509 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -40,7 +40,8 @@ pub use self::query::{ JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, - TableAlias, TableFactor, TableWithJoins, Top, Values, WildcardAdditionalOptions, With, + TableAlias, TableFactor, TableVersion, TableWithJoins, Top, Values, WildcardAdditionalOptions, + With, }; pub use self::value::{ escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value, diff --git a/src/ast/query.rs b/src/ast/query.rs index 5f4c289dc..b70017654 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -646,6 +646,7 @@ impl fmt::Display for TableWithJoins { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))] pub enum TableFactor { Table { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] @@ -661,6 +662,9 @@ pub enum TableFactor { args: Option>, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, + /// Optional version qualifier to facilitate table time-travel, as + /// supported by BigQuery and MSSQL. + version: Option, }, Derived { lateral: bool, @@ -720,6 +724,7 @@ impl fmt::Display for TableFactor { alias, args, with_hints, + version, } => { write!(f, "{name}")?; if let Some(args) = args { @@ -731,6 +736,9 @@ impl fmt::Display for TableFactor { if !with_hints.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_hints))?; } + if let Some(version) = version { + write!(f, "{version}")?; + } Ok(()) } TableFactor::Derived { @@ -835,6 +843,22 @@ impl fmt::Display for TableAlias { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableVersion { + ForSystemTimeAsOf(Expr), +} + +impl Display for TableVersion { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TableVersion::ForSystemTimeAsOf(e) => write!(f, " FOR SYSTEM_TIME AS OF {e}")?, + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 8aa038db9..bb7c19678 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -12,7 +12,7 @@ //! Recursive visitors for ast Nodes. See [`Visitor`] for more details. -use crate::ast::{Expr, ObjectName, Statement}; +use crate::ast::{Expr, ObjectName, Statement, TableFactor}; use core::ops::ControlFlow; /// A type that can be visited by a [`Visitor`]. See [`Visitor`] for @@ -115,8 +115,8 @@ visit_noop!(bigdecimal::BigDecimal); /// A visitor that can be used to walk an AST tree. /// -/// `previst_` methods are invoked before visiting all children of the -/// node and `postvisit_` methods are invoked after visiting all +/// `pre_visit_` methods are invoked before visiting all children of the +/// node and `post_visit_` methods are invoked after visiting all /// children of the node. /// /// # See also @@ -139,7 +139,7 @@ visit_noop!(bigdecimal::BigDecimal); /// } /// /// // Visit relations and exprs before children are visited (depth first walk) -/// // Note you can also visit statements and visit exprs after children have been visitoed +/// // Note you can also visit statements and visit exprs after children have been visited /// impl Visitor for V { /// type Break = (); /// @@ -189,6 +189,16 @@ pub trait Visitor { ControlFlow::Continue(()) } + /// Invoked for any table factors that appear in the AST before visiting children + fn pre_visit_table_factor(&mut self, _table_factor: &TableFactor) -> ControlFlow { + ControlFlow::Continue(()) + } + + /// Invoked for any table factors that appear in the AST after visiting children + fn post_visit_table_factor(&mut self, _table_factor: &TableFactor) -> ControlFlow { + ControlFlow::Continue(()) + } + /// Invoked for any expressions that appear in the AST before visiting children fn pre_visit_expr(&mut self, _expr: &Expr) -> ControlFlow { ControlFlow::Continue(()) @@ -212,8 +222,8 @@ pub trait Visitor { /// A visitor that can be used to mutate an AST tree. /// -/// `previst_` methods are invoked before visiting all children of the -/// node and `postvisit_` methods are invoked after visiting all +/// `pre_visit_` methods are invoked before visiting all children of the +/// node and `post_visit_` methods are invoked after visiting all /// children of the node. /// /// # See also @@ -267,6 +277,22 @@ pub trait VisitorMut { ControlFlow::Continue(()) } + /// Invoked for any table factors that appear in the AST before visiting children + fn pre_visit_table_factor( + &mut self, + _table_factor: &mut TableFactor, + ) -> ControlFlow { + ControlFlow::Continue(()) + } + + /// Invoked for any table factors that appear in the AST after visiting children + fn post_visit_table_factor( + &mut self, + _table_factor: &mut TableFactor, + ) -> ControlFlow { + ControlFlow::Continue(()) + } + /// Invoked for any expressions that appear in the AST before visiting children fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow { ControlFlow::Continue(()) @@ -609,6 +635,24 @@ mod tests { ControlFlow::Continue(()) } + fn pre_visit_table_factor( + &mut self, + table_factor: &TableFactor, + ) -> ControlFlow { + self.visited + .push(format!("PRE: TABLE FACTOR: {table_factor}")); + ControlFlow::Continue(()) + } + + fn post_visit_table_factor( + &mut self, + table_factor: &TableFactor, + ) -> ControlFlow { + self.visited + .push(format!("POST: TABLE FACTOR: {table_factor}")); + ControlFlow::Continue(()) + } + fn pre_visit_expr(&mut self, expr: &Expr) -> ControlFlow { self.visited.push(format!("PRE: EXPR: {expr}")); ControlFlow::Continue(()) @@ -647,22 +691,28 @@ mod tests { fn test_sql() { let tests = vec![ ( - "SELECT * from table_name", + "SELECT * from table_name as my_table", vec![ - "PRE: STATEMENT: SELECT * FROM table_name", + "PRE: STATEMENT: SELECT * FROM table_name AS my_table", + "PRE: TABLE FACTOR: table_name AS my_table", "PRE: RELATION: table_name", "POST: RELATION: table_name", - "POST: STATEMENT: SELECT * FROM table_name", + "POST: TABLE FACTOR: table_name AS my_table", + "POST: STATEMENT: SELECT * FROM table_name AS my_table", ], ), ( "SELECT * from t1 join t2 on t1.id = t2.t1_id", vec![ "PRE: STATEMENT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", + "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", + "POST: TABLE FACTOR: t1", + "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", + "POST: TABLE FACTOR: t2", "PRE: EXPR: t1.id = t2.t1_id", "PRE: EXPR: t1.id", "POST: EXPR: t1.id", @@ -676,13 +726,17 @@ mod tests { "SELECT * from t1 where EXISTS(SELECT column from t2)", vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", + "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: EXPR: column", "POST: EXPR: column", + "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", + "POST: TABLE FACTOR: t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", ], @@ -691,13 +745,17 @@ mod tests { "SELECT * from t1 where EXISTS(SELECT column from t2)", vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", + "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: EXPR: column", "POST: EXPR: column", + "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", + "POST: TABLE FACTOR: t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", ], @@ -706,16 +764,22 @@ mod tests { "SELECT * from t1 where EXISTS(SELECT column from t2) UNION SELECT * from t3", vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", + "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", + "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: EXPR: column", "POST: EXPR: column", + "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", + "POST: TABLE FACTOR: t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", + "PRE: TABLE FACTOR: t3", "PRE: RELATION: t3", "POST: RELATION: t3", + "POST: TABLE FACTOR: t3", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", ], ), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1b11237ff..c2a33b42a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6210,6 +6210,9 @@ impl<'a> Parser<'a> { } else { let name = self.parse_object_name()?; + // Parse potential version qualifier + let version = self.parse_table_version()?; + // Postgres, MSSQL: table-valued functions: let args = if self.consume_token(&Token::LParen) { Some(self.parse_optional_args()?) @@ -6240,10 +6243,25 @@ impl<'a> Parser<'a> { alias, args, with_hints, + version, }) } } + /// Parse a given table version specifier. + /// + /// For now it only supports timestamp versioning for BigQuery and MSSQL dialects. + pub fn parse_table_version(&mut self) -> Result, ParserError> { + if dialect_of!(self is BigQueryDialect | MsSqlDialect) + && self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) + { + let expr = self.parse_expr()?; + Ok(Some(TableVersion::ForSystemTimeAsOf(expr))) + } else { + Ok(None) + } + } + pub fn parse_derived_table_factor( &mut self, lateral: IsLateral, diff --git a/src/test_utils.rs b/src/test_utils.rs index 0ec595095..91130fb51 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -221,6 +221,7 @@ pub fn table(name: impl Into) -> TableFactor { alias: None, args: None, with_hints: vec![], + version: None, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index bbe1a6e9f..ca711b26e 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -95,6 +95,7 @@ fn parse_table_identifiers() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![] },] @@ -143,6 +144,31 @@ fn parse_table_identifiers() { test_table_ident("abc5.GROUP", vec![Ident::new("abc5"), Ident::new("GROUP")]); } +#[test] +fn parse_table_time_travel() { + let version = "2023-08-18 23:08:18".to_string(); + let sql = format!("SELECT 1 FROM t1 FOR SYSTEM_TIME AS OF '{version}'"); + let select = bigquery().verified_only_select(&sql); + assert_eq!( + select.from, + vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new("t1")]), + alias: None, + args: None, + with_hints: vec![], + version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( + Value::SingleQuotedString(version) + ))), + }, + joins: vec![] + },] + ); + + let sql = "SELECT 1 FROM t1 FOR SYSTEM TIME AS OF 'some_timestamp'".to_string(); + assert!(bigquery().parse_sql_statements(&sql).is_err()); +} + #[test] fn parse_join_constraint_unnest_alias() { assert_eq!( diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 24c641561..77b936d55 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -62,6 +62,7 @@ fn parse_map_access_expr() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![] }], @@ -169,11 +170,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8dfcc6e7f..96dac3da0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -214,6 +214,7 @@ fn parse_update_set_from() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }, @@ -240,6 +241,7 @@ fn parse_update_set_from() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -303,6 +305,7 @@ fn parse_update_with_table_alias() { }), args: None, with_hints: vec![], + version: None, }, joins: vec![], }, @@ -365,6 +368,7 @@ fn parse_select_with_table_alias() { }), args: None, with_hints: vec![], + version: None, }, joins: vec![], }] @@ -395,6 +399,7 @@ fn parse_delete_statement() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].relation ); @@ -422,6 +427,7 @@ fn parse_delete_statement_for_multi_tables() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].relation ); @@ -431,6 +437,7 @@ fn parse_delete_statement_for_multi_tables() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].joins[0].relation ); @@ -454,6 +461,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].relation ); @@ -463,6 +471,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { alias: None, args: None, with_hints: vec![], + version: None, }, from[1].relation ); @@ -472,6 +481,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { alias: None, args: None, with_hints: vec![], + version: None, }, using[0].relation ); @@ -481,6 +491,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { alias: None, args: None, with_hints: vec![], + version: None, }, using[0].joins[0].relation ); @@ -508,6 +519,7 @@ fn parse_where_delete_statement() { alias: None, args: None, with_hints: vec![], + version: None, }, from[0].relation, ); @@ -549,6 +561,7 @@ fn parse_where_delete_with_alias_statement() { }), args: None, with_hints: vec![], + version: None, }, from[0].relation, ); @@ -562,6 +575,7 @@ fn parse_where_delete_with_alias_statement() { }), args: None, with_hints: vec![], + version: None, }, joins: vec![], }]), @@ -3564,6 +3578,7 @@ fn test_parse_named_window() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -3902,6 +3917,7 @@ fn parse_interval_and_or_xor() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -4506,6 +4522,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }, @@ -4515,6 +4532,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }, @@ -4532,6 +4550,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -4539,6 +4558,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4549,6 +4569,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -4556,6 +4577,7 @@ fn parse_implicit_join() { alias: None, args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4576,6 +4598,7 @@ fn parse_cross_join() { alias: None, args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::CrossJoin, }, @@ -4596,6 +4619,7 @@ fn parse_joins_on() { alias, args: None, with_hints: vec![], + version: None, }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -4665,6 +4689,7 @@ fn parse_joins_using() { alias, args: None, with_hints: vec![], + version: None, }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -4726,6 +4751,7 @@ fn parse_natural_join() { alias, args: None, with_hints: vec![], + version: None, }, join_operator: f(JoinConstraint::Natural), } @@ -4990,6 +5016,7 @@ fn parse_derived_tables() { alias: None, args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6317,6 +6344,7 @@ fn parse_merge() { }), args: None, with_hints: vec![], + version: None, } ); assert_eq!(table, table_no_into); @@ -6340,6 +6368,7 @@ fn parse_merge() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 83b1e537c..3587e8d90 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -155,6 +155,7 @@ fn test_select_union_by_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -187,6 +188,7 @@ fn test_select_union_by_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -228,6 +230,7 @@ fn test_select_union_by_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], @@ -260,6 +263,7 @@ fn test_select_union_by_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], }], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index ddc5a8ccf..8cdfe9248 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -322,11 +322,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 0bb2ba3de..c4e0f3274 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -42,6 +42,31 @@ fn parse_mssql_identifiers() { }; } +#[test] +fn parse_table_time_travel() { + let version = "2023-08-18 23:08:18".to_string(); + let sql = format!("SELECT 1 FROM t1 FOR SYSTEM_TIME AS OF '{version}'"); + let select = ms().verified_only_select(&sql); + assert_eq!( + select.from, + vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new("t1")]), + alias: None, + args: None, + with_hints: vec![], + version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( + Value::SingleQuotedString(version) + ))), + }, + joins: vec![] + },] + ); + + let sql = "SELECT 1 FROM t1 FOR SYSTEM TIME AS OF 'some_timestamp'".to_string(); + assert!(ms().parse_sql_statements(&sql).is_err()); +} + #[test] fn parse_mssql_single_quoted_aliases() { let _ = ms_and_generic().one_statement_parses_to("SELECT foo 'alias'", "SELECT foo AS 'alias'"); @@ -283,11 +308,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } @@ -485,7 +512,8 @@ fn parse_substring_in_select() { }]), alias: None, args: None, - with_hints: vec![] + with_hints: vec![], + version: None, }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 209143b18..3a0177df4 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1093,6 +1093,7 @@ fn parse_select_with_numeric_prefix_column_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![] }], @@ -1141,6 +1142,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![] }], @@ -1200,6 +1202,7 @@ fn parse_update_with_joins() { }), args: None, with_hints: vec![], + version: None, }, joins: vec![Join { relation: TableFactor::Table { @@ -1210,6 +1213,7 @@ fn parse_update_with_joins() { }), args: None, with_hints: vec![], + version: None, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -1324,7 +1328,8 @@ fn parse_substring_in_select() { }]), alias: None, args: None, - with_hints: vec![] + with_hints: vec![], + version: None, }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index a62c41e42..b3621a34b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2855,11 +2855,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index c44f6dee4..9f5f62f78 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -45,6 +45,7 @@ fn test_square_brackets_over_db_schema_table_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], } @@ -89,6 +90,7 @@ fn test_double_quotes_over_db_schema_table_name() { alias: None, args: None, with_hints: vec![], + version: None, }, joins: vec![], } @@ -108,11 +110,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 43ebb8b11..200849896 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -223,11 +223,13 @@ fn parse_delimited_identifiers() { alias, args, with_hints, + version, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); assert!(args.is_none()); assert!(with_hints.is_empty()); + assert!(version.is_none()); } _ => panic!("Expecting TableFactor::Table"), } From a2533c20fe5cd1c0060f64190f6ef19fe893fdac Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Aug 2023 08:23:43 -0400 Subject: [PATCH 009/735] Changelog for version 0.37.0 (#953) --- CHANGELOG.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8669f0584..4c6c9dcc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,25 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.37.0] 2023-08-22 + +### Added +* Support `FOR SYSTEM_TIME AS OF` table time travel clause support, `visit_table_factor` to Visitor (#951) - Thanks @gruuya +* Support MySQL `auto_increment` offset in table definition (#950) - Thanks @ehoeve +* Test for mssql table name in square brackets (#952) - Thanks @lovasoa +* Support additional Postgres `CREATE INDEX` syntax (#943) - Thanks @ForbesLindesay +* Support `ALTER ROLE` syntax of PostgreSQL and MS SQL Server (#942) - Thanks @r4ntix +* Support table-level comments (#946) - Thanks @ehoeve +* Support `DROP TEMPORARY TABLE`, MySQL syntax (#916) - Thanks @liadgiladi +* Support posgres type alias (#933) - Thanks @Kikkon + +### Fixed +* Clarify the value of the special flag (#948) - Thanks @alamb +* Fix `SUBSTRING` from/to argument construction for mssql (#947) - Thanks @jmaness +* Fix: use Rust idiomatic capitalization for newly added DataType enums (#939) - Thanks @Kikkon +* Fix `BEGIN TRANSACTION` being serialized as `START TRANSACTION` (#935) - Thanks @lovasoa +* Fix parsing of datetime functions without parenthesis (#930) - Thanks @lovasoa + ## [0.36.1] 2023-07-19 ### Fixed From b8a58bbc1186503d523726f8f18f06b6e30a3313 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Aug 2023 08:28:03 -0400 Subject: [PATCH 010/735] chore: Release sqlparser version 0.37.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 777e9f4ae..43d9f981d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.36.1" +version = "0.37.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 9c2e8bcdbc437e715031540981ff971ef715afdd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Aug 2023 08:32:14 -0400 Subject: [PATCH 011/735] Break test and coverage test into separate jobs (#949) --- .github/workflows/rust.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 836d55614..64c4d114a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -75,11 +75,25 @@ jobs: uses: actions/checkout@v2 - name: Test run: cargo test --all-features + + test-coverage: + runs-on: ubuntu-latest + steps: + - name: Setup Rust + uses: hecrj/setup-rust-action@v1 + with: + rust-version: stable + - name: Install Tarpaulin + uses: actions-rs/install@v0.1 + with: + crate: cargo-tarpaulin + version: 0.14.2 + use-tool-cache: true + - name: Checkout + uses: actions/checkout@v2 - name: Coverage - if: matrix.rust == 'stable' run: cargo tarpaulin -o Lcov --output-dir ./coverage - name: Coveralls - if: matrix.rust == 'stable' uses: coverallsapp/github-action@master with: github-token: ${{ secrets.GITHUB_TOKEN }} From 4a2fa66b553cd2e4e6ff9e837e0f04e1e7687c04 Mon Sep 17 00:00:00 2001 From: David Dolphin <445312+ddol@users.noreply.github.com> Date: Fri, 25 Aug 2023 09:06:25 -0700 Subject: [PATCH 012/735] [cli] add --sqlite param (#956) --- examples/cli.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/cli.rs b/examples/cli.rs index 8af6246a0..72f963b1e 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -47,6 +47,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--redshift" => Box::new(RedshiftSqlDialect {}), "--clickhouse" => Box::new(ClickHouseDialect {}), "--duckdb" => Box::new(DuckDbDialect {}), + "--sqlite" => Box::new(SQLiteDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {s}"), }; From 14da37d1822ab6b7e96304cec9e409fb96ffdf09 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 25 Aug 2023 12:21:31 -0400 Subject: [PATCH 013/735] Fix Rust 1.72 clippy lints (#957) --- src/parser/mod.rs | 2 +- tests/sqlparser_bigquery.rs | 6 +++--- tests/sqlparser_common.rs | 4 ++-- tests/sqlparser_mysql.rs | 22 +++++++++++----------- tests/sqlparser_postgres.rs | 7 +++---- tests/sqlparser_snowflake.rs | 2 +- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c2a33b42a..846215249 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5881,7 +5881,7 @@ impl<'a> Parser<'a> { Some(_) => { let db_name = vec![self.parse_identifier()?]; let ObjectName(table_name) = object_name; - let object_name = db_name.into_iter().chain(table_name.into_iter()).collect(); + let object_name = db_name.into_iter().chain(table_name).collect(); ObjectName(object_name) } None => object_name, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ca711b26e..d6a28fd00 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -55,7 +55,7 @@ fn parse_raw_literal() { let sql = r#"SELECT R'abc', R"abc", R'f\(abc,(.*),def\)', R"f\(abc,(.*),def\)""#; let stmt = bigquery().one_statement_parses_to( sql, - r#"SELECT R'abc', R'abc', R'f\(abc,(.*),def\)', R'f\(abc,(.*),def\)'"#, + r"SELECT R'abc', R'abc', R'f\(abc,(.*),def\)', R'f\(abc,(.*),def\)'", ); if let Statement::Query(query) = stmt { if let SetExpr::Select(select) = *query.body { @@ -69,11 +69,11 @@ fn parse_raw_literal() { expr_from_projection(&select.projection[1]) ); assert_eq!( - &Expr::Value(Value::RawStringLiteral(r#"f\(abc,(.*),def\)"#.to_string())), + &Expr::Value(Value::RawStringLiteral(r"f\(abc,(.*),def\)".to_string())), expr_from_projection(&select.projection[2]) ); assert_eq!( - &Expr::Value(Value::RawStringLiteral(r#"f\(abc,(.*),def\)"#.to_string())), + &Expr::Value(Value::RawStringLiteral(r"f\(abc,(.*),def\)".to_string())), expr_from_projection(&select.projection[3]) ); return; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 96dac3da0..3fdf3d211 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7212,7 +7212,7 @@ fn parse_with_recursion_limit() { .expect("tokenize to work") .parse_statements(); - assert!(matches!(res, Ok(_)), "{res:?}"); + assert!(res.is_ok(), "{res:?}"); // limit recursion to something smaller, expect parsing to fail let res = Parser::new(&dialect) @@ -7230,7 +7230,7 @@ fn parse_with_recursion_limit() { .with_recursion_limit(50) .parse_statements(); - assert!(matches!(res, Ok(_)), "{res:?}"); + assert!(res.is_ok(), "{res:?}"); } #[test] diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3a0177df4..ab7997cdf 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -634,11 +634,11 @@ fn parse_escaped_backticks_with_no_escape() { #[test] fn parse_unterminated_escape() { - let sql = r#"SELECT 'I\'m not fine\'"#; + let sql = r"SELECT 'I\'m not fine\'"; let result = std::panic::catch_unwind(|| mysql().one_statement_parses_to(sql, "")); assert!(result.is_err()); - let sql = r#"SELECT 'I\\'m not fine'"#; + let sql = r"SELECT 'I\\'m not fine'"; let result = std::panic::catch_unwind(|| mysql().one_statement_parses_to(sql, "")); assert!(result.is_err()); } @@ -666,7 +666,7 @@ fn parse_escaped_string_with_escape() { _ => unreachable!(), }; } - let sql = r#"SELECT 'I\'m fine'"#; + let sql = r"SELECT 'I\'m fine'"; assert_mysql_query_value(sql, "I'm fine"); let sql = r#"SELECT 'I''m fine'"#; @@ -675,7 +675,7 @@ fn parse_escaped_string_with_escape() { let sql = r#"SELECT 'I\"m fine'"#; assert_mysql_query_value(sql, "I\"m fine"); - let sql = r#"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"#; + let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"; assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} a "); } @@ -702,8 +702,8 @@ fn parse_escaped_string_with_no_escape() { _ => unreachable!(), }; } - let sql = r#"SELECT 'I\'m fine'"#; - assert_mysql_query_value(sql, r#"I\'m fine"#); + let sql = r"SELECT 'I\'m fine'"; + assert_mysql_query_value(sql, r"I\'m fine"); let sql = r#"SELECT 'I''m fine'"#; assert_mysql_query_value(sql, r#"I''m fine"#); @@ -711,8 +711,8 @@ fn parse_escaped_string_with_no_escape() { let sql = r#"SELECT 'I\"m fine'"#; assert_mysql_query_value(sql, r#"I\"m fine"#); - let sql = r#"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"#; - assert_mysql_query_value(sql, r#"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "#); + let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ '"; + assert_mysql_query_value(sql, r"Testing: \0 \\ \% \_ \b \n \r \t \Z \a \ "); } #[test] @@ -723,7 +723,7 @@ fn check_roundtrip_of_escaped_string() { dialects: vec![Box::new(MySqlDialect {})], options: options.clone(), } - .verified_stmt(r#"SELECT 'I\'m fine'"#); + .verified_stmt(r"SELECT 'I\'m fine'"); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], options: options.clone(), @@ -733,12 +733,12 @@ fn check_roundtrip_of_escaped_string() { dialects: vec![Box::new(MySqlDialect {})], options: options.clone(), } - .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + .verified_stmt(r"SELECT 'I\\\'m fine'"); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], options: options.clone(), } - .verified_stmt(r#"SELECT 'I\\\'m fine'"#); + .verified_stmt(r"SELECT 'I\\\'m fine'"); TestedDialects { dialects: vec![Box::new(MySqlDialect {})], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index b3621a34b..c34ba75a8 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2332,8 +2332,7 @@ fn pg_and_generic() -> TestedDialects { #[test] fn parse_escaped_literal_string() { - let sql = - r#"SELECT E's1 \n s1', E's2 \\n s2', E's3 \\\n s3', E's4 \\\\n s4', E'\'', E'foo \\'"#; + let sql = r"SELECT E's1 \n s1', E's2 \\n s2', E's3 \\\n s3', E's4 \\\\n s4', E'\'', E'foo \\'"; let select = pg_and_generic().verified_only_select(sql); assert_eq!(6, select.projection.len()); assert_eq!( @@ -2361,7 +2360,7 @@ fn parse_escaped_literal_string() { expr_from_projection(&select.projection[5]) ); - let sql = r#"SELECT E'\'"#; + let sql = r"SELECT E'\'"; assert_eq!( pg_and_generic() .parse_sql_statements(sql) @@ -2631,7 +2630,7 @@ fn parse_create_role() { err => panic!("Failed to parse CREATE ROLE test case: {err:?}"), } - let negatables = vec![ + let negatables = [ "BYPASSRLS", "CREATEDB", "CREATEROLE", diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 200849896..ecd608527 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1004,7 +1004,7 @@ fn test_copy_into_copy_options() { #[test] fn test_snowflake_stage_object_names() { - let allowed_formatted_names = vec![ + let allowed_formatted_names = [ "my_company.emp_basic", "@namespace.%table_name", "@namespace.%table_name/path", From 4c3a4ad5a87ef8b4dd24a6e8540243e14fbfa148 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 25 Aug 2023 12:21:40 -0400 Subject: [PATCH 014/735] Update release documentation (#954) --- docs/releasing.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/releasing.md b/docs/releasing.md index b71c97595..efe0ab3d2 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -45,13 +45,6 @@ $ cargo install cargo-release Note that credentials for authoring in this way are securely stored in the (GitHub) repo secrets as `CRATE_TOKEN`. - * Bump the crate version again (to something like `0.8.1-alpha.0`) to - indicate the start of new development cycle. - -3. Push the updates to the `main` branch upstream: - ``` - $ git push upstream - ``` 4. Check that the new version of the crate is available on crates.io: https://crates.io/crates/sqlparser From b02c3f87ec0b70b608ddbf6e92bf3b94ffc9d211 Mon Sep 17 00:00:00 2001 From: dawg Date: Thu, 7 Sep 2023 22:23:09 +0200 Subject: [PATCH 015/735] feat: show location info in parse errors (#958) --- src/parser/mod.rs | 101 +++++++++++++++++++++++++------------- src/test_utils.rs | 8 ++- src/tokenizer.rs | 48 +++++++++--------- tests/sqlparser_common.rs | 30 ++++++----- 4 files changed, 116 insertions(+), 71 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 846215249..6902fc565 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -44,8 +44,8 @@ pub enum ParserError { // Use `Parser::expected` instead, if possible macro_rules! parser_err { - ($MSG:expr) => { - Err(ParserError::ParserError($MSG.to_string())) + ($MSG:expr, $loc:expr) => { + Err(ParserError::ParserError(format!("{}{}", $MSG, $loc))) }; } @@ -368,8 +368,8 @@ impl<'a> Parser<'a> { debug!("Parsing sql '{}'...", sql); let tokens = Tokenizer::new(self.dialect, sql) .with_unescape(self.options.unescape) - .tokenize()?; - Ok(self.with_tokens(tokens)) + .tokenize_with_location()?; + Ok(self.with_tokens_with_locations(tokens)) } /// Parse potentially multiple statements @@ -724,6 +724,7 @@ impl<'a> Parser<'a> { // Note also that naively `SELECT date` looks like a syntax error because the `date` type // name is not followed by a string literal, but in fact in PostgreSQL it is a valid // expression that should parse as the column name "date". + let loc = self.peek_token().location; return_ok_if_some!(self.maybe_parse(|parser| { match parser.parse_data_type()? { DataType::Interval => parser.parse_interval(), @@ -734,7 +735,7 @@ impl<'a> Parser<'a> { // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the // `type 'string'` syntax for the custom data types at all. - DataType::Custom(..) => parser_err!("dummy"), + DataType::Custom(..) => parser_err!("dummy", loc), data_type => Ok(Expr::TypedString { data_type, value: parser.parse_literal_string()?, @@ -909,7 +910,12 @@ impl<'a> Parser<'a> { let tok = self.next_token(); let key = match tok.token { Token::Word(word) => word.to_ident(), - _ => return parser_err!(format!("Expected identifier, found: {tok}")), + _ => { + return parser_err!( + format!("Expected identifier, found: {tok}"), + tok.location + ) + } }; Ok(Expr::CompositeAccess { expr: Box::new(expr), @@ -1220,7 +1226,10 @@ impl<'a> Parser<'a> { r#in: Box::new(from), }) } else { - parser_err!("Position function must include IN keyword".to_string()) + parser_err!( + "Position function must include IN keyword".to_string(), + self.peek_token().location + ) } } @@ -1884,7 +1893,10 @@ impl<'a> Parser<'a> { } } // Can only happen if `get_next_precedence` got out of sync with this function - _ => parser_err!(format!("No infix parser for token {:?}", tok.token)), + _ => parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.location + ), } } else if Token::DoubleColon == tok { self.parse_pg_cast(expr) @@ -1935,7 +1947,10 @@ impl<'a> Parser<'a> { }) } else { // Can only happen if `get_next_precedence` got out of sync with this function - parser_err!(format!("No infix parser for token {:?}", tok.token)) + parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.location + ) } } @@ -2213,7 +2228,10 @@ impl<'a> Parser<'a> { /// Report unexpected token pub fn expected(&self, expected: &str, found: TokenWithLocation) -> Result { - parser_err!(format!("Expected {expected}, found: {found}")) + parser_err!( + format!("Expected {expected}, found: {found}"), + found.location + ) } /// Look for an expected keyword and consume it if it exists @@ -2378,13 +2396,14 @@ impl<'a> Parser<'a> { /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed /// and results in a `ParserError` if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { + let loc = self.peek_token().location; let all = self.parse_keyword(Keyword::ALL); let distinct = self.parse_keyword(Keyword::DISTINCT); if !distinct { return Ok(None); } if all { - return parser_err!("Cannot specify both ALL and DISTINCT".to_string()); + return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); } let on = self.parse_keyword(Keyword::ON); if !on { @@ -2986,10 +3005,14 @@ impl<'a> Parser<'a> { let mut admin = vec![]; while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) { + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.location); match keyword { Keyword::AUTHORIZATION => { if authorization_owner.is_some() { - parser_err!("Found multiple AUTHORIZATION") + parser_err!("Found multiple AUTHORIZATION", loc) } else { authorization_owner = Some(self.parse_object_name()?); Ok(()) @@ -2997,7 +3020,7 @@ impl<'a> Parser<'a> { } Keyword::LOGIN | Keyword::NOLOGIN => { if login.is_some() { - parser_err!("Found multiple LOGIN or NOLOGIN") + parser_err!("Found multiple LOGIN or NOLOGIN", loc) } else { login = Some(keyword == Keyword::LOGIN); Ok(()) @@ -3005,7 +3028,7 @@ impl<'a> Parser<'a> { } Keyword::INHERIT | Keyword::NOINHERIT => { if inherit.is_some() { - parser_err!("Found multiple INHERIT or NOINHERIT") + parser_err!("Found multiple INHERIT or NOINHERIT", loc) } else { inherit = Some(keyword == Keyword::INHERIT); Ok(()) @@ -3013,7 +3036,7 @@ impl<'a> Parser<'a> { } Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => { if bypassrls.is_some() { - parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS") + parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc) } else { bypassrls = Some(keyword == Keyword::BYPASSRLS); Ok(()) @@ -3021,7 +3044,7 @@ impl<'a> Parser<'a> { } Keyword::CREATEDB | Keyword::NOCREATEDB => { if create_db.is_some() { - parser_err!("Found multiple CREATEDB or NOCREATEDB") + parser_err!("Found multiple CREATEDB or NOCREATEDB", loc) } else { create_db = Some(keyword == Keyword::CREATEDB); Ok(()) @@ -3029,7 +3052,7 @@ impl<'a> Parser<'a> { } Keyword::CREATEROLE | Keyword::NOCREATEROLE => { if create_role.is_some() { - parser_err!("Found multiple CREATEROLE or NOCREATEROLE") + parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc) } else { create_role = Some(keyword == Keyword::CREATEROLE); Ok(()) @@ -3037,7 +3060,7 @@ impl<'a> Parser<'a> { } Keyword::SUPERUSER | Keyword::NOSUPERUSER => { if superuser.is_some() { - parser_err!("Found multiple SUPERUSER or NOSUPERUSER") + parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc) } else { superuser = Some(keyword == Keyword::SUPERUSER); Ok(()) @@ -3045,7 +3068,7 @@ impl<'a> Parser<'a> { } Keyword::REPLICATION | Keyword::NOREPLICATION => { if replication.is_some() { - parser_err!("Found multiple REPLICATION or NOREPLICATION") + parser_err!("Found multiple REPLICATION or NOREPLICATION", loc) } else { replication = Some(keyword == Keyword::REPLICATION); Ok(()) @@ -3053,7 +3076,7 @@ impl<'a> Parser<'a> { } Keyword::PASSWORD => { if password.is_some() { - parser_err!("Found multiple PASSWORD") + parser_err!("Found multiple PASSWORD", loc) } else { password = if self.parse_keyword(Keyword::NULL) { Some(Password::NullPassword) @@ -3066,7 +3089,7 @@ impl<'a> Parser<'a> { Keyword::CONNECTION => { self.expect_keyword(Keyword::LIMIT)?; if connection_limit.is_some() { - parser_err!("Found multiple CONNECTION LIMIT") + parser_err!("Found multiple CONNECTION LIMIT", loc) } else { connection_limit = Some(Expr::Value(self.parse_number_value()?)); Ok(()) @@ -3075,7 +3098,7 @@ impl<'a> Parser<'a> { Keyword::VALID => { self.expect_keyword(Keyword::UNTIL)?; if valid_until.is_some() { - parser_err!("Found multiple VALID UNTIL") + parser_err!("Found multiple VALID UNTIL", loc) } else { valid_until = Some(Expr::Value(self.parse_value()?)); Ok(()) @@ -3084,14 +3107,14 @@ impl<'a> Parser<'a> { Keyword::IN => { if self.parse_keyword(Keyword::ROLE) { if !in_role.is_empty() { - parser_err!("Found multiple IN ROLE") + parser_err!("Found multiple IN ROLE", loc) } else { in_role = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) } } else if self.parse_keyword(Keyword::GROUP) { if !in_group.is_empty() { - parser_err!("Found multiple IN GROUP") + parser_err!("Found multiple IN GROUP", loc) } else { in_group = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3102,7 +3125,7 @@ impl<'a> Parser<'a> { } Keyword::ROLE => { if !role.is_empty() { - parser_err!("Found multiple ROLE") + parser_err!("Found multiple ROLE", loc) } else { role = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3110,7 +3133,7 @@ impl<'a> Parser<'a> { } Keyword::USER => { if !user.is_empty() { - parser_err!("Found multiple USER") + parser_err!("Found multiple USER", loc) } else { user = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3118,7 +3141,7 @@ impl<'a> Parser<'a> { } Keyword::ADMIN => { if !admin.is_empty() { - parser_err!("Found multiple ADMIN") + parser_err!("Found multiple ADMIN", loc) } else { admin = self.parse_comma_separated(Parser::parse_identifier)?; Ok(()) @@ -3181,14 +3204,19 @@ impl<'a> Parser<'a> { // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let names = self.parse_comma_separated(Parser::parse_object_name)?; + + let loc = self.peek_token().location; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); let purge = self.parse_keyword(Keyword::PURGE); if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); + return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc); } if object_type == ObjectType::Role && (cascade || restrict || purge) { - return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE"); + return parser_err!( + "Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE", + loc + ); } Ok(Statement::Drop { object_type, @@ -4446,7 +4474,11 @@ impl<'a> Parser<'a> { fn parse_literal_char(&mut self) -> Result { let s = self.parse_literal_string()?; if s.len() != 1 { - return parser_err!(format!("Expect a char, found {s:?}")); + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.location); + return parser_err!(format!("Expect a char, found {s:?}"), loc); } Ok(s.chars().next().unwrap()) } @@ -4525,7 +4557,7 @@ impl<'a> Parser<'a> { // (i.e., it returns the input string). Token::Number(ref n, l) => match n.parse() { Ok(n) => Ok(Value::Number(n, l)), - Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")), + Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), @@ -6465,10 +6497,11 @@ impl<'a> Parser<'a> { .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) .then(|| self.parse_identifier().unwrap()); + let loc = self.peek_token().location; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE"); + return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc); } Ok(Statement::Revoke { @@ -7929,14 +7962,12 @@ mod tests { #[test] fn test_parser_error_loc() { - // TODO: Once we thread token locations through the parser, we should update this - // test to assert the locations of the referenced token let sql = "SELECT this is a syntax error"; let ast = Parser::parse_sql(&GenericDialect, sql); assert_eq!( ast, Err(ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a" + "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16" .to_string() )) ); diff --git a/src/test_utils.rs b/src/test_utils.rs index 91130fb51..a5e9e739d 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -28,6 +28,7 @@ use core::fmt::Debug; use crate::dialect::*; use crate::parser::{Parser, ParserError}; +use crate::tokenizer::Tokenizer; use crate::{ast::*, parser::ParserOptions}; /// Tests use the methods on this struct to invoke the parser on one or @@ -82,8 +83,13 @@ impl TestedDialects { /// the result is the same for all tested dialects. pub fn parse_sql_statements(&self, sql: &str) -> Result, ParserError> { self.one_of_identical_results(|dialect| { + let mut tokenizer = Tokenizer::new(dialect, sql); + if let Some(options) = &self.options { + tokenizer = tokenizer.with_unescape(options.unescape); + } + let tokens = tokenizer.tokenize()?; self.new_parser(dialect) - .try_with_sql(sql)? + .with_tokens(tokens) .parse_statements() }) // To fail the `ensure_multiple_dialects_are_tested` test: diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f20e01b71..1b1b1e96c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -350,7 +350,7 @@ impl fmt::Display for Whitespace { } /// Location in input string -#[derive(Debug, Eq, PartialEq, Clone)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] pub struct Location { /// Line number, starting from 1 pub line: u64, @@ -358,6 +358,20 @@ pub struct Location { pub column: u64, } +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.line == 0 { + return Ok(()); + } + write!( + f, + // TODO: use standard compiler location syntax (::) + " at Line: {}, Column {}", + self.line, self.column, + ) + } +} + /// A [Token] with [Location] attached to it #[derive(Debug, Eq, PartialEq, Clone)] pub struct TokenWithLocation { @@ -400,17 +414,12 @@ impl fmt::Display for TokenWithLocation { #[derive(Debug, PartialEq, Eq)] pub struct TokenizerError { pub message: String, - pub line: u64, - pub col: u64, + pub location: Location, } impl fmt::Display for TokenizerError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{} at Line: {}, Column {}", - self.message, self.line, self.col - ) + write!(f, "{}{}", self.message, self.location,) } } @@ -546,10 +555,7 @@ impl<'a> Tokenizer<'a> { let mut location = state.location(); while let Some(token) = self.next_token(&mut state)? { - tokens.push(TokenWithLocation { - token, - location: location.clone(), - }); + tokens.push(TokenWithLocation { token, location }); location = state.location(); } @@ -1122,8 +1128,7 @@ impl<'a> Tokenizer<'a> { ) -> Result { Err(TokenizerError { message: message.into(), - col: loc.column, - line: loc.line, + location: loc, }) } @@ -1368,8 +1373,7 @@ mod tests { fn tokenizer_error_impl() { let err = TokenizerError { message: "test".into(), - line: 1, - col: 1, + location: Location { line: 1, column: 1 }, }; #[cfg(feature = "std")] { @@ -1694,8 +1698,7 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Unterminated string literal".to_string(), - line: 1, - col: 8 + location: Location { line: 1, column: 8 }, }) ); } @@ -1710,8 +1713,10 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Unterminated string literal".to_string(), - line: 1, - col: 35 + location: Location { + line: 1, + column: 35 + } }) ); } @@ -1873,8 +1878,7 @@ mod tests { tokenizer.tokenize(), Err(TokenizerError { message: "Expected close delimiter '\"' before EOF.".to_string(), - line: 1, - col: 1 + location: Location { line: 1, column: 1 }, }) ); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3fdf3d211..450743360 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -836,7 +836,12 @@ fn test_eof_after_as() { #[test] fn test_no_infix_error() { - let res = Parser::parse_sql(&ClickHouseDialect {}, "ASSERT-URA<<"); + let dialects = TestedDialects { + dialects: vec![Box::new(ClickHouseDialect {})], + options: None, + }; + + let res = dialects.parse_sql_statements("ASSERT-URA<<"); assert_eq!( ParserError::ParserError("No infix parser for token ShiftLeft".to_string()), res.unwrap_err() @@ -3238,19 +3243,21 @@ fn parse_alter_table_alter_column_type() { _ => unreachable!(), } - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT"), - ); + let dialect = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + + let res = + dialect.parse_sql_statements(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT")); assert_eq!( ParserError::ParserError("Expected SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), res.unwrap_err() ); - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'"), - ); + let res = dialect.parse_sql_statements(&format!( + "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" + )); assert_eq!( ParserError::ParserError("Expected end of statement, found: USING".to_string()), res.unwrap_err() @@ -3295,10 +3302,7 @@ fn parse_alter_table_drop_constraint() { _ => unreachable!(), } - let res = Parser::parse_sql( - &GenericDialect {}, - &format!("{alter_stmt} DROP CONSTRAINT is_active TEXT"), - ); + let res = parse_sql_statements(&format!("{alter_stmt} DROP CONSTRAINT is_active TEXT")); assert_eq!( ParserError::ParserError("Expected end of statement, found: TEXT".to_string()), res.unwrap_err() From e0afd4b179bd5209be6140c0aa02ef49c5a32707 Mon Sep 17 00:00:00 2001 From: SeanTroyUWO Date: Thu, 7 Sep 2023 14:32:50 -0600 Subject: [PATCH 016/735] `ANY` and `ALL` contains their operators (#963) --- src/ast/mod.rs | 28 ++++++++++++++++++++++------ src/parser/mod.rs | 32 ++++++++++++++++++++++++-------- tests/sqlparser_common.rs | 12 ++++++------ 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a241f9509..41d66166d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -419,10 +419,18 @@ pub enum Expr { pattern: Box, escape_char: Option, }, - /// Any operation e.g. `1 ANY (1)` or `foo > ANY(bar)`, It will be wrapped in the right side of BinaryExpr - AnyOp(Box), - /// ALL operation e.g. `1 ALL (1)` or `foo > ALL(bar)`, It will be wrapped in the right side of BinaryExpr - AllOp(Box), + /// Any operation e.g. `foo > ANY(bar)`, comparison operator is one of [=, >, <, =>, =<, !=] + AnyOp { + left: Box, + compare_op: BinaryOperator, + right: Box, + }, + /// ALL operation e.g. `foo > ALL(bar)`, comparison operator is one of [=, >, <, =>, =<, !=] + AllOp { + left: Box, + compare_op: BinaryOperator, + right: Box, + }, /// Unary operation e.g. `NOT foo` UnaryOp { op: UnaryOperator, expr: Box }, /// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` @@ -724,8 +732,16 @@ impl fmt::Display for Expr { pattern ), }, - Expr::AnyOp(expr) => write!(f, "ANY({expr})"), - Expr::AllOp(expr) => write!(f, "ALL({expr})"), + Expr::AnyOp { + left, + compare_op, + right, + } => write!(f, "{left} {compare_op} ANY({right})"), + Expr::AllOp { + left, + compare_op, + right, + } => write!(f, "{left} {compare_op} ALL({right})"), Expr::UnaryOp { op, expr } => { if op == &UnaryOperator::PGPostfixFactorial { write!(f, "{expr}{op}") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6902fc565..9de67a51b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1782,16 +1782,32 @@ impl<'a> Parser<'a> { let right = self.parse_subexpr(precedence)?; self.expect_token(&Token::RParen)?; - let right = match keyword { - Keyword::ALL => Box::new(Expr::AllOp(Box::new(right))), - Keyword::ANY => Box::new(Expr::AnyOp(Box::new(right))), - _ => unreachable!(), + if !matches!( + op, + BinaryOperator::Gt + | BinaryOperator::Lt + | BinaryOperator::GtEq + | BinaryOperator::LtEq + | BinaryOperator::Eq + | BinaryOperator::NotEq + ) { + return parser_err!(format!( + "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" + )); }; - Ok(Expr::BinaryOp { - left: Box::new(expr), - op, - right, + Ok(match keyword { + Keyword::ALL => Expr::AllOp { + left: Box::new(expr), + compare_op: op, + right: Box::new(right), + }, + Keyword::ANY => Expr::AnyOp { + left: Box::new(expr), + compare_op: op, + right: Box::new(right), + }, + _ => unreachable!(), }) } else { Ok(Expr::BinaryOp { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 450743360..a9c4130b4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1558,10 +1558,10 @@ fn parse_bitwise_ops() { fn parse_binary_any() { let select = verified_only_select("SELECT a = ANY(b)"); assert_eq!( - SelectItem::UnnamedExpr(Expr::BinaryOp { + SelectItem::UnnamedExpr(Expr::AnyOp { left: Box::new(Expr::Identifier(Ident::new("a"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::AnyOp(Box::new(Expr::Identifier(Ident::new("b"))))), + compare_op: BinaryOperator::Eq, + right: Box::new(Expr::Identifier(Ident::new("b"))), }), select.projection[0] ); @@ -1571,10 +1571,10 @@ fn parse_binary_any() { fn parse_binary_all() { let select = verified_only_select("SELECT a = ALL(b)"); assert_eq!( - SelectItem::UnnamedExpr(Expr::BinaryOp { + SelectItem::UnnamedExpr(Expr::AllOp { left: Box::new(Expr::Identifier(Ident::new("a"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::AllOp(Box::new(Expr::Identifier(Ident::new("b"))))), + compare_op: BinaryOperator::Eq, + right: Box::new(Expr::Identifier(Ident::new("b"))), }), select.projection[0] ); From 25e037c50fc46bc061126d583bc64c2da7c6cabf Mon Sep 17 00:00:00 2001 From: Forbes Lindesay Date: Thu, 7 Sep 2023 21:39:47 +0100 Subject: [PATCH 017/735] feat: allow multiple actions in one `ALTER TABLE` statement (#960) --- src/ast/mod.rs | 24 ++- src/parser/mod.rs | 342 ++++++++++++++++++----------------- src/test_utils.rs | 20 ++ tests/sqlparser_common.rs | 179 +++++++----------- tests/sqlparser_mysql.rs | 36 ++-- tests/sqlparser_postgres.rs | 66 +++++-- tests/sqlparser_snowflake.rs | 8 +- 7 files changed, 349 insertions(+), 326 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 41d66166d..79edea7d1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1407,7 +1407,9 @@ pub enum Statement { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] name: ObjectName, - operation: AlterTableOperation, + if_exists: bool, + only: bool, + operations: Vec, }, AlterIndex { name: ObjectName, @@ -2618,8 +2620,24 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::AlterTable { name, operation } => { - write!(f, "ALTER TABLE {name} {operation}") + Statement::AlterTable { + name, + if_exists, + only, + operations, + } => { + write!(f, "ALTER TABLE ")?; + if *if_exists { + write!(f, "IF EXISTS ")?; + } + if *only { + write!(f, "ONLY ")?; + } + write!( + f, + "{name} {operations}", + operations = display_comma_separated(operations) + ) } Statement::AlterIndex { name, operation } => { write!(f, "ALTER INDEX {name} {operation}") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9de67a51b..b872bbfc8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4081,6 +4081,174 @@ impl<'a> Parser<'a> { Ok(SqlOption { name, value }) } + pub fn parse_alter_table_operation(&mut self) -> Result { + let operation = if self.parse_keyword(Keyword::ADD) { + if let Some(constraint) = self.parse_optional_table_constraint()? { + AlterTableOperation::AddConstraint(constraint) + } else { + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions: partitions, + } + } else { + let column_keyword = self.parse_keyword(Keyword::COLUMN); + + let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) + { + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) + || if_not_exists + } else { + false + }; + + let column_def = self.parse_column_def()?; + AlterTableOperation::AddColumn { + column_keyword, + if_not_exists, + column_def, + } + } + } + } else if self.parse_keyword(Keyword::RENAME) { + if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::CONSTRAINT) { + let old_name = self.parse_identifier()?; + self.expect_keyword(Keyword::TO)?; + let new_name = self.parse_identifier()?; + AlterTableOperation::RenameConstraint { old_name, new_name } + } else if self.parse_keyword(Keyword::TO) { + let table_name = self.parse_object_name()?; + AlterTableOperation::RenameTable { table_name } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let old_column_name = self.parse_identifier()?; + self.expect_keyword(Keyword::TO)?; + let new_column_name = self.parse_identifier()?; + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, + } + } + } else if self.parse_keyword(Keyword::DROP) { + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, + } + } else if self.parse_keyword(Keyword::CONSTRAINT) { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropConstraint { + if_exists, + name, + cascade, + } + } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + AlterTableOperation::DropPrimaryKey + } else { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, + } + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, + } + } else if self.parse_keyword(Keyword::CHANGE) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let old_name = self.parse_identifier()?; + let new_name = self.parse_identifier()?; + let data_type = self.parse_data_type()?; + let mut options = vec![]; + while let Some(option) = self.parse_optional_column_option()? { + options.push(option); + } + + AlterTableOperation::ChangeColumn { + old_name, + new_name, + data_type, + options, + } + } else if self.parse_keyword(Keyword::ALTER) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let column_name = self.parse_identifier()?; + let is_postgresql = dialect_of!(self is PostgreSqlDialect); + + let op = if self.parse_keywords(&[Keyword::SET, Keyword::NOT, Keyword::NULL]) { + AlterColumnOperation::SetNotNull {} + } else if self.parse_keywords(&[Keyword::DROP, Keyword::NOT, Keyword::NULL]) { + AlterColumnOperation::DropNotNull {} + } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { + AlterColumnOperation::SetDefault { + value: self.parse_expr()?, + } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { + AlterColumnOperation::DropDefault {} + } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) + || (is_postgresql && self.parse_keyword(Keyword::TYPE)) + { + let data_type = self.parse_data_type()?; + let using = if is_postgresql && self.parse_keyword(Keyword::USING) { + Some(self.parse_expr()?) + } else { + None + }; + AlterColumnOperation::SetDataType { data_type, using } + } else { + return self.expected( + "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN", + self.peek_token(), + ); + }; + AlterTableOperation::AlterColumn { column_name, op } + } else if self.parse_keyword(Keyword::SWAP) { + self.expect_keyword(Keyword::WITH)?; + let table_name = self.parse_object_name()?; + AlterTableOperation::SwapWith { table_name } + } else { + return self.expected( + "ADD, RENAME, PARTITION, SWAP or DROP after ALTER TABLE", + self.peek_token(), + ); + }; + Ok(operation) + } + pub fn parse_alter(&mut self) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, @@ -4091,177 +4259,15 @@ impl<'a> Parser<'a> { match object_type { Keyword::VIEW => self.parse_alter_view(), Keyword::TABLE => { - let _ = self.parse_keyword(Keyword::ONLY); // [ ONLY ] + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] let table_name = self.parse_object_name()?; - let operation = if self.parse_keyword(Keyword::ADD) { - if let Some(constraint) = self.parse_optional_table_constraint()? { - AlterTableOperation::AddConstraint(constraint) - } else { - let if_not_exists = - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::AddPartitions { - if_not_exists, - new_partitions: partitions, - } - } else { - let column_keyword = self.parse_keyword(Keyword::COLUMN); - - let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) - { - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) - || if_not_exists - } else { - false - }; - - let column_def = self.parse_column_def()?; - AlterTableOperation::AddColumn { - column_keyword, - if_not_exists, - column_def, - } - } - } - } else if self.parse_keyword(Keyword::RENAME) { - if dialect_of!(self is PostgreSqlDialect) - && self.parse_keyword(Keyword::CONSTRAINT) - { - let old_name = self.parse_identifier()?; - self.expect_keyword(Keyword::TO)?; - let new_name = self.parse_identifier()?; - AlterTableOperation::RenameConstraint { old_name, new_name } - } else if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_object_name()?; - AlterTableOperation::RenameTable { table_name } - } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_column_name = self.parse_identifier()?; - self.expect_keyword(Keyword::TO)?; - let new_column_name = self.parse_identifier()?; - AlterTableOperation::RenameColumn { - old_column_name, - new_column_name, - } - } - } else if self.parse_keyword(Keyword::DROP) { - if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: true, - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: false, - } - } else if self.parse_keyword(Keyword::CONSTRAINT) { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropConstraint { - if_exists, - name, - cascade, - } - } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - AlterTableOperation::DropPrimaryKey - } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, - } - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let before = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::RENAME)?; - self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; - self.expect_token(&Token::LParen)?; - let renames = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::RenamePartitions { - old_partitions: before, - new_partitions: renames, - } - } else if self.parse_keyword(Keyword::CHANGE) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_name = self.parse_identifier()?; - let new_name = self.parse_identifier()?; - let data_type = self.parse_data_type()?; - let mut options = vec![]; - while let Some(option) = self.parse_optional_column_option()? { - options.push(option); - } - - AlterTableOperation::ChangeColumn { - old_name, - new_name, - data_type, - options, - } - } else if self.parse_keyword(Keyword::ALTER) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let column_name = self.parse_identifier()?; - let is_postgresql = dialect_of!(self is PostgreSqlDialect); - - let op = if self.parse_keywords(&[Keyword::SET, Keyword::NOT, Keyword::NULL]) { - AlterColumnOperation::SetNotNull {} - } else if self.parse_keywords(&[Keyword::DROP, Keyword::NOT, Keyword::NULL]) { - AlterColumnOperation::DropNotNull {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { - AlterColumnOperation::SetDefault { - value: self.parse_expr()?, - } - } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { - AlterColumnOperation::DropDefault {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) - || (is_postgresql && self.parse_keyword(Keyword::TYPE)) - { - let data_type = self.parse_data_type()?; - let using = if is_postgresql && self.parse_keyword(Keyword::USING) { - Some(self.parse_expr()?) - } else { - None - }; - AlterColumnOperation::SetDataType { data_type, using } - } else { - return self.expected( - "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN", - self.peek_token(), - ); - }; - AlterTableOperation::AlterColumn { column_name, op } - } else if self.parse_keyword(Keyword::SWAP) { - self.expect_keyword(Keyword::WITH)?; - let table_name = self.parse_object_name()?; - AlterTableOperation::SwapWith { table_name } - } else { - return self.expected( - "ADD, RENAME, PARTITION, SWAP or DROP after ALTER TABLE", - self.peek_token(), - ); - }; + let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; Ok(Statement::AlterTable { name: table_name, - operation, + if_exists, + only, + operations, }) } Keyword::INDEX => { diff --git a/src/test_utils.rs b/src/test_utils.rs index a5e9e739d..0db1e7d24 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -209,6 +209,26 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr { } } +pub fn alter_table_op_with_name(stmt: Statement, expected_name: &str) -> AlterTableOperation { + match stmt { + Statement::AlterTable { + name, + if_exists, + only: is_only, + operations, + } => { + assert_eq!(name.to_string(), expected_name); + assert!(!if_exists); + assert!(!is_only); + only(operations) + } + _ => panic!("Expected ALTER TABLE statement"), + } +} +pub fn alter_table_op(stmt: Statement) -> AlterTableOperation { + alter_table_op_with_name(stmt, "tab") +} + /// Creates a `Value::Number`, panic'ing if n is not a number pub fn number(n: &str) -> Value { Value::Number(n.parse().unwrap(), false) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a9c4130b4..1bf108cb0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -30,8 +30,8 @@ use sqlparser::dialect::{ use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; use test_utils::{ - all_dialects, assert_eq_vec, expr_from_projection, join, number, only, table, table_alias, - TestedDialects, + all_dialects, alter_table_op, assert_eq_vec, expr_from_projection, join, number, only, table, + table_alias, TestedDialects, }; #[macro_use] @@ -2920,19 +2920,17 @@ fn parse_create_external_table_lowercase() { #[test] fn parse_alter_table() { let add_column = "ALTER TABLE tab ADD COLUMN foo TEXT;"; - match one_statement_parses_to(add_column, "ALTER TABLE tab ADD COLUMN foo TEXT") { - Statement::AlterTable { - name, - operation: - AlterTableOperation::AddColumn { - column_keyword, - if_not_exists, - column_def, - }, + match alter_table_op(one_statement_parses_to( + add_column, + "ALTER TABLE tab ADD COLUMN foo TEXT", + )) { + AlterTableOperation::AddColumn { + column_keyword, + if_not_exists, + column_def, } => { assert!(column_keyword); assert!(!if_not_exists); - assert_eq!("tab", name.to_string()); assert_eq!("foo", column_def.name.to_string()); assert_eq!("TEXT", column_def.data_type.to_string()); } @@ -2940,28 +2938,19 @@ fn parse_alter_table() { }; let rename_table = "ALTER TABLE tab RENAME TO new_tab"; - match verified_stmt(rename_table) { - Statement::AlterTable { - name, - operation: AlterTableOperation::RenameTable { table_name }, - } => { - assert_eq!("tab", name.to_string()); - assert_eq!("new_tab", table_name.to_string()) + match alter_table_op(verified_stmt(rename_table)) { + AlterTableOperation::RenameTable { table_name } => { + assert_eq!("new_tab", table_name.to_string()); } _ => unreachable!(), }; let rename_column = "ALTER TABLE tab RENAME COLUMN foo TO new_foo"; - match verified_stmt(rename_column) { - Statement::AlterTable { - name, - operation: - AlterTableOperation::RenameColumn { - old_column_name, - new_column_name, - }, + match alter_table_op(verified_stmt(rename_column)) { + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, } => { - assert_eq!("tab", name.to_string()); assert_eq!(old_column_name.to_string(), "foo"); assert_eq!(new_column_name.to_string(), "new_foo"); } @@ -3047,21 +3036,15 @@ fn parse_alter_view_with_columns() { #[test] fn parse_alter_table_add_column() { - match verified_stmt("ALTER TABLE tab ADD foo TEXT") { - Statement::AlterTable { - operation: AlterTableOperation::AddColumn { column_keyword, .. }, - .. - } => { + match alter_table_op(verified_stmt("ALTER TABLE tab ADD foo TEXT")) { + AlterTableOperation::AddColumn { column_keyword, .. } => { assert!(!column_keyword); } _ => unreachable!(), }; - match verified_stmt("ALTER TABLE tab ADD COLUMN foo TEXT") { - Statement::AlterTable { - operation: AlterTableOperation::AddColumn { column_keyword, .. }, - .. - } => { + match alter_table_op(verified_stmt("ALTER TABLE tab ADD COLUMN foo TEXT")) { + AlterTableOperation::AddColumn { column_keyword, .. } => { assert!(column_keyword); } _ => unreachable!(), @@ -3080,24 +3063,19 @@ fn parse_alter_table_add_column_if_not_exists() { options: None, }; - match dialects.verified_stmt("ALTER TABLE tab ADD IF NOT EXISTS foo TEXT") { - Statement::AlterTable { - operation: AlterTableOperation::AddColumn { if_not_exists, .. }, - .. - } => { + match alter_table_op(dialects.verified_stmt("ALTER TABLE tab ADD IF NOT EXISTS foo TEXT")) { + AlterTableOperation::AddColumn { if_not_exists, .. } => { assert!(if_not_exists); } _ => unreachable!(), }; - match dialects.verified_stmt("ALTER TABLE tab ADD COLUMN IF NOT EXISTS foo TEXT") { - Statement::AlterTable { - operation: - AlterTableOperation::AddColumn { - column_keyword, - if_not_exists, - .. - }, + match alter_table_op( + dialects.verified_stmt("ALTER TABLE tab ADD COLUMN IF NOT EXISTS foo TEXT"), + ) { + AlterTableOperation::AddColumn { + column_keyword, + if_not_exists, .. } => { assert!(column_keyword); @@ -3123,12 +3101,10 @@ fn parse_alter_table_constraints() { check_one("CHECK (end_date > start_date OR end_date IS NULL)"); fn check_one(constraint_text: &str) { - match verified_stmt(&format!("ALTER TABLE tab ADD {constraint_text}")) { - Statement::AlterTable { - name, - operation: AlterTableOperation::AddConstraint(constraint), - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op(verified_stmt(&format!( + "ALTER TABLE tab ADD {constraint_text}" + ))) { + AlterTableOperation::AddConstraint(constraint) => { assert_eq!(constraint_text, constraint.to_string()); } _ => unreachable!(), @@ -3150,17 +3126,12 @@ fn parse_alter_table_drop_column() { ); fn check_one(constraint_text: &str) { - match verified_stmt(&format!("ALTER TABLE tab {constraint_text}")) { - Statement::AlterTable { - name, - operation: - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, - }, + match alter_table_op(verified_stmt(&format!("ALTER TABLE tab {constraint_text}"))) { + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, } => { - assert_eq!("tab", name.to_string()); assert_eq!("is_active", column_name.to_string()); assert!(if_exists); assert!(cascade); @@ -3173,12 +3144,10 @@ fn parse_alter_table_drop_column() { #[test] fn parse_alter_table_alter_column() { let alter_stmt = "ALTER TABLE tab"; - match verified_stmt(&format!("{alter_stmt} ALTER COLUMN is_active SET NOT NULL")) { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op(verified_stmt(&format!( + "{alter_stmt} ALTER COLUMN is_active SET NOT NULL" + ))) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); assert_eq!(op, AlterColumnOperation::SetNotNull {}); } @@ -3190,14 +3159,10 @@ fn parse_alter_table_alter_column() { "ALTER TABLE tab ALTER COLUMN is_active DROP NOT NULL", ); - match verified_stmt(&format!( + match alter_table_op(verified_stmt(&format!( "{alter_stmt} ALTER COLUMN is_active SET DEFAULT false" - )) { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + ))) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); assert_eq!( op, @@ -3209,12 +3174,10 @@ fn parse_alter_table_alter_column() { _ => unreachable!(), } - match verified_stmt(&format!("{alter_stmt} ALTER COLUMN is_active DROP DEFAULT")) { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op(verified_stmt(&format!( + "{alter_stmt} ALTER COLUMN is_active DROP DEFAULT" + ))) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); assert_eq!(op, AlterColumnOperation::DropDefault {}); } @@ -3225,12 +3188,10 @@ fn parse_alter_table_alter_column() { #[test] fn parse_alter_table_alter_column_type() { let alter_stmt = "ALTER TABLE tab"; - match verified_stmt("ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT") { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op(verified_stmt( + "ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT", + )) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); assert_eq!( op, @@ -3267,34 +3228,28 @@ fn parse_alter_table_alter_column_type() { #[test] fn parse_alter_table_drop_constraint() { let alter_stmt = "ALTER TABLE tab"; - match verified_stmt("ALTER TABLE tab DROP CONSTRAINT constraint_name CASCADE") { - Statement::AlterTable { - name, - operation: - AlterTableOperation::DropConstraint { - name: constr_name, - if_exists, - cascade, - }, + match alter_table_op(verified_stmt( + "ALTER TABLE tab DROP CONSTRAINT constraint_name CASCADE", + )) { + AlterTableOperation::DropConstraint { + name: constr_name, + if_exists, + cascade, } => { - assert_eq!("tab", name.to_string()); assert_eq!("constraint_name", constr_name.to_string()); assert!(!if_exists); assert!(cascade); } _ => unreachable!(), } - match verified_stmt("ALTER TABLE tab DROP CONSTRAINT IF EXISTS constraint_name") { - Statement::AlterTable { - name, - operation: - AlterTableOperation::DropConstraint { - name: constr_name, - if_exists, - cascade, - }, + match alter_table_op(verified_stmt( + "ALTER TABLE tab DROP CONSTRAINT IF EXISTS constraint_name", + )) { + AlterTableOperation::DropConstraint { + name: constr_name, + if_exists, + cascade, } => { - assert_eq!("tab", name.to_string()); assert_eq!("constraint_name", constr_name.to_string()); assert!(if_exists); assert!(!cascade); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ab7997cdf..7346b9c0d 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -14,6 +14,7 @@ //! Test SQL syntax specific to MySQL. The parser based on the generic dialect //! is also tested (on the inputs it can handle). +use matches::assert_matches; use sqlparser::ast::Expr; use sqlparser::ast::Value; use sqlparser::ast::*; @@ -1256,15 +1257,10 @@ fn parse_update_with_joins() { #[test] fn parse_alter_table_drop_primary_key() { - match mysql_and_generic().verified_stmt("ALTER TABLE tab DROP PRIMARY KEY") { - Statement::AlterTable { - name, - operation: AlterTableOperation::DropPrimaryKey, - } => { - assert_eq!("tab", name.to_string()); - } - _ => unreachable!(), - } + assert_matches!( + alter_table_op(mysql_and_generic().verified_stmt("ALTER TABLE tab DROP PRIMARY KEY")), + AlterTableOperation::DropPrimaryKey + ); } #[test] @@ -1278,22 +1274,16 @@ fn parse_alter_table_change_column() { }; let sql1 = "ALTER TABLE orders CHANGE COLUMN description desc TEXT NOT NULL"; - match mysql().verified_stmt(sql1) { - Statement::AlterTable { name, operation } => { - assert_eq!(expected_name, name); - assert_eq!(expected_operation, operation); - } - _ => unreachable!(), - } + let operation = + alter_table_op_with_name(mysql().verified_stmt(sql1), &expected_name.to_string()); + assert_eq!(expected_operation, operation); let sql2 = "ALTER TABLE orders CHANGE description desc TEXT NOT NULL"; - match mysql().one_statement_parses_to(sql2, sql1) { - Statement::AlterTable { name, operation } => { - assert_eq!(expected_name, name); - assert_eq!(expected_operation, operation); - } - _ => unreachable!(), - } + let operation = alter_table_op_with_name( + mysql().one_statement_parses_to(sql2, sql1), + &expected_name.to_string(), + ); + assert_eq!(expected_operation, operation); } #[test] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index c34ba75a8..093fac8c1 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -546,12 +546,10 @@ fn parse_create_table_constraints_only() { #[test] fn parse_alter_table_constraints_rename() { - match pg().verified_stmt("ALTER TABLE tab RENAME CONSTRAINT old_name TO new_name") { - Statement::AlterTable { - name, - operation: AlterTableOperation::RenameConstraint { old_name, new_name }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op( + pg().verified_stmt("ALTER TABLE tab RENAME CONSTRAINT old_name TO new_name"), + ) { + AlterTableOperation::RenameConstraint { old_name, new_name } => { assert_eq!(old_name.to_string(), "old_name"); assert_eq!(new_name.to_string(), "new_name"); } @@ -566,14 +564,12 @@ fn parse_alter_table_alter_column() { "ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'", ); - match pg() - .verified_stmt("ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'") - { - Statement::AlterTable { - name, - operation: AlterTableOperation::AlterColumn { column_name, op }, - } => { - assert_eq!("tab", name.to_string()); + match alter_table_op( + pg().verified_stmt( + "ALTER TABLE tab ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'", + ), + ) { + AlterTableOperation::AlterColumn { column_name, op } => { assert_eq!("is_active", column_name.to_string()); let using_expr = Expr::Value(Value::SingleQuotedString("text".to_string())); assert_eq!( @@ -588,6 +584,48 @@ fn parse_alter_table_alter_column() { } } +#[test] +fn parse_alter_table_add_columns() { + match pg().verified_stmt("ALTER TABLE IF EXISTS ONLY tab ADD COLUMN a TEXT, ADD COLUMN b INT") { + Statement::AlterTable { + name, + if_exists, + only, + operations, + } => { + assert_eq!(name.to_string(), "tab"); + assert!(if_exists); + assert!(only); + assert_eq!( + operations, + vec![ + AlterTableOperation::AddColumn { + column_keyword: true, + if_not_exists: false, + column_def: ColumnDef { + name: "a".into(), + data_type: DataType::Text, + collation: None, + options: vec![], + }, + }, + AlterTableOperation::AddColumn { + column_keyword: true, + if_not_exists: false, + column_def: ColumnDef { + name: "b".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + }, + ] + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_if_not_exists() { let sql = "CREATE TABLE IF NOT EXISTS uk_cities ()"; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index ecd608527..dabe66e1d 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -502,12 +502,8 @@ fn test_select_wildcard_with_exclude_and_rename() { #[test] fn test_alter_table_swap_with() { let sql = "ALTER TABLE tab1 SWAP WITH tab2"; - match snowflake_and_generic().verified_stmt(sql) { - Statement::AlterTable { - name, - operation: AlterTableOperation::SwapWith { table_name }, - } => { - assert_eq!("tab1", name.to_string()); + match alter_table_op_with_name(snowflake_and_generic().verified_stmt(sql), "tab1") { + AlterTableOperation::SwapWith { table_name } => { assert_eq!("tab2", table_name.to_string()); } _ => unreachable!(), From 2593dcfb79bb8709a6014673d47ffefcf9a68e20 Mon Sep 17 00:00:00 2001 From: ding-young Date: Fri, 8 Sep 2023 19:12:44 +0900 Subject: [PATCH 018/735] Add missing token loc in parse err msg (#965) --- src/parser/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b872bbfc8..348267fd8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1791,9 +1791,12 @@ impl<'a> Parser<'a> { | BinaryOperator::Eq | BinaryOperator::NotEq ) { - return parser_err!(format!( + return parser_err!( + format!( "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" - )); + ), + tok.location + ); }; Ok(match keyword { From bb7b05e106496c204b96d5371ed91bfb1edef728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berkay=20=C5=9Eahin?= <124376117+berkaysynnada@users.noreply.github.com> Date: Fri, 8 Sep 2023 13:47:56 +0300 Subject: [PATCH 019/735] feat: Group By All (#964) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 2 +- src/ast/query.rs | 37 ++++++++++++++++++++++++++++++++--- src/parser/mod.rs | 8 ++++++-- tests/sqlparser_clickhouse.rs | 2 +- tests/sqlparser_common.rs | 26 ++++++++++++++++++------ tests/sqlparser_duckdb.rs | 8 ++++---- tests/sqlparser_mssql.rs | 4 ++-- tests/sqlparser_mysql.rs | 16 +++++++-------- tests/sqlparser_postgres.rs | 18 ++++++++--------- 9 files changed, 85 insertions(+), 36 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 79edea7d1..eb8830bb1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -36,7 +36,7 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join, + Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, GroupByExpr, IdentWithAlias, Join, JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, diff --git a/src/ast/query.rs b/src/ast/query.rs index b70017654..c9fcdecc9 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -214,7 +214,7 @@ pub struct Select { /// WHERE pub selection: Option, /// GROUP BY - pub group_by: Vec, + pub group_by: GroupByExpr, /// CLUSTER BY (Hive) pub cluster_by: Vec, /// DISTRIBUTE BY (Hive) @@ -255,8 +255,13 @@ impl fmt::Display for Select { if let Some(ref selection) = self.selection { write!(f, " WHERE {selection}")?; } - if !self.group_by.is_empty() { - write!(f, " GROUP BY {}", display_comma_separated(&self.group_by))?; + match &self.group_by { + GroupByExpr::All => write!(f, " GROUP BY ALL")?, + GroupByExpr::Expressions(exprs) => { + if !exprs.is_empty() { + write!(f, " GROUP BY {}", display_comma_separated(exprs))?; + } + } } if !self.cluster_by.is_empty() { write!( @@ -1218,3 +1223,29 @@ impl fmt::Display for SelectInto { write!(f, "INTO{}{}{} {}", temporary, unlogged, table, self.name) } } + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum GroupByExpr { + /// ALL syntax of [Snowflake], and [DuckDB] + /// + /// [Snowflake]: + /// [DuckDB]: + All, + + /// Expressions + Expressions(Vec), +} + +impl fmt::Display for GroupByExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GroupByExpr::All => write!(f, "GROUP BY ALL"), + GroupByExpr::Expressions(col_names) => { + let col_names = display_comma_separated(col_names); + write!(f, "GROUP BY ({col_names})") + } + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 348267fd8..6f81e1e4e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5662,9 +5662,13 @@ impl<'a> Parser<'a> { }; let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_group_by_expr)? + if self.parse_keyword(Keyword::ALL) { + GroupByExpr::All + } else { + GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?) + } } else { - vec![] + GroupByExpr::Expressions(vec![]) }; let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 77b936d55..5241777e5 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -97,7 +97,7 @@ fn parse_map_access_expr() { right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))) }) }), - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1bf108cb0..be10914dc 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -247,7 +247,9 @@ fn parse_update_set_from() { }], lateral_views: vec![], selection: None, - group_by: vec![Expr::Identifier(Ident::new("id"))], + group_by: GroupByExpr::Expressions(vec![Expr::Identifier(Ident::new( + "id" + ))]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1808,10 +1810,10 @@ fn parse_select_group_by() { let sql = "SELECT id, fname, lname FROM customer GROUP BY lname, fname"; let select = verified_only_select(sql); assert_eq!( - vec![ + GroupByExpr::Expressions(vec![ Expr::Identifier(Ident::new("lname")), Expr::Identifier(Ident::new("fname")), - ], + ]), select.group_by ); @@ -1822,6 +1824,18 @@ fn parse_select_group_by() { ); } +#[test] +fn parse_select_group_by_all() { + let sql = "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL"; + let select = verified_only_select(sql); + assert_eq!(GroupByExpr::All, select.group_by); + + one_statement_parses_to( + "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL", + "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL", + ); +} + #[test] fn parse_select_having() { let sql = "SELECT foo FROM bar GROUP BY foo HAVING COUNT(*) > 1"; @@ -3543,7 +3557,7 @@ fn test_parse_named_window() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -3930,7 +3944,7 @@ fn parse_interval_and_or_xor() { }), }), }), - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -6333,7 +6347,7 @@ fn parse_merge() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 3587e8d90..0fe4bb0e7 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -161,7 +161,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -194,7 +194,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -236,7 +236,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -269,7 +269,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index c4e0f3274..9ed12ac21 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -103,7 +103,7 @@ fn parse_create_procedure() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -519,7 +519,7 @@ fn parse_substring_in_select() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 7346b9c0d..00901799f 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -496,7 +496,7 @@ fn parse_escaped_quote_identifiers_with_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -538,7 +538,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -577,7 +577,7 @@ fn parse_escaped_backticks_with_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -616,7 +616,7 @@ fn parse_escaped_backticks_with_no_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1100,7 +1100,7 @@ fn parse_select_with_numeric_prefix_column_name() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1149,7 +1149,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1325,7 +1325,7 @@ fn parse_substring_in_select() { }], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1604,7 +1604,7 @@ fn parse_hex_string_introducer() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 093fac8c1..079e4695a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -990,7 +990,7 @@ fn parse_copy_to() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), having: None, named_window: vec![], cluster_by: vec![], @@ -2019,7 +2019,7 @@ fn parse_array_subquery_expr() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2035,7 +2035,7 @@ fn parse_array_subquery_expr() { from: vec![], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -3321,14 +3321,14 @@ fn parse_select_group_by_grouping_sets() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, GROUPING SETS ((brand), (size), ())" ); assert_eq!( - vec![ + GroupByExpr::Expressions(vec![ Expr::Identifier(Ident::new("size")), Expr::GroupingSets(vec![ vec![Expr::Identifier(Ident::new("brand"))], vec![Expr::Identifier(Ident::new("size"))], vec![], ]), - ], + ]), select.group_by ); } @@ -3339,13 +3339,13 @@ fn parse_select_group_by_rollup() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, ROLLUP (brand, size)", ); assert_eq!( - vec![ + GroupByExpr::Expressions(vec![ Expr::Identifier(Ident::new("size")), Expr::Rollup(vec![ vec![Expr::Identifier(Ident::new("brand"))], vec![Expr::Identifier(Ident::new("size"))], ]), - ], + ]), select.group_by ); } @@ -3356,13 +3356,13 @@ fn parse_select_group_by_cube() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, CUBE (brand, size)", ); assert_eq!( - vec![ + GroupByExpr::Expressions(vec![ Expr::Identifier(Ident::new("size")), Expr::Cube(vec![ vec![Expr::Identifier(Ident::new("brand"))], vec![Expr::Identifier(Ident::new("size"))], ]), - ], + ]), select.group_by ); } From 0480ee9886e73affaebe330844cd5c26e0e8e1b0 Mon Sep 17 00:00:00 2001 From: artorias1024 <82564604+artorias1024@users.noreply.github.com> Date: Fri, 8 Sep 2023 18:58:31 +0800 Subject: [PATCH 020/735] feat: Add support for parsing the syntax of MySQL UNIQUE KEY. (#962) Co-authored-by: yukunpeng Co-authored-by: Andrew Lamb --- src/parser/mod.rs | 12 ++++++--- tests/sqlparser_mysql.rs | 56 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6f81e1e4e..4d642919e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3941,9 +3941,15 @@ impl<'a> Parser<'a> { match next_token.token { Token::Word(w) if w.keyword == Keyword::PRIMARY || w.keyword == Keyword::UNIQUE => { let is_primary = w.keyword == Keyword::PRIMARY; - if is_primary { - self.expect_keyword(Keyword::KEY)?; - } + + // parse optional [KEY] + let _ = self.parse_keyword(Keyword::KEY); + + // optional constraint name + let name = self + .maybe_parse(|parser| parser.parse_identifier()) + .or(name); + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; Ok(Some(TableConstraint::Unique { name, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 00901799f..5aeffb0b6 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -297,6 +297,62 @@ fn parse_create_table_auto_increment() { } } +#[test] +fn parse_create_table_unique_key() { + let sql = "CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, UNIQUE KEY bar_key (bar))"; + let canonical = "CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key UNIQUE (bar))"; + match mysql().one_statement_parses_to(sql, canonical) { + Statement::CreateTable { + name, + columns, + constraints, + .. + } => { + assert_eq!(name.to_string(), "foo"); + assert_eq!( + vec![TableConstraint::Unique { + name: Some(Ident::new("bar_key")), + columns: vec![Ident::new("bar")], + is_primary: false + }], + constraints + ); + assert_eq!( + vec![ + ColumnDef { + name: Ident::new("id"), + data_type: DataType::Int(None), + collation: None, + options: vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::Unique { is_primary: true }, + }, + ColumnOptionDef { + name: None, + option: ColumnOption::DialectSpecific(vec![Token::make_keyword( + "AUTO_INCREMENT" + )]), + }, + ], + }, + ColumnDef { + name: Ident::new("bar"), + data_type: DataType::Int(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + },], + }, + ], + columns + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_comment() { let canonical = "CREATE TABLE foo (bar INT) COMMENT 'baz'"; From a16791d0199633333909044b2e56ca21d96ac83b Mon Sep 17 00:00:00 2001 From: William Date: Thu, 14 Sep 2023 13:56:49 -0400 Subject: [PATCH 021/735] Support `UNNEST` as a table factor for PostgreSQL (#968) --- src/parser/mod.rs | 2 +- tests/sqlparser_postgres.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4d642919e..11f967876 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6240,7 +6240,7 @@ impl<'a> Parser<'a> { // appearing alone in parentheses (e.g. `FROM (mytable)`) self.expected("joined table", self.peek_token()) } - } else if dialect_of!(self is BigQueryDialect | GenericDialect) + } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) && self.parse_keyword(Keyword::UNNEST) { self.expect_token(&Token::LParen)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 079e4695a..77e5ba45e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3450,3 +3450,30 @@ fn parse_create_table_with_alias() { _ => unreachable!(), } } + +#[test] +fn parse_join_constraint_unnest_alias() { + assert_eq!( + only( + pg().verified_only_select("SELECT * FROM t1 JOIN UNNEST(t1.a) AS f ON c1 = c2") + .from + ) + .joins, + vec![Join { + relation: TableFactor::UNNEST { + alias: table_alias("f"), + array_exprs: vec![Expr::CompoundIdentifier(vec![ + Ident::new("t1"), + Ident::new("a") + ])], + with_offset: false, + with_offset_alias: None + }, + join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { + left: Box::new(Expr::Identifier("c1".into())), + op: BinaryOperator::Eq, + right: Box::new(Expr::Identifier("c2".into())), + })), + }] + ); +} From f6e4be4c154e81cf457249a310004e95d16ecf6f Mon Sep 17 00:00:00 2001 From: "chunshao.rcs" Date: Fri, 15 Sep 2023 02:21:47 +0800 Subject: [PATCH 022/735] Support mysql `partition` to table selection (#959) Co-authored-by: Andrew Lamb --- src/ast/query.rs | 6 +++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 41 +++++++++++++++++++++++++++++++++++ src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 2 ++ tests/sqlparser_clickhouse.rs | 16 ++++++++------ tests/sqlparser_common.rs | 29 +++++++++++++++++++++++++ tests/sqlparser_duckdb.rs | 4 ++++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 3 +++ tests/sqlparser_mysql.rs | 5 +++++ tests/sqlparser_postgres.rs | 1 + tests/sqlparser_redshift.rs | 3 +++ tests/sqlparser_snowflake.rs | 1 + 14 files changed, 108 insertions(+), 7 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index c9fcdecc9..af35c37a3 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -670,6 +670,8 @@ pub enum TableFactor { /// Optional version qualifier to facilitate table time-travel, as /// supported by BigQuery and MSSQL. version: Option, + /// [Partition selection](https://dev.mysql.com/doc/refman/8.0/en/partitioning-selection.html), supported by MySQL. + partitions: Vec, }, Derived { lateral: bool, @@ -730,8 +732,12 @@ impl fmt::Display for TableFactor { args, with_hints, version, + partitions, } => { write!(f, "{name}")?; + if !partitions.is_empty() { + write!(f, "PARTITION ({})", display_comma_separated(partitions))?; + } if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; } diff --git a/src/keywords.rs b/src/keywords.rs index c73535fca..ad0526ccd 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -716,6 +716,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::QUALIFY, Keyword::WINDOW, Keyword::END, + // for MYSQL PARTITION SELECTION + Keyword::PARTITION, ]; /// Can't be used as a column alias, so that `SELECT alias` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 11f967876..ba8f5784f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6277,6 +6277,14 @@ impl<'a> Parser<'a> { } else { let name = self.parse_object_name()?; + let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::PARTITION) + { + self.parse_partitions()? + } else { + vec![] + }; + // Parse potential version qualifier let version = self.parse_table_version()?; @@ -6311,6 +6319,7 @@ impl<'a> Parser<'a> { args, with_hints, version, + partitions, }) } } @@ -7483,6 +7492,13 @@ impl<'a> Parser<'a> { representation: UserDefinedTypeRepresentation::Composite { attributes }, }) } + + fn parse_partitions(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + Ok(partitions) + } } impl Word { @@ -8100,4 +8116,29 @@ mod tests { "sql parser error: Unexpected token following period in identifier: *", ); } + + #[test] + fn test_mysql_partition_selection() { + let sql = "SELECT * FROM employees PARTITION (p0, p2)"; + let expected = vec!["p0", "p2"]; + + let ast: Vec = Parser::parse_sql(&MySqlDialect {}, sql).unwrap(); + assert_eq!(ast.len(), 1); + if let Statement::Query(v) = &ast[0] { + if let SetExpr::Select(select) = &*v.body { + assert_eq!(select.from.len(), 1); + let from: &TableWithJoins = &select.from[0]; + let table_factor = &from.relation; + if let TableFactor::Table { partitions, .. } = table_factor { + let actual: Vec<&str> = partitions + .iter() + .map(|ident| ident.value.as_str()) + .collect(); + assert_eq!(expected, actual); + } + } + } else { + panic!("fail to parse mysql partition selection"); + } + } } diff --git a/src/test_utils.rs b/src/test_utils.rs index 0db1e7d24..b81cd5f4e 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -248,6 +248,7 @@ pub fn table(name: impl Into) -> TableFactor { args: None, with_hints: vec![], version: None, + partitions: vec![], } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d6a28fd00..3502d7dfa 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -96,6 +96,7 @@ fn parse_table_identifiers() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] },] @@ -160,6 +161,7 @@ fn parse_table_time_travel() { version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( Value::SingleQuotedString(version) ))), + partitions: vec![], }, joins: vec![] },] diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 5241777e5..a14598b3d 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -63,15 +63,16 @@ fn parse_map_access_expr() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, - joins: vec![] + joins: vec![], }], lateral_views: vec![], selection: Some(BinaryOp { left: Box::new(BinaryOp { left: Box::new(Identifier(Ident::new("id"))), op: BinaryOperator::Eq, - right: Box::new(Expr::Value(Value::SingleQuotedString("test".to_string()))) + right: Box::new(Expr::Value(Value::SingleQuotedString("test".to_string()))), }), op: BinaryOperator::And, right: Box::new(BinaryOp { @@ -91,11 +92,11 @@ fn parse_map_access_expr() { distinct: false, special: false, order_by: vec![], - })] + })], }), op: BinaryOperator::NotEq, - right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))) - }) + right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), + }), }), group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], @@ -103,7 +104,7 @@ fn parse_map_access_expr() { sort_by: vec![], having: None, named_window: vec![], - qualify: None + qualify: None, }, select ); @@ -117,7 +118,7 @@ fn parse_array_expr() { &Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("1".to_string())), - Expr::Value(Value::SingleQuotedString("2".to_string())) + Expr::Value(Value::SingleQuotedString("2".to_string())), ], named: false, }), @@ -171,6 +172,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index be10914dc..6f780de9e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -215,6 +215,7 @@ fn parse_update_set_from() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }, @@ -242,6 +243,7 @@ fn parse_update_set_from() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -308,6 +310,7 @@ fn parse_update_with_table_alias() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }, @@ -371,6 +374,7 @@ fn parse_select_with_table_alias() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }] @@ -402,6 +406,7 @@ fn parse_delete_statement() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation ); @@ -430,6 +435,7 @@ fn parse_delete_statement_for_multi_tables() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation ); @@ -440,6 +446,7 @@ fn parse_delete_statement_for_multi_tables() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].joins[0].relation ); @@ -464,6 +471,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation ); @@ -474,6 +482,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[1].relation ); @@ -484,6 +493,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, using[0].relation ); @@ -494,6 +504,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, using[0].joins[0].relation ); @@ -522,6 +533,7 @@ fn parse_where_delete_statement() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation, ); @@ -564,6 +576,7 @@ fn parse_where_delete_with_alias_statement() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, from[0].relation, ); @@ -578,6 +591,7 @@ fn parse_where_delete_with_alias_statement() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }]), @@ -3552,6 +3566,7 @@ fn test_parse_named_window() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -3891,6 +3906,7 @@ fn parse_interval_and_or_xor() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -4496,6 +4512,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }, @@ -4506,6 +4523,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }, @@ -4524,6 +4542,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![Join { relation: TableFactor::Table { @@ -4532,6 +4551,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4543,6 +4563,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![Join { relation: TableFactor::Table { @@ -4551,6 +4572,7 @@ fn parse_implicit_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -4572,6 +4594,7 @@ fn parse_cross_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::CrossJoin, }, @@ -4593,6 +4616,7 @@ fn parse_joins_on() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -4663,6 +4687,7 @@ fn parse_joins_using() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -4725,6 +4750,7 @@ fn parse_natural_join() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: f(JoinConstraint::Natural), } @@ -4990,6 +5016,7 @@ fn parse_derived_tables() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6318,6 +6345,7 @@ fn parse_merge() { args: None, with_hints: vec![], version: None, + partitions: vec![], } ); assert_eq!(table, table_no_into); @@ -6342,6 +6370,7 @@ fn parse_merge() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 0fe4bb0e7..b05cc0dd4 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -156,6 +156,7 @@ fn test_select_union_by_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -189,6 +190,7 @@ fn test_select_union_by_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -231,6 +233,7 @@ fn test_select_union_by_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], @@ -264,6 +267,7 @@ fn test_select_union_by_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], }], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 8cdfe9248..6ca47e12c 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -323,6 +323,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 9ed12ac21..135e5d138 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -58,6 +58,7 @@ fn parse_table_time_travel() { version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( Value::SingleQuotedString(version) ))), + partitions: vec![], }, joins: vec![] },] @@ -309,6 +310,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); @@ -514,6 +516,7 @@ fn parse_substring_in_select() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 5aeffb0b6..80ef9f981 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1151,6 +1151,7 @@ fn parse_select_with_numeric_prefix_column_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] }], @@ -1200,6 +1201,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] }], @@ -1260,6 +1262,7 @@ fn parse_update_with_joins() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![Join { relation: TableFactor::Table { @@ -1271,6 +1274,7 @@ fn parse_update_with_joins() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -1376,6 +1380,7 @@ fn parse_substring_in_select() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 77e5ba45e..bb3857817 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2893,6 +2893,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 9f5f62f78..f17ca5841 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -46,6 +46,7 @@ fn test_square_brackets_over_db_schema_table_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], } @@ -91,6 +92,7 @@ fn test_double_quotes_over_db_schema_table_name() { args: None, with_hints: vec![], version: None, + partitions: vec![], }, joins: vec![], } @@ -111,6 +113,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index dabe66e1d..e1db7ec61 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -224,6 +224,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); From 71c35d4dfddc89cfdd8f67bdc4c5d4e701a355e8 Mon Sep 17 00:00:00 2001 From: Ilya Date: Wed, 20 Sep 2023 04:31:11 +0300 Subject: [PATCH 023/735] Add support for == operator for Sqlite (#970) --- src/tokenizer.rs | 1 + tests/sqlparser_sqlite.rs | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1b1b1e96c..175b5d3b1 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -885,6 +885,7 @@ impl<'a> Tokenizer<'a> { chars.next(); // consume match chars.peek() { Some('>') => self.consume_and_return(chars, Token::RArrow), + Some('=') => self.consume_and_return(chars, Token::DoubleEq), _ => Ok(Some(Token::Eq)), } } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 8f6cc7572..fd7a22461 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -61,6 +61,14 @@ fn parse_create_virtual_table() { sqlite_and_generic().verified_stmt(sql); } +#[test] +fn double_equality_operator() { + // Sqlite supports this operator: https://www.sqlite.org/lang_expr.html#binaryops + let input = "SELECT a==b FROM t"; + let expected = "SELECT a = b FROM t"; + let _ = sqlite_and_generic().one_statement_parses_to(input, expected); +} + #[test] fn parse_create_table_auto_increment() { let sql = "CREATE TABLE foo (bar INT PRIMARY KEY AUTOINCREMENT)"; From 521ffa945c9b9562d1622cb2860faf0830994bc6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 21 Sep 2023 13:45:19 -0400 Subject: [PATCH 024/735] Changelog for 0.38.0 release (#973) --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c6c9dcc6..9e3e4e1a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,29 @@ Given that the parser produces a typed AST, any changes to the AST will technica ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.38.0] 2023-09-21 + +### Added + +* Support `==`operator for Sqlite (#970) - Thanks @marhoily +* Support mysql `PARTITION` to table selection (#959) - Thanks @chunshao90 +* Support `UNNEST` as a table factor for PostgreSQL (#968) @hexedpackets +* Support MySQL `UNIQUE KEY` syntax (#962) - Thanks @artorias1024 +* Support` `GROUP BY ALL` (#964) - @berkaysynnada +* Support multiple actions in one ALTER TABLE statement (#960) - Thanks @ForbesLindesay +* Add `--sqlite param` to CLI (#956) - Thanks @ddol + +### Fixed +* Fix Rust 1.72 clippy lints (#957) - Thanks @alamb + +### Changed +* Add missing token loc in parse err msg (#965) - Thanks @ding-young +* Change how `ANY` and `ALL` expressions are represented in AST (#963) - Thanks @SeanTroyUWO +* Show location info in parse errors (#958) - Thanks @MartinNowak +* Update release documentation (#954) - Thanks @alamb +* Break test and coverage test into separate jobs (#949) - Thanks @alamb + + ## [0.37.0] 2023-08-22 ### Added From 7723ea56c5119c7d1a15233c18eb2aaf48b60dc0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 21 Sep 2023 13:47:15 -0400 Subject: [PATCH 025/735] chore: Release sqlparser version 0.38.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 43d9f981d..4cc2f1eaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.37.0" +version = "0.38.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 495d0a02d5680c93a8886fde7cec4d9998c9b595 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Mon, 2 Oct 2023 13:10:56 +0200 Subject: [PATCH 026/735] Add support for ATTACH DATABASE (#989) --- src/ast/mod.rs | 18 ++++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 13 +++++++++++++ tests/sqlparser_sqlite.rs | 18 ++++++++++++++++++ 4 files changed, 50 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index eb8830bb1..48274f68c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1429,6 +1429,16 @@ pub enum Statement { name: Ident, operation: AlterRoleOperation, }, + /// ATTACH DATABASE 'path/to/file' AS alias + /// (SQLite-specific) + AttachDatabase { + /// The name to bind to the newly attached database + schema_name: Ident, + /// An expression that indicates the path to the database file + database_file_name: Expr, + /// true if the syntax is 'ATTACH DATABASE', false if it's just 'ATTACH' + database: bool, + }, /// DROP Drop { /// The type of the object to drop: TABLE, VIEW, etc. @@ -1969,6 +1979,14 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::AttachDatabase { + schema_name, + database_file_name, + database, + } => { + let keyword = if *database { "DATABASE " } else { "" }; + write!(f, "ATTACH {keyword}{database_file_name} AS {schema_name}") + } Statement::Analyze { table_name, partitions, diff --git a/src/keywords.rs b/src/keywords.rs index ad0526ccd..eee961350 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -95,6 +95,7 @@ define_keywords!( ASYMMETRIC, AT, ATOMIC, + ATTACH, AUTHORIZATION, AUTOINCREMENT, AUTO_INCREMENT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ba8f5784f..49cd24899 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -456,6 +456,7 @@ impl<'a> Parser<'a> { Ok(Statement::Query(Box::new(self.parse_query()?))) } Keyword::TRUNCATE => Ok(self.parse_truncate()?), + Keyword::ATTACH => Ok(self.parse_attach_database()?), Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::CACHE => Ok(self.parse_cache_table()?), @@ -543,6 +544,18 @@ impl<'a> Parser<'a> { }) } + pub fn parse_attach_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let database_file_name = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let schema_name = self.parse_identifier()?; + Ok(Statement::AttachDatabase { + database, + schema_name, + database_file_name, + }) + } + pub fn parse_analyze(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index fd7a22461..c4e69d530 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -259,6 +259,24 @@ fn parse_create_table_with_strict() { } } +#[test] +fn parse_attach_database() { + let sql = "ATTACH DATABASE 'test.db' AS test"; + let verified_stmt = sqlite().verified_stmt(sql); + assert_eq!(sql, format!("{}", verified_stmt)); + match verified_stmt { + Statement::AttachDatabase { + schema_name, + database_file_name: Expr::Value(Value::SingleQuotedString(literal_name)), + database: true, + } => { + assert_eq!(schema_name.value, "test"); + assert_eq!(literal_name, "test.db"); + } + _ => unreachable!(), + } +} + fn sqlite() -> TestedDialects { TestedDialects { dialects: vec![Box::new(SQLiteDialect {})], From e718ce6c42365d4fb987ce4977716c28091020a5 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 2 Oct 2023 13:23:25 +0200 Subject: [PATCH 027/735] bigquery: EXTRACT support For DAYOFWEEK, DAYOFYEAR, ISOWEEK, TIME (#980) --- src/ast/value.rs | 8 ++++++++ src/keywords.rs | 3 +++ src/parser/mod.rs | 4 ++++ tests/sqlparser_common.rs | 4 ++++ 4 files changed, 19 insertions(+) diff --git a/src/ast/value.rs b/src/ast/value.rs index 9c18a325c..e6f139256 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -117,6 +117,8 @@ pub enum DateTimeField { Month, Week, Day, + DayOfWeek, + DayOfYear, Date, Hour, Minute, @@ -127,6 +129,7 @@ pub enum DateTimeField { Doy, Epoch, Isodow, + IsoWeek, Isoyear, Julian, Microsecond, @@ -138,6 +141,7 @@ pub enum DateTimeField { Nanosecond, Nanoseconds, Quarter, + Time, Timezone, TimezoneHour, TimezoneMinute, @@ -151,6 +155,8 @@ impl fmt::Display for DateTimeField { DateTimeField::Month => "MONTH", DateTimeField::Week => "WEEK", DateTimeField::Day => "DAY", + DateTimeField::DayOfWeek => "DAYOFWEEK", + DateTimeField::DayOfYear => "DAYOFYEAR", DateTimeField::Date => "DATE", DateTimeField::Hour => "HOUR", DateTimeField::Minute => "MINUTE", @@ -162,6 +168,7 @@ impl fmt::Display for DateTimeField { DateTimeField::Epoch => "EPOCH", DateTimeField::Isodow => "ISODOW", DateTimeField::Isoyear => "ISOYEAR", + DateTimeField::IsoWeek => "ISOWEEK", DateTimeField::Julian => "JULIAN", DateTimeField::Microsecond => "MICROSECOND", DateTimeField::Microseconds => "MICROSECONDS", @@ -172,6 +179,7 @@ impl fmt::Display for DateTimeField { DateTimeField::Nanosecond => "NANOSECOND", DateTimeField::Nanoseconds => "NANOSECONDS", DateTimeField::Quarter => "QUARTER", + DateTimeField::Time => "TIME", DateTimeField::Timezone => "TIMEZONE", DateTimeField::TimezoneHour => "TIMEZONE_HOUR", DateTimeField::TimezoneMinute => "TIMEZONE_MINUTE", diff --git a/src/keywords.rs b/src/keywords.rs index eee961350..6fb74a8e0 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -196,6 +196,8 @@ define_keywords!( DATE, DATETIME, DAY, + DAYOFWEEK, + DAYOFYEAR, DEALLOCATE, DEC, DECADE, @@ -334,6 +336,7 @@ define_keywords!( IS, ISODOW, ISOLATION, + ISOWEEK, ISOYEAR, JAR, JOIN, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 49cd24899..279abf968 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1508,6 +1508,8 @@ impl<'a> Parser<'a> { Keyword::MONTH => Ok(DateTimeField::Month), Keyword::WEEK => Ok(DateTimeField::Week), Keyword::DAY => Ok(DateTimeField::Day), + Keyword::DAYOFWEEK => Ok(DateTimeField::DayOfWeek), + Keyword::DAYOFYEAR => Ok(DateTimeField::DayOfYear), Keyword::DATE => Ok(DateTimeField::Date), Keyword::HOUR => Ok(DateTimeField::Hour), Keyword::MINUTE => Ok(DateTimeField::Minute), @@ -1519,6 +1521,7 @@ impl<'a> Parser<'a> { Keyword::EPOCH => Ok(DateTimeField::Epoch), Keyword::ISODOW => Ok(DateTimeField::Isodow), Keyword::ISOYEAR => Ok(DateTimeField::Isoyear), + Keyword::ISOWEEK => Ok(DateTimeField::IsoWeek), Keyword::JULIAN => Ok(DateTimeField::Julian), Keyword::MICROSECOND => Ok(DateTimeField::Microsecond), Keyword::MICROSECONDS => Ok(DateTimeField::Microseconds), @@ -1529,6 +1532,7 @@ impl<'a> Parser<'a> { Keyword::NANOSECOND => Ok(DateTimeField::Nanosecond), Keyword::NANOSECONDS => Ok(DateTimeField::Nanoseconds), Keyword::QUARTER => Ok(DateTimeField::Quarter), + Keyword::TIME => Ok(DateTimeField::Time), Keyword::TIMEZONE => Ok(DateTimeField::Timezone), Keyword::TIMEZONE_HOUR => Ok(DateTimeField::TimezoneHour), Keyword::TIMEZONE_MINUTE => Ok(DateTimeField::TimezoneMinute), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6f780de9e..a9ce3cd6c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2069,6 +2069,8 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(MONTH FROM d)"); verified_stmt("SELECT EXTRACT(WEEK FROM d)"); verified_stmt("SELECT EXTRACT(DAY FROM d)"); + verified_stmt("SELECT EXTRACT(DAYOFWEEK FROM d)"); + verified_stmt("SELECT EXTRACT(DAYOFYEAR FROM d)"); verified_stmt("SELECT EXTRACT(DATE FROM d)"); verified_stmt("SELECT EXTRACT(HOUR FROM d)"); verified_stmt("SELECT EXTRACT(MINUTE FROM d)"); @@ -2082,6 +2084,7 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(DOY FROM d)"); verified_stmt("SELECT EXTRACT(EPOCH FROM d)"); verified_stmt("SELECT EXTRACT(ISODOW FROM d)"); + verified_stmt("SELECT EXTRACT(ISOWEEK FROM d)"); verified_stmt("SELECT EXTRACT(ISOYEAR FROM d)"); verified_stmt("SELECT EXTRACT(JULIAN FROM d)"); verified_stmt("SELECT EXTRACT(MICROSECOND FROM d)"); @@ -2094,6 +2097,7 @@ fn parse_extract() { verified_stmt("SELECT EXTRACT(TIMEZONE FROM d)"); verified_stmt("SELECT EXTRACT(TIMEZONE_HOUR FROM d)"); verified_stmt("SELECT EXTRACT(TIMEZONE_MINUTE FROM d)"); + verified_stmt("SELECT EXTRACT(TIME FROM d)"); let res = parse_sql_statements("SELECT EXTRACT(JIFFY FROM d)"); assert_eq!( From 4903bd4b8b8b615a2d1eb0bfad028952321ede56 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 2 Oct 2023 13:39:44 +0200 Subject: [PATCH 028/735] Add test for clickhouse: tokenize `==` as Token::DoubleEq (#981) --- src/test_utils.rs | 3 +++ src/tokenizer.rs | 24 +++++++++++++++++++++++- tests/sqlparser_clickhouse.rs | 8 ++++++++ tests/sqlparser_common.rs | 4 ++-- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index b81cd5f4e..8c64bfacd 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -31,6 +31,9 @@ use crate::parser::{Parser, ParserError}; use crate::tokenizer::Tokenizer; use crate::{ast::*, parser::ParserOptions}; +#[cfg(test)] +use pretty_assertions::assert_eq; + /// Tests use the methods on this struct to invoke the parser on one or /// multiple dialects. pub struct TestedDialects { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 175b5d3b1..067aa5a84 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1368,7 +1368,7 @@ fn peeking_take_while(chars: &mut State, mut predicate: impl FnMut(char) -> bool #[cfg(test)] mod tests { use super::*; - use crate::dialect::{GenericDialect, MsSqlDialect}; + use crate::dialect::{ClickHouseDialect, GenericDialect, MsSqlDialect}; #[test] fn tokenizer_error_impl() { @@ -1414,6 +1414,28 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_clickhouse_double_equal() { + let sql = String::from("SELECT foo=='1'"); + let dialect = ClickHouseDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Word(Word { + value: "foo".to_string(), + quote_style: None, + keyword: Keyword::NoKeyword, + }), + Token::DoubleEq, + Token::SingleQuotedString("1".to_string()), + ]; + + compare(expected, tokens); + } + #[test] fn tokenize_select_exponent() { let sql = String::from("SELECT 1e10, 1e-10, 1e+10, 1ea, 1e-10a, 1e-10-10"); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index a14598b3d..936b0799a 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -336,6 +336,14 @@ fn parse_create_table() { ); } +#[test] +fn parse_double_equal() { + clickhouse().one_statement_parses_to( + r#"SELECT foo FROM bar WHERE buz == 'buz'"#, + r#"SELECT foo FROM bar WHERE buz = 'buz'"#, + ); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a9ce3cd6c..46503c7f2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6792,10 +6792,10 @@ fn parse_time_functions() { // Validating Parenthesis let sql_without_parens = format!("SELECT {}", func_name); - let mut ast_without_parens = select_localtime_func_call_ast.clone(); + let mut ast_without_parens = select_localtime_func_call_ast; ast_without_parens.special = true; assert_eq!( - &Expr::Function(ast_without_parens.clone()), + &Expr::Function(ast_without_parens), expr_from_projection(&verified_only_select(&sql_without_parens).projection[0]) ); } From ed39329060bbc34d4fc7655f0d9973dacdea5534 Mon Sep 17 00:00:00 2001 From: William Date: Mon, 2 Oct 2023 08:36:17 -0400 Subject: [PATCH 029/735] Add JumpWire to users in README (#990) --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1195cc941..454ea6c29 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users This parser is currently being used by the [DataFusion] query engine, -[LocustDB], [Ballista], [GlueSQL], and [Opteryx]. +[LocustDB], [Ballista], [GlueSQL], [Opteryx], and [JumpWire]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. @@ -179,6 +179,7 @@ licensed as above, without any additional terms or conditions. [Ballista]: https://github.com/apache/arrow-ballista [GlueSQL]: https://github.com/gluesql/gluesql [Opteryx]: https://github.com/mabel-dev/opteryx +[JumpWire]: https://github.com/extragoodlabs/jumpwire [Pratt Parser]: https://tdop.github.io/ [sql-2016-grammar]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html [sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 From 6ffc3b3a52c6dc49d4f62733ddf3451ff3f9039d Mon Sep 17 00:00:00 2001 From: Ulrich Schmidt-Goertz Date: Mon, 2 Oct 2023 14:42:58 +0200 Subject: [PATCH 030/735] Support DELETE with ORDER BY and LIMIT (MySQL) (#992) --- src/ast/mod.rs | 12 ++++++++++++ src/parser/mod.rs | 13 ++++++++++++- tests/sqlparser_common.rs | 2 ++ tests/sqlparser_mysql.rs | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 48274f68c..f2dbb8899 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1301,6 +1301,10 @@ pub enum Statement { selection: Option, /// RETURNING returning: Option>, + /// ORDER BY (MySQL) + order_by: Vec, + /// LIMIT (MySQL) + limit: Option, }, /// CREATE VIEW CreateView { @@ -2141,6 +2145,8 @@ impl fmt::Display for Statement { using, selection, returning, + order_by, + limit, } => { write!(f, "DELETE ")?; if !tables.is_empty() { @@ -2156,6 +2162,12 @@ impl fmt::Display for Statement { if let Some(returning) = returning { write!(f, " RETURNING {}", display_comma_separated(returning))?; } + if !order_by.is_empty() { + write!(f, " ORDER BY {}", display_comma_separated(order_by))?; + } + if let Some(limit) = limit { + write!(f, " LIMIT {limit}")?; + } Ok(()) } Statement::Close { cursor } => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 279abf968..a3ebcc475 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5306,12 +5306,21 @@ impl<'a> Parser<'a> { } else { None }; - let returning = if self.parse_keyword(Keyword::RETURNING) { Some(self.parse_comma_separated(Parser::parse_select_item)?) } else { None }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; Ok(Statement::Delete { tables, @@ -5319,6 +5328,8 @@ impl<'a> Parser<'a> { using, selection, returning, + order_by, + limit, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 46503c7f2..d73061f79 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -525,6 +525,7 @@ fn parse_where_delete_statement() { using, selection, returning, + .. } => { assert_eq!( TableFactor::Table { @@ -565,6 +566,7 @@ fn parse_where_delete_with_alias_statement() { using, selection, returning, + .. } => { assert_eq!( TableFactor::Table { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 80ef9f981..f1a054bfb 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1315,6 +1315,38 @@ fn parse_update_with_joins() { } } +#[test] +fn parse_delete_with_order_by() { + let sql = "DELETE FROM customers ORDER BY id DESC"; + match mysql().verified_stmt(sql) { + Statement::Delete { order_by, .. } => { + assert_eq!( + vec![OrderByExpr { + expr: Expr::Identifier(Ident { + value: "id".to_owned(), + quote_style: None + }), + asc: Some(false), + nulls_first: None, + }], + order_by + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_delete_with_limit() { + let sql = "DELETE FROM customers LIMIT 100"; + match mysql().verified_stmt(sql) { + Statement::Delete { limit, .. } => { + assert_eq!(Some(Expr::Value(number("100"))), limit); + } + _ => unreachable!(), + } +} + #[test] fn parse_alter_table_drop_primary_key() { assert_matches!( From 993769ec0267e8684a034ebeb268ae0360785822 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Mon, 2 Oct 2023 14:48:51 +0200 Subject: [PATCH 031/735] Add support for mixed BigQuery table name quoting (#971) Co-authored-by: ifeanyi --- src/parser/mod.rs | 21 ++++++++ src/test_utils.rs | 18 +++++++ tests/sqlparser_bigquery.rs | 95 +++++++++++++++++++++++++++++++++++-- 3 files changed, 129 insertions(+), 5 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a3ebcc475..a388a9137 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5041,6 +5041,27 @@ impl<'a> Parser<'a> { break; } } + + // BigQuery accepts any number of quoted identifiers of a table name. + // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers + if dialect_of!(self is BigQueryDialect) + && idents.iter().any(|ident| ident.value.contains('.')) + { + idents = idents + .into_iter() + .flat_map(|ident| { + ident + .value + .split('.') + .map(|value| Ident { + value: value.into(), + quote_style: ident.quote_style, + }) + .collect::>() + }) + .collect() + } + Ok(ObjectName(idents)) } diff --git a/src/test_utils.rs b/src/test_utils.rs index 8c64bfacd..f0c5e425a 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -162,6 +162,24 @@ impl TestedDialects { } } + /// Ensures that `sql` parses as a single [`Select`], and that additionally: + /// + /// 1. parsing `sql` results in the same [`Statement`] as parsing + /// `canonical`. + /// + /// 2. re-serializing the result of parsing `sql` produces the same + /// `canonical` sql string + pub fn verified_only_select_with_canonical(&self, query: &str, canonical: &str) -> Select { + let q = match self.one_statement_parses_to(query, canonical) { + Statement::Query(query) => *query, + _ => panic!("Expected Query"), + }; + match *q.body { + SetExpr::Select(s) => *s, + _ => panic!("Expected SetExpr::Select"), + } + } + /// Ensures that `sql` parses as an [`Expr`], and that /// re-serializing the parse result produces the same `sql` /// string (is not modified after a serialization round-trip). diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 3502d7dfa..e05581d5f 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -13,6 +13,8 @@ #[macro_use] mod test_utils; +use std::ops::Deref; + use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; use test_utils::*; @@ -84,9 +86,24 @@ fn parse_raw_literal() { #[test] fn parse_table_identifiers() { - fn test_table_ident(ident: &str, expected: Vec) { + /// Parses a table identifier ident and verifies that re-serializing the + /// parsed identifier produces the original ident string. + /// + /// In some cases, re-serializing the result of the parsed ident is not + /// expected to produce the original ident string. canonical is provided + /// instead as the canonical representation of the identifier for comparison. + /// For example, re-serializing the result of ident `foo.bar` produces + /// the equivalent canonical representation `foo`.`bar` + fn test_table_ident(ident: &str, canonical: Option<&str>, expected: Vec) { let sql = format!("SELECT 1 FROM {ident}"); - let select = bigquery().verified_only_select(&sql); + let canonical = canonical.map(|ident| format!("SELECT 1 FROM {ident}")); + + let select = if let Some(canonical) = canonical { + bigquery().verified_only_select_with_canonical(&sql, canonical.deref()) + } else { + bigquery().verified_only_select(&sql) + }; + assert_eq!( select.from, vec![TableWithJoins { @@ -102,26 +119,30 @@ fn parse_table_identifiers() { },] ); } + fn test_table_ident_err(ident: &str) { let sql = format!("SELECT 1 FROM {ident}"); assert!(bigquery().parse_sql_statements(&sql).is_err()); } - test_table_ident("da-sh-es", vec![Ident::new("da-sh-es")]); + test_table_ident("da-sh-es", None, vec![Ident::new("da-sh-es")]); - test_table_ident("`spa ce`", vec![Ident::with_quote('`', "spa ce")]); + test_table_ident("`spa ce`", None, vec![Ident::with_quote('`', "spa ce")]); test_table_ident( "`!@#$%^&*()-=_+`", + None, vec![Ident::with_quote('`', "!@#$%^&*()-=_+")], ); test_table_ident( "_5abc.dataField", + None, vec![Ident::new("_5abc"), Ident::new("dataField")], ); test_table_ident( "`5abc`.dataField", + None, vec![Ident::with_quote('`', "5abc"), Ident::new("dataField")], ); @@ -129,6 +150,7 @@ fn parse_table_identifiers() { test_table_ident( "abc5.dataField", + None, vec![Ident::new("abc5"), Ident::new("dataField")], ); @@ -136,13 +158,76 @@ fn parse_table_identifiers() { test_table_ident( "`GROUP`.dataField", + None, vec![Ident::with_quote('`', "GROUP"), Ident::new("dataField")], ); // TODO: this should be error // test_table_ident_err("GROUP.dataField"); - test_table_ident("abc5.GROUP", vec![Ident::new("abc5"), Ident::new("GROUP")]); + test_table_ident( + "abc5.GROUP", + None, + vec![Ident::new("abc5"), Ident::new("GROUP")], + ); + + test_table_ident( + "`foo.bar.baz`", + Some("`foo`.`bar`.`baz`"), + vec![ + Ident::with_quote('`', "foo"), + Ident::with_quote('`', "bar"), + Ident::with_quote('`', "baz"), + ], + ); + + test_table_ident( + "`foo.bar`.`baz`", + Some("`foo`.`bar`.`baz`"), + vec![ + Ident::with_quote('`', "foo"), + Ident::with_quote('`', "bar"), + Ident::with_quote('`', "baz"), + ], + ); + + test_table_ident( + "`foo`.`bar.baz`", + Some("`foo`.`bar`.`baz`"), + vec![ + Ident::with_quote('`', "foo"), + Ident::with_quote('`', "bar"), + Ident::with_quote('`', "baz"), + ], + ); + + test_table_ident( + "`foo`.`bar`.`baz`", + Some("`foo`.`bar`.`baz`"), + vec![ + Ident::with_quote('`', "foo"), + Ident::with_quote('`', "bar"), + Ident::with_quote('`', "baz"), + ], + ); + + test_table_ident( + "`5abc.dataField`", + Some("`5abc`.`dataField`"), + vec![ + Ident::with_quote('`', "5abc"), + Ident::with_quote('`', "dataField"), + ], + ); + + test_table_ident( + "`_5abc.da-sh-es`", + Some("`_5abc`.`da-sh-es`"), + vec![ + Ident::with_quote('`', "_5abc"), + Ident::with_quote('`', "da-sh-es"), + ], + ); } #[test] From 2786c7eaf1d0b420ca5a5f338b45b6cc0fbd68be Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 2 Oct 2023 17:53:32 +0200 Subject: [PATCH 032/735] clickhouse: add support for LIMIT BY (#977) --- src/ast/query.rs | 7 +++++++ src/parser/mod.rs | 13 ++++++++++++- tests/sqlparser_clickhouse.rs | 18 ++++++++++++++++++ tests/sqlparser_common.rs | 5 +++++ tests/sqlparser_mssql.rs | 3 +++ tests/sqlparser_mysql.rs | 9 +++++++++ tests/sqlparser_postgres.rs | 2 ++ 7 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index af35c37a3..d5f170791 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -35,6 +35,10 @@ pub struct Query { pub order_by: Vec, /// `LIMIT { | ALL }` pub limit: Option, + + /// `LIMIT { } BY { ,,... } }` + pub limit_by: Vec, + /// `OFFSET [ { ROW | ROWS } ]` pub offset: Option, /// `FETCH { FIRST | NEXT } [ PERCENT ] { ROW | ROWS } | { ONLY | WITH TIES }` @@ -58,6 +62,9 @@ impl fmt::Display for Query { if let Some(ref offset) = self.offset { write!(f, " {offset}")?; } + if !self.limit_by.is_empty() { + write!(f, " BY {}", display_separated(&self.limit_by, ", "))?; + } if let Some(ref fetch) = self.fetch { write!(f, " {fetch}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a388a9137..dcd731a65 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5431,6 +5431,7 @@ impl<'a> Parser<'a> { with, body: Box::new(SetExpr::Insert(insert)), limit: None, + limit_by: vec![], order_by: vec![], offset: None, fetch: None, @@ -5442,6 +5443,7 @@ impl<'a> Parser<'a> { with, body: Box::new(SetExpr::Update(update)), limit: None, + limit_by: vec![], order_by: vec![], offset: None, fetch: None, @@ -5468,7 +5470,7 @@ impl<'a> Parser<'a> { offset = Some(self.parse_offset()?) } - if dialect_of!(self is GenericDialect | MySqlDialect) + if dialect_of!(self is GenericDialect | MySqlDialect | ClickHouseDialect) && limit.is_some() && offset.is_none() && self.consume_token(&Token::Comma) @@ -5483,6 +5485,14 @@ impl<'a> Parser<'a> { } } + let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::BY) + { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) } else { @@ -5499,6 +5509,7 @@ impl<'a> Parser<'a> { body, order_by, limit, + limit_by, offset, fetch, locks, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 936b0799a..9efe4a368 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -25,6 +25,7 @@ use sqlparser::ast::TableFactor::Table; use sqlparser::ast::*; use sqlparser::dialect::ClickHouseDialect; +use sqlparser::dialect::GenericDialect; #[test] fn parse_map_access_expr() { @@ -344,9 +345,26 @@ fn parse_double_equal() { ); } +#[test] +fn parse_limit_by() { + clickhouse_and_generic().verified_stmt( + r#"SELECT * FROM default.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset"#, + ); + clickhouse_and_generic().verified_stmt( + r#"SELECT * FROM default.last_asset_runs_mv ORDER BY created_at DESC LIMIT 1 BY asset, toStartOfDay(created_at)"#, + ); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], options: None, } } + +fn clickhouse_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(ClickHouseDialect {}), Box::new(GenericDialect {})], + options: None, + } +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index d73061f79..80e4cdf02 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -261,6 +261,7 @@ fn parse_update_set_from() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -2662,6 +2663,7 @@ fn parse_create_table_as_table() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -2685,6 +2687,7 @@ fn parse_create_table_as_table() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -3976,6 +3979,7 @@ fn parse_interval_and_or_xor() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -6392,6 +6396,7 @@ fn parse_merge() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 135e5d138..f9eb4d8fb 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -92,6 +92,7 @@ fn parse_create_procedure() { body: vec![Statement::Query(Box::new(Query { with: None, limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -493,6 +494,7 @@ fn parse_substring_in_select() { assert_eq!( Box::new(Query { with: None, + body: Box::new(SetExpr::Select(Box::new(Select { distinct: Some(Distinct::Distinct), top: None, @@ -532,6 +534,7 @@ fn parse_substring_in_select() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f1a054bfb..80b9dcfd8 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -562,6 +562,7 @@ fn parse_escaped_quote_identifiers_with_escape() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -604,6 +605,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -643,6 +645,7 @@ fn parse_escaped_backticks_with_escape() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -682,6 +685,7 @@ fn parse_escaped_backticks_with_no_escape() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -956,6 +960,7 @@ fn parse_simple_insert() { })), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -991,6 +996,7 @@ fn parse_empty_row_insert() { })), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -1049,6 +1055,7 @@ fn parse_insert_with_on_duplicate_update() { })), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -1428,6 +1435,7 @@ fn parse_substring_in_select() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -1708,6 +1716,7 @@ fn parse_hex_string_introducer() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index bb3857817..fe336bda7 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1000,6 +1000,7 @@ fn parse_copy_to() { }))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], @@ -2046,6 +2047,7 @@ fn parse_array_subquery_expr() { }), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], From 40e2ecbdf34f0068863ed74b0ae3a8eb410c6401 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 2 Oct 2023 10:28:13 -0700 Subject: [PATCH 033/735] snowflake: support for UNPIVOT and a fix for chained PIVOTs (#983) --- src/ast/query.rs | 67 ++++++++++------ src/keywords.rs | 2 + src/parser/mod.rs | 54 +++++++++---- tests/sqlparser_common.rs | 155 +++++++++++++++++++++++++++++++++++--- 4 files changed, 231 insertions(+), 47 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index d5f170791..88b0931de 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -720,13 +720,28 @@ pub enum TableFactor { /// For example `FROM monthly_sales PIVOT(sum(amount) FOR MONTH IN ('JAN', 'FEB'))` /// See Pivot { - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - name: ObjectName, - table_alias: Option, + #[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))] + table: Box, aggregate_function: Expr, // Function expression value_column: Vec, pivot_values: Vec, - pivot_alias: Option, + alias: Option, + }, + /// An UNPIVOT operation on a table. + /// + /// Syntax: + /// ```sql + /// table UNPIVOT(value FOR name IN (column1, [ column2, ... ])) [ alias ] + /// ``` + /// + /// See . + Unpivot { + #[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))] + table: Box, + value: Ident, + name: Ident, + columns: Vec, + alias: Option, }, } @@ -810,32 +825,42 @@ impl fmt::Display for TableFactor { Ok(()) } TableFactor::Pivot { - name, - table_alias, + table, aggregate_function, value_column, pivot_values, - pivot_alias, + alias, } => { - write!(f, "{}", name)?; - if table_alias.is_some() { - write!(f, " AS {}", table_alias.as_ref().unwrap())?; - } write!( f, - " PIVOT({} FOR {} IN (", + "{} PIVOT({} FOR {} IN ({}))", + table, aggregate_function, - Expr::CompoundIdentifier(value_column.to_vec()) + Expr::CompoundIdentifier(value_column.to_vec()), + display_comma_separated(pivot_values) )?; - for value in pivot_values { - write!(f, "{}", value)?; - if !value.eq(pivot_values.last().unwrap()) { - write!(f, ", ")?; - } + if alias.is_some() { + write!(f, " AS {}", alias.as_ref().unwrap())?; } - write!(f, "))")?; - if pivot_alias.is_some() { - write!(f, " AS {}", pivot_alias.as_ref().unwrap())?; + Ok(()) + } + TableFactor::Unpivot { + table, + value, + name, + columns, + alias, + } => { + write!( + f, + "{} UNPIVOT({} FOR {} IN ({}))", + table, + value, + name, + display_comma_separated(columns) + )?; + if alias.is_some() { + write!(f, " AS {}", alias.as_ref().unwrap())?; } Ok(()) } diff --git a/src/keywords.rs b/src/keywords.rs index 6fb74a8e0..d85708032 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -635,6 +635,7 @@ define_keywords!( UNKNOWN, UNLOGGED, UNNEST, + UNPIVOT, UNSIGNED, UNTIL, UPDATE, @@ -693,6 +694,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::HAVING, Keyword::ORDER, Keyword::PIVOT, + Keyword::UNPIVOT, Keyword::TOP, Keyword::LATERAL, Keyword::VIEW, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dcd731a65..45600f42d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6276,9 +6276,8 @@ impl<'a> Parser<'a> { | TableFactor::Table { alias, .. } | TableFactor::UNNEST { alias, .. } | TableFactor::TableFunction { alias, .. } - | TableFactor::Pivot { - pivot_alias: alias, .. - } + | TableFactor::Pivot { alias, .. } + | TableFactor::Unpivot { alias, .. } | TableFactor::NestedJoin { alias, .. } => { // but not `FROM (mytable AS alias1) AS alias2`. if let Some(inner_alias) = alias { @@ -6357,11 +6356,6 @@ impl<'a> Parser<'a> { let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - // Pivot - if self.parse_keyword(Keyword::PIVOT) { - return self.parse_pivot_table_factor(name, alias); - } - // MSSQL-specific table hints: let mut with_hints = vec![]; if self.parse_keyword(Keyword::WITH) { @@ -6373,14 +6367,25 @@ impl<'a> Parser<'a> { self.prev_token(); } }; - Ok(TableFactor::Table { + + let mut table = TableFactor::Table { name, alias, args, with_hints, version, partitions, - }) + }; + + while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { + table = match kw { + Keyword::PIVOT => self.parse_pivot_table_factor(table)?, + Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, + _ => unreachable!(), + } + } + + Ok(table) } } @@ -6417,8 +6422,7 @@ impl<'a> Parser<'a> { pub fn parse_pivot_table_factor( &mut self, - name: ObjectName, - table_alias: Option, + table: TableFactor, ) -> Result { self.expect_token(&Token::LParen)?; let function_name = match self.next_token().token { @@ -6435,12 +6439,32 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::Pivot { - name, - table_alias, + table: Box::new(table), aggregate_function: function, value_column, pivot_values, - pivot_alias: alias, + alias, + }) + } + + pub fn parse_unpivot_table_factor( + &mut self, + table: TableFactor, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_identifier()?; + self.expect_keyword(Keyword::FOR)?; + let name = self.parse_identifier()?; + self.expect_keyword(Keyword::IN)?; + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Unpivot { + table: Box::new(table), + value, + name, + columns, + alias, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 80e4cdf02..3c4a2d9ea 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -20,7 +20,7 @@ use matches::assert_matches; use sqlparser::ast::SelectItem::UnnamedExpr; -use sqlparser::ast::TableFactor::Pivot; +use sqlparser::ast::TableFactor::{Pivot, Unpivot}; use sqlparser::ast::*; use sqlparser::dialect::{ AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect, @@ -7257,10 +7257,16 @@ fn parse_pivot_table() { assert_eq!( verified_only_select(sql).from[0].relation, Pivot { - name: ObjectName(vec![Ident::new("monthly_sales")]), - table_alias: Some(TableAlias { - name: Ident::new("a"), - columns: vec![] + table: Box::new(TableFactor::Table { + name: ObjectName(vec![Ident::new("monthly_sales")]), + alias: Some(TableAlias { + name: Ident::new("a"), + columns: vec![] + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], }), aggregate_function: Expr::Function(Function { name: ObjectName(vec![Ident::new("SUM")]), @@ -7279,7 +7285,7 @@ fn parse_pivot_table() { Value::SingleQuotedString("MAR".to_string()), Value::SingleQuotedString("APR".to_string()), ], - pivot_alias: Some(TableAlias { + alias: Some(TableAlias { name: Ident { value: "p".to_string(), quote_style: None @@ -7290,17 +7296,15 @@ fn parse_pivot_table() { ); assert_eq!(verified_stmt(sql).to_string(), sql); + // parsing should succeed with empty alias let sql_without_table_alias = concat!( "SELECT * FROM monthly_sales ", "PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d) ", "ORDER BY EMPID" ); assert_matches!( - verified_only_select(sql_without_table_alias).from[0].relation, - Pivot { - table_alias: None, // parsing should succeed with empty alias - .. - } + &verified_only_select(sql_without_table_alias).from[0].relation, + Pivot { table, .. } if matches!(&**table, TableFactor::Table { alias: None, .. }) ); assert_eq!( verified_stmt(sql_without_table_alias).to_string(), @@ -7308,6 +7312,135 @@ fn parse_pivot_table() { ); } +#[test] +fn parse_unpivot_table() { + let sql = concat!( + "SELECT * FROM sales AS s ", + "UNPIVOT(quantity FOR quarter IN (Q1, Q2, Q3, Q4)) AS u (product, quarter, quantity)" + ); + + pretty_assertions::assert_eq!( + verified_only_select(sql).from[0].relation, + Unpivot { + table: Box::new(TableFactor::Table { + name: ObjectName(vec![Ident::new("sales")]), + alias: Some(TableAlias { + name: Ident::new("s"), + columns: vec![] + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }), + value: Ident { + value: "quantity".to_string(), + quote_style: None + }, + + name: Ident { + value: "quarter".to_string(), + quote_style: None + }, + columns: ["Q1", "Q2", "Q3", "Q4"] + .into_iter() + .map(Ident::new) + .collect(), + alias: Some(TableAlias { + name: Ident::new("u"), + columns: ["product", "quarter", "quantity"] + .into_iter() + .map(Ident::new) + .collect() + }), + } + ); + assert_eq!(verified_stmt(sql).to_string(), sql); + + let sql_without_aliases = concat!( + "SELECT * FROM sales ", + "UNPIVOT(quantity FOR quarter IN (Q1, Q2, Q3, Q4))" + ); + + assert_matches!( + &verified_only_select(sql_without_aliases).from[0].relation, + Unpivot { + table, + alias: None, + .. + } if matches!(&**table, TableFactor::Table { alias: None, .. }) + ); + assert_eq!( + verified_stmt(sql_without_aliases).to_string(), + sql_without_aliases + ); +} + +#[test] +fn parse_pivot_unpivot_table() { + let sql = concat!( + "SELECT * FROM census AS c ", + "UNPIVOT(population FOR year IN (population_2000, population_2010)) AS u ", + "PIVOT(sum(population) FOR year IN ('population_2000', 'population_2010')) AS p" + ); + + pretty_assertions::assert_eq!( + verified_only_select(sql).from[0].relation, + Pivot { + table: Box::new(Unpivot { + table: Box::new(TableFactor::Table { + name: ObjectName(vec![Ident::new("census")]), + alias: Some(TableAlias { + name: Ident::new("c"), + columns: vec![] + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }), + value: Ident { + value: "population".to_string(), + quote_style: None + }, + + name: Ident { + value: "year".to_string(), + quote_style: None + }, + columns: ["population_2000", "population_2010"] + .into_iter() + .map(Ident::new) + .collect(), + alias: Some(TableAlias { + name: Ident::new("u"), + columns: vec![] + }), + }), + aggregate_function: Expr::Function(Function { + name: ObjectName(vec![Ident::new("sum")]), + args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("population")) + ))]), + over: None, + distinct: false, + special: false, + order_by: vec![], + }), + value_column: vec![Ident::new("year")], + pivot_values: vec![ + Value::SingleQuotedString("population_2000".to_string()), + Value::SingleQuotedString("population_2010".to_string()) + ], + alias: Some(TableAlias { + name: Ident::new("p"), + columns: vec![] + }), + } + ); + assert_eq!(verified_stmt(sql).to_string(), sql); +} + /// Makes a predicate that looks like ((user_id = $id) OR user_id = $2...) fn make_where_clause(num: usize) -> String { use std::fmt::Write; From c811e2260505e2651b04e85e886ec09d237a0602 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 2 Oct 2023 19:42:01 +0200 Subject: [PATCH 034/735] =?UTF-8?q?redshift:=20add=20support=20for=20CREAT?= =?UTF-8?q?E=20VIEW=20=E2=80=A6=20WITH=20NO=20SCHEMA=20BINDING=20(#979)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ast/mod.rs | 11 +++++++++-- src/keywords.rs | 1 + src/parser/mod.rs | 10 ++++++++++ tests/sqlparser_common.rs | 12 ++++++++++++ tests/sqlparser_redshift.rs | 14 ++++++++++++++ 5 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f2dbb8899..6f9d32c8d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -577,7 +577,7 @@ pub enum Expr { /// /// Syntax: /// ```sql - /// MARCH (, , ...) AGAINST ( []) + /// MATCH (, , ...) AGAINST ( []) /// /// = CompoundIdentifier /// = String literal @@ -1316,6 +1316,8 @@ pub enum Statement { query: Box, with_options: Vec, cluster_by: Vec, + /// if true, has RedShift [`WITH NO SCHEMA BINDING`] clause + with_no_schema_binding: bool, }, /// CREATE TABLE CreateTable { @@ -2271,6 +2273,7 @@ impl fmt::Display for Statement { materialized, with_options, cluster_by, + with_no_schema_binding, } => { write!( f, @@ -2288,7 +2291,11 @@ impl fmt::Display for Statement { if !cluster_by.is_empty() { write!(f, " CLUSTER BY ({})", display_comma_separated(cluster_by))?; } - write!(f, " AS {query}") + write!(f, " AS {query}")?; + if *with_no_schema_binding { + write!(f, " WITH NO SCHEMA BINDING")?; + } + Ok(()) } Statement::CreateTable { name, diff --git a/src/keywords.rs b/src/keywords.rs index d85708032..e1bbf44ae 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -110,6 +110,7 @@ define_keywords!( BIGINT, BIGNUMERIC, BINARY, + BINDING, BLOB, BLOOMFILTER, BOOL, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 45600f42d..5f6788696 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2974,6 +2974,15 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::AS)?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. + + let with_no_schema_binding = dialect_of!(self is RedshiftSqlDialect | GenericDialect) + && self.parse_keywords(&[ + Keyword::WITH, + Keyword::NO, + Keyword::SCHEMA, + Keyword::BINDING, + ]); + Ok(Statement::CreateView { name, columns, @@ -2982,6 +2991,7 @@ impl<'a> Parser<'a> { or_replace, with_options, cluster_by, + with_no_schema_binding, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3c4a2d9ea..027dc312f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5320,6 +5320,7 @@ fn parse_create_view() { materialized, with_options, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5328,6 +5329,7 @@ fn parse_create_view() { assert!(!or_replace); assert_eq!(with_options, vec![]); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5368,6 +5370,7 @@ fn parse_create_view_with_columns() { query, materialized, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![Ident::new("has"), Ident::new("cols")]); @@ -5376,6 +5379,7 @@ fn parse_create_view_with_columns() { assert!(!materialized); assert!(!or_replace); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5393,6 +5397,7 @@ fn parse_create_or_replace_view() { query, materialized, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5401,6 +5406,7 @@ fn parse_create_or_replace_view() { assert!(!materialized); assert!(or_replace); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5422,6 +5428,7 @@ fn parse_create_or_replace_materialized_view() { query, materialized, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5430,6 +5437,7 @@ fn parse_create_or_replace_materialized_view() { assert!(materialized); assert!(or_replace); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5447,6 +5455,7 @@ fn parse_create_materialized_view() { materialized, with_options, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5455,6 +5464,7 @@ fn parse_create_materialized_view() { assert_eq!(with_options, vec![]); assert!(!or_replace); assert_eq!(cluster_by, vec![]); + assert!(!late_binding); } _ => unreachable!(), } @@ -5472,6 +5482,7 @@ fn parse_create_materialized_view_with_cluster_by() { materialized, with_options, cluster_by, + with_no_schema_binding: late_binding, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5480,6 +5491,7 @@ fn parse_create_materialized_view_with_cluster_by() { assert_eq!(with_options, vec![]); assert!(!or_replace); assert_eq!(cluster_by, vec![Ident::new("foo")]); + assert!(!late_binding); } _ => unreachable!(), } diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index f17ca5841..5ae539b3c 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -16,6 +16,7 @@ mod test_utils; use test_utils::*; use sqlparser::ast::*; +use sqlparser::dialect::GenericDialect; use sqlparser::dialect::RedshiftSqlDialect; #[test] @@ -272,6 +273,13 @@ fn redshift() -> TestedDialects { } } +fn redshift_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(RedshiftSqlDialect {}), Box::new(GenericDialect {})], + options: None, + } +} + #[test] fn test_sharp() { let sql = "SELECT #_of_values"; @@ -281,3 +289,9 @@ fn test_sharp() { select.projection[0] ); } + +#[test] +fn test_create_view_with_no_schema_binding() { + redshift_and_generic() + .verified_stmt("CREATE VIEW myevent AS SELECT eventname FROM event WITH NO SCHEMA BINDING"); +} From 02f3d78a920ad90a91d318fbb3b08890c53e1a28 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Oct 2023 15:25:03 -0400 Subject: [PATCH 035/735] Fix for clippy 1.73 (#995) --- src/ast/mod.rs | 2 +- src/ast/visitor.rs | 2 +- src/parser/mod.rs | 12 ++++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6f9d32c8d..d4e2f26ea 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -155,7 +155,7 @@ impl fmt::Display for Ident { let escaped = value::escape_quoted_string(&self.value, q); write!(f, "{q}{escaped}{q}") } - Some(q) if q == '[' => write!(f, "[{}]", self.value), + Some('[') => write!(f, "[{}]", self.value), None => f.write_str(&self.value), _ => panic!("unexpected quote style"), } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index bb7c19678..09cb20a0c 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -490,7 +490,7 @@ where /// /// This demonstrates how to effectively replace an expression with another more complicated one /// that references the original. This example avoids unnecessary allocations by using the -/// [`std::mem`](std::mem) family of functions. +/// [`std::mem`] family of functions. /// /// ``` /// # use sqlparser::parser::Parser; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5f6788696..d0b3cef78 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4701,7 +4701,11 @@ impl<'a> Parser<'a> { pub fn parse_literal_string(&mut self) -> Result { let next_token = self.next_token(); match next_token.token { - Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), + Token::Word(Word { + value, + keyword: Keyword::NoKeyword, + .. + }) => Ok(value), Token::SingleQuotedString(s) => Ok(s), Token::DoubleQuotedString(s) => Ok(s), Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { @@ -5853,8 +5857,8 @@ impl<'a> Parser<'a> { self.expect_token(&Token::Colon)?; } else if self.parse_keyword(Keyword::ROLE) { let context_modifier = match modifier { - Some(keyword) if keyword == Keyword::LOCAL => ContextModifier::Local, - Some(keyword) if keyword == Keyword::SESSION => ContextModifier::Session, + Some(Keyword::LOCAL) => ContextModifier::Local, + Some(Keyword::SESSION) => ContextModifier::Session, _ => ContextModifier::None, }; @@ -6897,7 +6901,7 @@ impl<'a> Parser<'a> { } } - /// Parse an [`WildcardAdditionalOptions`](WildcardAdditionalOptions) information for wildcard select items. + /// Parse an [`WildcardAdditionalOptions`] information for wildcard select items. /// /// If it is not possible to parse it, will return an option. pub fn parse_wildcard_additional_options( From 5263da68cdaa052dfd4f8989760569eae253253e Mon Sep 17 00:00:00 2001 From: Gabriel Villalonga Simon Date: Thu, 5 Oct 2023 20:32:43 +0100 Subject: [PATCH 036/735] Handle CREATE [TEMPORARY|TEMP] VIEW [IF NOT EXISTS] (#993) --- src/ast/mod.rs | 12 +++++++-- src/parser/mod.rs | 12 +++++++-- tests/sqlparser_common.rs | 55 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_sqlite.rs | 31 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d4e2f26ea..d048ccc1b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1318,6 +1318,10 @@ pub enum Statement { cluster_by: Vec, /// if true, has RedShift [`WITH NO SCHEMA BINDING`] clause with_no_schema_binding: bool, + /// if true, has SQLite `IF NOT EXISTS` clause + if_not_exists: bool, + /// if true, has SQLite `TEMP` or `TEMPORARY` clause + temporary: bool, }, /// CREATE TABLE CreateTable { @@ -2274,13 +2278,17 @@ impl fmt::Display for Statement { with_options, cluster_by, with_no_schema_binding, + if_not_exists, + temporary, } => { write!( f, - "CREATE {or_replace}{materialized}VIEW {name}", + "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, materialized = if *materialized { "MATERIALIZED " } else { "" }, - name = name + name = name, + temporary = if *temporary { "TEMPORARY " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" } )?; if !with_options.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_options))?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d0b3cef78..922a791f3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2478,7 +2478,7 @@ impl<'a> Parser<'a> { self.parse_create_table(or_replace, temporary, global, transient) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); - self.parse_create_view(or_replace) + self.parse_create_view(or_replace, temporary) } else if self.parse_keyword(Keyword::EXTERNAL) { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { @@ -2955,9 +2955,15 @@ impl<'a> Parser<'a> { } } - pub fn parse_create_view(&mut self, or_replace: bool) -> Result { + pub fn parse_create_view( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { let materialized = self.parse_keyword(Keyword::MATERIALIZED); self.expect_keyword(Keyword::VIEW)?; + let if_not_exists = dialect_of!(self is SQLiteDialect|GenericDialect) + && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; @@ -2992,6 +2998,8 @@ impl<'a> Parser<'a> { with_options, cluster_by, with_no_schema_binding, + if_not_exists, + temporary, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 027dc312f..c0ec456a9 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5321,6 +5321,8 @@ fn parse_create_view() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5330,6 +5332,8 @@ fn parse_create_view() { assert_eq!(with_options, vec![]); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5371,6 +5375,8 @@ fn parse_create_view_with_columns() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![Ident::new("has"), Ident::new("cols")]); @@ -5380,6 +5386,39 @@ fn parse_create_view_with_columns() { assert!(!or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_view_temporary() { + let sql = "CREATE TEMPORARY VIEW myschema.myview AS SELECT foo FROM bar"; + match verified_stmt(sql) { + Statement::CreateView { + name, + columns, + query, + or_replace, + materialized, + with_options, + cluster_by, + with_no_schema_binding: late_binding, + if_not_exists, + temporary, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(!materialized); + assert!(!or_replace); + assert_eq!(with_options, vec![]); + assert_eq!(cluster_by, vec![]); + assert!(!late_binding); + assert!(!if_not_exists); + assert!(temporary); } _ => unreachable!(), } @@ -5398,6 +5437,8 @@ fn parse_create_or_replace_view() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5407,6 +5448,8 @@ fn parse_create_or_replace_view() { assert!(or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5429,6 +5472,8 @@ fn parse_create_or_replace_materialized_view() { materialized, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -5438,6 +5483,8 @@ fn parse_create_or_replace_materialized_view() { assert!(or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5456,6 +5503,8 @@ fn parse_create_materialized_view() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5465,6 +5514,8 @@ fn parse_create_materialized_view() { assert!(!or_replace); assert_eq!(cluster_by, vec![]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } @@ -5483,6 +5534,8 @@ fn parse_create_materialized_view_with_cluster_by() { with_options, cluster_by, with_no_schema_binding: late_binding, + if_not_exists, + temporary, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -5492,6 +5545,8 @@ fn parse_create_materialized_view_with_cluster_by() { assert!(!or_replace); assert_eq!(cluster_by, vec![Ident::new("foo")]); assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); } _ => unreachable!(), } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index c4e69d530..39a82cc8b 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -61,6 +61,37 @@ fn parse_create_virtual_table() { sqlite_and_generic().verified_stmt(sql); } +#[test] +fn parse_create_view_temporary_if_not_exists() { + let sql = "CREATE TEMPORARY VIEW IF NOT EXISTS myschema.myview AS SELECT foo FROM bar"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::CreateView { + name, + columns, + query, + or_replace, + materialized, + with_options, + cluster_by, + with_no_schema_binding: late_binding, + if_not_exists, + temporary, + } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!(Vec::::new(), columns); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(!materialized); + assert!(!or_replace); + assert_eq!(with_options, vec![]); + assert_eq!(cluster_by, vec![]); + assert!(!late_binding); + assert!(if_not_exists); + assert!(temporary); + } + _ => unreachable!(), + } +} + #[test] fn double_equality_operator() { // Sqlite supports this operator: https://www.sqlite.org/lang_expr.html#binaryops From 83cb734b3c206502dd73998def455da554c37eef Mon Sep 17 00:00:00 2001 From: Zdenko Nevrala Date: Fri, 6 Oct 2023 20:48:18 +0200 Subject: [PATCH 037/735] Support Snowflake/BigQuery TRIM. (#975) --- src/ast/mod.rs | 6 ++++++ src/parser/mod.rs | 14 ++++++++++++++ tests/sqlparser_bigquery.rs | 26 ++++++++++++++++++++++++++ tests/sqlparser_common.rs | 24 ++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 25 +++++++++++++++++++++++++ 5 files changed, 95 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d048ccc1b..87f7ebb37 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -496,12 +496,14 @@ pub enum Expr { /// ```sql /// TRIM([BOTH | LEADING | TRAILING] [ FROM] ) /// TRIM() + /// TRIM(, [, characters]) -- only Snowflake or Bigquery /// ``` Trim { expr: Box, // ([BOTH | LEADING | TRAILING] trim_where: Option, trim_what: Option>, + trim_characters: Option>, }, /// ```sql /// OVERLAY( PLACING FROM [ FOR ] @@ -895,6 +897,7 @@ impl fmt::Display for Expr { expr, trim_where, trim_what, + trim_characters, } => { write!(f, "TRIM(")?; if let Some(ident) = trim_where { @@ -905,6 +908,9 @@ impl fmt::Display for Expr { } else { write!(f, "{expr}")?; } + if let Some(characters) = trim_characters { + write!(f, ", {}", display_comma_separated(characters))?; + } write!(f, ")") } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 922a791f3..95f1f8edc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1315,6 +1315,7 @@ impl<'a> Parser<'a> { /// ```sql /// TRIM ([WHERE] ['text' FROM] 'text') /// TRIM ('text') + /// TRIM(, [, characters]) -- only Snowflake or BigQuery /// ``` pub fn parse_trim_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; @@ -1336,6 +1337,18 @@ impl<'a> Parser<'a> { expr: Box::new(expr), trim_where, trim_what: Some(trim_what), + trim_characters: None, + }) + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | BigQueryDialect | GenericDialect) + { + let characters = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where: None, + trim_what: None, + trim_characters: Some(characters), }) } else { self.expect_token(&Token::RParen)?; @@ -1343,6 +1356,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), trim_where, trim_what: None, + trim_characters: None, }) } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index e05581d5f..7a9a8d1c4 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -17,6 +17,7 @@ use std::ops::Deref; use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; +use sqlparser::parser::ParserError; use test_utils::*; #[test] @@ -549,3 +550,28 @@ fn parse_map_access_offset() { bigquery().verified_only_select(sql); } } + +#[test] +fn test_bigquery_trim() { + let real_sql = r#"SELECT customer_id, TRIM(item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; + assert_eq!(bigquery().verified_stmt(real_sql).to_string(), real_sql); + + let sql_only_select = "SELECT TRIM('xyz', 'a')"; + let select = bigquery().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value(Value::SingleQuotedString("xyz".to_owned()))), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value(Value::SingleQuotedString("a".to_owned()))]), + }, + expr_from_projection(only(&select.projection)) + ); + + // missing comma separation + let error_sql = "SELECT TRIM('xyz' 'a')"; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + bigquery().parse_sql_statements(error_sql).unwrap_err() + ); +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c0ec456a9..1511aa76e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5225,6 +5225,30 @@ fn parse_trim() { ParserError::ParserError("Expected ), found: 'xyz'".to_owned()), parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); + + //keep Snowflake/BigQuery TRIM syntax failing + let all_expected_snowflake = TestedDialects { + dialects: vec![ + //Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MsSqlDialect {}), + Box::new(AnsiDialect {}), + //Box::new(SnowflakeDialect {}), + Box::new(HiveDialect {}), + Box::new(RedshiftSqlDialect {}), + Box::new(MySqlDialect {}), + //Box::new(BigQueryDialect {}), + Box::new(SQLiteDialect {}), + Box::new(DuckDbDialect {}), + ], + options: None, + }; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + all_expected_snowflake + .parse_sql_statements("SELECT TRIM('xyz', 'a')") + .unwrap_err() + ); } #[test] diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e1db7ec61..e92656d0b 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1039,3 +1039,28 @@ fn test_snowflake_stage_object_names() { } } } + +#[test] +fn test_snowflake_trim() { + let real_sql = r#"SELECT customer_id, TRIM(sub_items.value:item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; + assert_eq!(snowflake().verified_stmt(real_sql).to_string(), real_sql); + + let sql_only_select = "SELECT TRIM('xyz', 'a')"; + let select = snowflake().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value(Value::SingleQuotedString("xyz".to_owned()))), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value(Value::SingleQuotedString("a".to_owned()))]), + }, + expr_from_projection(only(&select.projection)) + ); + + // missing comma separation + let error_sql = "SELECT TRIM('xyz' 'a')"; + assert_eq!( + ParserError::ParserError("Expected ), found: 'a'".to_owned()), + snowflake().parse_sql_statements(error_sql).unwrap_err() + ); +} From c68e9775a22acf00e54b33542b10ac6d1a8cf887 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 20:33:12 +0200 Subject: [PATCH 038/735] Support bigquery `CAST AS x [STRING|DATE] FORMAT` syntax (#978) --- src/ast/mod.rs | 64 ++++++++++++++++++++++++++++++++++-- src/parser/mod.rs | 23 +++++++++++++ tests/sqlparser_bigquery.rs | 35 ++++++++++++++++++-- tests/sqlparser_common.rs | 10 ++++++ tests/sqlparser_postgres.rs | 3 +- tests/sqlparser_snowflake.rs | 1 + 6 files changed, 130 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 87f7ebb37..fc15efbc4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -322,6 +322,16 @@ impl fmt::Display for JsonOperator { } } +/// Options for `CAST` / `TRY_CAST` +/// BigQuery: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CastFormat { + Value(Value), + ValueAtTimeZone(Value, Value), +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -437,12 +447,18 @@ pub enum Expr { Cast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` // this differs from CAST in the choice of how to implement invalid conversions TryCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)` // only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting @@ -450,6 +466,9 @@ pub enum Expr { SafeCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { @@ -597,6 +616,15 @@ pub enum Expr { }, } +impl fmt::Display for CastFormat { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CastFormat::Value(v) => write!(f, "{v}"), + CastFormat::ValueAtTimeZone(v, tz) => write!(f, "{v} AT TIME ZONE {tz}"), + } + } +} + impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -753,9 +781,39 @@ impl fmt::Display for Expr { write!(f, "{op}{expr}") } } - Expr::Cast { expr, data_type } => write!(f, "CAST({expr} AS {data_type})"), - Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({expr} AS {data_type})"), - Expr::SafeCast { expr, data_type } => write!(f, "SAFE_CAST({expr} AS {data_type})"), + Expr::Cast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "CAST({expr} AS {data_type})") + } + } + Expr::TryCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "TRY_CAST({expr} AS {data_type})") + } + } + Expr::SafeCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "SAFE_CAST({expr} AS {data_type})") + } + } Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), Expr::Ceil { expr, field } => { if field == &DateTimeField::NoDateTime { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 95f1f8edc..829b299af 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1139,16 +1139,34 @@ impl<'a> Parser<'a> { }) } + pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FORMAT) { + let value = self.parse_value()?; + if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { + Ok(Some(CastFormat::ValueAtTimeZone( + value, + self.parse_value()?, + ))) + } else { + Ok(Some(CastFormat::Value(value))) + } + } else { + Ok(None) + } + } + /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { expr: Box::new(expr), data_type, + format, }) } @@ -1158,10 +1176,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::TryCast { expr: Box::new(expr), data_type, + format, }) } @@ -1171,10 +1191,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::SafeCast { expr: Box::new(expr), data_type, + format, }) } @@ -2101,6 +2123,7 @@ impl<'a> Parser<'a> { Ok(Expr::Cast { expr: Box::new(expr), data_type: self.parse_data_type()?, + format: None, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7a9a8d1c4..b3f683b9a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -304,8 +304,39 @@ fn parse_trailing_comma() { #[test] fn parse_cast_type() { - let sql = r#"SELECT SAFE_CAST(1 AS INT64)"#; - bigquery().verified_only_select(sql); + let sql = r"SELECT SAFE_CAST(1 AS INT64)"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_date_format() { + let sql = + r"SELECT CAST(date_valid_from AS DATE FORMAT 'YYYY-MM-DD') AS date_valid_from FROM foo"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_time_format() { + let sql = r"SELECT CAST(TIME '21:30:00' AS STRING FORMAT 'PM') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_timestamp_format_tz() { + let sql = r"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'TZH' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_string_to_bytes_format() { + let sql = r"SELECT CAST('Hello' AS BYTES FORMAT 'ASCII') AS string_to_bytes"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_bytes_to_string_format() { + let sql = r"SELECT CAST(B'\x48\x65\x6c\x6c\x6f' AS STRING FORMAT 'ASCII') AS bytes_to_string"; + bigquery_and_generic().verified_only_select(sql); } #[test] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1511aa76e..ff8bdd7a4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1934,6 +1934,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1944,6 +1945,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1970,6 +1972,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Nvarchar(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1980,6 +1983,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1990,6 +1994,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2000,6 +2005,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2010,6 +2016,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2020,6 +2027,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2030,6 +2038,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2043,6 +2052,7 @@ fn parse_try_cast() { &Expr::TryCast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fe336bda7..654723668 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1782,7 +1782,8 @@ fn parse_array_index_expr() { })), data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new( DataType::Int(None) - )))))) + )))))), + format: None, }))), indexes: vec![num[1].clone(), num[2].clone()], }, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e92656d0b..bb988665d 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -167,6 +167,7 @@ fn parse_array() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(None), + format: None, }, expr_from_projection(only(&select.projection)) ); From 88510f662563786a6e3af6b1ed109444bcd332e7 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 21:49:18 +0200 Subject: [PATCH 039/735] fix column `COLLATE` not displayed (#1012) --- src/ast/ddl.rs | 3 +++ tests/sqlparser_common.rs | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index a4640d557..f1575d979 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -517,6 +517,9 @@ pub struct ColumnDef { impl fmt::Display for ColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} {}", self.name, self.data_type)?; + if let Some(collation) = &self.collation { + write!(f, " COLLATE {collation}")?; + } for option in &self.options { write!(f, " {option}")?; } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ff8bdd7a4..3b8775e45 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7635,3 +7635,8 @@ fn parse_create_type() { create_type ); } + +#[test] +fn parse_create_table_collate() { + pg_and_generic().verified_stmt("CREATE TABLE tbl (foo INT, bar TEXT COLLATE \"de_DE\")"); +} From c03586b727a659bb6d22d77910f4d4e9b9d9688c Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Fri, 20 Oct 2023 22:13:22 +0200 Subject: [PATCH 040/735] Support mysql `RLIKE` and `REGEXP` binary operators (#1017) --- src/ast/mod.rs | 21 +++++++++++++++++++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 19 +++++++++++++++++-- src/test_utils.rs | 2 +- tests/sqlparser_mysql.rs | 12 ++++++++++++ 5 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fc15efbc4..3b0030017 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -429,6 +429,14 @@ pub enum Expr { pattern: Box, escape_char: Option, }, + /// MySQL: RLIKE regex or REGEXP regex + RLike { + negated: bool, + expr: Box, + pattern: Box, + // true for REGEXP, false for RLIKE (no difference in semantics) + regexp: bool, + }, /// Any operation e.g. `foo > ANY(bar)`, comparison operator is one of [=, >, <, =>, =<, !=] AnyOp { left: Box, @@ -740,6 +748,19 @@ impl fmt::Display for Expr { pattern ), }, + Expr::RLike { + negated, + expr, + pattern, + regexp, + } => write!( + f, + "{} {}{} {}", + expr, + if *negated { "NOT " } else { "" }, + if *regexp { "REGEXP" } else { "RLIKE" }, + pattern + ), Expr::SimilarTo { negated, expr, diff --git a/src/keywords.rs b/src/keywords.rs index e1bbf44ae..6327ccc84 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -498,6 +498,7 @@ define_keywords!( REFERENCES, REFERENCING, REGCLASS, + REGEXP, REGR_AVGX, REGR_AVGY, REGR_COUNT, @@ -524,6 +525,7 @@ define_keywords!( RETURNS, REVOKE, RIGHT, + RLIKE, ROLE, ROLLBACK, ROLLUP, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 829b299af..0065f7987 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1932,10 +1932,21 @@ impl<'a> Parser<'a> { | Keyword::BETWEEN | Keyword::LIKE | Keyword::ILIKE - | Keyword::SIMILAR => { + | Keyword::SIMILAR + | Keyword::REGEXP + | Keyword::RLIKE => { self.prev_token(); let negated = self.parse_keyword(Keyword::NOT); - if self.parse_keyword(Keyword::IN) { + let regexp = self.parse_keyword(Keyword::REGEXP); + let rlike = self.parse_keyword(Keyword::RLIKE); + if regexp || rlike { + Ok(Expr::RLike { + negated, + expr: Box::new(expr), + pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + regexp, + }) + } else if self.parse_keyword(Keyword::IN) { self.parse_in(expr, negated) } else if self.parse_keyword(Keyword::BETWEEN) { self.parse_between(expr, negated) @@ -2178,6 +2189,8 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), _ => Ok(0), }, @@ -2186,6 +2199,8 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), diff --git a/src/test_utils.rs b/src/test_utils.rs index f0c5e425a..76a3e073b 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -111,7 +111,7 @@ impl TestedDialects { /// 2. re-serializing the result of parsing `sql` produces the same /// `canonical` sql string pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement { - let mut statements = self.parse_sql_statements(sql).unwrap(); + let mut statements = self.parse_sql_statements(sql).expect(sql); assert_eq!(statements.len(), 1); if !canonical.is_empty() && sql != canonical { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 80b9dcfd8..6e59198d7 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1454,6 +1454,18 @@ fn parse_show_variables() { mysql_and_generic().verified_stmt("SHOW VARIABLES WHERE value = '3306'"); } +#[test] +fn parse_rlike_and_regexp() { + for s in &[ + "SELECT 1 WHERE 'a' RLIKE '^a$'", + "SELECT 1 WHERE 'a' REGEXP '^a$'", + "SELECT 1 WHERE 'a' NOT RLIKE '^a$'", + "SELECT 1 WHERE 'a' NOT REGEXP '^a$'", + ] { + mysql_and_generic().verified_only_select(s); + } +} + #[test] fn parse_kill() { let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5"); From 5c10668dbb60bccaf11f224013d333a48e32ec38 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Tue, 24 Oct 2023 01:37:31 +0400 Subject: [PATCH 041/735] Add support for UNION DISTINCT BY NAME syntax (#997) Co-authored-by: Andrew Lamb --- src/ast/query.rs | 5 +- src/parser/mod.rs | 4 +- tests/sqlparser_duckdb.rs | 232 ++++++++++++++------------------------ 3 files changed, 89 insertions(+), 152 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 88b0931de..824fab1ba 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -120,7 +120,8 @@ impl fmt::Display for SetExpr { SetQuantifier::All | SetQuantifier::Distinct | SetQuantifier::ByName - | SetQuantifier::AllByName => write!(f, " {set_quantifier}")?, + | SetQuantifier::AllByName + | SetQuantifier::DistinctByName => write!(f, " {set_quantifier}")?, SetQuantifier::None => write!(f, "{set_quantifier}")?, } write!(f, " {right}")?; @@ -160,6 +161,7 @@ pub enum SetQuantifier { Distinct, ByName, AllByName, + DistinctByName, None, } @@ -170,6 +172,7 @@ impl fmt::Display for SetQuantifier { SetQuantifier::Distinct => write!(f, "DISTINCT"), SetQuantifier::ByName => write!(f, "BY NAME"), SetQuantifier::AllByName => write!(f, "ALL BY NAME"), + SetQuantifier::DistinctByName => write!(f, "DISTINCT BY NAME"), SetQuantifier::None => write!(f, ""), } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0065f7987..68a8cef1f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5696,7 +5696,9 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { Some(SetOperator::Union) => { - if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { SetQuantifier::ByName } else if self.parse_keyword(Keyword::ALL) { if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index b05cc0dd4..db11d1e77 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -132,155 +132,87 @@ fn test_create_table_macro() { #[test] fn test_select_union_by_name() { - let ast = duckdb().verified_query("SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"); - let expected = Box::::new(SetExpr::SetOperation { - op: SetOperator::Union, - set_quantifier: SetQuantifier::ByName, - left: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - right: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - }); - - assert_eq!(ast.body, expected); + let q1 = "SELECT * FROM capitals UNION BY NAME SELECT * FROM weather"; + let q2 = "SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"; + let q3 = "SELECT * FROM capitals UNION DISTINCT BY NAME SELECT * FROM weather"; - let ast = - duckdb().verified_query("SELECT * FROM capitals UNION ALL BY NAME SELECT * FROM weather"); - let expected = Box::::new(SetExpr::SetOperation { - op: SetOperator::Union, - set_quantifier: SetQuantifier::AllByName, - left: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "capitals".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - right: Box::::new(SetExpr::Select(Box::new(Select { - distinct: None, - top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: None, - opt_except: None, - opt_rename: None, - opt_replace: None, - })], - into: None, - from: vec![TableWithJoins { - relation: TableFactor::Table { - name: ObjectName(vec![Ident { - value: "weather".to_string(), - quote_style: None, - }]), - alias: None, - args: None, - with_hints: vec![], - version: None, - partitions: vec![], - }, - joins: vec![], - }], - lateral_views: vec![], - selection: None, - group_by: GroupByExpr::Expressions(vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - }))), - }); - assert_eq!(ast.body, expected); + for (ast, expected_quantifier) in &[ + (duckdb().verified_query(q1), SetQuantifier::ByName), + (duckdb().verified_query(q2), SetQuantifier::AllByName), + (duckdb().verified_query(q3), SetQuantifier::DistinctByName), + ] { + let expected = Box::::new(SetExpr::SetOperation { + op: SetOperator::Union, + set_quantifier: *expected_quantifier, + left: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "capitals".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + right: Box::::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: None, + opt_except: None, + opt_rename: None, + opt_replace: None, + })], + into: None, + from: vec![TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident { + value: "weather".to_string(), + quote_style: None, + }]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + }, + joins: vec![], + }], + lateral_views: vec![], + selection: None, + group_by: GroupByExpr::Expressions(vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + }))), + }); + assert_eq!(ast.body, expected); + } } From 56f24ce2361bb2f9ee9d7566c3b1ce256ee02d8b Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 23 Oct 2023 14:50:45 -0700 Subject: [PATCH 042/735] Support subquery as function arg w/o parens in Snowflake dialect (#996) --- src/parser/mod.rs | 20 +++++++++++++++++++- tests/sqlparser_snowflake.rs | 20 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 68a8cef1f..1c1d8b23e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1507,7 +1507,7 @@ impl<'a> Parser<'a> { within_group: false, })); } - // Snowflake defines ORDERY BY in within group instead of inside the function like + // Snowflake defines ORDER BY in within group instead of inside the function like // ANSI SQL. self.expect_token(&Token::RParen)?; let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { @@ -6914,6 +6914,24 @@ impl<'a> Parser<'a> { if self.consume_token(&Token::RParen) { Ok((vec![], vec![])) } else { + // Snowflake permits a subquery to be passed as an argument without + // an enclosing set of parens if it's the only argument. + if dialect_of!(self is SnowflakeDialect) + && self + .parse_one_of_keywords(&[Keyword::WITH, Keyword::SELECT]) + .is_some() + { + self.prev_token(); + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + return Ok(( + vec![FunctionArg::Unnamed(FunctionArgExpr::from( + WildcardExpr::Expr(Expr::Subquery(Box::new(subquery))), + ))], + vec![], + )); + } + let args = self.parse_comma_separated(Parser::parse_function_args)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { self.parse_comma_separated(Parser::parse_order_by_expr)? diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index bb988665d..79c9eb1ea 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1065,3 +1065,23 @@ fn test_snowflake_trim() { snowflake().parse_sql_statements(error_sql).unwrap_err() ); } + +#[test] +fn parse_subquery_function_argument() { + // Snowflake allows passing an unparenthesized subquery as the single + // argument to a function. + snowflake().one_statement_parses_to( + "SELECT parse_json(SELECT '{}')", + "SELECT parse_json((SELECT '{}'))", + ); + + // Subqueries that begin with WITH work too. + snowflake().one_statement_parses_to( + "SELECT parse_json(WITH q AS (SELECT '{}' AS foo) SELECT foo FROM q)", + "SELECT parse_json((WITH q AS (SELECT '{}' AS foo) SELECT foo FROM q))", + ); + + // Commas are parsed as part of the subquery, not additional arguments to + // the function. + snowflake().one_statement_parses_to("SELECT func(SELECT 1, 2)", "SELECT func((SELECT 1, 2))"); +} From e857a452016d82dfc00398a5483ce9551dff9565 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Mon, 23 Oct 2023 23:55:11 +0200 Subject: [PATCH 043/735] Support `SELECT * EXCEPT/REPLACE` syntax from ClickHouse (#1013) --- src/ast/query.rs | 2 ++ src/parser/mod.rs | 37 +++++++++++++++++++++++------------ tests/sqlparser_clickhouse.rs | 18 +++++++++++++++++ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 824fab1ba..4289b0bde 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -434,11 +434,13 @@ pub struct WildcardAdditionalOptions { /// `[EXCLUDE...]`. pub opt_exclude: Option, /// `[EXCEPT...]`. + /// Clickhouse syntax: pub opt_except: Option, /// `[RENAME ...]`. pub opt_rename: Option, /// `[REPLACE]` /// BigQuery syntax: + /// Clickhouse syntax: pub opt_replace: Option, } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1c1d8b23e..9e0d595cb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6993,7 +6993,8 @@ impl<'a> Parser<'a> { } else { None }; - let opt_except = if dialect_of!(self is GenericDialect | BigQueryDialect) { + let opt_except = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + { self.parse_optional_select_item_except()? } else { None @@ -7004,7 +7005,8 @@ impl<'a> Parser<'a> { None }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect) { + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect) + { self.parse_optional_select_item_replace()? } else { None @@ -7047,18 +7049,27 @@ impl<'a> Parser<'a> { &mut self, ) -> Result, ParserError> { let opt_except = if self.parse_keyword(Keyword::EXCEPT) { - let idents = self.parse_parenthesized_column_list(Mandatory, false)?; - match &idents[..] { - [] => { - return self.expected( - "at least one column should be parsed by the expect clause", - self.peek_token(), - )?; + if self.peek_token().token == Token::LParen { + let idents = self.parse_parenthesized_column_list(Mandatory, false)?; + match &idents[..] { + [] => { + return self.expected( + "at least one column should be parsed by the expect clause", + self.peek_token(), + )?; + } + [first, idents @ ..] => Some(ExceptSelectItem { + first_element: first.clone(), + additional_elements: idents.to_vec(), + }), } - [first, idents @ ..] => Some(ExceptSelectItem { - first_element: first.clone(), - additional_elements: idents.to_vec(), - }), + } else { + // Clickhouse allows EXCEPT column_name + let ident = self.parse_identifier()?; + Some(ExceptSelectItem { + first_element: ident, + additional_elements: vec![], + }) } } else { None diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 9efe4a368..8cca0da0b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -355,6 +355,24 @@ fn parse_limit_by() { ); } +#[test] +fn parse_select_star_except() { + clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); +} + +#[test] +fn parse_select_star_except_no_parens() { + clickhouse().one_statement_parses_to( + "SELECT * EXCEPT prev_status FROM anomalies", + "SELECT * EXCEPT (prev_status) FROM anomalies", + ); +} + +#[test] +fn parse_select_star_replace() { + clickhouse().verified_stmt("SELECT * REPLACE (i + 1 AS i) FROM columns_transformers"); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], From ce62fe6d274d354fef34fad919b58f6ba16c61a3 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 00:06:39 +0200 Subject: [PATCH 044/735] Support `FILTER` in over clause (#1007) Co-authored-by: Andrew Lamb --- README.md | 2 +- src/ast/mod.rs | 9 +++++++++ src/ast/visitor.rs | 2 +- src/dialect/sqlite.rs | 4 ++++ src/parser/mod.rs | 14 ++++++++++++++ tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 4 ++++ tests/sqlparser_common.rs | 19 +++++++++++++++++++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_mysql.rs | 6 ++++++ tests/sqlparser_postgres.rs | 6 ++++++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 33 +++++++++++++++++++++++++++++++++ 15 files changed, 102 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 454ea6c29..e987c2a21 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ println!("AST: {:?}", ast); This outputs ```rust -AST: [Query(Query { ctes: [], body: Select(Select { distinct: false, projection: [UnnamedExpr(Identifier("a")), UnnamedExpr(Identifier("b")), UnnamedExpr(Value(Long(123))), UnnamedExpr(Function(Function { name: ObjectName(["myfunc"]), args: [Identifier("b")], over: None, distinct: false }))], from: [TableWithJoins { relation: Table { name: ObjectName(["table_1"]), alias: None, args: [], with_hints: [] }, joins: [] }], selection: Some(BinaryOp { left: BinaryOp { left: Identifier("a"), op: Gt, right: Identifier("b") }, op: And, right: BinaryOp { left: Identifier("b"), op: Lt, right: Value(Long(100)) } }), group_by: [], having: None }), order_by: [OrderByExpr { expr: Identifier("a"), asc: Some(false) }, OrderByExpr { expr: Identifier("b"), asc: None }], limit: None, offset: None, fetch: None })] +AST: [Query(Query { ctes: [], body: Select(Select { distinct: false, projection: [UnnamedExpr(Identifier("a")), UnnamedExpr(Identifier("b")), UnnamedExpr(Value(Long(123))), UnnamedExpr(Function(Function { name: ObjectName(["myfunc"]), args: [Identifier("b")], filter: None, over: None, distinct: false }))], from: [TableWithJoins { relation: Table { name: ObjectName(["table_1"]), alias: None, args: [], with_hints: [] }, joins: [] }], selection: Some(BinaryOp { left: BinaryOp { left: Identifier("a"), op: Gt, right: Identifier("b") }, op: And, right: BinaryOp { left: Identifier("b"), op: Lt, right: Value(Long(100)) } }), group_by: [], having: None }), order_by: [OrderByExpr { expr: Identifier("a"), asc: Some(false) }, OrderByExpr { expr: Identifier("b"), asc: None }], limit: None, offset: None, fetch: None })] ``` diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 3b0030017..11ce9b810 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1070,8 +1070,11 @@ impl Display for WindowType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WindowSpec { + /// `OVER (PARTITION BY ...)` pub partition_by: Vec, + /// `OVER (ORDER BY ...)` pub order_by: Vec, + /// `OVER (window frame)` pub window_frame: Option, } @@ -3729,6 +3732,8 @@ impl fmt::Display for CloseCursor { pub struct Function { pub name: ObjectName, pub args: Vec, + /// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)` + pub filter: Option>, pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, @@ -3777,6 +3782,10 @@ impl fmt::Display for Function { display_comma_separated(&self.order_by), )?; + if let Some(filter_cond) = &self.filter { + write!(f, " FILTER (WHERE {filter_cond})")?; + } + if let Some(o) = &self.over { write!(f, " OVER {o}")?; } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 09cb20a0c..4e025f962 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -506,7 +506,7 @@ where /// *expr = Expr::Function(Function { /// name: ObjectName(vec![Ident::new("f")]), /// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))], -/// over: None, distinct: false, special: false, order_by: vec![], +/// filter: None, over: None, distinct: false, special: false, order_by: vec![], /// }); /// } /// ControlFlow::<()>::Continue(()) diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index fa21224f6..37c7c7fa7 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -35,6 +35,10 @@ impl Dialect for SQLiteDialect { || ('\u{007f}'..='\u{ffff}').contains(&ch) } + fn supports_filter_during_aggregation(&self) -> bool { + true + } + fn is_identifier_part(&self, ch: char) -> bool { self.is_identifier_start(ch) || ch.is_ascii_digit() } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9e0d595cb..3bf5228c4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -772,6 +772,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -957,6 +958,17 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let distinct = self.parse_all_or_distinct()?.is_some(); let (args, order_by) = self.parse_optional_args_with_orderby()?; + let filter = if self.dialect.supports_filter_during_aggregation() + && self.parse_keyword(Keyword::FILTER) + && self.consume_token(&Token::LParen) + && self.parse_keyword(Keyword::WHERE) + { + let filter = Some(Box::new(self.parse_expr()?)); + self.expect_token(&Token::RParen)?; + filter + } else { + None + }; let over = if self.parse_keyword(Keyword::OVER) { if self.consume_token(&Token::LParen) { let window_spec = self.parse_window_spec()?; @@ -970,6 +982,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + filter, over, distinct, special: false, @@ -987,6 +1000,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + filter: None, over: None, distinct: false, special, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index b3f683b9a..fe95b1873 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -564,6 +564,7 @@ fn parse_map_access_offset() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( number("0") ))),], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 8cca0da0b..7d9cb0309 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -50,6 +50,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("endpoint".to_string()) ))), ], + filter: None, over: None, distinct: false, special: false, @@ -89,6 +90,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("app".to_string()) ))), ], + filter: None, over: None, distinct: false, special: false, @@ -138,6 +140,7 @@ fn parse_array_fn() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x1")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x2")))), ], + filter: None, over: None, distinct: false, special: false, @@ -196,6 +199,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3b8775e45..9eb52f6ec 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -875,6 +875,7 @@ fn parse_select_count_wildcard() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + filter: None, over: None, distinct: false, special: false, @@ -895,6 +896,7 @@ fn parse_select_count_distinct() { op: UnaryOperator::Plus, expr: Box::new(Expr::Identifier(Ident::new("x"))), }))], + filter: None, over: None, distinct: true, special: false, @@ -1862,6 +1864,7 @@ fn parse_select_having() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + filter: None, over: None, distinct: false, special: false, @@ -1887,6 +1890,7 @@ fn parse_select_qualify() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), args: vec![], + filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![Expr::Identifier(Ident::new("p"))], order_by: vec![OrderByExpr { @@ -3342,6 +3346,7 @@ fn parse_scalar_function_in_projection() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("id")) ))], + filter: None, over: None, distinct: false, special: false, @@ -3461,6 +3466,7 @@ fn parse_named_argument_function() { ))), }, ], + filter: None, over: None, distinct: false, special: false, @@ -3492,6 +3498,7 @@ fn parse_window_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), args: vec![], + filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![], order_by: vec![OrderByExpr { @@ -3535,6 +3542,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window1".to_string(), quote_style: None, @@ -3560,6 +3568,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window2".to_string(), quote_style: None, @@ -4029,6 +4038,7 @@ fn parse_at_timezone() { quote_style: None, }]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero.clone()))], + filter: None, over: None, distinct: false, special: false, @@ -4056,6 +4066,7 @@ fn parse_at_timezone() { quote_style: None, },],), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero))], + filter: None, over: None, distinct: false, special: false, @@ -4067,6 +4078,7 @@ fn parse_at_timezone() { Value::SingleQuotedString("%Y-%m-%dT%H".to_string()), ),),), ], + filter: None, over: None, distinct: false, special: false, @@ -4225,6 +4237,7 @@ fn parse_table_function() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( Value::SingleQuotedString("1".to_owned()), )))], + filter: None, over: None, distinct: false, special: false, @@ -4376,6 +4389,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + filter: None, over: None, distinct: false, special: false, @@ -4405,6 +4419,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + filter: None, over: None, distinct: false, special: false, @@ -4416,6 +4431,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("5")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("6")))), ], + filter: None, over: None, distinct: false, special: false, @@ -6888,6 +6904,7 @@ fn parse_time_functions() { let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), args: vec![], + filter: None, over: None, distinct: false, special: false, @@ -7374,6 +7391,7 @@ fn parse_pivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("amount"),]) ))]), + filter: None, over: None, distinct: false, special: false, @@ -7523,6 +7541,7 @@ fn parse_pivot_unpivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("population")) ))]), + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 6ca47e12c..6f3a8f994 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -346,6 +346,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index f9eb4d8fb..ebadf95f2 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -334,6 +334,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 6e59198d7..3bcb84439 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1071,6 +1071,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("description")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1084,6 +1085,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_create")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1097,6 +1099,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_read")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1110,6 +1113,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_update")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1123,6 +1127,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_delete")) ))], + filter: None, over: None, distinct: false, special: false, @@ -1512,6 +1517,7 @@ fn parse_table_colum_option_on_update() { option: ColumnOption::OnUpdate(Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_TIMESTAMP")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 654723668..0256579db 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2275,6 +2275,7 @@ fn test_composite_value() { named: true } )))], + filter: None, over: None, distinct: false, special: false, @@ -2436,6 +2437,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2447,6 +2449,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2458,6 +2461,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2469,6 +2473,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), args: vec![], + filter: None, over: None, distinct: false, special: true, @@ -2919,6 +2924,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 5ae539b3c..6238d1eca 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -137,6 +137,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 79c9eb1ea..3319af7b9 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -248,6 +248,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + filter: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 39a82cc8b..8d7ccf315 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -290,6 +290,39 @@ fn parse_create_table_with_strict() { } } +#[test] +fn parse_window_function_with_filter() { + for func_name in [ + "row_number", + "rank", + "max", + "count", + "user_defined_function", + ] { + let sql = format!("SELECT {}(x) FILTER (WHERE y) OVER () FROM t", func_name); + let select = sqlite().verified_only_select(&sql); + assert_eq!(select.to_string(), sql); + assert_eq!( + select.projection, + vec![SelectItem::UnnamedExpr(Expr::Function(Function { + name: ObjectName(vec![Ident::new(func_name)]), + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("x")) + ))], + over: Some(WindowType::WindowSpec(WindowSpec { + partition_by: vec![], + order_by: vec![], + window_frame: None, + })), + filter: Some(Box::new(Expr::Identifier(Ident::new("y")))), + distinct: false, + special: false, + order_by: vec![] + }))] + ); + } +} + #[test] fn parse_attach_database() { let sql = "ATTACH DATABASE 'test.db' AS test"; From 2798b65b42c529bd089742a2028e94d59d82e493 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Oct 2023 18:07:00 -0400 Subject: [PATCH 045/735] snowflake/generic: `position` can be the name of a column (#1022) Co-authored-by: Lukasz Stefaniak --- src/parser/mod.rs | 4 +++- tests/sqlparser_snowflake.rs | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3bf5228c4..e79f31bac 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -794,7 +794,9 @@ impl<'a> Parser<'a> { Keyword::EXTRACT => self.parse_extract_expr(), Keyword::CEIL => self.parse_ceil_floor_expr(true), Keyword::FLOOR => self.parse_ceil_floor_expr(false), - Keyword::POSITION => self.parse_position_expr(), + Keyword::POSITION if self.peek_token().token == Token::LParen => { + self.parse_position_expr() + } Keyword::SUBSTRING => self.parse_substring_expr(), Keyword::OVERLAY => self.parse_overlay_expr(), Keyword::TRIM => self.parse_trim_expr(), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 3319af7b9..7e6f18138 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1067,6 +1067,12 @@ fn test_snowflake_trim() { ); } +#[test] +fn parse_position_not_function_columns() { + snowflake_and_generic() + .verified_stmt("SELECT position FROM tbl1 WHERE position NOT IN ('first', 'last')"); +} + #[test] fn parse_subquery_function_argument() { // Snowflake allows passing an unparenthesized subquery as the single From 8b2a248d7b90edce93e8c443d31a790d553fc0c2 Mon Sep 17 00:00:00 2001 From: Ilya Date: Tue, 24 Oct 2023 01:07:39 +0300 Subject: [PATCH 046/735] parse SQLite pragma statement (#969) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 18 ++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 28 ++++++++++++++++++++++++ tests/sqlparser_sqlite.rs | 45 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 11ce9b810..5aa42c96f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1914,6 +1914,12 @@ pub enum Statement { name: ObjectName, representation: UserDefinedTypeRepresentation, }, + // PRAGMA . = + Pragma { + name: ObjectName, + value: Option, + is_eq: bool, + }, } impl fmt::Display for Statement { @@ -3276,6 +3282,18 @@ impl fmt::Display for Statement { } => { write!(f, "CREATE TYPE {name} AS {representation}") } + Statement::Pragma { name, value, is_eq } => { + write!(f, "PRAGMA {name}")?; + if value.is_some() { + let val = value.as_ref().unwrap(); + if *is_eq { + write!(f, " = {val}")?; + } else { + write!(f, "({val})")?; + } + } + Ok(()) + } } } } diff --git a/src/keywords.rs b/src/keywords.rs index 6327ccc84..405203601 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -472,6 +472,7 @@ define_keywords!( POSITION, POSITION_REGEX, POWER, + PRAGMA, PRECEDES, PRECEDING, PRECISION, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e79f31bac..f83f019ea 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -491,6 +491,8 @@ impl<'a> Parser<'a> { Keyword::EXECUTE => Ok(self.parse_execute()?), Keyword::PREPARE => Ok(self.parse_prepare()?), Keyword::MERGE => Ok(self.parse_merge()?), + // `PRAGMA` is sqlite specific https://www.sqlite.org/pragma.html + Keyword::PRAGMA => Ok(self.parse_pragma()?), _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -7502,6 +7504,32 @@ impl<'a> Parser<'a> { }) } + // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] + pub fn parse_pragma(&mut self) -> Result { + let name = self.parse_object_name()?; + if self.consume_token(&Token::LParen) { + let value = self.parse_number_value()?; + self.expect_token(&Token::RParen)?; + Ok(Statement::Pragma { + name, + value: Some(value), + is_eq: false, + }) + } else if self.consume_token(&Token::Eq) { + Ok(Statement::Pragma { + name, + value: Some(self.parse_number_value()?), + is_eq: true, + }) + } else { + Ok(Statement::Pragma { + name, + value: None, + is_eq: false, + }) + } + } + /// ```sql /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] /// ``` diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 8d7ccf315..2fdd4e3de 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -24,6 +24,51 @@ use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, SQLiteDialect}; use sqlparser::tokenizer::Token; +#[test] +fn pragma_no_value() { + let sql = "PRAGMA cache_size"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: None, + is_eq: false, + } => { + assert_eq!("cache_size", name.to_string()); + } + _ => unreachable!(), + } +} +#[test] +fn pragma_eq_style() { + let sql = "PRAGMA cache_size = 10"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: Some(val), + is_eq: true, + } => { + assert_eq!("cache_size", name.to_string()); + assert_eq!("10", val.to_string()); + } + _ => unreachable!(), + } +} +#[test] +fn pragma_funciton_style() { + let sql = "PRAGMA cache_size(10)"; + match sqlite_and_generic().verified_stmt(sql) { + Statement::Pragma { + name, + value: Some(val), + is_eq: false, + } => { + assert_eq!("cache_size", name.to_string()); + assert_eq!("10", val.to_string()); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_table_without_rowid() { let sql = "CREATE TABLE t (a INT) WITHOUT ROWID"; From 6739d377bd2c5acfbc4d4631651ee7a857caefec Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Oct 2023 18:09:02 -0400 Subject: [PATCH 047/735] Add docstrings for `Dialect`s, update README (#1016) --- README.md | 31 +++++++++++++++++++++---------- src/dialect/ansi.rs | 1 + src/dialect/bigquery.rs | 1 + src/dialect/clickhouse.rs | 1 + src/dialect/duckdb.rs | 1 + src/dialect/generic.rs | 2 ++ src/dialect/hive.rs | 1 + src/dialect/mod.rs | 3 +++ src/dialect/mssql.rs | 2 +- src/dialect/mysql.rs | 2 +- src/dialect/postgresql.rs | 1 + src/dialect/redshift.rs | 1 + src/dialect/snowflake.rs | 1 + src/dialect/sqlite.rs | 1 + 14 files changed, 37 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e987c2a21..58f5b8d48 100644 --- a/README.md +++ b/README.md @@ -124,28 +124,36 @@ parser](docs/custom_sql_parser.md). ## Contributing Contributions are highly encouraged! However, the bandwidth we have to -maintain this crate is fairly limited. +maintain this crate is limited. Please read the following sections carefully. -Pull requests that add support for or fix a bug in a feature in the -SQL standard, or a feature in a popular RDBMS, like Microsoft SQL +### New Syntax + +The most commonly accepted PRs add support for or fix a bug in a feature in the +SQL standard, or a a popular RDBMS, such as Microsoft SQL Server or PostgreSQL, will likely be accepted after a brief -review. +review. Any SQL feature that is dialect specific should be parsed by *both* the relevant [`Dialect`] +as well as [`GenericDialect`]. + +### Major API Changes The current maintainers do not plan for any substantial changes to -this crate's API at this time. And thus, PRs proposing major refactors +this crate's API. PRs proposing major refactors are not likely to be accepted. -Please be aware that, while we hope to review PRs in a reasonably -timely fashion, it may take a while. In order to speed the process, +### Testing + +While we hope to review PRs in a reasonably +timely fashion, it may take a week or more. In order to speed the process, please make sure the PR passes all CI checks, and includes tests demonstrating your code works as intended (and to avoid regressions). Remember to also test error paths. PRs without tests will not be reviewed or merged. Since the CI ensures that `cargo test`, `cargo fmt`, and `cargo clippy`, pass you -will likely want to run all three commands locally before submitting +should likely to run all three commands locally before submitting your PR. +### Filing Issues If you are unable to submit a patch, feel free to file an issue instead. Please try to include: @@ -156,8 +164,9 @@ try to include: * links to documentation for the feature for a few of the most popular databases that support it. -If you need support for a feature, you will likely need to implement -it yourself. Our goal as maintainers is to facilitate the integration +Unfortunately, if you need support for a feature, you will likely need to implement +it yourself, or file a well enough described ticket that another member of the community can do so. +Our goal as maintainers is to facilitate the integration of various features from various contributors, but not to provide the implementations ourselves, as we simply don't have the resources. @@ -183,3 +192,5 @@ licensed as above, without any additional terms or conditions. [Pratt Parser]: https://tdop.github.io/ [sql-2016-grammar]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html [sql-standard]: https://en.wikipedia.org/wiki/ISO/IEC_9075 +[`Dialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html +[`GenericDialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/struct.GenericDialect.html diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index 14c83ae16..d07bc07eb 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [ANSI SQL](https://en.wikipedia.org/wiki/SQL:2011). #[derive(Debug)] pub struct AnsiDialect {} diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 8266a32f0..46f27fea4 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/) #[derive(Debug, Default)] pub struct BigQueryDialect; diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 395116f9c..50fbde99e 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). #[derive(Debug)] pub struct ClickHouseDialect {} diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index 4e6e9d9a4..a4f9309e6 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [DuckDB](https://duckdb.org/) #[derive(Debug, Default)] pub struct DuckDbDialect; diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 8310954cd..4be4b9e23 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -12,6 +12,8 @@ use crate::dialect::Dialect; +/// A permissive, general purpose [`Dialect`], which parses a wide variety of SQL +/// statements, from many different dialects. #[derive(Debug, Default)] pub struct GenericDialect; diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 96cefb1d9..20800c1d3 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -12,6 +12,7 @@ use crate::dialect::Dialect; +/// A [`Dialect`] for [Hive](https://hive.apache.org/). #[derive(Debug)] pub struct HiveDialect {} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index e174528b0..625f9ce0a 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -64,6 +64,9 @@ macro_rules! dialect_of { /// custom extensions or various historical reasons. This trait /// encapsulates the parsing differences between dialects. /// +/// [`GenericDialect`] is the most permissive dialect, and parses the union of +/// all the other dialects, when there is no ambiguity. +/// /// # Examples /// Most users create a [`Dialect`] directly, as shown on the [module /// level documentation]: diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index f04398100..26ecd4782 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -12,7 +12,7 @@ use crate::dialect::Dialect; -// [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) dialect +/// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) #[derive(Debug)] pub struct MsSqlDialect {} diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 0f914ed02..8c3de74b7 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -19,7 +19,7 @@ use crate::{ keywords::Keyword, }; -/// [MySQL](https://www.mysql.com/) +/// A [`Dialect`] for [MySQL](https://www.mysql.com/) #[derive(Debug)] pub struct MySqlDialect {} diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index d131ff9c6..a0b192c85 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -16,6 +16,7 @@ use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; +/// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/) #[derive(Debug)] pub struct PostgreSqlDialect {} diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index c85f3dc20..73457ab30 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -16,6 +16,7 @@ use core::str::Chars; use super::PostgreSqlDialect; +/// A [`Dialect`] for [RedShift](https://aws.amazon.com/redshift/) #[derive(Debug)] pub struct RedshiftSqlDialect {} diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 713394a1e..33425e846 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -28,6 +28,7 @@ use alloc::vec::Vec; #[cfg(not(feature = "std"))] use alloc::{format, vec}; +/// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) #[derive(Debug, Default)] pub struct SnowflakeDialect; diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 37c7c7fa7..68515d24f 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -15,6 +15,7 @@ use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; +/// A [`Dialect`] for [SQLite](https://www.sqlite.org) #[derive(Debug)] pub struct SQLiteDialect {} From 86aa1b96be1c1fbf56cbe7cb04e12370df53605c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mehmet=20Emin=20KARAKA=C5=9E?= Date: Tue, 24 Oct 2023 12:45:25 +0300 Subject: [PATCH 048/735] Support `INSERT IGNORE` in `MySql` and `GenericDialect` (#1004) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 6 +++++- src/parser/mod.rs | 4 ++++ tests/sqlparser_mysql.rs | 41 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5aa42c96f..17f6d3a04 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1296,6 +1296,8 @@ pub enum Statement { Insert { /// Only for Sqlite or: Option, + /// Only for mysql + ignore: bool, /// INTO - optional keyword into: bool, /// TABLE @@ -2126,6 +2128,7 @@ impl fmt::Display for Statement { } Statement::Insert { or, + ignore, into, table_name, overwrite, @@ -2142,8 +2145,9 @@ impl fmt::Display for Statement { } else { write!( f, - "INSERT{over}{int}{tbl} {table_name} ", + "INSERT{ignore}{over}{int}{tbl} {table_name} ", table_name = table_name, + ignore = if *ignore { " IGNORE" } else { "" }, over = if *overwrite { " OVERWRITE" } else { "" }, int = if *into { " INTO" } else { "" }, tbl = if *table { " TABLE" } else { "" } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f83f019ea..d0b11ffea 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6755,6 +6755,9 @@ impl<'a> Parser<'a> { None }; + let ignore = dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::IGNORE); + let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); let into = action == Some(Keyword::INTO); let overwrite = action == Some(Keyword::OVERWRITE); @@ -6852,6 +6855,7 @@ impl<'a> Parser<'a> { Ok(Statement::Insert { or, table_name, + ignore, into, overwrite, partitioned, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3bcb84439..8391bbadb 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -972,6 +972,47 @@ fn parse_simple_insert() { } } +#[test] +fn parse_ignore_insert() { + let sql = r"INSERT IGNORE INTO tasks (title, priority) VALUES ('Test Some Inserts', 1)"; + + match mysql_and_generic().verified_stmt(sql) { + Statement::Insert { + table_name, + columns, + source, + on, + ignore, + .. + } => { + assert_eq!(ObjectName(vec![Ident::new("tasks")]), table_name); + assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert!(on.is_none()); + assert!(ignore); + assert_eq!( + Box::new(Query { + with: None, + body: Box::new(SetExpr::Values(Values { + explicit_row: false, + rows: vec![vec![ + Expr::Value(Value::SingleQuotedString("Test Some Inserts".to_string())), + Expr::Value(number("1")) + ]] + })), + order_by: vec![], + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![] + }), + source + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_empty_row_insert() { let sql = "INSERT INTO tb () VALUES (), ()"; From 57090537f0b2984681ff9333c57f8a8ce7c995cb Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 12:30:05 +0200 Subject: [PATCH 049/735] Test that `regexp` can be used as an identifier in postgres (#1018) --- tests/sqlparser_postgres.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 0256579db..64fcbd38a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3390,6 +3390,13 @@ fn parse_truncate() { ); } +#[test] +fn parse_select_regexp_as_column_name() { + pg_and_generic().verified_only_select( + "SELECT REGEXP.REGEXP AS REGEXP FROM REGEXP AS REGEXP WHERE REGEXP.REGEXP", + ); +} + #[test] fn parse_create_table_with_alias() { let sql = "CREATE TABLE public.datatype_aliases From 9832adb37651da83483263cd652ff6ab01a7060f Mon Sep 17 00:00:00 2001 From: Chris A Date: Tue, 24 Oct 2023 05:33:51 -0500 Subject: [PATCH 050/735] Support "with" identifiers surrounded by backticks in `GenericDialect` (#1010) --- src/dialect/mod.rs | 2 +- tests/sqlparser_hive.rs | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 625f9ce0a..856cfe1c9 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -95,7 +95,7 @@ pub trait Dialect: Debug + Any { /// MySQL, MS SQL, and sqlite). You can accept one of characters listed /// in `Word::matching_end_quote` here fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '"' + ch == '"' || ch == '`' } /// Determine if quoted characters are proper for identifier fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 6f3a8f994..f63b9cef9 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -20,7 +20,7 @@ use sqlparser::ast::{ SelectItem, Statement, TableFactor, UnaryOperator, Value, }; use sqlparser::dialect::{GenericDialect, HiveDialect}; -use sqlparser::parser::ParserError; +use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::test_utils::*; #[test] @@ -32,6 +32,20 @@ fn parse_table_create() { hive().verified_stmt(iof); } +fn generic(options: Option) -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options, + } +} + +#[test] +fn parse_describe() { + let describe = r#"DESCRIBE namespace.`table`"#; + hive().verified_stmt(describe); + generic(None).verified_stmt(describe); +} + #[test] fn parse_insert_overwrite() { let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#; @@ -265,13 +279,8 @@ fn parse_create_function() { _ => unreachable!(), } - let generic = TestedDialects { - dialects: vec![Box::new(GenericDialect {})], - options: None, - }; - assert_eq!( - generic.parse_sql_statements(sql).unwrap_err(), + generic(None).parse_sql_statements(sql).unwrap_err(), ParserError::ParserError( "Expected an object type after CREATE, found: FUNCTION".to_string() ) From 004a8dc5ddbbbfc0935c09fb572cd6161af33525 Mon Sep 17 00:00:00 2001 From: Chris A Date: Tue, 24 Oct 2023 06:19:01 -0500 Subject: [PATCH 051/735] Support multiple `PARTITION` statements in `ALTER TABLE ADD` statement (#1011) Co-authored-by: Chris A Co-authored-by: Andrew Lamb --- src/ast/ddl.rs | 24 +++++++++++++++++++++--- src/ast/mod.rs | 5 +++-- src/dialect/generic.rs | 4 ++++ src/parser/mod.rs | 22 +++++++++++++++++----- tests/sqlparser_hive.rs | 6 ++++++ 5 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index f1575d979..da2c8c9e4 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -69,7 +69,7 @@ pub enum AlterTableOperation { /// Add Partitions AddPartitions { if_not_exists: bool, - new_partitions: Vec, + new_partitions: Vec, }, DropPartitions { partitions: Vec, @@ -119,8 +119,8 @@ impl fmt::Display for AlterTableOperation { new_partitions, } => write!( f, - "ADD{ine} PARTITION ({})", - display_comma_separated(new_partitions), + "ADD{ine} {}", + display_separated(new_partitions, " "), ine = if *if_not_exists { " IF NOT EXISTS" } else { "" } ), AlterTableOperation::AddConstraint(c) => write!(f, "ADD {c}"), @@ -771,3 +771,21 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef { Ok(()) } } + +/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Partition { + pub partitions: Vec, +} + +impl fmt::Display for Partition { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "PARTITION ({})", + display_comma_separated(&self.partitions) + ) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 17f6d3a04..4c69d3ed0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -31,8 +31,9 @@ pub use self::data_type::{ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, - ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ProcedureParam, ReferentialAction, - TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, + ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, + ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, + UserDefinedTypeRepresentation, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 4be4b9e23..ea5cc6c34 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -18,6 +18,10 @@ use crate::dialect::Dialect; pub struct GenericDialect; impl Dialect for GenericDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' || ch == '`' + } + fn is_identifier_start(&self, ch: char) -> bool { ch.is_alphabetic() || ch == '_' || ch == '#' || ch == '@' } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d0b11ffea..8930b0f49 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4195,6 +4195,13 @@ impl<'a> Parser<'a> { Ok(SqlOption { name, value }) } + pub fn parse_partition(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Partition { partitions }) + } + pub fn parse_alter_table_operation(&mut self) -> Result { let operation = if self.parse_keyword(Keyword::ADD) { if let Some(constraint) = self.parse_optional_table_constraint()? { @@ -4202,13 +4209,18 @@ impl<'a> Parser<'a> { } else { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + let mut new_partitions = vec![]; + loop { + if self.parse_keyword(Keyword::PARTITION) { + new_partitions.push(self.parse_partition()?); + } else { + break; + } + } + if !new_partitions.is_empty() { AlterTableOperation::AddPartitions { if_not_exists, - new_partitions: partitions, + new_partitions, } } else { let column_keyword = self.parse_keyword(Keyword::COLUMN); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index f63b9cef9..534a224ea 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -128,6 +128,12 @@ fn test_add_partition() { hive().verified_stmt(add); } +#[test] +fn test_add_multiple_partitions() { + let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (`a` = 'asdf', `b` = 2) PARTITION (`a` = 'asdh', `b` = 3)"; + hive().verified_stmt(add); +} + #[test] fn test_drop_partition() { let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)"; From c5a7d6ccb97292ace1399f24f88dbb1027c0987f Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 24 Oct 2023 13:20:12 +0200 Subject: [PATCH 052/735] Support for single-quoted identifiers (#1021) Co-authored-by: Andrew Lamb --- src/parser/mod.rs | 38 ++++++++++++++++++++++++++------------ tests/sqlparser_sqlite.rs | 5 +++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8930b0f49..eb7c4a008 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -620,18 +620,29 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - Token::Word(w) if self.peek_token().token == Token::Period => { - let mut id_parts: Vec = vec![w.to_ident()]; - - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident()), - Token::Mul => { - return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts))); - } - _ => { - return self.expected("an identifier or a '*' after '.'", next_token); + t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { + if self.peek_token().token == Token::Period { + let mut id_parts: Vec = vec![match t { + Token::Word(w) => w.to_ident(), + Token::SingleQuotedString(s) => Ident::with_quote('\'', s), + _ => unreachable!(), // We matched above + }]; + + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.to_ident()), + Token::SingleQuotedString(s) => { + // SQLite has single-quoted identifiers + id_parts.push(Ident::with_quote('\'', s)) + } + Token::Mul => { + return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts))); + } + _ => { + return self + .expected("an identifier or a '*' after '.'", next_token); + } } } } @@ -830,6 +841,9 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { Token::Word(w) => id_parts.push(w.to_ident()), + Token::SingleQuotedString(s) => { + id_parts.push(Ident::with_quote('\'', s)) + } _ => { return self .expected("an identifier or a '*' after '.'", next_token); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 2fdd4e3de..b657acddf 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -335,6 +335,11 @@ fn parse_create_table_with_strict() { } } +#[test] +fn parse_single_quoted_identified() { + sqlite().verified_only_select("SELECT 't'.*, t.'x' FROM 't'"); + // TODO: add support for select 't'.x +} #[test] fn parse_window_function_with_filter() { for func_name in [ From 8262abcd311e2b129cfba369e7332efb833db188 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Oct 2023 07:35:59 -0400 Subject: [PATCH 053/735] Improve documentation on Parser::consume_token and friends (#994) --- src/parser/mod.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index eb7c4a008..a1323f7a8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2346,7 +2346,7 @@ impl<'a> Parser<'a> { } } - /// Report unexpected token + /// Report `found` was encountered instead of `expected` pub fn expected(&self, expected: &str, found: TokenWithLocation) -> Result { parser_err!( format!("Expected {expected}, found: {found}"), @@ -2354,7 +2354,8 @@ impl<'a> Parser<'a> { ) } - /// Look for an expected keyword and consume it if it exists + /// If the current token is the `expected` keyword, consume it and returns + /// true. Otherwise, no tokens are consumed and returns false. #[must_use] pub fn parse_keyword(&mut self, expected: Keyword) -> bool { match self.peek_token().token { @@ -2366,7 +2367,9 @@ impl<'a> Parser<'a> { } } - /// Look for an expected sequence of keywords and consume them if they exist + /// If the current and subsequent tokens exactly match the `keywords` + /// sequence, consume them and returns true. Otherwise, no tokens are + /// consumed and returns false #[must_use] pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { let index = self.index; @@ -2381,7 +2384,9 @@ impl<'a> Parser<'a> { true } - /// Look for one of the given keywords and return the one that matches. + /// If the current token is one of the given `keywords`, consume the token + /// and return the keyword that matches. Otherwise, no tokens are consumed + /// and returns `None`. #[must_use] pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { match self.peek_token().token { @@ -2398,7 +2403,8 @@ impl<'a> Parser<'a> { } } - /// Bail out if the current token is not one of the expected keywords, or consume it if it is + /// If the current token is one of the expected keywords, consume the token + /// and return the keyword that matches. Otherwise, return an error. pub fn expect_one_of_keywords(&mut self, keywords: &[Keyword]) -> Result { if let Some(keyword) = self.parse_one_of_keywords(keywords) { Ok(keyword) @@ -2411,7 +2417,8 @@ impl<'a> Parser<'a> { } } - /// Bail out if the current token is not an expected keyword, or consume it if it is + /// If the current token is the `expected` keyword, consume the token. + /// Otherwise return an error. pub fn expect_keyword(&mut self, expected: Keyword) -> Result<(), ParserError> { if self.parse_keyword(expected) { Ok(()) @@ -2420,8 +2427,8 @@ impl<'a> Parser<'a> { } } - /// Bail out if the following tokens are not the expected sequence of - /// keywords, or consume them if they are. + /// If the current and subsequent tokens exactly match the `keywords` + /// sequence, consume them and returns Ok. Otherwise, return an Error. pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { for &kw in expected { self.expect_keyword(kw)?; From b89edaa98b6f8cbea105b59fdc455c300d28a828 Mon Sep 17 00:00:00 2001 From: yuval-illumex <85674443+yuval-illumex@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:45:59 +0300 Subject: [PATCH 054/735] Support `IGNORE|RESPECT` NULLs clause in window functions (#998) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 26 ++++++++++++++++ src/ast/visitor.rs | 1 + src/keywords.rs | 1 + src/parser/mod.rs | 16 ++++++++++ tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 4 +++ tests/sqlparser_common.rs | 58 +++++++++++++++++++++++++++++++++++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_mysql.rs | 6 ++++ tests/sqlparser_postgres.rs | 6 ++++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 1 + 14 files changed, 124 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4c69d3ed0..b52bdf846 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1161,6 +1161,26 @@ impl fmt::Display for WindowFrameUnits { } } +/// Specifies Ignore / Respect NULL within window functions. +/// For example +/// `FIRST_VALUE(column2) IGNORE NULLS OVER (PARTITION BY column1)` +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum NullTreatment { + IgnoreNulls, + RespectNulls, +} + +impl fmt::Display for NullTreatment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + NullTreatment::IgnoreNulls => "IGNORE NULLS", + NullTreatment::RespectNulls => "RESPECT NULLS", + }) + } +} + /// Specifies [WindowFrame]'s `start_bound` and `end_bound` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -3757,6 +3777,8 @@ pub struct Function { pub args: Vec, /// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)` pub filter: Option>, + // Snowflake/MSSQL supports diffrent options for null treatment in rank functions + pub null_treatment: Option, pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, @@ -3809,6 +3831,10 @@ impl fmt::Display for Function { write!(f, " FILTER (WHERE {filter_cond})")?; } + if let Some(o) = &self.null_treatment { + write!(f, " {o}")?; + } + if let Some(o) = &self.over { write!(f, " OVER {o}")?; } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 4e025f962..99db16107 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -506,6 +506,7 @@ where /// *expr = Expr::Function(Function { /// name: ObjectName(vec![Ident::new("f")]), /// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))], +/// null_treatment: None, /// filter: None, over: None, distinct: false, special: false, order_by: vec![], /// }); /// } diff --git a/src/keywords.rs b/src/keywords.rs index 405203601..dec324cfb 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -518,6 +518,7 @@ define_keywords!( REPLACE, REPLICATION, RESET, + RESPECT, RESTRICT, RESULT, RETAIN, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a1323f7a8..4a465ec99 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -785,6 +785,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -987,6 +988,19 @@ impl<'a> Parser<'a> { } else { None }; + let null_treatment = match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) + { + Some(keyword) => { + self.expect_keyword(Keyword::NULLS)?; + + match keyword { + Keyword::RESPECT => Some(NullTreatment::RespectNulls), + Keyword::IGNORE => Some(NullTreatment::IgnoreNulls), + _ => None, + } + } + None => None, + }; let over = if self.parse_keyword(Keyword::OVER) { if self.consume_token(&Token::LParen) { let window_spec = self.parse_window_spec()?; @@ -1000,6 +1014,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + null_treatment, filter, over, distinct, @@ -1018,6 +1033,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, args, + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index fe95b1873..e72a99b49 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -564,6 +564,7 @@ fn parse_map_access_offset() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( number("0") ))),], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 7d9cb0309..e7c85c2a3 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -50,6 +50,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("endpoint".to_string()) ))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -90,6 +91,7 @@ fn parse_map_access_expr() { Value::SingleQuotedString("app".to_string()) ))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -140,6 +142,7 @@ fn parse_array_fn() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x1")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new("x2")))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -199,6 +202,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9eb52f6ec..5eb70b09b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -875,6 +875,7 @@ fn parse_select_count_wildcard() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + null_treatment: None, filter: None, over: None, distinct: false, @@ -896,6 +897,7 @@ fn parse_select_count_distinct() { op: UnaryOperator::Plus, expr: Box::new(Expr::Identifier(Ident::new("x"))), }))], + null_treatment: None, filter: None, over: None, distinct: true, @@ -1864,6 +1866,7 @@ fn parse_select_having() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1890,6 +1893,7 @@ fn parse_select_qualify() { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), args: vec![], + null_treatment: None, filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![Expr::Identifier(Ident::new("p"))], @@ -2287,6 +2291,45 @@ fn parse_agg_with_order_by() { } } +#[test] +fn parse_window_rank_function() { + let supported_dialects = TestedDialects { + dialects: vec![ + Box::new(GenericDialect {}), + Box::new(PostgreSqlDialect {}), + Box::new(MsSqlDialect {}), + Box::new(AnsiDialect {}), + Box::new(HiveDialect {}), + Box::new(SnowflakeDialect {}), + ], + options: None, + }; + + for sql in [ + "SELECT column1, column2, FIRST_VALUE(column2) OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1", + "SELECT column1, column2, FIRST_VALUE(column2) OVER (ORDER BY column2 NULLS LAST) AS column2_first FROM t1", + "SELECT col_1, col_2, LAG(col_2) OVER (ORDER BY col_1) FROM t1", + "SELECT LAG(col_2, 1, 0) OVER (ORDER BY col_1) FROM t1", + "SELECT LAG(col_2, 1, 0) OVER (PARTITION BY col_3 ORDER BY col_1)", + ] { + supported_dialects.verified_stmt(sql); + } + + let supported_dialects_nulls = TestedDialects { + dialects: vec![Box::new(MsSqlDialect {}), Box::new(SnowflakeDialect {})], + options: None, + }; + + for sql in [ + "SELECT column1, column2, FIRST_VALUE(column2) IGNORE NULLS OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1", + "SELECT column1, column2, FIRST_VALUE(column2) RESPECT NULLS OVER (PARTITION BY column1 ORDER BY column2 NULLS LAST) AS column2_first FROM t1", + "SELECT LAG(col_2, 1, 0) IGNORE NULLS OVER (ORDER BY col_1) FROM t1", + "SELECT LAG(col_2, 1, 0) RESPECT NULLS OVER (ORDER BY col_1) FROM t1", + ] { + supported_dialects_nulls.verified_stmt(sql); + } +} + #[test] fn parse_create_table() { let sql = "CREATE TABLE uk_cities (\ @@ -3346,6 +3389,7 @@ fn parse_scalar_function_in_projection() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("id")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -3466,6 +3510,7 @@ fn parse_named_argument_function() { ))), }, ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -3498,6 +3543,7 @@ fn parse_window_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), args: vec![], + null_treatment: None, filter: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![], @@ -3542,6 +3588,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + null_treatment: None, filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window1".to_string(), @@ -3568,6 +3615,7 @@ fn test_parse_named_window() { quote_style: None, }), ))], + null_treatment: None, filter: None, over: Some(WindowType::NamedWindow(Ident { value: "window2".to_string(), @@ -4038,6 +4086,7 @@ fn parse_at_timezone() { quote_style: None, }]), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero.clone()))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4066,6 +4115,7 @@ fn parse_at_timezone() { quote_style: None, },],), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(zero))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4078,6 +4128,7 @@ fn parse_at_timezone() { Value::SingleQuotedString("%Y-%m-%dT%H".to_string()), ),),), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4237,6 +4288,7 @@ fn parse_table_function() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( Value::SingleQuotedString("1".to_owned()), )))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4389,6 +4441,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4419,6 +4472,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("2")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("3")))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -4431,6 +4485,7 @@ fn parse_unnest_in_from_clause() { FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("5")))), FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(number("6")))), ], + null_treatment: None, filter: None, over: None, distinct: false, @@ -6904,6 +6959,7 @@ fn parse_time_functions() { let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -7391,6 +7447,7 @@ fn parse_pivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("amount"),]) ))]), + null_treatment: None, filter: None, over: None, distinct: false, @@ -7541,6 +7598,7 @@ fn parse_pivot_unpivot_table() { args: (vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("population")) ))]), + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 534a224ea..66eef09e1 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -361,6 +361,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index ebadf95f2..4aa993fa0 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -334,6 +334,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 8391bbadb..2788dfabe 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1112,6 +1112,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("description")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1126,6 +1127,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_create")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1140,6 +1142,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_read")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1154,6 +1157,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_update")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1168,6 +1172,7 @@ fn parse_insert_with_on_duplicate_update() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("perm_delete")) ))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -1558,6 +1563,7 @@ fn parse_table_colum_option_on_update() { option: ColumnOption::OnUpdate(Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_TIMESTAMP")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 64fcbd38a..18b5fe6f7 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2275,6 +2275,7 @@ fn test_composite_value() { named: true } )))], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2437,6 +2438,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2449,6 +2451,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2461,6 +2464,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2473,6 +2477,7 @@ fn parse_current_functions() { &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, @@ -2924,6 +2929,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 6238d1eca..6fa647d38 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -137,6 +137,7 @@ fn parse_delimited_identifiers() { &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], + null_treatment: None, filter: None, over: None, distinct: false, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7e6f18138..19a62b61d 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -249,6 +249,7 @@ fn parse_delimited_identifiers() { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), args: vec![], filter: None, + null_treatment: None, over: None, distinct: false, special: false, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index b657acddf..4935f1f50 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -359,6 +359,7 @@ fn parse_window_function_with_filter() { args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( Expr::Identifier(Ident::new("x")) ))], + null_treatment: None, over: Some(WindowType::WindowSpec(WindowSpec { partition_by: vec![], order_by: vec![], From 79933846861551a05afc00979516fe8111796492 Mon Sep 17 00:00:00 2001 From: yuval-illumex <85674443+yuval-illumex@users.noreply.github.com> Date: Tue, 24 Oct 2023 23:05:43 +0300 Subject: [PATCH 055/735] Support `date` 'key' when using semi structured data (#1023) --- src/parser/mod.rs | 2 +- tests/sqlparser_snowflake.rs | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4a465ec99..1a586514c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4726,7 +4726,7 @@ impl<'a> Parser<'a> { )?, }, // Case when Snowflake Semi-structured data like key:value - Keyword::NoKeyword | Keyword::LOCATION | Keyword::TYPE if dialect_of!(self is SnowflakeDialect | GenericDialect) => { + Keyword::NoKeyword | Keyword::LOCATION | Keyword::TYPE | Keyword::DATE if dialect_of!(self is SnowflakeDialect | GenericDialect) => { Ok(Value::UnQuotedString(w.value)) } _ => self.expected( diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 19a62b61d..54d6b5542 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -208,6 +208,17 @@ fn parse_json_using_colon() { select.projection[0] ); + let sql = "SELECT a:date FROM t"; + let select = snowflake().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + left: Box::new(Expr::Identifier(Ident::new("a"))), + operator: JsonOperator::Colon, + right: Box::new(Expr::Value(Value::UnQuotedString("date".to_string()))), + }), + select.projection[0] + ); + snowflake().one_statement_parses_to("SELECT a:b::int FROM t", "SELECT CAST(a:b AS INT) FROM t"); } From 65317edcb9ac1cf58badf336bcd55d52fd6cecca Mon Sep 17 00:00:00 2001 From: yuval-illumex <85674443+yuval-illumex@users.noreply.github.com> Date: Wed, 25 Oct 2023 19:53:09 +0300 Subject: [PATCH 056/735] Support Snowflake - allow number as placeholder (e.g. `:1`) (#1001) --- src/parser/mod.rs | 9 ++++++++- tests/sqlparser_snowflake.rs | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1a586514c..45ce81ac0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4759,7 +4759,14 @@ impl<'a> Parser<'a> { Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), tok @ Token::Colon | tok @ Token::AtSign => { - let ident = self.parse_identifier()?; + // Not calling self.parse_identifier()? because only in placeholder we want to check numbers as idfentifies + // This because snowflake allows numbers as placeholders + let next_token = self.next_token(); + let ident = match next_token.token { + Token::Word(w) => Ok(w.to_ident()), + Token::Number(w, false) => Ok(Ident::new(w)), + _ => self.expected("placeholder", next_token), + }?; let placeholder = tok.to_string() + &ident.value; Ok(Value::Placeholder(placeholder)) } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 54d6b5542..a959a4a4e 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1079,6 +1079,20 @@ fn test_snowflake_trim() { ); } +#[test] +fn test_number_placeholder() { + let sql_only_select = "SELECT :1"; + let select = snowflake().verified_only_select(sql_only_select); + assert_eq!( + &Expr::Value(Value::Placeholder(":1".into())), + expr_from_projection(only(&select.projection)) + ); + + snowflake() + .parse_sql_statements("alter role 1 with name = 'foo'") + .expect_err("should have failed"); +} + #[test] fn parse_position_not_function_columns() { snowflake_and_generic() From 2f437db2a68724e4ae709df22f53999d24804ac7 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Wed, 25 Oct 2023 18:57:33 +0200 Subject: [PATCH 057/735] Support for BigQuery `struct`, `array` and `bytes` , `int64`, `float64` datatypes (#1003) --- src/ast/data_type.rs | 67 ++++- src/ast/mod.rs | 58 ++++- src/keywords.rs | 5 + src/parser/mod.rs | 236 ++++++++++++++++- tests/sqlparser_bigquery.rs | 489 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 101 +++++--- tests/sqlparser_postgres.rs | 8 +- tests/sqlparser_snowflake.rs | 2 +- 8 files changed, 901 insertions(+), 65 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 2a6a004f4..506de815d 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::ObjectName; +use crate::ast::{display_comma_separated, ObjectName, StructField}; use super::value::escape_single_quote_string; @@ -71,6 +71,10 @@ pub enum DataType { /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type /// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html Blob(Option), + /// Variable-length binary data with optional length. + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type + Bytes(Option), /// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1] /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type @@ -125,6 +129,10 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Int4(Option), + /// Integer type in [bigquery] + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + Int64, /// Integer with optional display width e.g. INTEGER or INTEGER(11) Integer(Option), /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED @@ -149,6 +157,10 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Float4, + /// Floating point in [bigquery] + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + Float64, /// Floating point e.g. REAL Real, /// Float8 as alias for Double in [postgresql] @@ -190,18 +202,23 @@ pub enum DataType { Regclass, /// Text Text, - /// String - String, + /// String with optional length. + String(Option), /// Bytea Bytea, /// Custom type such as enums Custom(ObjectName, Vec), /// Arrays - Array(Option>), + Array(ArrayElemTypeDef), /// Enums Enum(Vec), /// Set Set(Vec), + /// Struct + /// + /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Struct(Vec), } impl fmt::Display for DataType { @@ -231,6 +248,7 @@ impl fmt::Display for DataType { format_type_with_optional_length(f, "VARBINARY", size, false) } DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false), + DataType::Bytes(size) => format_type_with_optional_length(f, "BYTES", size, false), DataType::Numeric(info) => { write!(f, "NUMERIC{info}") } @@ -274,6 +292,9 @@ impl fmt::Display for DataType { DataType::Int4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, false) } + DataType::Int64 => { + write!(f, "INT64") + } DataType::UnsignedInt4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, true) } @@ -297,6 +318,7 @@ impl fmt::Display for DataType { } DataType::Real => write!(f, "REAL"), DataType::Float4 => write!(f, "FLOAT4"), + DataType::Float64 => write!(f, "FLOAT64"), DataType::Double => write!(f, "DOUBLE"), DataType::Float8 => write!(f, "FLOAT8"), DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"), @@ -316,15 +338,13 @@ impl fmt::Display for DataType { DataType::JSON => write!(f, "JSON"), DataType::Regclass => write!(f, "REGCLASS"), DataType::Text => write!(f, "TEXT"), - DataType::String => write!(f, "STRING"), + DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false), DataType::Bytea => write!(f, "BYTEA"), - DataType::Array(ty) => { - if let Some(t) = &ty { - write!(f, "{t}[]") - } else { - write!(f, "ARRAY") - } - } + DataType::Array(ty) => match ty { + ArrayElemTypeDef::None => write!(f, "ARRAY"), + ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"), + ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"), + }, DataType::Custom(ty, modifiers) => { if modifiers.is_empty() { write!(f, "{ty}") @@ -352,6 +372,13 @@ impl fmt::Display for DataType { } write!(f, ")") } + DataType::Struct(fields) => { + if !fields.is_empty() { + write!(f, "STRUCT<{}>", display_comma_separated(fields)) + } else { + write!(f, "STRUCT") + } + } } } } @@ -533,3 +560,19 @@ impl fmt::Display for CharLengthUnits { } } } + +/// Represents the data type of the elements in an array (if any) as well as +/// the syntax used to declare the array. +/// +/// For example: Bigquery/Hive use `ARRAY` whereas snowflake uses ARRAY. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ArrayElemTypeDef { + /// `ARRAY` + None, + /// `ARRAY` + AngleBracket(Box), + /// `[]INT` + SquareBracket(Box), +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b52bdf846..ab917dc4c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -26,7 +26,7 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; pub use self::data_type::{ - CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, + ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, }; pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ @@ -323,6 +323,27 @@ impl fmt::Display for JsonOperator { } } +/// A field definition within a struct. +/// +/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct StructField { + pub field_name: Option, + pub field_type: DataType, +} + +impl fmt::Display for StructField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(name) = &self.field_name { + write!(f, "{name} {}", self.field_type) + } else { + write!(f, "{}", self.field_type) + } + } +} + /// Options for `CAST` / `TRY_CAST` /// BigQuery: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -597,6 +618,26 @@ pub enum Expr { Rollup(Vec>), /// ROW / TUPLE a single value, such as `SELECT (1, 2)` Tuple(Vec), + /// `BigQuery` specific `Struct` literal expression [1] + /// Syntax: + /// ```sql + /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// ``` + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Struct { + /// Struct values. + values: Vec, + /// Struct field definitions. + fields: Vec, + }, + /// `BigQuery` specific: An named expression in a typeless struct [1] + /// + /// Syntax + /// ```sql + /// 1 AS A + /// ``` + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type + Named { expr: Box, name: Ident }, /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` ArrayIndex { obj: Box, indexes: Vec }, /// An array expression e.g. `ARRAY[1, 2]` @@ -997,6 +1038,21 @@ impl fmt::Display for Expr { Expr::Tuple(exprs) => { write!(f, "({})", display_comma_separated(exprs)) } + Expr::Struct { values, fields } => { + if !fields.is_empty() { + write!( + f, + "STRUCT<{}>({})", + display_comma_separated(fields), + display_comma_separated(values) + ) + } else { + write!(f, "STRUCT({})", display_comma_separated(values)) + } + } + Expr::Named { expr, name } => { + write!(f, "{} AS {}", expr, name) + } Expr::ArrayIndex { obj, indexes } => { write!(f, "{obj}")?; for i in indexes { diff --git a/src/keywords.rs b/src/keywords.rs index dec324cfb..2941c8176 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -120,6 +120,7 @@ define_keywords!( BY, BYPASSRLS, BYTEA, + BYTES, CACHE, CALL, CALLED, @@ -270,6 +271,7 @@ define_keywords!( FIRST_VALUE, FLOAT, FLOAT4, + FLOAT64, FLOAT8, FLOOR, FOLLOWING, @@ -293,6 +295,7 @@ define_keywords!( FUSION, GENERATE, GENERATED, + GEOGRAPHY, GET, GLOBAL, GRANT, @@ -328,6 +331,7 @@ define_keywords!( INT, INT2, INT4, + INT64, INT8, INTEGER, INTERSECT, @@ -584,6 +588,7 @@ define_keywords!( STORED, STRICT, STRING, + STRUCT, SUBMULTISET, SUBSTRING, SUBSTRING_REGEX, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 45ce81ac0..eb4ef68a6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -30,7 +30,7 @@ use IsOptional::*; use crate::ast::helpers::stmt_create_table::CreateTableBuilder; use crate::ast::*; use crate::dialect::*; -use crate::keywords::{self, Keyword}; +use crate::keywords::{self, Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; mod alter; @@ -197,6 +197,26 @@ impl std::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 50; +/// Composite types declarations using angle brackets syntax can be arbitrary +/// nested such that the following declaration is possible: +/// `ARRAY>` +/// But the tokenizer recognizes the `>>` as a ShiftRight token. +/// We work-around that limitation when parsing a data type by accepting +/// either a `>` or `>>` token in such cases, remembering which variant we +/// matched. +/// In the latter case having matched a `>>`, the parent type will not look to +/// match its closing `>` as a result since that will have taken place at the +/// child type. +/// +/// See [Parser::parse_data_type] for details +struct MatchedTrailingBracket(bool); + +impl From for MatchedTrailingBracket { + fn from(value: bool) -> Self { + Self(value) + } +} + /// Options that control how the [`Parser`] parses SQL text #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserOptions { @@ -833,6 +853,10 @@ impl<'a> Parser<'a> { Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { self.parse_match_against() } + Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { + self.prev_token(); + self.parse_bigquery_struct_literal() + } // Here `w` is a word, check if it's a part of a multi-part // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { @@ -1798,6 +1822,172 @@ impl<'a> Parser<'a> { })) } + /// Bigquery specific: Parse a struct literal + /// Syntax + /// ```sql + /// -- typed + /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// -- typeless + /// STRUCT( expr1 [AS field_name] [, ... ]) + /// ``` + fn parse_bigquery_struct_literal(&mut self) -> Result { + let (fields, trailing_bracket) = + self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + if trailing_bracket.0 { + return parser_err!("unmatched > in STRUCT literal", self.peek_token().location); + } + + self.expect_token(&Token::LParen)?; + let values = self + .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; + self.expect_token(&Token::RParen)?; + + Ok(Expr::Struct { values, fields }) + } + + /// Parse an expression value for a bigquery struct [1] + /// Syntax + /// ```sql + /// expr [AS name] + /// ``` + /// + /// Parameter typed_syntax is set to true if the expression + /// is to be parsed as a field expression declared using typed + /// struct syntax [2], and false if using typeless struct syntax [3]. + /// + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct + /// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + /// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax + fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result { + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::AS) { + if typed_syntax { + return parser_err!("Typed syntax does not allow AS", { + self.prev_token(); + self.peek_token().location + }); + } + let field_name = self.parse_identifier()?; + Ok(Expr::Named { + expr: expr.into(), + name: field_name, + }) + } else { + Ok(expr) + } + } + + /// Parse a Struct type definition as a sequence of field-value pairs. + /// The syntax of the Struct elem differs by dialect so it is customised + /// by the `elem_parser` argument. + /// + /// Syntax + /// ```sql + /// Hive: + /// STRUCT + /// + /// BigQuery: + /// STRUCT<[field_name] field_type> + /// ``` + fn parse_struct_type_def( + &mut self, + mut elem_parser: F, + ) -> Result<(Vec, MatchedTrailingBracket), ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, + { + let start_token = self.peek_token(); + self.expect_keyword(Keyword::STRUCT)?; + + // Nothing to do if we have no type information. + if Token::Lt != self.peek_token() { + return Ok((Default::default(), false.into())); + } + self.next_token(); + + let mut field_defs = vec![]; + let trailing_bracket = loop { + let (def, trailing_bracket) = elem_parser(self)?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break trailing_bracket; + } + + // Angle brackets are balanced so we only expect the trailing `>>` after + // we've matched all field types for the current struct. + // e.g. this is invalid syntax `STRUCT>>, INT>(NULL)` + if trailing_bracket.0 { + return parser_err!("unmatched > in STRUCT definition", start_token.location); + } + }; + + Ok(( + field_defs, + self.expect_closing_angle_bracket(trailing_bracket)?, + )) + } + + /// Parse a field definition in a BigQuery struct. + /// Syntax: + /// + /// ```sql + /// [field_name] field_type + /// ``` + fn parse_big_query_struct_field_def( + &mut self, + ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { + let is_anonymous_field = if let Token::Word(w) = self.peek_token().token { + ALL_KEYWORDS + .binary_search(&w.value.to_uppercase().as_str()) + .is_ok() + } else { + false + }; + + let field_name = if is_anonymous_field { + None + } else { + Some(self.parse_identifier()?) + }; + + let (field_type, trailing_bracket) = self.parse_data_type_helper()?; + + Ok(( + StructField { + field_name, + field_type, + }, + trailing_bracket, + )) + } + + /// For nested types that use the angle bracket syntax, this matches either + /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously + /// matched `trailing_bracket` argument). It returns whether there is a trailing + /// left to be matched - (i.e. if '>>' was matched). + fn expect_closing_angle_bracket( + &mut self, + trailing_bracket: MatchedTrailingBracket, + ) -> Result { + let trailing_bracket = if !trailing_bracket.0 { + match self.peek_token().token { + Token::Gt => { + self.next_token(); + false.into() + } + Token::ShiftRight => { + self.next_token(); + true.into() + } + _ => return self.expected(">", self.peek_token()), + } + } else { + false.into() + }; + + Ok(trailing_bracket) + } + /// Parse an operator following an expression pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { // allow the dialect to override infix parsing @@ -4876,7 +5066,22 @@ impl<'a> Parser<'a> { /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { + let (ty, trailing_bracket) = self.parse_data_type_helper()?; + if trailing_bracket.0 { + return parser_err!( + format!("unmatched > after parsing data type {ty}"), + self.peek_token() + ); + } + + Ok(ty) + } + + fn parse_data_type_helper( + &mut self, + ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { let next_token = self.next_token(); + let mut trailing_bracket = false.into(); let mut data = match next_token.token { Token::Word(w) => match w.keyword { Keyword::BOOLEAN => Ok(DataType::Boolean), @@ -4884,6 +5089,7 @@ impl<'a> Parser<'a> { Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT64 => Ok(DataType::Float64), Keyword::FLOAT8 => Ok(DataType::Float8), Keyword::DOUBLE => { if self.parse_keyword(Keyword::PRECISION) { @@ -4940,6 +5146,7 @@ impl<'a> Parser<'a> { Ok(DataType::Int4(optional_precision?)) } } + Keyword::INT64 => Ok(DataType::Int64), Keyword::INTEGER => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { @@ -4994,6 +5201,7 @@ impl<'a> Parser<'a> { Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)), Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)), Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)), + Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), Keyword::UUID => Ok(DataType::Uuid), Keyword::DATE => Ok(DataType::Date), Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), @@ -5037,7 +5245,7 @@ impl<'a> Parser<'a> { Keyword::INTERVAL => Ok(DataType::Interval), Keyword::JSON => Ok(DataType::JSON), Keyword::REGCLASS => Ok(DataType::Regclass), - Keyword::STRING => Ok(DataType::String), + Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), Keyword::TEXT => Ok(DataType::Text), Keyword::BYTEA => Ok(DataType::Bytea), Keyword::NUMERIC => Ok(DataType::Numeric( @@ -5059,17 +5267,23 @@ impl<'a> Parser<'a> { Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), Keyword::ARRAY => { if dialect_of!(self is SnowflakeDialect) { - Ok(DataType::Array(None)) + Ok(DataType::Array(ArrayElemTypeDef::None)) } else { - // Hive array syntax. Note that nesting arrays - or other Hive syntax - // that ends with > will fail due to "C++" problem - >> is parsed as - // Token::ShiftRight self.expect_token(&Token::Lt)?; - let inside_type = self.parse_data_type()?; - self.expect_token(&Token::Gt)?; - Ok(DataType::Array(Some(Box::new(inside_type)))) + let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; + trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?; + Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + inside_type, + )))) } } + Keyword::STRUCT if dialect_of!(self is BigQueryDialect) => { + self.prev_token(); + let (field_defs, _trailing_bracket) = + self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + trailing_bracket = _trailing_bracket; + Ok(DataType::Struct(field_defs)) + } _ => { self.prev_token(); let type_name = self.parse_object_name()?; @@ -5087,9 +5301,9 @@ impl<'a> Parser<'a> { // Keyword::ARRAY syntax from above while self.consume_token(&Token::LBracket) { self.expect_token(&Token::RBracket)?; - data = DataType::Array(Some(Box::new(data))) + data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data))) } - Ok(data) + Ok((data, trailing_bracket)) } pub fn parse_string_values(&mut self) -> Result, ParserError> { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index e72a99b49..006927e46 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -13,6 +13,7 @@ #[macro_use] mod test_utils; +use sqlparser::ast; use std::ops::Deref; use sqlparser::ast::*; @@ -85,6 +86,494 @@ fn parse_raw_literal() { panic!("invalid query") } +#[test] +fn parse_nested_data_types() { + let sql = "CREATE TABLE table (x STRUCT, b BYTES(42)>, y ARRAY>)"; + match bigquery().one_statement_parses_to(sql, sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("x"), + data_type: DataType::Struct(vec![ + StructField { + field_name: Some("a".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket( + Box::new(DataType::Int64,) + )) + }, + StructField { + field_name: Some("b".into()), + field_type: DataType::Bytes(Some(42)) + }, + ]), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("y"), + data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Struct(vec![StructField { + field_name: None, + field_type: DataType::Int64, + }]), + ))), + collation: None, + options: vec![], + }, + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_invalid_brackets() { + let sql = "SELECT STRUCT>(NULL)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("unmatched > in STRUCT literal".to_string()) + ); + + let sql = "SELECT STRUCT>>(NULL)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError("Expected (, found: >".to_string()) + ); + + let sql = "CREATE TABLE table (x STRUCT>>)"; + assert_eq!( + bigquery().parse_sql_statements(sql).unwrap_err(), + ParserError::ParserError( + "Expected ',' or ')' after column definition, found: >".to_string() + ) + ); +} + +#[test] +fn parse_tuple_struct_literal() { + // tuple syntax: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#tuple_syntax + // syntax: (expr1, expr2 [, ... ]) + let sql = "SELECT (1, 2, 3), (1, 1.0, '123', true)"; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Tuple(vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")), + ]), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Tuple(vec![ + Expr::Value(number("1")), + Expr::Value(number("1.0")), + Expr::Value(Value::SingleQuotedString("123".to_string())), + Expr::Value(Value::Boolean(true)) + ]), + expr_from_projection(&select.projection[1]) + ); +} + +#[test] +fn parse_typeless_struct_syntax() { + // typeless struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax + // syntax: STRUCT( expr1 [AS field_name] [, ... ]) + let sql = "SELECT STRUCT(1, 2, 3), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)"; + let select = bigquery().verified_only_select(sql); + assert_eq!(5, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")), + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[0]) + ); + + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::SingleQuotedString("abc".to_string())),], + fields: Default::default() + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::CompoundIdentifier(vec![Ident::from("t"), Ident::from("str_col")]), + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Named { + expr: Expr::Value(number("1")).into(), + name: Ident::from("a") + }, + Expr::Named { + expr: Expr::Value(Value::SingleQuotedString("abc".to_string())).into(), + name: Ident::from("b") + }, + ], + fields: Default::default() + }, + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Named { + expr: Expr::Identifier(Ident::from("str_col")).into(), + name: Ident::from("abc") + }], + fields: Default::default() + }, + expr_from_projection(&select.projection[4]) + ); +} + +#[test] +fn parse_typed_struct_syntax() { + // typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + // syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + + let sql = r#"SELECT STRUCT(5), STRUCT(1, t.str_col), STRUCT, str STRUCT>(nested_col)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("5")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Int64, + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::CompoundIdentifier(vec![ + Ident { + value: "t".into(), + quote_style: None, + }, + Ident { + value: "str_col".into(), + quote_style: None, + }, + ]), + ], + fields: vec![ + StructField { + field_name: Some(Ident { + value: "x".into(), + quote_style: None, + }), + field_type: DataType::Int64 + }, + StructField { + field_name: Some(Ident { + value: "y".into(), + quote_style: None, + }), + field_type: DataType::String(None) + }, + ] + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Identifier(Ident { + value: "nested_col".into(), + quote_style: None, + }),], + fields: vec![ + StructField { + field_name: Some("arr".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Float64 + ))) + }, + StructField { + field_name: Some("str".into()), + field_type: DataType::Struct(vec![StructField { + field_name: None, + field_type: DataType::Bool + }]) + }, + ] + }, + expr_from_projection(&select.projection[2]) + ); + + let sql = r#"SELECT STRUCT>(nested_col)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Identifier(Ident { + value: "nested_col".into(), + quote_style: None, + }),], + fields: vec![ + StructField { + field_name: Some("x".into()), + field_type: DataType::Struct(Default::default()) + }, + StructField { + field_name: Some("y".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Struct(Default::default()) + ))) + }, + ] + }, + expr_from_projection(&select.projection[0]) + ); + + let sql = r#"SELECT STRUCT(true), STRUCT(B'abc')"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::Boolean(true)),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Bool + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::SingleQuotedByteStringLiteral( + "abc".into() + )),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Bytes(Some(42)) + }] + }, + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT STRUCT("2011-05-05"), STRUCT(DATETIME '1999-01-01 01:23:34.45'), STRUCT(5.0), STRUCT(1)"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(4, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::DoubleQuotedString( + "2011-05-05".to_string() + )),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Date + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::TypedString { + data_type: DataType::Datetime(None), + value: "1999-01-01 01:23:34.45".to_string() + },], + fields: vec![StructField { + field_name: None, + field_type: DataType::Datetime(None) + }] + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("5.0")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Float64 + }] + }, + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("1")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Int64 + }] + }, + expr_from_projection(&select.projection[3]) + ); + + let sql = r#"SELECT STRUCT(INTERVAL '1-2 3 4:5:6.789999'), STRUCT(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Interval(ast::Interval { + value: Box::new(Expr::Value(Value::SingleQuotedString( + "1-2 3 4:5:6.789999".to_string() + ))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None + }),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Interval + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::TypedString { + data_type: DataType::JSON, + value: r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.to_string() + },], + fields: vec![StructField { + field_name: None, + field_type: DataType::JSON + }] + }, + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT STRUCT("foo"), STRUCT(TIMESTAMP '2008-12-25 15:30:00 America/Los_Angeles'), STRUCT, b BYTES(42)>, y ARRAY>)"; - match bigquery().one_statement_parses_to(sql, sql) { + match bigquery_and_generic().one_statement_parses_to(sql, sql) { Statement::CreateTable { name, columns, .. } => { assert_eq!(name, ObjectName(vec!["table".into()])); assert_eq!( @@ -395,19 +395,25 @@ fn parse_nested_data_types() { fn parse_invalid_brackets() { let sql = "SELECT STRUCT>(NULL)"; assert_eq!( - bigquery().parse_sql_statements(sql).unwrap_err(), + bigquery_and_generic() + .parse_sql_statements(sql) + .unwrap_err(), ParserError::ParserError("unmatched > in STRUCT literal".to_string()) ); let sql = "SELECT STRUCT>>(NULL)"; assert_eq!( - bigquery().parse_sql_statements(sql).unwrap_err(), + bigquery_and_generic() + .parse_sql_statements(sql) + .unwrap_err(), ParserError::ParserError("Expected (, found: >".to_string()) ); let sql = "CREATE TABLE table (x STRUCT>>)"; assert_eq!( - bigquery().parse_sql_statements(sql).unwrap_err(), + bigquery_and_generic() + .parse_sql_statements(sql) + .unwrap_err(), ParserError::ParserError( "Expected ',' or ')' after column definition, found: >".to_string() ) @@ -445,7 +451,7 @@ fn parse_typeless_struct_syntax() { // typeless struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax // syntax: STRUCT( expr1 [AS field_name] [, ... ]) let sql = "SELECT STRUCT(1, 2, 3), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)"; - let select = bigquery().verified_only_select(sql); + let select = bigquery_and_generic().verified_only_select(sql); assert_eq!(5, select.projection.len()); assert_eq!( &Expr::Struct { @@ -505,7 +511,7 @@ fn parse_typeless_struct_syntax() { } #[test] -fn parse_typed_struct_syntax() { +fn parse_typed_struct_syntax_bigquery() { // typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax // syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) @@ -789,7 +795,291 @@ fn parse_typed_struct_syntax() { } #[test] -fn parse_typed_struct_with_field_name() { +fn parse_typed_struct_syntax_bigquery_and_generic() { + // typed struct syntax https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + // syntax: STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + + let sql = r#"SELECT STRUCT(5), STRUCT(1, t.str_col), STRUCT, str STRUCT>(nested_col)"#; + let select = bigquery_and_generic().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("5")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Int64, + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![ + Expr::Value(number("1")), + Expr::CompoundIdentifier(vec![ + Ident { + value: "t".into(), + quote_style: None, + }, + Ident { + value: "str_col".into(), + quote_style: None, + }, + ]), + ], + fields: vec![ + StructField { + field_name: Some(Ident { + value: "x".into(), + quote_style: None, + }), + field_type: DataType::Int64 + }, + StructField { + field_name: Some(Ident { + value: "y".into(), + quote_style: None, + }), + field_type: DataType::String(None) + }, + ] + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Identifier(Ident { + value: "nested_col".into(), + quote_style: None, + }),], + fields: vec![ + StructField { + field_name: Some("arr".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Float64 + ))) + }, + StructField { + field_name: Some("str".into()), + field_type: DataType::Struct(vec![StructField { + field_name: None, + field_type: DataType::Bool + }]) + }, + ] + }, + expr_from_projection(&select.projection[2]) + ); + + let sql = r#"SELECT STRUCT>(nested_col)"#; + let select = bigquery_and_generic().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Identifier(Ident { + value: "nested_col".into(), + quote_style: None, + }),], + fields: vec![ + StructField { + field_name: Some("x".into()), + field_type: DataType::Struct(Default::default()) + }, + StructField { + field_name: Some("y".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + DataType::Struct(Default::default()) + ))) + }, + ] + }, + expr_from_projection(&select.projection[0]) + ); + + let sql = r#"SELECT STRUCT(true), STRUCT(B'abc')"#; + let select = bigquery_and_generic().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::Boolean(true)),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Bool + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::SingleQuotedByteStringLiteral( + "abc".into() + )),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Bytes(Some(42)) + }] + }, + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT STRUCT('2011-05-05'), STRUCT(DATETIME '1999-01-01 01:23:34.45'), STRUCT(5.0), STRUCT(1)"#; + let select = bigquery_and_generic().verified_only_select(sql); + assert_eq!(4, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(Value::SingleQuotedString( + "2011-05-05".to_string() + )),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Date + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::TypedString { + data_type: DataType::Datetime(None), + value: "1999-01-01 01:23:34.45".to_string() + },], + fields: vec![StructField { + field_name: None, + field_type: DataType::Datetime(None) + }] + }, + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("5.0")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Float64 + }] + }, + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Value(number("1")),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Int64 + }] + }, + expr_from_projection(&select.projection[3]) + ); + + let sql = r#"SELECT STRUCT(INTERVAL '1-2 3 4:5:6.789999'), STRUCT(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#; + let select = bigquery_and_generic().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Struct { + values: vec![Expr::Interval(ast::Interval { + value: Box::new(Expr::Value(Value::SingleQuotedString( + "1-2 3 4:5:6.789999".to_string() + ))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None + }),], + fields: vec![StructField { + field_name: None, + field_type: DataType::Interval + }] + }, + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Struct { + values: vec![Expr::TypedString { + data_type: DataType::JSON, + value: r#"{"class" : {"students" : [{"name" : "Jane"}]}}"#.to_string() + },], + fields: vec![StructField { + field_name: None, + field_type: DataType::JSON + }] + }, + expr_from_projection(&select.projection[1]) + ); + + let sql = r#"SELECT STRUCT('foo'), STRUCT(TIMESTAMP '2008-12-25 15:30:00 America/Los_Angeles'), STRUCT