From 9f058c8ae8a28597dc73dd3586b67362ebdf6d7a Mon Sep 17 00:00:00 2001 From: abcpro1 Date: Mon, 26 Jun 2023 17:41:42 +0000 Subject: [PATCH] Make a special dialect for dozer `DozerDialect` is based on `AnsiDialect` with a dozer specific extension, which is parsing UDFs in the format `function_name(arguments)`. --- src/ast/mod.rs | 9 +++- src/ast/visitor.rs | 2 +- src/dialect/dozer.rs | 29 ++++++++++++ src/dialect/mod.rs | 2 + src/parser.rs | 40 ++++++++++++++-- src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 4 ++ tests/sqlparser_common.rs | 19 ++++++++ tests/sqlparser_dozer.rs | 86 +++++++++++++++++++++++++++++++++++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_mysql.rs | 6 +++ tests/sqlparser_postgres.rs | 6 +++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 1 + 16 files changed, 201 insertions(+), 8 deletions(-) create mode 100644 src/dialect/dozer.rs create mode 100644 tests/sqlparser_dozer.rs diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5652cce19..e0a0e3c13 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3472,6 +3472,8 @@ pub struct Function { pub special: bool, // Required ordering for the function (if empty, there is no requirement). pub order_by: Vec, + // Optional return type used in dozer dialect as `function_name(arguments)` + pub return_type: Option, } #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -3503,10 +3505,13 @@ impl fmt::Display for Function { } else { "" }; + write!(f, "{}", self.name)?; + if let Some(ident) = &self.return_type { + write!(f, "<{ident}>")?; + } write!( f, - "{}({}{}{order_by}{})", - self.name, + "({}{}{order_by}{})", if self.distinct { "DISTINCT " } else { "" }, display_comma_separated(&self.args), display_comma_separated(&self.order_by), diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 81343220a..ffea6d783 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -480,7 +480,7 @@ where /// *expr = Expr::Function(Function { /// name: ObjectName(vec![Ident::new("f")]), /// args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(old_expr))], -/// over: None, distinct: false, special: false, order_by: vec![], +/// over: None, distinct: false, special: false, order_by: vec![], return_type: None, /// }); /// } /// ControlFlow::<()>::Continue(()) diff --git a/src/dialect/dozer.rs b/src/dialect/dozer.rs new file mode 100644 index 000000000..00484d508 --- /dev/null +++ b/src/dialect/dozer.rs @@ -0,0 +1,29 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; + +#[derive(Debug)] +pub struct DozerDialect {} + +impl Dialect for DozerDialect { + fn is_identifier_start(&self, ch: char) -> bool { + ('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) + || ch == '_' + } +} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8b3a58888..fc2dc7ed9 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -13,6 +13,7 @@ mod ansi; mod bigquery; mod clickhouse; +mod dozer; mod duckdb; mod generic; mod hive; @@ -32,6 +33,7 @@ use core::str::Chars; pub use self::ansi::AnsiDialect; pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; +pub use self::dozer::DozerDialect; pub use self::duckdb::DuckDbDialect; pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; diff --git a/src/parser.rs b/src/parser.rs index 0b1421463..d10248e86 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -712,6 +712,7 @@ impl<'a> Parser<'a> { distinct: false, special: true, order_by: vec![], + return_type: None, })) } Keyword::CURRENT_TIMESTAMP @@ -770,7 +771,7 @@ impl<'a> Parser<'a> { if self.consume_token(&Token::LParen) { self.prev_token(); - self.parse_function(ObjectName(id_parts)) + self.parse_function(ObjectName(id_parts), None) } else { Ok(Expr::CompoundIdentifier(id_parts)) } @@ -786,6 +787,15 @@ impl<'a> Parser<'a> { value: self.parse_introduced_string_value()?, }) } + // function_name(arguments) + Token::Lt + if dialect_of!(self is DozerDialect) + && matches!(self.peek_nth_token(1).token, Token::Word(_)) + && self.peek_nth_token(2) == Token::Gt + && self.peek_nth_token(3) == Token::LParen => + { + self.parse_function_with_return_type(ObjectName(vec![w.to_ident()])) + } _ => Ok(Expr::Identifier(w.to_ident())), }, }, // End of Token::Word @@ -884,7 +894,18 @@ impl<'a> Parser<'a> { } } - pub fn parse_function(&mut self, name: ObjectName) -> Result { + pub fn parse_function_with_return_type(&mut self, name: ObjectName) -> Result { + self.expect_token(&Token::Lt)?; + let next_token = self.next_token(); + let return_type = match next_token.token { + Token::Word(return_type) => return_type.to_ident(), + _ => self.expected("a return type identifier", next_token)?, + }; + self.expect_token(&Token::Gt)?; + self.parse_function(name, Some(return_type)) + } + + pub fn parse_function(&mut self, name: ObjectName, return_type: Option) -> Result { self.expect_token(&Token::LParen)?; let distinct = self.parse_all_or_distinct()?.is_some(); let (args, order_by) = self.parse_optional_args_with_orderby()?; @@ -905,6 +926,7 @@ impl<'a> Parser<'a> { distinct, special: false, order_by, + return_type, })) } @@ -921,6 +943,7 @@ impl<'a> Parser<'a> { distinct: false, special: false, order_by, + return_type: None, })) } @@ -4465,11 +4488,18 @@ impl<'a> Parser<'a> { Token::Word(Word { value, keyword, .. }) if (dialect_of!(self is BigQueryDialect) && keyword == Keyword::OFFSET) => { - self.parse_function(ObjectName(vec![Ident::new(value)])) + self.parse_function(ObjectName(vec![Ident::new(value)]), None) } Token::Word(Word { value, keyword, .. }) if (keyword == Keyword::NoKeyword) => { if self.peek_token() == Token::LParen { - return self.parse_function(ObjectName(vec![Ident::new(value)])); + return self.parse_function(ObjectName(vec![Ident::new(value)]), None); + } else if dialect_of!(self is DozerDialect) + && self.peek_token() == Token::Lt + && matches!(self.peek_nth_token(1).token, Token::Word(_)) + && self.peek_nth_token(2) == Token::Gt + && self.peek_nth_token(3) == Token::LParen + { + return self.parse_function_with_return_type(ObjectName(vec![Ident::new(value)])); } Ok(Expr::Value(Value::SingleQuotedString(value))) } @@ -6074,7 +6104,7 @@ impl<'a> Parser<'a> { Token::Word(w) => Ok(w.value), _ => self.expected("an aggregate function name", self.peek_token()), }?; - let function = self.parse_function(ObjectName(vec![Ident::new(function_name)]))?; + let function = self.parse_function(ObjectName(vec![Ident::new(function_name)]), None)?; self.expect_keyword(Keyword::FOR)?; let value_column = self.parse_object_name()?.0; self.expect_keyword(Keyword::IN)?; diff --git a/src/test_utils.rs b/src/test_utils.rs index 57b21e1c9..6c232792d 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -169,6 +169,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), Box::new(DuckDbDialect {}), + Box::new(DozerDialect {}), ], options: None, } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 11998ae6d..d8c04a874 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -438,6 +438,7 @@ fn parse_map_access_offset() { distinct: false, special: false, order_by: vec![], + return_type: None, })], }) ); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 24c641561..0253198bd 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -53,6 +53,7 @@ fn parse_map_access_expr() { distinct: false, special: false, order_by: vec![], + return_type: None, })], })], into: None, @@ -90,6 +91,7 @@ fn parse_map_access_expr() { distinct: false, special: false, order_by: vec![], + return_type: None, })] }), op: BinaryOperator::NotEq, @@ -139,6 +141,7 @@ fn parse_array_fn() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(only(&select.projection)) ); @@ -194,6 +197,7 @@ fn parse_delimited_identifiers() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 917a89d8d..48fe284a4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -841,6 +841,7 @@ fn parse_select_count_wildcard() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(only(&select.projection)) ); @@ -861,6 +862,7 @@ fn parse_select_count_distinct() { distinct: true, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(only(&select.projection)) ); @@ -1719,6 +1721,7 @@ fn parse_select_having() { distinct: false, special: false, order_by: vec![], + return_type: None, })), op: BinaryOperator::Gt, right: Box::new(Expr::Value(number("1"))), @@ -1753,6 +1756,7 @@ fn parse_select_qualify() { distinct: false, special: false, order_by: vec![], + return_type: None, })), op: BinaryOperator::Eq, right: Box::new(Expr::Value(number("1"))), @@ -3169,6 +3173,7 @@ fn parse_scalar_function_in_projection() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(only(&select.projection)) ); @@ -3288,6 +3293,7 @@ fn parse_named_argument_function() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(only(&select.projection)) ); @@ -3327,6 +3333,7 @@ fn parse_window_functions() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[0]) ); @@ -3365,6 +3372,7 @@ fn test_parse_named_window() { distinct: false, special: false, order_by: vec![], + return_type: None, }), alias: Ident { value: "min1".to_string(), @@ -3390,6 +3398,7 @@ fn test_parse_named_window() { distinct: false, special: false, order_by: vec![], + return_type: None, }), alias: Ident { value: "max1".to_string(), @@ -3851,6 +3860,7 @@ fn parse_at_timezone() { distinct: false, special: false, order_by: vec![], + return_type: None, })), time_zone: "UTC-06:00".to_string(), }, @@ -3878,6 +3888,7 @@ fn parse_at_timezone() { distinct: false, special: false, order_by: vec![], + return_type: None, },)), time_zone: "UTC-06:00".to_string(), },),), @@ -3889,6 +3900,7 @@ fn parse_at_timezone() { distinct: false, special: false, order_by: vec![], + return_type: None, },), alias: Ident { value: "hour".to_string(), @@ -4047,6 +4059,7 @@ fn parse_table_function() { distinct: false, special: false, order_by: vec![], + return_type: None, }); assert_eq!(expr, expected_expr); assert_eq!(alias, table_alias("a")) @@ -6473,6 +6486,7 @@ fn parse_time_functions() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[0]) ); @@ -6490,6 +6504,7 @@ fn parse_time_functions() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[0]) ); @@ -6507,6 +6522,7 @@ fn parse_time_functions() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[0]) ); @@ -6524,6 +6540,7 @@ fn parse_time_functions() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[0]) ); @@ -6541,6 +6558,7 @@ fn parse_time_functions() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[0]) ); @@ -7006,6 +7024,7 @@ fn parse_pivot_table() { distinct: false, special: false, order_by: vec![], + return_type: None, }), value_column: vec![Ident::new("a"), Ident::new("MONTH")], pivot_values: vec![ diff --git a/tests/sqlparser_dozer.rs b/tests/sqlparser_dozer.rs new file mode 100644 index 000000000..f353f6ece --- /dev/null +++ b/tests/sqlparser_dozer.rs @@ -0,0 +1,86 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![warn(clippy::all)] +//! Test SQL syntax specific to Dozer. + +#[macro_use] +mod test_utils; +use test_utils::*; + +use sqlparser::ast::*; +use sqlparser::dialect::DozerDialect; + +fn dozer() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(DozerDialect {})], + options: None, + } +} + +#[test] +fn parse_function_with_return_type() { + // check that quoted identifiers in any position remain quoted after serialization + let select = dozer().verified_only_select( + r#"SELECT myfun(0, a) FROM b"#, + ); + assert_eq!( + &Expr::Function(Function { + name: ObjectName(vec![Ident::new("myfun")]), + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("0") + ))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new( + "a" + )))), + ], + over: None, + distinct: false, + special: false, + order_by: vec![], + return_type: Some(Ident::new("float")), + }), + expr_from_projection(&select.projection[0]), + ); +} + +#[test] +fn parse_function_with_return_type_in_map_access() { + let sql = r#"SELECT some_map[some_fun(some_arg, 'another_arg')] FROM some_table"#; + let select = dozer().verified_only_select(sql); + assert_eq!( + &Expr::MapAccess { + column: Box::new(Expr::Identifier(Ident { + value: "some_map".to_string(), + quote_style: None, + })), + keys: vec![Expr::Function(Function { + name: ObjectName(vec!["some_fun".into()]), + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(Ident::new( + "some_arg" + )))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + Value::SingleQuotedString("another_arg".to_string()) + ))), + ], + over: None, + distinct: false, + special: false, + order_by: vec![], + return_type: Some(Ident::new("string")), + })], + }, + expr_from_projection(&select.projection[0]), + ); +} diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index ddc5a8ccf..493a521f3 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -347,6 +347,7 @@ fn parse_delimited_identifiers() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 8fc3dcd59..7f4783db8 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -260,6 +260,7 @@ fn parse_delimited_identifiers() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3e5c810ef..14b61646b 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -794,6 +794,7 @@ fn parse_insert_with_on_duplicate_update() { distinct: false, special: false, order_by: vec![], + return_type: None, }) }, Assignment { @@ -807,6 +808,7 @@ fn parse_insert_with_on_duplicate_update() { distinct: false, special: false, order_by: vec![], + return_type: None, }) }, Assignment { @@ -820,6 +822,7 @@ fn parse_insert_with_on_duplicate_update() { distinct: false, special: false, order_by: vec![], + return_type: None, }) }, Assignment { @@ -833,6 +836,7 @@ fn parse_insert_with_on_duplicate_update() { distinct: false, special: false, order_by: vec![], + return_type: None, }) }, Assignment { @@ -846,6 +850,7 @@ fn parse_insert_with_on_duplicate_update() { distinct: false, special: false, order_by: vec![], + return_type: None, }) }, ])), @@ -1193,6 +1198,7 @@ fn parse_table_colum_option_on_update() { distinct: false, special: false, order_by: vec![], + return_type: None, })), },], }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 80a4261ee..ba6ca3d5d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2062,6 +2062,7 @@ fn test_composite_value() { distinct: false, special: false, order_by: vec![], + return_type: None, })))) }), select.projection[0] @@ -2224,6 +2225,7 @@ fn parse_current_functions() { distinct: false, special: true, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[0]) ); @@ -2235,6 +2237,7 @@ fn parse_current_functions() { distinct: false, special: true, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[1]) ); @@ -2246,6 +2249,7 @@ fn parse_current_functions() { distinct: false, special: true, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[2]) ); @@ -2257,6 +2261,7 @@ fn parse_current_functions() { distinct: false, special: true, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[3]) ); @@ -2512,6 +2517,7 @@ fn parse_delimited_identifiers() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index c44f6dee4..ee37295bb 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -133,6 +133,7 @@ fn parse_delimited_identifiers() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[1]), ); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 9a54c89cf..6ad9bbba0 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -250,6 +250,7 @@ fn parse_delimited_identifiers() { distinct: false, special: false, order_by: vec![], + return_type: None, }), expr_from_projection(&select.projection[1]), );