Skip to content

Commit b453068

Browse files
authored
Add support for trailing commas (apache#810)
* Add support for trailing commas * Support trailing commas for brace/bracket * Andrew's comments
1 parent 2285bb4 commit b453068

File tree

2 files changed

+63
-21
lines changed

2 files changed

+63
-21
lines changed

src/parser.rs

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,20 @@ impl std::error::Error for ParserError {}
195195
// By default, allow expressions up to this deep before erroring
196196
const DEFAULT_REMAINING_DEPTH: usize = 50;
197197

198+
#[derive(Default)]
199+
pub struct ParserOptions {
200+
pub trailing_commas: bool,
201+
}
202+
198203
pub struct Parser<'a> {
199204
tokens: Vec<TokenWithLocation>,
200205
/// The index of the first unprocessed token in `self.tokens`
201206
index: usize,
202207
/// The current dialect to use
203208
dialect: &'a dyn Dialect,
209+
/// Additional options that allow you to mix & match behavior otherwise
210+
/// constrained to certain dialects (e.g. trailing commas)
211+
options: ParserOptions,
204212
/// ensure the stack does not overflow by limiting recusion depth
205213
recursion_counter: RecursionCounter,
206214
}
@@ -227,6 +235,7 @@ impl<'a> Parser<'a> {
227235
index: 0,
228236
dialect,
229237
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
238+
options: ParserOptions::default(),
230239
}
231240
}
232241

@@ -255,6 +264,31 @@ impl<'a> Parser<'a> {
255264
self
256265
}
257266

267+
/// Specify additional parser options
268+
///
269+
///
270+
/// [`Parser`] supports additional options ([`ParserOptions`]) that allow you to
271+
/// mix & match behavior otherwise constrained to certain dialects (e.g. trailing
272+
/// commas).
273+
///
274+
/// Example:
275+
/// ```
276+
/// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect};
277+
/// # fn main() -> Result<(), ParserError> {
278+
/// let dialect = GenericDialect{};
279+
/// let result = Parser::new(&dialect)
280+
/// .with_options(ParserOptions { trailing_commas: true })
281+
/// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")?
282+
/// .parse_statements();
283+
/// assert!(matches!(result, Ok(_)));
284+
/// # Ok(())
285+
/// # }
286+
/// ```
287+
pub fn with_options(mut self, options: ParserOptions) -> Self {
288+
self.options = options;
289+
self
290+
}
291+
258292
/// Reset this parser to parse the specified token stream
259293
pub fn with_tokens_with_locations(mut self, tokens: Vec<TokenWithLocation>) -> Self {
260294
self.tokens = tokens;
@@ -2196,15 +2230,32 @@ impl<'a> Parser<'a> {
21962230

21972231
/// Parse a comma-separated list of 1+ SelectItem
21982232
pub fn parse_projection(&mut self) -> Result<Vec<SelectItem>, ParserError> {
2233+
// BigQuery allows trailing commas, but only in project lists
2234+
// e.g. `SELECT 1, 2, FROM t`
2235+
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas
2236+
//
2237+
// This pattern could be captured better with RAII type semantics, but it's quite a bit of
2238+
// code to add for just one case, so we'll just do it manually here.
2239+
let old_value = self.options.trailing_commas;
2240+
self.options.trailing_commas |= dialect_of!(self is BigQueryDialect);
2241+
2242+
let ret = self.parse_comma_separated(|p| p.parse_select_item());
2243+
self.options.trailing_commas = old_value;
2244+
2245+
ret
2246+
}
2247+
2248+
/// Parse a comma-separated list of 1+ items accepted by `F`
2249+
pub fn parse_comma_separated<T, F>(&mut self, mut f: F) -> Result<Vec<T>, ParserError>
2250+
where
2251+
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
2252+
{
21992253
let mut values = vec![];
22002254
loop {
2201-
values.push(self.parse_select_item()?);
2255+
values.push(f(self)?);
22022256
if !self.consume_token(&Token::Comma) {
22032257
break;
2204-
} else if dialect_of!(self is BigQueryDialect) {
2205-
// BigQuery allows trailing commas.
2206-
// e.g. `SELECT 1, 2, FROM t`
2207-
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas
2258+
} else if self.options.trailing_commas {
22082259
match self.peek_token().token {
22092260
Token::Word(kw)
22102261
if keywords::RESERVED_FOR_COLUMN_ALIAS
@@ -2213,29 +2264,18 @@ impl<'a> Parser<'a> {
22132264
{
22142265
break;
22152266
}
2216-
Token::RParen | Token::EOF => break,
2267+
Token::RParen
2268+
| Token::SemiColon
2269+
| Token::EOF
2270+
| Token::RBracket
2271+
| Token::RBrace => break,
22172272
_ => continue,
22182273
}
22192274
}
22202275
}
22212276
Ok(values)
22222277
}
22232278

2224-
/// Parse a comma-separated list of 1+ items accepted by `F`
2225-
pub fn parse_comma_separated<T, F>(&mut self, mut f: F) -> Result<Vec<T>, ParserError>
2226-
where
2227-
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
2228-
{
2229-
let mut values = vec![];
2230-
loop {
2231-
values.push(f(self)?);
2232-
if !self.consume_token(&Token::Comma) {
2233-
break;
2234-
}
2235-
}
2236-
Ok(values)
2237-
}
2238-
22392279
/// Run a parser method `f`, reverting back to the current position
22402280
/// if unsuccessful.
22412281
#[must_use]

tests/sqlparser_bigquery.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ fn parse_join_constraint_unnest_alias() {
181181
fn parse_trailing_comma() {
182182
for (sql, canonical) in [
183183
("SELECT a,", "SELECT a"),
184+
("SELECT 1,", "SELECT 1"),
185+
("SELECT 1,2,", "SELECT 1, 2"),
184186
("SELECT a, b,", "SELECT a, b"),
185187
("SELECT a, b AS c,", "SELECT a, b AS c"),
186188
("SELECT a, b AS c, FROM t", "SELECT a, b AS c FROM t"),

0 commit comments

Comments
 (0)