Skip to content

Commit 0d57947

Browse files
author
Paweł Andruszkiewicz
committed
BUG#35894915 Update parser grammar to use new features
Applied changes from shell-plugins up to: f37bcc7cfdc7881829e2ca8d8813d9a5810a24fc Change-Id: Ib034c693a8a70fd2f7ab3a2a5edaca3d62defdef (cherry picked from commit d298f3bc288a9619fd3270f6d81ce07d9324fade)
1 parent c81577b commit 0d57947

34 files changed

+23980
-21387
lines changed

mysqlshdk/libs/parser/MySQLBaseLexer.cpp

Lines changed: 63 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016, 2022, Oracle and/or its affiliates.
2+
* Copyright (c) 2016, 2023, Oracle and/or its affiliates.
33
*
44
* This program is free software; you can redistribute it and/or modify
55
* it under the terms of the GNU General Public License, version 2.0,
@@ -72,15 +72,18 @@ bool MySQLBaseLexer::isIdentifier(size_t type_) const {
7272

7373
// Double quoted text represents identifiers only if the ANSI QUOTES sql mode
7474
// is active.
75-
if (((sqlMode & AnsiQuotes) != 0) &&
76-
(type_ == MySQLLexer::DOUBLE_QUOTED_TEXT))
77-
return true;
75+
if (type_ == MySQLLexer::DOUBLE_QUOTED_TEXT) return (sqlMode & AnsiQuotes);
7876

79-
const std::string symbol{getVocabulary().getSymbolicName(type_)};
80-
if (!symbol.empty() &&
81-
!MySQLSymbolInfo::isReservedKeyword(
82-
symbol, MySQLSymbolInfo::numberToVersion(serverVersion)))
83-
return true;
77+
if (auto symbol = getVocabulary().getSymbolicName(type_); !symbol.empty()) {
78+
if (symbol.ends_with("_SYMBOL")) {
79+
symbol.remove_suffix(7);
80+
}
81+
82+
if (!MySQLSymbolInfo::isReservedKeyword(
83+
symbol, MySQLSymbolInfo::numberToVersion(serverVersion))) {
84+
return true;
85+
}
86+
}
8487

8588
return false;
8689
}
@@ -92,22 +95,30 @@ size_t MySQLBaseLexer::keywordFromText(std::string const &name) {
9295
// here for comparison.
9396
std::string transformed;
9497
std::transform(name.begin(), name.end(), std::back_inserter(transformed),
95-
::tolower);
98+
::toupper);
9699

97100
if (!MySQLSymbolInfo::isKeyword(
98101
transformed, MySQLSymbolInfo::numberToVersion(serverVersion)))
99102
return INVALID_INDEX - 1; // INVALID_INDEX alone can be interpreted as EOF.
100103

101-
// Generate string -> enum value map, if not yet done.
104+
// Generate string_view -> enum value map, if not yet done.
102105
if (_symbols.empty()) {
103-
auto &vocabulary = getVocabulary();
104-
size_t max = vocabulary.getMaxTokenType();
105-
for (size_t i = 0; i <= max; ++i)
106-
_symbols[std::string{vocabulary.getSymbolicName(i)}] = i;
106+
const auto &vocabulary = getVocabulary();
107+
const auto max = vocabulary.getMaxTokenType();
108+
109+
for (size_t i = 0; i <= max; ++i) {
110+
if (auto symbol = vocabulary.getSymbolicName(i); !symbol.empty()) {
111+
if (symbol.ends_with("_SYMBOL")) {
112+
symbol.remove_suffix(7);
113+
}
114+
115+
_symbols[symbol] = i;
116+
}
117+
}
107118
}
108119

109120
// Here we know for sure we got a keyword.
110-
auto symbol = _symbols.find(transformed);
121+
const auto symbol = _symbols.find(transformed);
111122
if (symbol == _symbols.end()) return INVALID_INDEX - 1;
112123
return symbol->second;
113124
}
@@ -238,6 +249,16 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
238249

239250
case MySQLLexer::CREATE_SYMBOL: {
240251
tok = nextDefaultChannelToken();
252+
253+
// Skip OR REPLACE
254+
if (tok->getType() == MySQLLexer::OR_SYMBOL) {
255+
tok = nextDefaultChannelToken();
256+
257+
if (tok->getType() == MySQLLexer::REPLACE_SYMBOL) {
258+
tok = nextDefaultChannelToken();
259+
}
260+
}
261+
241262
if (tok->getType() == Token::EOF) return QtAmbiguous;
242263

243264
switch (tok->getType()) {
@@ -299,7 +320,6 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
299320
}
300321

301322
case MySQLLexer::VIEW_SYMBOL:
302-
case MySQLLexer::OR_SYMBOL: // CREATE OR REPLACE ... VIEW
303323
case MySQLLexer::ALGORITHM_SYMBOL: // CREATE ALGORITHM ... VIEW
304324
return QtCreateView;
305325

@@ -441,21 +461,16 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
441461
case MySQLLexer::UPDATE_SYMBOL:
442462
return QtUpdate;
443463

444-
case MySQLLexer::OPEN_PAR_SYMBOL: // Either (((select ..))) or
445-
// (partition...)
464+
case MySQLLexer::OPEN_PAR_SYMBOL: // (((select ..)))
446465
{
447466
while (tok->getType() == MySQLLexer::OPEN_PAR_SYMBOL) {
448467
tok = nextDefaultChannelToken();
449468
if (tok->getType() == Token::EOF) return QtAmbiguous;
450469
}
451470
if (tok->getType() == MySQLLexer::SELECT_SYMBOL) return QtSelect;
452-
return QtPartition;
471+
return QtUnknown;
453472
}
454473

455-
case MySQLLexer::PARTITION_SYMBOL:
456-
case MySQLLexer::PARTITIONS_SYMBOL:
457-
return QtPartition;
458-
459474
case MySQLLexer::START_SYMBOL: {
460475
tok = nextDefaultChannelToken();
461476
if (tok->getType() == Token::EOF) return QtAmbiguous;
@@ -511,6 +526,7 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
511526

512527
if (tok->getType() == MySQLLexer::TRANSACTION_SYMBOL)
513528
return QtSetTransaction;
529+
514530
return QtSet;
515531
}
516532

@@ -541,9 +557,10 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
541557
if (tok->getType() == Token::EOF) return QtReset;
542558

543559
switch (tok->getType()) {
544-
case MySQLLexer::SERVER_SYMBOL:
560+
case MySQLLexer::MASTER_SYMBOL:
545561
return QtResetMaster;
546562
case MySQLLexer::SLAVE_SYMBOL:
563+
case MySQLLexer::REPLICA_SYMBOL:
547564
return QtResetSlave;
548565
case MySQLLexer::PERSIST_SYMBOL:
549566
return QtResetPersist;
@@ -637,9 +654,6 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
637654
return QtShowVariables;
638655
}
639656

640-
case MySQLLexer::AUTHORS_SYMBOL:
641-
return QtShowAuthors;
642-
643657
case MySQLLexer::BINARY_SYMBOL:
644658
return QtShowBinaryLogs;
645659

@@ -650,6 +664,7 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
650664
return QtShowRelaylogEvents;
651665

652666
case MySQLLexer::CHAR_SYMBOL:
667+
case MySQLLexer::CHARSET_SYMBOL:
653668
return QtShowCharset;
654669

655670
case MySQLLexer::COLLATION_SYMBOL:
@@ -658,9 +673,6 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
658673
case MySQLLexer::COLUMNS_SYMBOL:
659674
return QtShowColumns;
660675

661-
case MySQLLexer::CONTRIBUTORS_SYMBOL:
662-
return QtShowContributors;
663-
664676
case MySQLLexer::COUNT_SYMBOL: {
665677
tok = nextDefaultChannelToken();
666678
if (tok->getType() != MySQLLexer::OPEN_PAR_SYMBOL) return QtShow;
@@ -768,6 +780,7 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
768780
case MySQLLexer::PRIVILEGES_SYMBOL:
769781
return QtShowPrivileges;
770782

783+
case MySQLLexer::FULL_SYMBOL:
771784
case MySQLLexer::PROCESSLIST_SYMBOL:
772785
return QtShowProcessList;
773786

@@ -817,8 +830,8 @@ MySQLQueryType MySQLBaseLexer::determineQueryType() {
817830
case MySQLLexer::KILL_SYMBOL:
818831
return QtKill;
819832

820-
case MySQLLexer::DESCRIBE_SYMBOL: // EXPLAIN is converted to DESCRIBE in
821-
// the lexer.
833+
case MySQLLexer::EXPLAIN_SYMBOL:
834+
case MySQLLexer::DESCRIBE_SYMBOL:
822835
case MySQLLexer::DESC_SYMBOL: {
823836
tok = nextDefaultChannelToken();
824837
if (tok->getType() == Token::EOF) return QtAmbiguous;
@@ -921,6 +934,21 @@ bool MySQLBaseLexer::isNumber(size_t type) {
921934

922935
//----------------------------------------------------------------------------------------------------------------------
923936

937+
bool MySQLBaseLexer::isDelimiter(size_t type) {
938+
switch (type) {
939+
case MySQLLexer::DOT_SYMBOL:
940+
case MySQLLexer::COMMA_SYMBOL:
941+
case MySQLLexer::SEMICOLON_SYMBOL:
942+
case MySQLLexer::COLON_SYMBOL:
943+
return true;
944+
945+
default:
946+
return false;
947+
}
948+
}
949+
950+
//----------------------------------------------------------------------------------------------------------------------
951+
924952
bool MySQLBaseLexer::isOperator(size_t type) {
925953
switch (type) {
926954
case MySQLLexer::EQUAL_OPERATOR:
@@ -990,7 +1018,7 @@ std::unique_ptr<antlr4::Token> MySQLBaseLexer::nextToken() {
9901018

9911019
//----------------------------------------------------------------------------------------------------------------------
9921020

993-
bool MySQLBaseLexer::checkVersion(const std::string &text) {
1021+
bool MySQLBaseLexer::checkMySQLVersion(const std::string &text) {
9941022
if (text.size() < 8) // Minimum is: /*!12345
9951023
return false;
9961024

@@ -1125,6 +1153,7 @@ void MySQLBaseLexer::emitSymbol(size_t symbol) {
11251153
{this, _input}, symbol, _text, channel, tokenStartCharIndex,
11261154
tokenStartCharIndex, tokenStartLine, tokenStartCharPositionInLine));
11271155
++tokenStartCharIndex;
1156+
++tokenStartCharPositionInLine;
11281157
}
11291158

11301159
//----------------------------------------------------------------------------------------------------------------------

mysqlshdk/libs/parser/MySQLBaseLexer.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016, 2022, Oracle and/or its affiliates.
2+
* Copyright (c) 2016, 2023, Oracle and/or its affiliates.
33
*
44
* This program is free software; you can redistribute it and/or modify
55
* it under the terms of the GNU General Public License, version 2.0,
@@ -28,6 +28,7 @@
2828
#include <memory>
2929
#include <set>
3030
#include <string>
31+
#include <string_view>
3132

3233
#include <Lexer.h>
3334

@@ -61,14 +62,15 @@ class PARSERS_PUBLIC_TYPE MySQLBaseLexer : public antlr4::Lexer,
6162

6263
static bool isRelation(size_t type);
6364
static bool isNumber(size_t type);
65+
static bool isDelimiter(size_t type);
6466
static bool isOperator(size_t type);
6567

6668
std::unique_ptr<antlr4::Token> nextToken() override;
6769

6870
protected:
6971
// Checks if the version number, given by the token, is less than or equal to
7072
// the current server version. Returns true if so, otherwise false.
71-
bool checkVersion(const std::string &text);
73+
bool checkMySQLVersion(const std::string &text);
7274

7375
// Called when a keyword was consumed that represents an internal MySQL
7476
// function and checks if that keyword is followed by an open parenthesis. If
@@ -93,7 +95,7 @@ class PARSERS_PUBLIC_TYPE MySQLBaseLexer : public antlr4::Lexer,
9395
void emitSymbol(size_t symbol);
9496

9597
std::list<std::unique_ptr<antlr4::Token>> _pendingTokens;
96-
std::map<std::string, size_t>
98+
std::map<std::string_view, size_t>
9799
_symbols; // A list of all defined symbols for lookup.
98100

99101
std::unique_ptr<antlr4::Token> nextDefaultChannelToken();

mysqlshdk/libs/parser/MySQLRecognizerCommon.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016, 2022, Oracle and/or its affiliates.
2+
* Copyright (c) 2016, 2023, Oracle and/or its affiliates.
33
*
44
* This program is free software; you can redistribute it and/or modify
55
* it under the terms of the GNU General Public License, version 2.0,
@@ -129,15 +129,13 @@ static std::string dumpTree(RuleContext *context, const Vocabulary &vocabulary,
129129
ruleContext, vocabulary,
130130
indentation.size() < 100 ? indentation + " " : indentation);
131131
}
132-
} else {
132+
} else if (antlrcpp::is<TerminalNode *>(child)) {
133133
// A terminal node.
134134
stream << indentation;
135135

136-
TerminalNode *node = dynamic_cast<TerminalNode *>(child);
137136
if (antlrcpp::is<ErrorNode *>(child)) stream << "Syntax Error: ";
138137

139-
antlr4::Token *token = node->getSymbol();
140-
138+
antlr4::Token *token = dynamic_cast<TerminalNode *>(child)->getSymbol();
141139
std::size_t type = token->getType();
142140
std::string_view tokenName =
143141
type == Token::EOF ? "<EOF>"

mysqlshdk/libs/parser/MySQLRecognizerCommon.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016, 2022, Oracle and/or its affiliates.
2+
* Copyright (c) 2016, 2023, Oracle and/or its affiliates.
33
*
44
* This program is free software; you can redistribute it and/or modify
55
* it under the terms of the GNU General Public License, version 2.0,
@@ -66,6 +66,9 @@ class PARSERS_PUBLIC_TYPE MySQLRecognizerCommon {
6666
SqlMode sqlMode; // A collection of flags indicating which of relevant SQL
6767
// modes are active.
6868

69+
/** Enable Multi Language Extension support. */
70+
bool supportMle = true;
71+
6972
// Returns true if the given mode (one of the enums above) is set.
7073
bool isSqlModeActive(size_t mode) const;
7174
void sqlModeFromString(std::string modes);

0 commit comments

Comments
 (0)