Skip to content

Commit 4d0b2b3

Browse files
committed
1 parent b7da359 commit 4d0b2b3

File tree

3 files changed

+58
-23
lines changed

3 files changed

+58
-23
lines changed

README.md

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ You can also try the online version, PEG Playground at https://yhirose.github.io
1010

1111
The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf). *cpp-peglib* also supports the following additional syntax for now:
1212

13+
* `'...'i` (Case-insensitive literal operator)
1314
* `<` ... `>` (Token boundary operator)
1415
* `~` (Ignore operator)
1516
* `\x20` (Hex number char)
@@ -169,11 +170,11 @@ auto ret = pg.parse(" token1, token2 ");
169170
We can ignore unnecessary semantic values from the list by using `~` operator.
170171

171172
```cpp
172-
peg::pegparser parser(
173-
" ROOT <- _ ITEM (',' _ ITEM _)* "
174-
" ITEM <- ([a-z])+ "
175-
" ~_ <- [ \t]* "
176-
);
173+
peg::pegparser parser(R"(
174+
ROOT <- _ ITEM (',' _ ITEM _)*
175+
ITEM <- ([a-z])+
176+
~_ <- [ \t]*
177+
)");
177178

178179
parser["ROOT"] = [&](const SemanticValues& sv) {
179180
assert(sv.size() == 2); // should be 2 instead of 5.
@@ -185,11 +186,11 @@ auto ret = parser.parse(" item1, item2 ");
185186
The following grammar is same as the above.
186187

187188
```cpp
188-
peg::parser parser(
189-
" ROOT <- ~_ ITEM (',' ~_ ITEM ~_)* "
190-
" ITEM <- ([a-z])+ "
191-
" _ <- [ \t]* "
192-
);
189+
peg::pegparser parser(R"(
190+
ROOT <- ~_ ITEM (',' ~_ ITEM ~_)*
191+
ITEM <- ([a-z])+
192+
_ <- [ \t]*
193+
)");
193194
```
194195

195196
*Semantic predicate* support is available. We can do it by throwing a `peg::parse_error` exception in a semantic action.
@@ -244,9 +245,10 @@ As you can see in the first example, we can ignore whitespaces between tokens au
244245
These are valid tokens:
245246

246247
```
247-
KEYWORD <- 'keyword'
248-
WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used.
249-
IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used.
248+
KEYWORD <- 'keyword'
249+
KEYWORDI <- 'case_insensitive_keyword'
250+
WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used.
251+
IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used.
250252
```
251253

252254
The following grammar accepts ` one, "two three", four `.
@@ -372,6 +374,7 @@ The following are available operators:
372374
| apd | And predicate |
373375
| npd | Not predicate |
374376
| lit | Literal string |
377+
| liti | Case-insensitive Literal string |
375378
| cls | Character class |
376379
| chr | Character |
377380
| dot | Any character |

peglib.h

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,8 +1233,9 @@ class LiteralString : public Ope
12331233
, public std::enable_shared_from_this<LiteralString>
12341234
{
12351235
public:
1236-
LiteralString(const std::string& s)
1236+
LiteralString(const std::string& s, bool ignore_case)
12371237
: lit_(s)
1238+
, ignore_case_(ignore_case)
12381239
, init_is_word_(false)
12391240
, is_word_(false)
12401241
{}
@@ -1244,6 +1245,7 @@ class LiteralString : public Ope
12441245
void accept(Visitor& v) override;
12451246

12461247
std::string lit_;
1248+
bool ignore_case_;
12471249
mutable bool init_is_word_;
12481250
mutable bool is_word_;
12491251
};
@@ -1564,8 +1566,12 @@ inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope>& ope) {
15641566
return std::make_shared<NotPredicate>(ope);
15651567
}
15661568

1567-
inline std::shared_ptr<Ope> lit(const std::string& lit) {
1568-
return std::make_shared<LiteralString>(lit);
1569+
inline std::shared_ptr<Ope> lit(const std::string& s) {
1570+
return std::make_shared<LiteralString>(s, false);
1571+
}
1572+
1573+
inline std::shared_ptr<Ope> liti(const std::string& s) {
1574+
return std::make_shared<LiteralString>(s, true);
15691575
}
15701576

15711577
inline std::shared_ptr<Ope> cls(const std::string& s) {
@@ -2136,11 +2142,11 @@ class Definition
21362142
*/
21372143

21382144
inline size_t parse_literal(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt,
2139-
const std::string& lit, bool& init_is_word, bool& is_word)
2145+
const std::string& lit, bool& init_is_word, bool& is_word, bool ignore_case)
21402146
{
21412147
size_t i = 0;
21422148
for (; i < lit.size(); i++) {
2143-
if (i >= n || s[i] != lit[i]) {
2149+
if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i])) : (s[i] != lit[i]))) {
21442150
c.set_error_pos(s);
21452151
return static_cast<size_t>(-1);
21462152
}
@@ -2184,7 +2190,7 @@ inline size_t parse_literal(const char* s, size_t n, SemanticValues& sv, Context
21842190

21852191
inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
21862192
c.trace("LiteralString", s, n, sv, dt);
2187-
return parse_literal(s, n, sv, c, dt, lit_, init_is_word_, is_word_);
2193+
return parse_literal(s, n, sv, c, dt, lit_, init_is_word_, is_word_, ignore_case_);
21882194
}
21892195

21902196
inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const {
@@ -2338,7 +2344,7 @@ inline size_t BackReference::parse(const char* s, size_t n, SemanticValues& sv,
23382344
const auto& lit = captures.at(name_);
23392345
auto init_is_word = false;
23402346
auto is_word = false;
2341-
return parse_literal(s, n, sv, c, dt, lit, init_is_word, is_word);
2347+
return parse_literal(s, n, sv, c, dt, lit, init_is_word, is_word, false);
23422348
}
23432349
++it;
23442350
}
@@ -2535,7 +2541,7 @@ class ParserGenerator
25352541
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
25362542
seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]),
25372543
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
2538-
g["BackRef"], g["Literal"], g["Class"], g["DOT"]);
2544+
g["BackRef"], g["LiteralI"], g["Literal"], g["Class"], g["DOT"]);
25392545

25402546
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
25412547
g["IdentCont"] <= seq(g["IdentStart"], zom(g["IdentRest"]));
@@ -2545,6 +2551,9 @@ class ParserGenerator
25452551

25462552
g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
25472553

2554+
g["LiteralI"] <= cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"), g["Spacing"]),
2555+
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"), g["Spacing"]));
2556+
25482557
g["Literal"] <= cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), cls("'"), g["Spacing"]),
25492558
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), cls("\""), g["Spacing"]));
25502559

@@ -2744,19 +2753,22 @@ class ParserGenerator
27442753
g["IdentCont"] = [](const SemanticValues& sv) {
27452754
return std::string(sv.c_str(), sv.length());
27462755
};
2747-
27482756
g["IdentStart"] = [](const SemanticValues& /*sv*/) {
27492757
return std::string();
27502758
};
2751-
27522759
g["IdentRest"] = [](const SemanticValues& /*sv*/) {
27532760
return std::string();
27542761
};
27552762

2763+
g["LiteralI"] = [](const SemanticValues& sv) {
2764+
const auto& tok = sv.tokens.front();
2765+
return liti(resolve_escape_sequence(tok.first, tok.second));
2766+
};
27562767
g["Literal"] = [](const SemanticValues& sv) {
27572768
const auto& tok = sv.tokens.front();
27582769
return lit(resolve_escape_sequence(tok.first, tok.second));
27592770
};
2771+
27602772
g["Class"] = [](const SemanticValues& sv) {
27612773
auto ranges = sv.transform<std::pair<char32_t, char32_t>>();
27622774
return cls(ranges);

test/test.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,26 @@ TEST_CASE("Octal/Hex/Unicode value test", "[general]")
416416
REQUIRE(ret == true);
417417
}
418418

419+
TEST_CASE("Ignore case test", "[general]") {
420+
peg::parser parser(R"(
421+
ROOT <- HELLO WORLD
422+
HELLO <- 'hello'i
423+
WORLD <- 'world'i
424+
%whitespace <- [ \t\r\n]*
425+
)");
426+
427+
parser["HELLO"] = [](const SemanticValues& sv) {
428+
REQUIRE(sv.token() == "Hello");
429+
};
430+
431+
parser["WORLD"] = [](const SemanticValues& sv) {
432+
REQUIRE(sv.token() == "World");
433+
};
434+
435+
auto ret = parser.parse(" Hello World ");
436+
REQUIRE(ret == true);
437+
}
438+
419439
TEST_CASE("mutable lambda test", "[general]")
420440
{
421441
vector<string> vec;

0 commit comments

Comments
 (0)