diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..37e8a2b6 --- /dev/null +++ b/.clang-format @@ -0,0 +1,5 @@ +BasedOnStyle: LLVM +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true +Cpp11BracedListStyle: true diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml new file mode 100644 index 00000000..fd6c7695 --- /dev/null +++ b/.github/workflows/cmake.yml @@ -0,0 +1,36 @@ +name: CMake + +on: [push, pull_request] + +env: + BUILD_TYPE: Release + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, ubuntu-24.04, macos-latest, windows-latest, windows-2019] + + steps: + - uses: actions/checkout@v4 + + - name: Configure CMake + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Build + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C ${{env.BUILD_TYPE}} + + - name: Configure CMake with C++20 + run: cmake -B ${{github.workspace}}/build_20 -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_STANDARD=20 + + - name: Build with C++20 + run: cmake --build ${{github.workspace}}/build_20 --config ${{env.BUILD_TYPE}} + + - name: Test with C++20 + working-directory: ${{github.workspace}}/build_20 + run: ctest -C ${{env.BUILD_TYPE}} diff --git a/.gitignore b/.gitignore index 5bb01815..db3dc131 100644 --- a/.gitignore +++ b/.gitignore @@ -38,10 +38,13 @@ Release xcuserdata *.xcworkspace temp* -build/ +build*/ Makefile CMakeFiles CMakeCache.txt *.cmake *.vcxproj.filters *.opensdf +.idea/ +grammar/test/* +.DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index 463aae9a..a37046a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,45 @@ -cmake_minimum_required(VERSION 3.0) -add_definitions("-std=c++1y") - -add_subdirectory(lint) -add_subdirectory(test) -add_subdirectory(example) -add_subdirectory(language/pl0) -add_subdirectory(language/culebra) +cmake_minimum_required(VERSION 3.14) +project(peglib) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) + +if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus /utf-8 /D_CRT_SECURE_NO_DEPRECATE") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") +endif() + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads) + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(add_link_deps Threads::Threads) +endif() + +add_library(peglib INTERFACE) +target_include_directories(peglib INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) + +option(BUILD_TESTS "Build cpp-peglib tests" ON) +option(PEGLIB_BUILD_LINT "Build cpp-peglib lint utility" OFF) +option(PEGLIB_BUILD_EXAMPLES "Build cpp-peglib examples" OFF) +option(PEGLIB_BUILD_PL0 "Build pl0 interpreter" OFF) + +if (${BUILD_TESTS}) + add_subdirectory(test) + enable_testing() +endif() + +if (${PEGLIB_BUILD_LINT}) + add_subdirectory(lint) +endif() + +if (${PEGLIB_BUILD_EXAMPLES}) + add_subdirectory(example) +endif() + +if (${PEGLIB_BUILD_PL0}) + add_subdirectory(pl0) +endif() + +install(FILES peglib.h DESTINATION include) diff --git a/LICENSE b/LICENSE index 00e56ee1..7efa8fd0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015 yhirose +Copyright (c) 2022 yhirose Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 5e9a0828..e710401f 100644 --- a/README.md +++ b/README.md @@ -1,173 +1,240 @@ cpp-peglib ========== -C++11 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library. +[![](https://github.com/yhirose/cpp-peglib/workflows/CMake/badge.svg)](https://github.com/yhirose/cpp-peglib/actions) -*cpp-peglib* tries to provide more expressive parsing experience in a simple way. This library depends on only one header file. So, you can start using it right away just by including `peglib.h` in your project. +C++17 header-only [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) library. You can start using it right away just by including `peglib.h` in your project. -The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf). *cpp-peglib* also supports the following additional syntax for now: +Since this library only supports C++17 compilers, please make sure that the compiler option `-std=c++17` is enabled. +(`/std:c++17 /Zc:__cplusplus` for MSVC) - * `<` ... `>` (Token boundary operator) - * `~` (Ignore operator) - * `\x20` (Hex number char) - * `$<` ... `>` (Capture operator) - * `$name<` ... `>` (Named capture operator) +You can also try the online version, PEG Playground at https://yhirose.github.io/cpp-peglib. -This library also supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing. +The PEG syntax is well described on page 2 in the [document](http://www.brynosaurus.com/pub/lang/peg.pdf) by Bryan Ford. *cpp-peglib* also supports the following additional syntax for now: + +* `'...'i` (Case-insensitive literal operator) +* `[...]i` (Case-insensitive character class operator) +* `[^...]` (Negated character class operator) +* `[^...]i` (Case-insensitive negated character class operator) +* `{2,5}` (Regex-like repetition operator) +* `<` ... `>` (Token boundary operator) +* `~` (Ignore operator) +* `\x20` (Hex number char) +* `\u10FFFF` (Unicode char) +* `%whitespace` (Automatic whitespace skipping) +* `%word` (Word expression) +* `$name(` ... `)` (Capture scope operator) +* `$name<` ... `>` (Named capture operator) +* `$name` (Backreference operator) +* `|` (Dictionary operator) +* `↑` (Cut operator) +* `MACRO_NAME(` ... `)` (Parameterized rule or Macro) +* `{ precedence L - + L / * }` (Parsing infix expression) +* `%recovery(` ... `)` (Error recovery operator) +* `exp⇑label` or `exp^label` (Syntax sugar for `(exp / %recover(label))`) +* `label { error_message "..." }` (Error message instruction) +* `{ no_ast_opt }` (No AST node optimization instruction) + +'End of Input' check will be done as default. To disable the check, please call `disable_eoi_check`. + +This library supports the linear-time parsing known as the [*Packrat*](http://pdos.csail.mit.edu/~baford/packrat/thesis/thesis.pdf) parsing. + +IMPORTANT NOTE for some Linux distributions such as Ubuntu and CentOS: Need `-pthread` option when linking. See [#23](https://github.com/yhirose/cpp-peglib/issues/23#issuecomment-261126127), [#46](https://github.com/yhirose/cpp-peglib/issues/46#issuecomment-417870473) and [#62](https://github.com/yhirose/cpp-peglib/issues/62#issuecomment-492032680). + +I am sure that you will enjoy this excellent ["Practical parsing with PEG and cpp-peglib"](https://berthub.eu/articles/posts/practical-peg-parsing/) article by [bert hubert](https://berthub.eu/)! How to use ---------- -This is a simple calculator sample. It shows how to define grammar, associate samantic actions to the grammar and handle semantic values. +This is a simple calculator sample. It shows how to define grammar, associate semantic actions to the grammar, and handle semantic values. ```cpp // (1) Include the header file #include #include +#include using namespace peg; using namespace std; int main(void) { - // (2) Make a parser - auto syntax = R"( - # Grammar for Calculator... - Additive <- Multitive '+' Additive / Multitive - Multitive <- Primary '*' Multitive / Primary - Primary <- '(' Additive ')' / Number - Number <- < [0-9]+ > - %whitespace <- [ \t]* - )"; - - parser parser(syntax); - - // (3) Setup an action - parser["Additive"] = [](const SemanticValues& sv) { - switch (sv.choice()) { - case 0: // "Multitive '+' Additive" - return sv[0].get() + sv[1].get(); - default: // "Multitive" - return sv[0].get(); - } - }; - - parser["Multitive"] = [](const SemanticValues& sv) { - switch (sv.choice()) { - case 0: // "Primary '*' Multitive" - return sv[0].get() * sv[1].get(); - default: // "Primary" - return sv[0].get(); - } - }; + // (2) Make a parser + parser parser(R"( + # Grammar for Calculator... + Additive <- Multiplicative '+' Additive / Multiplicative + Multiplicative <- Primary '*' Multiplicative / Primary + Primary <- '(' Additive ')' / Number + Number <- < [0-9]+ > + %whitespace <- [ \t]* + )"); + + assert(static_cast(parser) == true); + + // (3) Setup actions + parser["Additive"] = [](const SemanticValues &vs) { + switch (vs.choice()) { + case 0: // "Multiplicative '+' Additive" + return any_cast(vs[0]) + any_cast(vs[1]); + default: // "Multiplicative" + return any_cast(vs[0]); + } + }; + + parser["Multiplicative"] = [](const SemanticValues &vs) { + switch (vs.choice()) { + case 0: // "Primary '*' Multiplicative" + return any_cast(vs[0]) * any_cast(vs[1]); + default: // "Primary" + return any_cast(vs[0]); + } + }; - parser["Number"] = [](const SemanticValues& sv) { - return stoi(sv.token(), nullptr, 10); - }; + parser["Number"] = [](const SemanticValues &vs) { + return vs.token_to_number(); + }; - // (4) Parse - parser.enable_packrat_parsing(); // Enable packrat parsing. + // (4) Parse + parser.enable_packrat_parsing(); // Enable packrat parsing. - int val; - parser.parse(" (1 + 2) * 3 ", val); + int val; + parser.parse(" (1 + 2) * 3 ", val); - assert(val == 9); + assert(val == 9); } ``` -Here are available actions: +To show syntax errors in grammar text: ```cpp -[](const SemanticValues& sv, any& dt) -[](const SemanticValues& sv) +auto grammar = R"( + # Grammar for Calculator... + Additive <- Multiplicative '+' Additive / Multiplicative + Multiplicative <- Primary '*' Multiplicative / Primary + Primary <- '(' Additive ')' / Number + Number <- < [0-9]+ > + %whitespace <- [ \t]* +)"; + +parser parser; + +parser.set_logger([](size_t line, size_t col, const string& msg, const string &rule) { + cerr << line << ":" << col << ": " << msg << "\n"; +}); + +auto ok = parser.load_grammar(grammar); +assert(ok); ``` -`const SemanticValues& sv` contains semantic values. `SemanticValues` structure is defined as follows. +There are four semantic actions available: + +```cpp +[](const SemanticValues& vs, any& dt) +[](const SemanticValues& vs) +[](SemanticValues& vs, any& dt) +[](SemanticValues& vs) +``` + +`SemanticValues` value contains the following information: + +* Semantic values +* Matched string information +* Token information if the rule is literal or uses a token boundary operator +* Choice number when the rule is 'prioritized choice' + +`any& dt` is a 'read-write' context data which can be used for whatever purposes. The initial context data is set in `peg::parser::parse` method. + +A semantic action can return a value of arbitrary data type, which will be wrapped by `peg::any`. If a user returns nothing in a semantic action, the first semantic value in the `const SemanticValues& vs` argument will be returned. (Yacc parser has the same behavior.) + +Here shows the `SemanticValues` structure: ```cpp struct SemanticValues : protected std::vector { - // Matched string - std::string str() const; // Matched string - const char* c_str() const; // Matched string start - size_t length() const; // Matched string length + // Input text + const char* path; + const char* ss; - // Tokens - std::vector< - std::pair< - const char*, // Token start - size_t>> // Token length - tokens; + // Matched string + std::string_view sv() const { return sv_; } - std::string token(size_t id = 0) const; + // Line number and column at which the matched string is + std::pair line_info() const; - // Choice number (0 based index) - size_t choice() const; + // Tokens + std::vector tokens; + std::string_view token(size_t id = 0) const; - // Transform the semantic value vector to another vector - template vector transform(size_t beg = 0, size_t end = -1) const; -} -``` + // Token conversion + std::string token_to_string(size_t id = 0) const; + template T token_to_number() const; -`peg::any` class is very similar to [boost::any](http://www.boost.org/doc/libs/1_57_0/doc/html/any.html). You can obtain a value by castning it to the actual type. In order to determine the actual type, you have to check the return value type of the child action for the semantic value. + // Choice number (0 based index) + size_t choice() const; -`any& dt` is a data object which can be used by the user for whatever purposes. + // Transform the semantic value vector to another vector + template vector transform(size_t beg = 0, size_t end = -1) const; +} +``` -The following example uses `<` ... ` >` operators. They are the *token boundary* operators. +The following example uses `<` ... `>` operator, which is *token boundary* operator. ```cpp -auto syntax = R"( - ROOT <- _ TOKEN (',' _ TOKEN)* - TOKEN <- < [a-z0-9]+ > _ - _ <- [ \t\r\n]* -)"; - -peg pg(syntax); - -pg["TOKEN"] = [](const SemanticValues& sv) { - // 'token' doesn't include trailing whitespaces - auto token = sv.token(); +peg::parser parser(R"( + ROOT <- _ TOKEN (',' _ TOKEN)* + TOKEN <- < [a-z0-9]+ > _ + _ <- [ \t\r\n]* +)"); + +parser["TOKEN"] = [](const SemanticValues& vs) { + // 'token' doesn't include trailing whitespaces + auto token = vs.token(); }; -auto ret = pg.parse(" token1, token2 "); +auto ret = parser.parse(" token1, token2 "); ``` We can ignore unnecessary semantic values from the list by using `~` operator. ```cpp -peg::pegparser parser( - " ROOT <- _ ITEM (',' _ ITEM _)* " - " ITEM <- ([a-z])+ " - " ~_ <- [ \t]* " -); - -parser["ROOT"] = [&](const SemanticValues& sv) { - assert(sv.size() == 2); // should be 2 instead of 5. +peg::parser parser(R"( + ROOT <- _ ITEM (',' _ ITEM _)* + ITEM <- ([a-z0-9])+ + ~_ <- [ \t]* +)"); + +parser["ROOT"] = [&](const SemanticValues& vs) { + assert(vs.size() == 2); // should be 2 instead of 5. }; auto ret = parser.parse(" item1, item2 "); ``` -The following grammar is same as the above. +The following grammar is the same as the above. ```cpp -peg::parser parser( - " ROOT <- ~_ ITEM (',' ~_ ITEM ~_)* " - " ITEM <- ([a-z])+ " - " _ <- [ \t]* " -); +peg::parser parser(R"( + ROOT <- ~_ ITEM (',' ~_ ITEM ~_)* + ITEM <- ([a-z0-9])+ + _ <- [ \t]* +)"); ``` -*Semantic predicate* support is available. We can do it by throwing a `peg::parse_error` exception in a semantic action. +*Semantic predicate* support is available with a *predicate* action. ```cpp peg::parser parser("NUMBER <- [0-9]+"); -parser["NUMBER"] = [](const SemanticValues& sv) { - auto val = stol(sv.str(), nullptr, 10); - if (val != 100) { - throw peg::parse_error("value error!!"); - } - return val; +parser["NUMBER"] = [](const SemanticValues &vs) { + return vs.token_to_number(); +}; + +parser["NUMBER"].predicate = [](const SemanticValues &vs, + const std::any & /*dt*/, std::string &msg) { + if (vs.token_to_number() != 100) { + msg = "value error!!"; + return false; + } + return true; }; long val; @@ -179,22 +246,34 @@ ret = parser.parse("200", val); assert(ret == false); ``` -*enter* and *leave* actions are also avalable. +*enter* and *leave* actions are also available. ```cpp -parser["RULE"].enter = [](any& dt) { - std::cout << "enter" << std::endl; +parser["RULE"].enter = [](const Context &c, const char* s, size_t n, any& dt) { + std::cout << "enter" << std::endl; }; -parser["RULE"] = [](const SemanticValues& sv, any& dt) { - std::cout << "action!" << std::endl; +parser["RULE"] = [](const SemanticValues& vs, any& dt) { + std::cout << "action!" << std::endl; }; -parser["RULE"].leave = [](any& dt) { - std::cout << "leave" << std::endl; +parser["RULE"].leave = [](const Context &c, const char* s, size_t n, size_t matchlen, any& value, any& dt) { + std::cout << "leave" << std::endl; }; ``` +You can receive error information via a logger: + +```cpp +parser.set_logger([](size_t line, size_t col, const string& msg) { + ... +}); + +parser.set_logger([](size_t line, size_t col, const string& msg, const string &rule) { + ... +}); +``` + Ignoring Whitespaces -------------------- @@ -202,16 +281,17 @@ As you can see in the first example, we can ignore whitespaces between tokens au `%whitespace` rule can be applied to the following three conditions: - * trailing spaces on tokens - * leading spaces on text - * trailing spaces on literal strings in rules +* trailing spaces on tokens +* leading spaces on text +* trailing spaces on literal strings in rules These are valid tokens: ``` -KEYWORD <- 'keyword' -WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used. -IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used. +KEYWORD <- 'keyword' +KEYWORDI <- 'case_insensitive_keyword' +WORD <- < [a-zA-Z0-9] [a-zA-Z0-9-_]* > # token boundary operator is used. +IDNET <- < IDENT_START_CHAR IDENT_CHAR* > # token boundary operator is used. ``` The following grammar accepts ` one, "two three", four `. @@ -225,95 +305,191 @@ PHRASE <- < '"' (!'"' .)* '"' > %whitespace <- [ \t\r\n]* ``` -Simple interface ----------------- +Word expression +--------------- -*cpp-peglib* provides std::regex-like simple interface for trivial tasks. +```cpp +peg::parser parser(R"( + ROOT <- 'hello' 'world' + %whitespace <- [ \t\r\n]* + %word <- [a-z]+ +)"); + +parser.parse("hello world"); // OK +parser.parse("helloworld"); // NG +``` -`peg::peg_match` tries to capture strings in the `$< ... >` operator and store them into `peg::match` object. +Capture/Backreference +--------------------- ```cpp -peg::match m; +peg::parser parser(R"( + ROOT <- CONTENT + CONTENT <- (ELEMENT / TEXT)* + ELEMENT <- $(STAG CONTENT ETAG) + STAG <- '<' $tag< TAG_NAME > '>' + ETAG <- '' + TAG_NAME <- 'b' / 'u' + TEXT <- TEXT_DATA + TEXT_DATA <- ![<] . +)"); + +parser.parse("This is a test text."); // OK +parser.parse("This is a test text."); // NG +parser.parse("This is a test text."); // NG +``` + +Dictionary +---------- -auto ret = peg::peg_match( - R"( - ROOT <- _ ('[' $< TAG_NAME > ']' _)* - TAG_NAME <- (!']' .)+ - _ <- [ \t]* - )", - " [tag1] [tag:2] [tag-3] ", - m); +`|` operator allows us to make a word dictionary for fast lookup by using Trie structure internally. We don't have to worry about the order of words. -assert(ret == true); -assert(m.size() == 4); -assert(m.str(1) == "tag1"); -assert(m.str(2) == "tag:2"); -assert(m.str(3) == "tag-3"); +```peg +START <- 'This month is ' MONTH '.' +MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...' ``` -It also supports named capture with the `$name<` ... `>` operator. +We are able to find which item is matched with `choice()`. ```cpp -peg::match m; +parser["MONTH"] = [](const SemanticValues &vs) { + auto id = vs.choice(); +}; +``` + +It supports the case-insensitive mode. + +```peg +START <- 'This month is ' MONTH '.' +MONTH <- 'Jan'i | 'January'i | 'Feb'i | 'February'i | '...'i +``` + +Cut operator +------------ -auto ret = peg::peg_match( - R"( - ROOT <- _ ('[' $test< TAG_NAME > ']' _)* - TAG_NAME <- (!']' .)+ - _ <- [ \t]* - )", - " [tag1] [tag:2] [tag-3] ", - m); +`↑` operator could mitigate the backtrack performance problem, but has a risk to change the meaning of grammar. -auto cap = m.named_capture("test"); +```peg +S <- '(' ↑ P ')' / '"' ↑ P '"' / P +P <- 'a' / 'b' / 'c' +``` + +When we parse `(z` with the above grammar, we don't have to backtrack in `S` after `(` is matched, because a cut operator is inserted there. + +Parameterized Rule or Macro +--------------------------- + +```peg +# Syntax +Start ← _ Expr +Expr ← Sum +Sum ← List(Product, SumOpe) +Product ← List(Value, ProOpe) +Value ← Number / T('(') Expr T(')') + +# Token +SumOpe ← T('+' / '-') +ProOpe ← T('*' / '/') +Number ← T([0-9]+) +~_ ← [ \t\r\n]* -REQUIRE(ret == true); -REQUIRE(m.size() == 4); -REQUIRE(cap.size() == 3); -REQUIRE(m.str(cap[2]) == "tag-3"); +# Macro +List(I, D) ← I (D I)* +T(x) ← < x > _ ``` -There are some ways to *search* a peg pattern in a document. +Parsing infix expression by Precedence climbing +----------------------------------------------- + +Regarding the *precedence climbing algorithm*, please see [this article](https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing). ```cpp -using namespace peg; +parser parser(R"( + EXPRESSION <- INFIX_EXPRESSION(ATOM, OPERATOR) + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t]* + + # Declare order of precedence + INFIX_EXPRESSION(A, O) <- A (O A)* { + precedence + L + - + L * / + } +)"); + +parser["INFIX_EXPRESSION"] = [](const SemanticValues& vs) -> long { + auto result = any_cast(vs[0]); + if (vs.size() > 1) { + auto ope = any_cast(vs[1]); + auto num = any_cast(vs[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; +}; +parser["OPERATOR"] = [](const SemanticValues& vs) { return *vs.sv(); }; +parser["NUMBER"] = [](const SemanticValues& vs) { return vs.token_to_number(); }; -auto syntax = R"( - ROOT <- '[' $< [a-z0-9]+ > ']' -)"; +long val; +parser.parse(" -1 + (1 + 2) * 3 - -1", val); +assert(val == 9); +``` -auto s = " [tag1] [tag2] [tag3] "; - -// peg::peg_search -parser pg(syntax); -size_t pos = 0; -auto n = strlen(s); -match m; -while (peg_search(pg, s + pos, n - pos, m)) { - cout << m.str() << endl; // entire match - cout << m.str(1) << endl; // submatch #1 - pos += m.length(); -} +*precedence* instruction can be applied only to the following 'list' style rule. -// peg::peg_token_iterator -peg_token_iterator it(syntax, s); -while (it != peg_token_iterator()) { - cout << it->str() << endl; // entire match - cout << it->str(1) << endl; // submatch #1 - ++it; +``` +Rule <- Atom (Operator Atom)* { + precedence + L - + + L / * + R ^ } +``` + +*precedence* instruction contains precedence info entries. Each entry starts with *associativity* which is 'L' (left) or 'R' (right), then operator *literal* tokens follow. The first entry has the highest order level. + +AST generation +-------------- -// peg::peg_token_range -for (auto& m: peg_token_range(syntax, s)) { - cout << m.str() << endl; // entire match - cout << m.str(1) << endl; // submatch #1 +*cpp-peglib* is able to generate an AST (Abstract Syntax Tree) when parsing. `enable_ast` method on `peg::parser` class enables the feature. + +NOTE: An AST node holds a corresponding token as `std::string_vew` for performance and less memory usage. It is users' responsibility to keep the original source text along with the generated AST tree. + +``` +peg::parser parser(R"( + ... + definition1 <- ... { no_ast_opt } + definition2 <- ... { no_ast_opt } + ... +)"); + +parser.enable_ast(); + +shared_ptr ast; +if (parser.parse("...", ast)) { + cout << peg::ast_to_s(ast); + + ast = parser.optimize_ast(ast); + cout << peg::ast_to_s(ast); } ``` +`optimize_ast` removes redundant nodes to make an AST simpler. If you want to disable this behavior from particular rules, `no_ast_opt` instruction can be used. + +It internally calls `peg::AstOptimizer` to do the job. You can make your own AST optimizers to fit your needs. + +See actual usages in the [AST calculator example](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) and [PL/0 language example](https://github.com/yhirose/cpp-peglib/blob/master/pl0/pl0.cc). + Make a parser with parser combinators ------------------------------------- -Instead of makeing a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser combinatorss*. Here is an example: +Instead of making a parser by parsing PEG syntax text, we can also construct a parser by hand with *parser combinators*. Here is an example: ```cpp using namespace peg; @@ -323,8 +499,8 @@ vector tags; Definition ROOT, TAG_NAME, _; ROOT <= seq(_, zom(seq(chr('['), TAG_NAME, chr(']'), _))); -TAG_NAME <= oom(seq(npd(chr(']')), dot())), [&](const SemanticValues& sv) { - tags.push_back(sv.str()); +TAG_NAME <= oom(seq(npd(chr(']')), dot())), [&](const SemanticValues& vs) { + tags.push_back(vs.token_to_string()); }; _ <= zom(cls(" \t")); @@ -333,23 +509,20 @@ auto ret = ROOT.parse(" [tag1] [tag:2] [tag-3] "); The following are available operators: -| Operator | Description | -| :------- | :-------------------- | -| seq | Sequence | -| cho | Prioritized Choice | -| zom | Zero or More | -| oom | One or More | -| opt | Optional | -| apd | And predicate | -| npd | Not predicate | -| lit | Literal string | -| cls | Character class | -| chr | Character | -| dot | Any character | -| tok | Token boundary | -| ign | Ignore semantic value | -| cap | Capture character | -| usr | User defined parser | +| Operator | Description | Operator | Description | +|:---------|:--------------------------------|:---------|:--------------------| +| seq | Sequence | cho | Prioritized Choice | +| zom | Zero or More | oom | One or More | +| opt | Optional | apd | And predicate | +| npd | Not predicate | lit | Literal string | +| liti | Case-insensitive Literal string | cls | Character class | +| ncls | Negated Character class | chr | Character | +| dot | Any character | tok | Token boundary | +| ign | Ignore semantic value | csc | Capture scope | +| cap | Capture | bkr | Back reference | +| dic | Dictionary | pre | Infix expression | +| rec | Infix expression | usr | User defined parser | +| rep | Repetition | | | Adjust definitions ------------------ @@ -358,24 +531,24 @@ It's possible to add/override definitions. ```cpp auto syntax = R"( - ROOT <- _ 'Hello' _ NAME '!' _ + ROOT <- _ 'Hello' _ NAME '!' _ )"; Rules additional_rules = { - { - "NAME", usr([](const char* s, size_t n, SemanticValues& sv, any& dt) -> size_t { - static vector names = { "PEG", "BNF" }; - for (const auto& name: names) { - if (name.size() <= n && !name.compare(0, name.size(), s, name.size())) { - return name.size(); // processed length - } - } - return -1; // parse error - }) - }, - { - "~_", zom(cls(" \t\r\n")) - } + { + "NAME", usr([](const char* s, size_t n, SemanticValues& vs, any& dt) -> size_t { + static vector names = { "PEG", "BNF" }; + for (const auto& name: names) { + if (name.size() <= n && !name.compare(0, name.size(), s, name.size())) { + return name.size(); // processed length + } + } + return -1; // parse error + }) + }, + { + "~_", zom(cls(" \t\r\n")) + } }; auto g = parser(syntax, additional_rules); @@ -386,30 +559,243 @@ assert(g.parse(" Hello BNF! ")); Unicode support --------------- -Since cpp-peglib only accepts 8 bits characters, it probably accepts UTF-8 text. But `.` matches only a byte, not a Unicode character. Also, it dosn't support `\u????`. +cpp-peglib accepts UTF8 text. `.` matches a Unicode codepoint. Also, it supports `\u????`. -Sample codes ------------- +Error report and recovery +------------------------- + +cpp-peglib supports the furthest failure error position report as described in the Bryan Ford original document. + +For better error report and recovery, cpp-peglib supports 'recovery' operator with label which can be associated with a recovery expression and a custom error message. This idea comes from the fantastic ["Syntax Error Recovery in Parsing Expression Grammars"](https://arxiv.org/pdf/1806.11150.pdf) paper by Sergio Medeiros and Fabio Mascarenhas. + +The custom message supports `%t` which is a placeholder for the unexpected token, and `%c` for the unexpected Unicode char. + +Here is an example of Java-like grammar: + +```peg +# java.peg +Prog ← 'public' 'class' NAME '{' 'public' 'static' 'void' 'main' '(' 'String' '[' ']' NAME ')' BlockStmt '}' +BlockStmt ← '{' (!'}' Stmt^stmtb)* '}' # Annotated with `stmtb` +Stmt ← IfStmt / WhileStmt / PrintStmt / DecStmt / AssignStmt / BlockStmt +IfStmt ← 'if' '(' Exp ')' Stmt ('else' Stmt)? +WhileStmt ← 'while' '(' Exp^condw ')' Stmt # Annotated with `condw` +DecStmt ← 'int' NAME ('=' Exp)? ';' +AssignStmt ← NAME '=' Exp ';'^semia # Annotated with `semi` +PrintStmt ← 'System.out.println' '(' Exp ')' ';' +Exp ← RelExp ('==' RelExp)* +RelExp ← AddExp ('<' AddExp)* +AddExp ← MulExp (('+' / '-') MulExp)* +MulExp ← AtomExp (('*' / '/') AtomExp)* +AtomExp ← '(' Exp ')' / NUMBER / NAME + +NUMBER ← < [0-9]+ > +NAME ← < [a-zA-Z_][a-zA-Z_0-9]* > - * [Calculator](https://github.com/yhirose/cpp-peglib/blob/master/example/calc.cc) - * [Calculator (with parser operators)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc2.cc) - * [Calculator (AST version)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) - * [PEG syntax Lint utility](https://github.com/yhirose/cpp-peglib/blob/master/lint/cmdline/peglint.cc) - * [PL/0 Interpreter](https://github.com/yhirose/cpp-peglib/blob/master/language/pl0/pl0.cc) +%whitespace ← [ \t\n]* +%word ← NAME + +# Recovery operator labels +semia ← '' { error_message "missing semicolon in assignment." } +stmtb ← (!(Stmt / 'else' / '}') .)* { error_message "invalid statement" } +condw ← &'==' ('==' RelExp)* / &'<' ('<' AddExp)* / (!')' .)* +``` + +For instance, `';'^semi` is a syntactic sugar for `(';' / %recovery(semi))`. `%recover` operator tries to recover the error at ';' by skipping input text with the recovery expression `semi`. Also `semi` is associated with a custom message "missing semicolon in assignment." + +Here is the result: + +```java +> cat sample.java +public class Example { + public static void main(String[] args) { + int n = 5; + int f = 1; + while( < n) { + f = f * n; + n = n - 1 + }; + System.out.println(f); + } +} + +> peglint java.peg sample.java +sample.java:5:12: syntax error, unexpected '<', expecting '(', , . +sample.java:8:5: missing semicolon in assignment. +sample.java:8:6: invalid statement +``` + +As you can see, it can now show more than one error, and provide more meaningful error messages than the default messages. + +### Custom error message for definitions + +We can associate custom error messages to definitions. + +```peg +# custom_message.peg +START <- CODE (',' CODE)* +CODE <- < '0x' [a-fA-F0-9]+ > { error_message 'code format error...' } +%whitespace <- [ \t]* +``` + +``` +> cat custom_message.txt +0x1234,0x@@@@,0xABCD + +> peglint custom_message.peg custom_message.txt +custom_message.txt:1:8: code format error... +``` -Tested compilers ----------------- +NOTE: If there is more than one element with an error message instruction in a prioritized choice, this feature may not work as you expect. - * Visual Studio 2015 - * Visual Studio 2013 with update 5 - * Clang 3.5 +Change the Start Definition Rule +-------------------------------- -TODO ----- +We can change the start definition rule as below. + +```cpp +auto grammar = R"( + Start <- A + A <- B (',' B)* + B <- '[one]' / '[two]' + %whitespace <- [ \t\n]* +)"; + +peg::parser parser(grammar, "A"); // Start Rule is "A" + + or + +peg::parser parser; +parser.load_grammar(grammar, "A"); // Start Rule is "A" + +parser.parse(" [one] , [two] "); // OK +``` + +peglint - PEG syntax lint utility +--------------------------------- + +### Build peglint + +``` +> cd lint +> mkdir build +> cd build +> cmake .. +> make +> ./peglint +usage: grammar_file_path [source_file_path] + + options: + --source: source text + --packrat: enable packrat memoise + --ast: show AST tree + --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction + --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction + --trace: show concise trace messages + --profile: show profile report + --verbose: verbose output for trace and profile +``` + +### Grammar check + +``` +> cat a.peg +Additive <- Multiplicative '+' Additive / Multiplicative +Multiplicative <- Primary '*' Multiplicative / Primary +Primary <- '(' Additive ')' / Number +%whitespace <- [ \t\r\n]* + +> peglint a.peg +[commandline]:3:35: 'Number' is not defined. +``` + +### Source check + +``` +> cat a.peg +Additive <- Multiplicative '+' Additive / Multiplicative +Multiplicative <- Primary '*' Multiplicative / Primary +Primary <- '(' Additive ')' / Number +Number <- < [0-9]+ > +%whitespace <- [ \t\r\n]* + +> peglint --source "1 + a * 3" a.peg +[commandline]:1:3: syntax error +``` + +### AST + +``` +> cat a.txt +1 + 2 * 3 + +> peglint --ast a.peg a.txt ++ Additive + + Multiplicative + + Primary + - Number (1) + + Additive + + Multiplicative + + Primary + - Number (2) + + Multiplicative + + Primary + - Number (3) +``` + +### AST optimization + +``` +> peglint --ast --opt --source "1 + 2 * 3" a.peg ++ Additive + - Multiplicative[Number] (1) + + Additive[Multiplicative] + - Primary[Number] (2) + - Multiplicative[Number] (3) +``` + +### Adjust AST optimization with `no_ast_opt` instruction + +``` +> cat a.peg +Additive <- Multiplicative '+' Additive / Multiplicative +Multiplicative <- Primary '*' Multiplicative / Primary +Primary <- '(' Additive ')' / Number { no_ast_opt } +Number <- < [0-9]+ > +%whitespace <- [ \t\r\n]* + +> peglint --ast --opt --source "1 + 2 * 3" a.peg ++ Additive/0 + + Multiplicative/1[Primary] + - Number (1) + + Additive/1[Multiplicative] + + Primary/1 + - Number (2) + + Multiplicative/1[Primary] + - Number (3) + +> peglint --ast --opt-only --source "1 + 2 * 3" a.peg ++ Additive/0 + + Multiplicative/1 + - Primary/1[Number] (1) + + Additive/1 + + Multiplicative/0 + - Primary/1[Number] (2) + + Multiplicative/1 + - Primary/1[Number] (3) +``` + +Sample codes +------------ - * Unicode support (`.` matches a Unicode char. `\u????`, `\p{L}`) +* [Calculator](https://github.com/yhirose/cpp-peglib/blob/master/example/calc.cc) +* [Calculator (with parser operators)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc2.cc) +* [Calculator (AST version)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc3.cc) +* [Calculator (parsing expressions by precedence climbing)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc4.cc) +* [Calculator (AST version and parsing expressions by precedence climbing)](https://github.com/yhirose/cpp-peglib/blob/master/example/calc5.cc) +* [A tiny PL/0 JIT compiler in less than 900 LOC with LLVM and PEG parser](https://github.com/yhirose/pl0-jit-compiler) +* [A Programming Language just for writing Fizz Buzz program. :)](https://github.com/yhirose/fizzbuzzlang) License ------- -MIT license (© 2015 Yuji Hirose) +MIT license (© 2022 Yuji Hirose) diff --git a/docs/build.sh b/docs/build.sh new file mode 100755 index 00000000..2bb5d4e6 --- /dev/null +++ b/docs/build.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +source ~/Projects/emsdk/emsdk_env.sh +emcc -std=c++17 -O3 --bind -o native.js -s ALLOW_MEMORY_GROWTH native.cpp diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000..edd068c2 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,49 @@ + + + +PEG Playground + + + +
+
+
    +
  • +
  • Grammar
  • +
+
{{syntax}}
+
+
+
+
    +
  • +
  • Source Code
  • +
  • +
      +
    • +
    • +
    • +
    • +
    +
  • +
+
{{source}}
+
AST
+

+    
Optimized AST     + mode:  +
+

+    
Profile
+
+ +
+
+
+
+ + + + + + diff --git a/docs/index.js b/docs/index.js new file mode 100644 index 00000000..abe23d3c --- /dev/null +++ b/docs/index.js @@ -0,0 +1,222 @@ +// Setup editors +function setupInfoArea(id) { + const e = ace.edit(id); + e.setShowPrintMargin(false); + e.setOptions({ + readOnly: true, + highlightActiveLine: false, + highlightGutterLine: false + }) + e.renderer.$cursorLayer.element.style.opacity=0; + return e; +} + +function setupEditorArea(id, lsKey) { + const e = ace.edit(id); + e.setShowPrintMargin(false); + e.setValue(localStorage.getItem(lsKey) || ''); + e.moveCursorTo(0, 0); + return e; +} + +const grammar = setupEditorArea("grammar-editor", "grammarText"); +const code = setupEditorArea("code-editor", "codeText"); + +const codeAst = setupInfoArea("code-ast"); +const codeAstOptimized = setupInfoArea("code-ast-optimized"); +const codeProfile = setupInfoArea("code-profile"); + +$('#opt-mode').val(localStorage.getItem('optimizationMode') || 'all'); +$('#start-rule').val(localStorage.getItem('startRule') || ''); +$('#packrat').prop('checked', localStorage.getItem('packrat') === 'true'); +$('#auto-refresh').prop('checked', localStorage.getItem('autoRefresh') === 'true'); +$('#parse').prop('disabled', $('#auto-refresh').prop('checked')); + +// Parse +function escapeHtml(unsafe) { + return unsafe + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} + +function generateErrorListHTML(errors) { + let html = '
    '; + + html += $.map(errors, function (x) { + if (x.gln && x.gcol) { + return `
  • ${x.ln}:${x.col} ${escapeHtml(x.msg)}
  • `; + } else { + return `
  • ${x.ln}:${x.col} ${escapeHtml(x.msg)}
  • `; + } + }).join(''); + + html += '
      '; + + return html; +} + +function updateLocalStorage() { + localStorage.setItem('grammarText', grammar.getValue()); + localStorage.setItem('codeText', code.getValue()); + localStorage.setItem('optimizationMode', $('#opt-mode').val()); + localStorage.setItem('startRule', $('#start-rule').val()); + localStorage.setItem('packrat', $('#packrat').prop('checked')); + localStorage.setItem('autoRefresh', $('#auto-refresh').prop('checked')); +} + +function parse() { + const $grammarValidation = $('#grammar-validation'); + const $grammarInfo = $('#grammar-info'); + const grammarText = grammar.getValue(); + + const $codeValidation = $('#code-validation'); + const $codeInfo = $('#code-info'); + const codeText = code.getValue(); + + const optimizationMode = $('#opt-mode').val(); + const startRule = $('#start-rule').val(); + const packrat = $('#packrat').prop('checked'); + + $grammarInfo.html(''); + $grammarValidation.hide(); + $codeInfo.html(''); + $codeValidation.hide(); + codeAst.setValue(''); + codeAstOptimized.setValue(''); + codeProfile.setValue(''); + + if (grammarText.length === 0) { + return; + } + + const mode = optimizationMode == 'all'; + + $('#overlay').css({ + 'z-index': '1', + 'display': 'block', + 'background-color': 'rgba(0, 0, 0, 0.1)' + }); + window.setTimeout(() => { + const data = JSON.parse(Module.lint(grammarText, codeText, mode, packrat, startRule)); + $('#overlay').css({ + 'z-index': '-1', + 'display': 'none', + 'background-color': 'rgba(1, 1, 1, 1.0)' + }); + + if (data.grammar_valid) { + $grammarValidation.removeClass('validation-invalid').show(); + + codeAst.insert(data.ast); + codeAstOptimized.insert(data.astOptimized); + codeProfile.insert(data.profile); + + if (data.source_valid) { + $codeValidation.removeClass('validation-invalid').show(); + } else { + $codeValidation.addClass('validation-invalid').show(); + } + + if (data.code.length > 0) { + const html = generateErrorListHTML(data.code); + $codeInfo.html(html); + } + } else { + $grammarValidation.addClass('validation-invalid').show(); + } + + if (data.grammar.length > 0) { + const html = generateErrorListHTML(data.grammar); + $grammarInfo.html(html); + } + }, 0); +} + +// Event handing for text editing +let timer; +function setupTimer() { + clearTimeout(timer); + timer = setTimeout(() => { + updateLocalStorage(); + if ($('#auto-refresh').prop('checked')) { + parse(); + } + }, 750); +}; +grammar.getSession().on('change', setupTimer); +code.getSession().on('change', setupTimer); + +// Event handing in the info area +function makeOnClickInInfo(editor) { + return function () { + const el = $(this); + editor.navigateTo(el.data('ln') - 1, el.data('col') - 1); + editor.scrollToLine(el.data('ln') - 1, true, false, null); + editor.focus(); + + if(el.data('gln') && el.data('gcol')) { + grammar.navigateTo(el.data('gln') - 1, el.data('gcol') - 1); + grammar.scrollToLine(el.data('gln') - 1, true, false, null); + } + } +}; +$('#grammar-info').on('click', 'li', makeOnClickInInfo(grammar)); +$('#code-info').on('click', 'li', makeOnClickInInfo(code)); + +// Event handing in the AST optimization +$('#opt-mode').on('change', setupTimer); +$('#start-rule').on('keydown', setupTimer); +$('#packrat').on('change', setupTimer); +$('#auto-refresh').on('change', () => { + updateLocalStorage(); + $('#parse').prop('disabled', $('#auto-refresh').prop('checked')); + setupTimer(); +}); +$('#parse').on('click', parse); + +// Resize editors to fit their parents +function resizeEditorsToParent() { + code.resize(); + code.renderer.updateFull(); + codeAst.resize(); + codeAst.renderer.updateFull(); + codeAstOptimized.resize(); + codeAstOptimized.renderer.updateFull(); + codeProfile.resize(); + codeProfile.renderer.updateFull(); +} + +// Show windows +function setupToolWindow(lsKeyName, buttonSel, codeSel) { + let show = localStorage.getItem(lsKeyName) === 'true'; + $(buttonSel).prop('checked', show); + $(codeSel).css({ 'display': show ? 'block' : 'none' }); + + $(buttonSel).on('change', () => { + show = !show; + localStorage.setItem(lsKeyName, show); + $(codeSel).css({ 'display': show ? 'block' : 'none' }); + resizeEditorsToParent(); + }); +} +setupToolWindow('show-ast', '#show-ast', '#code-ast'); +setupToolWindow('show-ast-optimized', '#show-ast-optimized', '#code-ast-optimized'); +setupToolWindow('show-profile', '#show-profile', '#code-profile'); + +// Show page +$('#main').css({ + 'display': 'flex', +}); + +// WebAssembly +var Module = { + onRuntimeInitialized: function() { + // Initial parse + if ($('#auto-refresh').prop('checked')) { + parse(); + } + } +}; diff --git a/docs/native.cpp b/docs/native.cpp new file mode 100644 index 00000000..497940ea --- /dev/null +++ b/docs/native.cpp @@ -0,0 +1,116 @@ +#include "../peglib.h" +#include +#include +#include +#include +#include + +// https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784 +std::string escape_json(const std::string &s) { + std::ostringstream o; + for (auto c : s) { + if (c == '"' || c == '\\' || ('\x00' <= c && c <= '\x1f')) { + o << "\\u" << std::hex << std::setw(4) << std::setfill('0') << (int)c; + } else { + o << c; + } + } + return o.str(); +} + +std::function +makeJSONFormatter(peg::parser &peg, std::string &json, bool &init) { + init = true; + return [&](size_t ln, size_t col, const std::string &msg, + const std::string &rule) mutable { + if (!init) { json += ","; } + json += "{"; + json += R"("ln":)" + std::to_string(ln) + ","; + json += R"("col":)" + std::to_string(col) + ","; + json += R"("msg":")" + escape_json(msg) + R"(")"; + if (!rule.empty()) { + auto it = peg.get_grammar().find(rule); + if (it != peg.get_grammar().end()) { + auto [gln, gcol] = it->second.line_; + json += ","; + json += R"("gln":)" + std::to_string(gln) + ","; + json += R"("gcol":)" + std::to_string(gcol); + } + } + json += "}"; + + init = false; + }; +} + +bool parse_grammar(const std::string &text, peg::parser &peg, + const std::string &startRule, std::string &json) { + bool init; + peg.set_logger(makeJSONFormatter(peg, json, init)); + json += "["; + auto ret = peg.load_grammar(text.data(), text.size(), startRule); + json += "]"; + return ret; +} + +bool parse_code(const std::string &text, peg::parser &peg, std::string &json, + std::shared_ptr &ast) { + peg.enable_ast(); + bool init; + peg.set_logger(makeJSONFormatter(peg, json, init)); + json += "["; + auto ret = peg.parse_n(text.data(), text.size(), ast); + json += "]"; + return ret; +} + +std::string lint(const std::string &grammarText, const std::string &codeText, + bool opt_mode, bool packrat, const std::string &startRule) { + std::string grammarResult; + std::string codeResult; + std::string astResult; + std::string astResultOptimized; + std::string profileResult; + + peg::parser peg; + auto is_grammar_valid = + parse_grammar(grammarText, peg, startRule, grammarResult); + auto is_source_valid = false; + + if (is_grammar_valid && peg) { + std::stringstream ss; + peg::enable_profiling(peg, ss); + + if (packrat) { peg.enable_packrat_parsing(); } + + std::shared_ptr ast; + is_source_valid = parse_code(codeText, peg, codeResult, ast); + + profileResult = escape_json(ss.str()); + + if (ast) { + astResult = escape_json(peg::ast_to_s(ast)); + astResultOptimized = + escape_json(peg::ast_to_s(peg.optimize_ast(ast, opt_mode))); + } + } + + std::string json; + json += "{"; + json += + std::string("\"grammar_valid\":") + (is_grammar_valid ? "true" : "false"); + json += ",\"grammar\":" + grammarResult; + json += + std::string(",\"source_valid\":") + (is_source_valid ? "true" : "false"); + if (!codeResult.empty()) { + json += ",\"code\":" + codeResult; + json += ",\"ast\":\"" + astResult + "\""; + json += ",\"astOptimized\":\"" + astResultOptimized + "\""; + json += ",\"profile\":\"" + profileResult + "\""; + } + json += "}"; + + return json; +} + +EMSCRIPTEN_BINDINGS(native) { emscripten::function("lint", &lint); } diff --git a/docs/native.js b/docs/native.js new file mode 100644 index 00000000..a8eefad4 --- /dev/null +++ b/docs/native.js @@ -0,0 +1 @@ +var Module=typeof Module!="undefined"?Module:{};var moduleOverrides=Object.assign({},Module);var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var ENVIRONMENT_IS_WEB=typeof window=="object";var ENVIRONMENT_IS_WORKER=typeof importScripts=="function";var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var read_,readAsync,readBinary,setWindowTitle;function logExceptionOnExit(e){if(e instanceof ExitStatus)return;let toLog=e;err("exiting due to exception: "+toLog)}var fs;var nodePath;var requireNodeFS;if(ENVIRONMENT_IS_NODE){if(ENVIRONMENT_IS_WORKER){scriptDirectory=require("path").dirname(scriptDirectory)+"/"}else{scriptDirectory=__dirname+"/"}requireNodeFS=(()=>{if(!nodePath){fs=require("fs");nodePath=require("path")}});read_=function shell_read(filename,binary){requireNodeFS();filename=nodePath["normalize"](filename);return fs.readFileSync(filename,binary?undefined:"utf8")};readBinary=(filename=>{var ret=read_(filename,true);if(!ret.buffer){ret=new Uint8Array(ret)}return ret});readAsync=((filename,onload,onerror)=>{requireNodeFS();filename=nodePath["normalize"](filename);fs.readFile(filename,function(err,data){if(err)onerror(err);else onload(data.buffer)})});if(process["argv"].length>1){thisProgram=process["argv"][1].replace(/\\/g,"/")}arguments_=process["argv"].slice(2);if(typeof module!="undefined"){module["exports"]=Module}process["on"]("uncaughtException",function(ex){if(!(ex instanceof ExitStatus)){throw ex}});process["on"]("unhandledRejection",function(reason){throw reason});quit_=((status,toThrow)=>{if(keepRuntimeAlive()){process["exitCode"]=status;throw toThrow}logExceptionOnExit(toThrow);process["exit"](status)});Module["inspect"]=function(){return"[Emscripten Module object]"}}else if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){if(ENVIRONMENT_IS_WORKER){scriptDirectory=self.location.href}else if(typeof document!="undefined"&&document.currentScript){scriptDirectory=document.currentScript.src}if(scriptDirectory.indexOf("blob:")!==0){scriptDirectory=scriptDirectory.substr(0,scriptDirectory.replace(/[?#].*/,"").lastIndexOf("/")+1)}else{scriptDirectory=""}{read_=(url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.send(null);return xhr.responseText});if(ENVIRONMENT_IS_WORKER){readBinary=(url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)})}readAsync=((url,onload,onerror)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=(()=>{if(xhr.status==200||xhr.status==0&&xhr.response){onload(xhr.response);return}onerror()});xhr.onerror=onerror;xhr.send(null)})}setWindowTitle=(title=>document.title=title)}else{}var out=Module["print"]||console.log.bind(console);var err=Module["printErr"]||console.warn.bind(console);Object.assign(Module,moduleOverrides);moduleOverrides=null;if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["quit"])quit_=Module["quit"];var wasmBinary;if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];var noExitRuntime=Module["noExitRuntime"]||true;if(typeof WebAssembly!="object"){abort("no native wasm support detected")}var wasmMemory;var ABORT=false;var EXITSTATUS;var UTF8Decoder=typeof TextDecoder!="undefined"?new TextDecoder("utf8"):undefined;function UTF8ArrayToString(heapOrArray,idx,maxBytesToRead){var endIdx=idx+maxBytesToRead;var endPtr=idx;while(heapOrArray[endPtr]&&!(endPtr>=endIdx))++endPtr;if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.subarray(idx,endPtr))}else{var str="";while(idx>10,56320|ch&1023)}}}return str}function UTF8ToString(ptr,maxBytesToRead){return ptr?UTF8ArrayToString(HEAPU8,ptr,maxBytesToRead):""}function stringToUTF8Array(str,heap,outIdx,maxBytesToWrite){if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=55296&&u<=57343){var u1=str.charCodeAt(++i);u=65536+((u&1023)<<10)|u1&1023}if(u<=127){if(outIdx>=endIdx)break;heap[outIdx++]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++]=192|u>>6;heap[outIdx++]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++]=224|u>>12;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++]=240|u>>18;heap[outIdx++]=128|u>>12&63;heap[outIdx++]=128|u>>6&63;heap[outIdx++]=128|u&63}}heap[outIdx]=0;return outIdx-startIdx}function stringToUTF8(str,outPtr,maxBytesToWrite){return stringToUTF8Array(str,HEAPU8,outPtr,maxBytesToWrite)}function lengthBytesUTF8(str){var len=0;for(var i=0;i=55296&&u<=57343)u=65536+((u&1023)<<10)|str.charCodeAt(++i)&1023;if(u<=127)++len;else if(u<=2047)len+=2;else if(u<=65535)len+=3;else len+=4}return len}var UTF16Decoder=typeof TextDecoder!="undefined"?new TextDecoder("utf-16le"):undefined;function UTF16ToString(ptr,maxBytesToRead){var endPtr=ptr;var idx=endPtr>>1;var maxIdx=idx+maxBytesToRead/2;while(!(idx>=maxIdx)&&HEAPU16[idx])++idx;endPtr=idx<<1;if(endPtr-ptr>32&&UTF16Decoder){return UTF16Decoder.decode(HEAPU8.subarray(ptr,endPtr))}else{var str="";for(var i=0;!(i>=maxBytesToRead/2);++i){var codeUnit=HEAP16[ptr+i*2>>1];if(codeUnit==0)break;str+=String.fromCharCode(codeUnit)}return str}}function stringToUTF16(str,outPtr,maxBytesToWrite){if(maxBytesToWrite===undefined){maxBytesToWrite=2147483647}if(maxBytesToWrite<2)return 0;maxBytesToWrite-=2;var startPtr=outPtr;var numCharsToWrite=maxBytesToWrite>1]=codeUnit;outPtr+=2}HEAP16[outPtr>>1]=0;return outPtr-startPtr}function lengthBytesUTF16(str){return str.length*2}function UTF32ToString(ptr,maxBytesToRead){var i=0;var str="";while(!(i>=maxBytesToRead/4)){var utf32=HEAP32[ptr+i*4>>2];if(utf32==0)break;++i;if(utf32>=65536){var ch=utf32-65536;str+=String.fromCharCode(55296|ch>>10,56320|ch&1023)}else{str+=String.fromCharCode(utf32)}}return str}function stringToUTF32(str,outPtr,maxBytesToWrite){if(maxBytesToWrite===undefined){maxBytesToWrite=2147483647}if(maxBytesToWrite<4)return 0;var startPtr=outPtr;var endPtr=startPtr+maxBytesToWrite-4;for(var i=0;i=55296&&codeUnit<=57343){var trailSurrogate=str.charCodeAt(++i);codeUnit=65536+((codeUnit&1023)<<10)|trailSurrogate&1023}HEAP32[outPtr>>2]=codeUnit;outPtr+=4;if(outPtr+4>endPtr)break}HEAP32[outPtr>>2]=0;return outPtr-startPtr}function lengthBytesUTF32(str){var len=0;for(var i=0;i=55296&&codeUnit<=57343)++i;len+=4}return len}function writeArrayToMemory(array,buffer){HEAP8.set(array,buffer)}function writeAsciiToMemory(str,buffer,dontAddNull){for(var i=0;i>0]=str.charCodeAt(i)}if(!dontAddNull)HEAP8[buffer>>0]=0}var buffer,HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;function updateGlobalBufferAndViews(buf){buffer=buf;Module["HEAP8"]=HEAP8=new Int8Array(buf);Module["HEAP16"]=HEAP16=new Int16Array(buf);Module["HEAP32"]=HEAP32=new Int32Array(buf);Module["HEAPU8"]=HEAPU8=new Uint8Array(buf);Module["HEAPU16"]=HEAPU16=new Uint16Array(buf);Module["HEAPU32"]=HEAPU32=new Uint32Array(buf);Module["HEAPF32"]=HEAPF32=new Float32Array(buf);Module["HEAPF64"]=HEAPF64=new Float64Array(buf)}var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||16777216;var wasmTable;var __ATPRERUN__=[];var __ATINIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;function keepRuntimeAlive(){return noExitRuntime}function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function initRuntime(){runtimeInitialized=true;callRuntimeCallbacks(__ATINIT__)}function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}function addOnInit(cb){__ATINIT__.unshift(cb)}function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;function addRunDependency(id){runDependencies++;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}}function removeRunDependency(id){runDependencies--;if(Module["monitorRunDependencies"]){Module["monitorRunDependencies"](runDependencies)}if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}function abort(what){{if(Module["onAbort"]){Module["onAbort"](what)}}what="Aborted("+what+")";err(what);ABORT=true;EXITSTATUS=1;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);throw e}var dataURIPrefix="data:application/octet-stream;base64,";function isDataURI(filename){return filename.startsWith(dataURIPrefix)}function isFileURI(filename){return filename.startsWith("file://")}var wasmBinaryFile;wasmBinaryFile="native.wasm";if(!isDataURI(wasmBinaryFile)){wasmBinaryFile=locateFile(wasmBinaryFile)}function getBinary(file){try{if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}else{throw"both async and sync fetching of the wasm failed"}}catch(err){abort(err)}}function getBinaryPromise(){if(!wasmBinary&&(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER)){if(typeof fetch=="function"&&!isFileURI(wasmBinaryFile)){return fetch(wasmBinaryFile,{credentials:"same-origin"}).then(function(response){if(!response["ok"]){throw"failed to load wasm binary file at '"+wasmBinaryFile+"'"}return response["arrayBuffer"]()}).catch(function(){return getBinary(wasmBinaryFile)})}else{if(readAsync){return new Promise(function(resolve,reject){readAsync(wasmBinaryFile,function(response){resolve(new Uint8Array(response))},reject)})}}}return Promise.resolve().then(function(){return getBinary(wasmBinaryFile)})}function createWasm(){var info={"a":asmLibraryArg};function receiveInstance(instance,module){var exports=instance.exports;Module["asm"]=exports;wasmMemory=Module["asm"]["v"];updateGlobalBufferAndViews(wasmMemory.buffer);wasmTable=Module["asm"]["y"];addOnInit(Module["asm"]["w"]);removeRunDependency("wasm-instantiate")}addRunDependency("wasm-instantiate");function receiveInstantiationResult(result){receiveInstance(result["instance"])}function instantiateArrayBuffer(receiver){return getBinaryPromise().then(function(binary){return WebAssembly.instantiate(binary,info)}).then(function(instance){return instance}).then(receiver,function(reason){err("failed to asynchronously prepare wasm: "+reason);abort(reason)})}function instantiateAsync(){if(!wasmBinary&&typeof WebAssembly.instantiateStreaming=="function"&&!isDataURI(wasmBinaryFile)&&!isFileURI(wasmBinaryFile)&&typeof fetch=="function"){return fetch(wasmBinaryFile,{credentials:"same-origin"}).then(function(response){var result=WebAssembly.instantiateStreaming(response,info);return result.then(receiveInstantiationResult,function(reason){err("wasm streaming compile failed: "+reason);err("falling back to ArrayBuffer instantiation");return instantiateArrayBuffer(receiveInstantiationResult)})})}else{return instantiateArrayBuffer(receiveInstantiationResult)}}if(Module["instantiateWasm"]){try{var exports=Module["instantiateWasm"](info,receiveInstance);return exports}catch(e){err("Module.instantiateWasm callback failed with error: "+e);return false}}instantiateAsync();return{}}function callRuntimeCallbacks(callbacks){while(callbacks.length>0){var callback=callbacks.shift();if(typeof callback=="function"){callback(Module);continue}var func=callback.func;if(typeof func=="number"){if(callback.arg===undefined){getWasmTableEntry(func)()}else{getWasmTableEntry(func)(callback.arg)}}else{func(callback.arg===undefined?null:callback.arg)}}}var wasmTableMirror=[];function getWasmTableEntry(funcPtr){var func=wasmTableMirror[funcPtr];if(!func){if(funcPtr>=wasmTableMirror.length)wasmTableMirror.length=funcPtr+1;wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func}function ___assert_fail(condition,filename,line,func){abort("Assertion failed: "+UTF8ToString(condition)+", at: "+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"])}function ___cxa_allocate_exception(size){return _malloc(size+24)+24}function ExceptionInfo(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24;this.set_type=function(type){HEAPU32[this.ptr+4>>2]=type};this.get_type=function(){return HEAPU32[this.ptr+4>>2]};this.set_destructor=function(destructor){HEAPU32[this.ptr+8>>2]=destructor};this.get_destructor=function(){return HEAPU32[this.ptr+8>>2]};this.set_refcount=function(refcount){HEAP32[this.ptr>>2]=refcount};this.set_caught=function(caught){caught=caught?1:0;HEAP8[this.ptr+12>>0]=caught};this.get_caught=function(){return HEAP8[this.ptr+12>>0]!=0};this.set_rethrown=function(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13>>0]=rethrown};this.get_rethrown=function(){return HEAP8[this.ptr+13>>0]!=0};this.init=function(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);this.set_destructor(destructor);this.set_refcount(0);this.set_caught(false);this.set_rethrown(false)};this.add_ref=function(){var value=HEAP32[this.ptr>>2];HEAP32[this.ptr>>2]=value+1};this.release_ref=function(){var prev=HEAP32[this.ptr>>2];HEAP32[this.ptr>>2]=prev-1;return prev===1};this.set_adjusted_ptr=function(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr};this.get_adjusted_ptr=function(){return HEAPU32[this.ptr+16>>2]};this.get_exception_ptr=function(){var isPointer=___cxa_is_pointer_type(this.get_type());if(isPointer){return HEAPU32[this.excPtr>>2]}var adjusted=this.get_adjusted_ptr();if(adjusted!==0)return adjusted;return this.excPtr}}var exceptionLast=0;var uncaughtExceptionCount=0;function ___cxa_throw(ptr,type,destructor){var info=new ExceptionInfo(ptr);info.init(type,destructor);exceptionLast=ptr;uncaughtExceptionCount++;throw ptr}function __embind_register_bigint(primitiveType,name,size,minRange,maxRange){}function getShiftFromSize(size){switch(size){case 1:return 0;case 2:return 1;case 4:return 2;case 8:return 3;default:throw new TypeError("Unknown type size: "+size)}}function embind_init_charCodes(){var codes=new Array(256);for(var i=0;i<256;++i){codes[i]=String.fromCharCode(i)}embind_charCodes=codes}var embind_charCodes=undefined;function readLatin1String(ptr){var ret="";var c=ptr;while(HEAPU8[c]){ret+=embind_charCodes[HEAPU8[c++]]}return ret}var awaitingDependencies={};var registeredTypes={};var typeDependencies={};var char_0=48;var char_9=57;function makeLegalFunctionName(name){if(undefined===name){return"_unknown"}name=name.replace(/[^a-zA-Z0-9_]/g,"$");var f=name.charCodeAt(0);if(f>=char_0&&f<=char_9){return"_"+name}return name}function createNamedFunction(name,body){name=makeLegalFunctionName(name);return new Function("body","return function "+name+"() {\n"+' "use strict";'+" return body.apply(this, arguments);\n"+"};\n")(body)}function extendError(baseErrorType,errorName){var errorClass=createNamedFunction(errorName,function(message){this.name=errorName;this.message=message;var stack=new Error(message).stack;if(stack!==undefined){this.stack=this.toString()+"\n"+stack.replace(/^Error(:[^\n]*)?\n/,"")}});errorClass.prototype=Object.create(baseErrorType.prototype);errorClass.prototype.constructor=errorClass;errorClass.prototype.toString=function(){if(this.message===undefined){return this.name}else{return this.name+": "+this.message}};return errorClass}var BindingError=undefined;function throwBindingError(message){throw new BindingError(message)}var InternalError=undefined;function throwInternalError(message){throw new InternalError(message)}function whenDependentTypesAreResolved(myTypes,dependentTypes,getTypeConverters){myTypes.forEach(function(type){typeDependencies[type]=dependentTypes});function onComplete(typeConverters){var myTypeConverters=getTypeConverters(typeConverters);if(myTypeConverters.length!==myTypes.length){throwInternalError("Mismatched type converter count")}for(var i=0;i{if(registeredTypes.hasOwnProperty(dt)){typeConverters[i]=registeredTypes[dt]}else{unregisteredTypes.push(dt);if(!awaitingDependencies.hasOwnProperty(dt)){awaitingDependencies[dt]=[]}awaitingDependencies[dt].push(()=>{typeConverters[i]=registeredTypes[dt];++registered;if(registered===unregisteredTypes.length){onComplete(typeConverters)}})}});if(0===unregisteredTypes.length){onComplete(typeConverters)}}function registerType(rawType,registeredInstance,options={}){if(!("argPackAdvance"in registeredInstance)){throw new TypeError("registerType registeredInstance requires argPackAdvance")}var name=registeredInstance.name;if(!rawType){throwBindingError('type "'+name+'" must have a positive integer typeid pointer')}if(registeredTypes.hasOwnProperty(rawType)){if(options.ignoreDuplicateRegistrations){return}else{throwBindingError("Cannot register type '"+name+"' twice")}}registeredTypes[rawType]=registeredInstance;delete typeDependencies[rawType];if(awaitingDependencies.hasOwnProperty(rawType)){var callbacks=awaitingDependencies[rawType];delete awaitingDependencies[rawType];callbacks.forEach(cb=>cb())}}function __embind_register_bool(rawType,name,size,trueValue,falseValue){var shift=getShiftFromSize(size);name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(wt){return!!wt},"toWireType":function(destructors,o){return o?trueValue:falseValue},"argPackAdvance":8,"readValueFromPointer":function(pointer){var heap;if(size===1){heap=HEAP8}else if(size===2){heap=HEAP16}else if(size===4){heap=HEAP32}else{throw new TypeError("Unknown boolean type size: "+name)}return this["fromWireType"](heap[pointer>>shift])},destructorFunction:null})}var emval_free_list=[];var emval_handle_array=[{},{value:undefined},{value:null},{value:true},{value:false}];function __emval_decref(handle){if(handle>4&&0===--emval_handle_array[handle].refcount){emval_handle_array[handle]=undefined;emval_free_list.push(handle)}}function count_emval_handles(){var count=0;for(var i=5;i{if(!handle){throwBindingError("Cannot use deleted val. handle = "+handle)}return emval_handle_array[handle].value},toHandle:value=>{switch(value){case undefined:return 1;case null:return 2;case true:return 3;case false:return 4;default:{var handle=emval_free_list.length?emval_free_list.pop():emval_handle_array.length;emval_handle_array[handle]={refcount:1,value:value};return handle}}}};function simpleReadValueFromPointer(pointer){return this["fromWireType"](HEAPU32[pointer>>2])}function __embind_register_emval(rawType,name){name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(handle){var rv=Emval.toValue(handle);__emval_decref(handle);return rv},"toWireType":function(destructors,value){return Emval.toHandle(value)},"argPackAdvance":8,"readValueFromPointer":simpleReadValueFromPointer,destructorFunction:null})}function floatReadValueFromPointer(name,shift){switch(shift){case 2:return function(pointer){return this["fromWireType"](HEAPF32[pointer>>2])};case 3:return function(pointer){return this["fromWireType"](HEAPF64[pointer>>3])};default:throw new TypeError("Unknown float type: "+name)}}function __embind_register_float(rawType,name,size){var shift=getShiftFromSize(size);name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":function(value){return value},"toWireType":function(destructors,value){return value},"argPackAdvance":8,"readValueFromPointer":floatReadValueFromPointer(name,shift),destructorFunction:null})}function new_(constructor,argumentList){if(!(constructor instanceof Function)){throw new TypeError("new_ called with constructor type "+typeof constructor+" which is not a function")}var dummy=createNamedFunction(constructor.name||"unknownFunctionName",function(){});dummy.prototype=constructor.prototype;var obj=new dummy;var r=constructor.apply(obj,argumentList);return r instanceof Object?r:obj}function runDestructors(destructors){while(destructors.length){var ptr=destructors.pop();var del=destructors.pop();del(ptr)}}function craftInvokerFunction(humanName,argTypes,classType,cppInvokerFunc,cppTargetFunc){var argCount=argTypes.length;if(argCount<2){throwBindingError("argTypes array size mismatch! Must at least get return value and 'this' types!")}var isClassMethodFunc=argTypes[1]!==null&&classType!==null;var needsDestructorStack=false;for(var i=1;i0?", ":"")+argsListWired}invokerFnBody+=(returns?"var rv = ":"")+"invoker(fn"+(argsListWired.length>0?", ":"")+argsListWired+");\n";if(needsDestructorStack){invokerFnBody+="runDestructors(destructors);\n"}else{for(var i=isClassMethodFunc?1:2;i>2)+i])}return array}function replacePublicSymbol(name,value,numArguments){if(!Module.hasOwnProperty(name)){throwInternalError("Replacing nonexistant public symbol")}if(undefined!==Module[name].overloadTable&&undefined!==numArguments){Module[name].overloadTable[numArguments]=value}else{Module[name]=value;Module[name].argCount=numArguments}}function dynCallLegacy(sig,ptr,args){var f=Module["dynCall_"+sig];return args&&args.length?f.apply(null,[ptr].concat(args)):f.call(null,ptr)}function dynCall(sig,ptr,args){if(sig.includes("j")){return dynCallLegacy(sig,ptr,args)}return getWasmTableEntry(ptr).apply(null,args)}function getDynCaller(sig,ptr){var argCache=[];return function(){argCache.length=0;Object.assign(argCache,arguments);return dynCall(sig,ptr,argCache)}}function embind__requireFunction(signature,rawFunction){signature=readLatin1String(signature);function makeDynCaller(){if(signature.includes("j")){return getDynCaller(signature,rawFunction)}return getWasmTableEntry(rawFunction)}var fp=makeDynCaller();if(typeof fp!="function"){throwBindingError("unknown function pointer with signature "+signature+": "+rawFunction)}return fp}var UnboundTypeError=undefined;function getTypeName(type){var ptr=___getTypeName(type);var rv=readLatin1String(ptr);_free(ptr);return rv}function throwUnboundTypeError(message,types){var unboundTypes=[];var seen={};function visit(type){if(seen[type]){return}if(registeredTypes[type]){return}if(typeDependencies[type]){typeDependencies[type].forEach(visit);return}unboundTypes.push(type);seen[type]=true}types.forEach(visit);throw new UnboundTypeError(message+": "+unboundTypes.map(getTypeName).join([", "]))}function __embind_register_function(name,argCount,rawArgTypesAddr,signature,rawInvoker,fn){var argTypes=heap32VectorToArray(argCount,rawArgTypesAddr);name=readLatin1String(name);rawInvoker=embind__requireFunction(signature,rawInvoker);exposePublicSymbol(name,function(){throwUnboundTypeError("Cannot call "+name+" due to unbound types",argTypes)},argCount-1);whenDependentTypesAreResolved([],argTypes,function(argTypes){var invokerArgsArray=[argTypes[0],null].concat(argTypes.slice(1));replacePublicSymbol(name,craftInvokerFunction(name,invokerArgsArray,null,rawInvoker,fn),argCount-1);return[]})}function integerReadValueFromPointer(name,shift,signed){switch(shift){case 0:return signed?function readS8FromPointer(pointer){return HEAP8[pointer]}:function readU8FromPointer(pointer){return HEAPU8[pointer]};case 1:return signed?function readS16FromPointer(pointer){return HEAP16[pointer>>1]}:function readU16FromPointer(pointer){return HEAPU16[pointer>>1]};case 2:return signed?function readS32FromPointer(pointer){return HEAP32[pointer>>2]}:function readU32FromPointer(pointer){return HEAPU32[pointer>>2]};default:throw new TypeError("Unknown integer type: "+name)}}function __embind_register_integer(primitiveType,name,size,minRange,maxRange){name=readLatin1String(name);if(maxRange===-1){maxRange=4294967295}var shift=getShiftFromSize(size);var fromWireType=value=>value;if(minRange===0){var bitshift=32-8*size;fromWireType=(value=>value<>>bitshift)}var isUnsignedType=name.includes("unsigned");var checkAssertions=(value,toTypeName)=>{};var toWireType;if(isUnsignedType){toWireType=function(destructors,value){checkAssertions(value,this.name);return value>>>0}}else{toWireType=function(destructors,value){checkAssertions(value,this.name);return value}}registerType(primitiveType,{name:name,"fromWireType":fromWireType,"toWireType":toWireType,"argPackAdvance":8,"readValueFromPointer":integerReadValueFromPointer(name,shift,minRange!==0),destructorFunction:null})}function __embind_register_memory_view(rawType,dataTypeIndex,name){var typeMapping=[Int8Array,Uint8Array,Int16Array,Uint16Array,Int32Array,Uint32Array,Float32Array,Float64Array];var TA=typeMapping[dataTypeIndex];function decodeMemoryView(handle){handle=handle>>2;var heap=HEAPU32;var size=heap[handle];var data=heap[handle+1];return new TA(buffer,data,size)}name=readLatin1String(name);registerType(rawType,{name:name,"fromWireType":decodeMemoryView,"argPackAdvance":8,"readValueFromPointer":decodeMemoryView},{ignoreDuplicateRegistrations:true})}function __embind_register_std_string(rawType,name){name=readLatin1String(name);var stdStringIsUTF8=name==="std::string";registerType(rawType,{name:name,"fromWireType":function(value){var length=HEAPU32[value>>2];var str;if(stdStringIsUTF8){var decodeStartPtr=value+4;for(var i=0;i<=length;++i){var currentBytePtr=value+4+i;if(i==length||HEAPU8[currentBytePtr]==0){var maxRead=currentBytePtr-decodeStartPtr;var stringSegment=UTF8ToString(decodeStartPtr,maxRead);if(str===undefined){str=stringSegment}else{str+=String.fromCharCode(0);str+=stringSegment}decodeStartPtr=currentBytePtr+1}}}else{var a=new Array(length);for(var i=0;ilengthBytesUTF8(value))}else{getLength=(()=>value.length)}var length=getLength();var ptr=_malloc(4+length+1);HEAPU32[ptr>>2]=length;if(stdStringIsUTF8&&valueIsOfTypeString){stringToUTF8(value,ptr+4,length+1)}else{if(valueIsOfTypeString){for(var i=0;i255){_free(ptr);throwBindingError("String has UTF-16 code units that do not fit in 8 bits")}HEAPU8[ptr+4+i]=charCode}}else{for(var i=0;iHEAPU16);shift=1}else if(charSize===4){decodeString=UTF32ToString;encodeString=stringToUTF32;lengthBytesUTF=lengthBytesUTF32;getHeap=(()=>HEAPU32);shift=2}registerType(rawType,{name:name,"fromWireType":function(value){var length=HEAPU32[value>>2];var HEAP=getHeap();var str;var decodeStartPtr=value+4;for(var i=0;i<=length;++i){var currentBytePtr=value+4+i*charSize;if(i==length||HEAP[currentBytePtr>>shift]==0){var maxReadBytes=currentBytePtr-decodeStartPtr;var stringSegment=decodeString(decodeStartPtr,maxReadBytes);if(str===undefined){str=stringSegment}else{str+=String.fromCharCode(0);str+=stringSegment}decodeStartPtr=currentBytePtr+charSize}}_free(value);return str},"toWireType":function(destructors,value){if(!(typeof value=="string")){throwBindingError("Cannot pass non-string to C++ string type "+name)}var length=lengthBytesUTF(value);var ptr=_malloc(4+length+charSize);HEAPU32[ptr>>2]=length>>shift;encodeString(value,ptr+4,length+charSize);if(destructors!==null){destructors.push(_free,ptr)}return ptr},"argPackAdvance":8,"readValueFromPointer":simpleReadValueFromPointer,destructorFunction:function(ptr){_free(ptr)}})}function __embind_register_void(rawType,name){name=readLatin1String(name);registerType(rawType,{isVoid:true,name:name,"argPackAdvance":0,"fromWireType":function(){return undefined},"toWireType":function(destructors,o){return undefined}})}var nowIsMonotonic=true;function __emscripten_get_now_is_monotonic(){return nowIsMonotonic}function _abort(){abort("")}var _emscripten_get_now;if(ENVIRONMENT_IS_NODE){_emscripten_get_now=(()=>{var t=process["hrtime"]();return t[0]*1e3+t[1]/1e6})}else _emscripten_get_now=(()=>performance.now());function _emscripten_memcpy_big(dest,src,num){HEAPU8.copyWithin(dest,src,src+num)}function _emscripten_get_heap_max(){return 2147483648}function emscripten_realloc_buffer(size){try{wasmMemory.grow(size-buffer.byteLength+65535>>>16);updateGlobalBufferAndViews(wasmMemory.buffer);return 1}catch(e){}}function _emscripten_resize_heap(requestedSize){var oldSize=HEAPU8.length;requestedSize=requestedSize>>>0;var maxHeapSize=_emscripten_get_heap_max();if(requestedSize>maxHeapSize){return false}let alignUp=(x,multiple)=>x+(multiple-x%multiple)%multiple;for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignUp(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=emscripten_realloc_buffer(newSize);if(replacement){return true}}return false}var ENV={};function getExecutableName(){return thisProgram||"./this.program"}function getEnvStrings(){if(!getEnvStrings.strings){var lang=(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8";var env={"USER":"web_user","LOGNAME":"web_user","PATH":"/","PWD":"/","HOME":"/home/web_user","LANG":lang,"_":getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(x+"="+env[x])}getEnvStrings.strings=strings}return getEnvStrings.strings}var SYSCALLS={varargs:undefined,get:function(){SYSCALLS.varargs+=4;var ret=HEAP32[SYSCALLS.varargs-4>>2];return ret},getStr:function(ptr){var ret=UTF8ToString(ptr);return ret}};function _environ_get(__environ,environ_buf){var bufSize=0;getEnvStrings().forEach(function(string,i){var ptr=environ_buf+bufSize;HEAP32[__environ+i*4>>2]=ptr;writeAsciiToMemory(string,ptr);bufSize+=string.length+1});return 0}function _environ_sizes_get(penviron_count,penviron_buf_size){var strings=getEnvStrings();HEAP32[penviron_count>>2]=strings.length;var bufSize=0;strings.forEach(function(string){bufSize+=string.length+1});HEAP32[penviron_buf_size>>2]=bufSize;return 0}function __isLeapYear(year){return year%4===0&&(year%100!==0||year%400===0)}function __arraySum(array,index){var sum=0;for(var i=0;i<=index;sum+=array[i++]){}return sum}var __MONTH_DAYS_LEAP=[31,29,31,30,31,30,31,31,30,31,30,31];var __MONTH_DAYS_REGULAR=[31,28,31,30,31,30,31,31,30,31,30,31];function __addDays(date,days){var newDate=new Date(date.getTime());while(days>0){var leap=__isLeapYear(newDate.getFullYear());var currentMonth=newDate.getMonth();var daysInCurrentMonth=(leap?__MONTH_DAYS_LEAP:__MONTH_DAYS_REGULAR)[currentMonth];if(days>daysInCurrentMonth-newDate.getDate()){days-=daysInCurrentMonth-newDate.getDate()+1;newDate.setDate(1);if(currentMonth<11){newDate.setMonth(currentMonth+1)}else{newDate.setMonth(0);newDate.setFullYear(newDate.getFullYear()+1)}}else{newDate.setDate(newDate.getDate()+days);return newDate}}return newDate}function _strftime(s,maxsize,format,tm){var tm_zone=HEAP32[tm+40>>2];var date={tm_sec:HEAP32[tm>>2],tm_min:HEAP32[tm+4>>2],tm_hour:HEAP32[tm+8>>2],tm_mday:HEAP32[tm+12>>2],tm_mon:HEAP32[tm+16>>2],tm_year:HEAP32[tm+20>>2],tm_wday:HEAP32[tm+24>>2],tm_yday:HEAP32[tm+28>>2],tm_isdst:HEAP32[tm+32>>2],tm_gmtoff:HEAP32[tm+36>>2],tm_zone:tm_zone?UTF8ToString(tm_zone):""};var pattern=UTF8ToString(format);var EXPANSION_RULES_1={"%c":"%a %b %d %H:%M:%S %Y","%D":"%m/%d/%y","%F":"%Y-%m-%d","%h":"%b","%r":"%I:%M:%S %p","%R":"%H:%M","%T":"%H:%M:%S","%x":"%m/%d/%y","%X":"%H:%M:%S","%Ec":"%c","%EC":"%C","%Ex":"%m/%d/%y","%EX":"%H:%M:%S","%Ey":"%y","%EY":"%Y","%Od":"%d","%Oe":"%e","%OH":"%H","%OI":"%I","%Om":"%m","%OM":"%M","%OS":"%S","%Ou":"%u","%OU":"%U","%OV":"%V","%Ow":"%w","%OW":"%W","%Oy":"%y"};for(var rule in EXPANSION_RULES_1){pattern=pattern.replace(new RegExp(rule,"g"),EXPANSION_RULES_1[rule])}var WEEKDAYS=["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"];var MONTHS=["January","February","March","April","May","June","July","August","September","October","November","December"];function leadingSomething(value,digits,character){var str=typeof value=="number"?value.toString():value||"";while(str.length0?1:0}var compare;if((compare=sgn(date1.getFullYear()-date2.getFullYear()))===0){if((compare=sgn(date1.getMonth()-date2.getMonth()))===0){compare=sgn(date1.getDate()-date2.getDate())}}return compare}function getFirstWeekStartDate(janFourth){switch(janFourth.getDay()){case 0:return new Date(janFourth.getFullYear()-1,11,29);case 1:return janFourth;case 2:return new Date(janFourth.getFullYear(),0,3);case 3:return new Date(janFourth.getFullYear(),0,2);case 4:return new Date(janFourth.getFullYear(),0,1);case 5:return new Date(janFourth.getFullYear()-1,11,31);case 6:return new Date(janFourth.getFullYear()-1,11,30)}}function getWeekBasedYear(date){var thisDate=__addDays(new Date(date.tm_year+1900,0,1),date.tm_yday);var janFourthThisYear=new Date(thisDate.getFullYear(),0,4);var janFourthNextYear=new Date(thisDate.getFullYear()+1,0,4);var firstWeekStartThisYear=getFirstWeekStartDate(janFourthThisYear);var firstWeekStartNextYear=getFirstWeekStartDate(janFourthNextYear);if(compareByDay(firstWeekStartThisYear,thisDate)<=0){if(compareByDay(firstWeekStartNextYear,thisDate)<=0){return thisDate.getFullYear()+1}else{return thisDate.getFullYear()}}else{return thisDate.getFullYear()-1}}var EXPANSION_RULES_2={"%a":function(date){return WEEKDAYS[date.tm_wday].substring(0,3)},"%A":function(date){return WEEKDAYS[date.tm_wday]},"%b":function(date){return MONTHS[date.tm_mon].substring(0,3)},"%B":function(date){return MONTHS[date.tm_mon]},"%C":function(date){var year=date.tm_year+1900;return leadingNulls(year/100|0,2)},"%d":function(date){return leadingNulls(date.tm_mday,2)},"%e":function(date){return leadingSomething(date.tm_mday,2," ")},"%g":function(date){return getWeekBasedYear(date).toString().substring(2)},"%G":function(date){return getWeekBasedYear(date)},"%H":function(date){return leadingNulls(date.tm_hour,2)},"%I":function(date){var twelveHour=date.tm_hour;if(twelveHour==0)twelveHour=12;else if(twelveHour>12)twelveHour-=12;return leadingNulls(twelveHour,2)},"%j":function(date){return leadingNulls(date.tm_mday+__arraySum(__isLeapYear(date.tm_year+1900)?__MONTH_DAYS_LEAP:__MONTH_DAYS_REGULAR,date.tm_mon-1),3)},"%m":function(date){return leadingNulls(date.tm_mon+1,2)},"%M":function(date){return leadingNulls(date.tm_min,2)},"%n":function(){return"\n"},"%p":function(date){if(date.tm_hour>=0&&date.tm_hour<12){return"AM"}else{return"PM"}},"%S":function(date){return leadingNulls(date.tm_sec,2)},"%t":function(){return"\t"},"%u":function(date){return date.tm_wday||7},"%U":function(date){var days=date.tm_yday+7-date.tm_wday;return leadingNulls(Math.floor(days/7),2)},"%V":function(date){var val=Math.floor((date.tm_yday+7-(date.tm_wday+6)%7)/7);if((date.tm_wday+371-date.tm_yday-2)%7<=2){val++}if(!val){val=52;var dec31=(date.tm_wday+7-date.tm_yday-1)%7;if(dec31==4||dec31==5&&__isLeapYear(date.tm_year%400-1)){val++}}else if(val==53){var jan1=(date.tm_wday+371-date.tm_yday)%7;if(jan1!=4&&(jan1!=3||!__isLeapYear(date.tm_year)))val=1}return leadingNulls(val,2)},"%w":function(date){return date.tm_wday},"%W":function(date){var days=date.tm_yday+7-(date.tm_wday+6)%7;return leadingNulls(Math.floor(days/7),2)},"%y":function(date){return(date.tm_year+1900).toString().substring(2)},"%Y":function(date){return date.tm_year+1900},"%z":function(date){var off=date.tm_gmtoff;var ahead=off>=0;off=Math.abs(off)/60;off=off/60*100+off%60;return(ahead?"+":"-")+String("0000"+off).slice(-4)},"%Z":function(date){return date.tm_zone},"%%":function(){return"%"}};pattern=pattern.replace(/%%/g,"\0\0");for(var rule in EXPANSION_RULES_2){if(pattern.includes(rule)){pattern=pattern.replace(new RegExp(rule,"g"),EXPANSION_RULES_2[rule](date))}}pattern=pattern.replace(/\0\0/g,"%");var bytes=intArrayFromString(pattern,false);if(bytes.length>maxsize){return 0}writeArrayToMemory(bytes,s);return bytes.length-1}function _strftime_l(s,maxsize,format,tm){return _strftime(s,maxsize,format,tm)}embind_init_charCodes();BindingError=Module["BindingError"]=extendError(Error,"BindingError");InternalError=Module["InternalError"]=extendError(Error,"InternalError");init_emval();UnboundTypeError=Module["UnboundTypeError"]=extendError(Error,"UnboundTypeError");function intArrayFromString(stringy,dontAddNull,length){var len=length>0?length:lengthBytesUTF8(stringy)+1;var u8array=new Array(len);var numBytesWritten=stringToUTF8Array(stringy,u8array,0,u8array.length);if(dontAddNull)u8array.length=numBytesWritten;return u8array}var asmLibraryArg={"b":___assert_fail,"e":___cxa_allocate_exception,"d":___cxa_throw,"l":__embind_register_bigint,"j":__embind_register_bool,"t":__embind_register_emval,"i":__embind_register_float,"u":__embind_register_function,"c":__embind_register_integer,"a":__embind_register_memory_view,"h":__embind_register_std_string,"f":__embind_register_std_wstring,"k":__embind_register_void,"p":__emscripten_get_now_is_monotonic,"g":_abort,"r":_emscripten_get_now,"s":_emscripten_memcpy_big,"q":_emscripten_resize_heap,"n":_environ_get,"o":_environ_sizes_get,"m":_strftime_l};var asm=createWasm();var ___wasm_call_ctors=Module["___wasm_call_ctors"]=function(){return(___wasm_call_ctors=Module["___wasm_call_ctors"]=Module["asm"]["w"]).apply(null,arguments)};var _malloc=Module["_malloc"]=function(){return(_malloc=Module["_malloc"]=Module["asm"]["x"]).apply(null,arguments)};var ___getTypeName=Module["___getTypeName"]=function(){return(___getTypeName=Module["___getTypeName"]=Module["asm"]["z"]).apply(null,arguments)};var ___embind_register_native_and_builtin_types=Module["___embind_register_native_and_builtin_types"]=function(){return(___embind_register_native_and_builtin_types=Module["___embind_register_native_and_builtin_types"]=Module["asm"]["A"]).apply(null,arguments)};var _free=Module["_free"]=function(){return(_free=Module["_free"]=Module["asm"]["B"]).apply(null,arguments)};var ___cxa_is_pointer_type=Module["___cxa_is_pointer_type"]=function(){return(___cxa_is_pointer_type=Module["___cxa_is_pointer_type"]=Module["asm"]["C"]).apply(null,arguments)};var dynCall_viijii=Module["dynCall_viijii"]=function(){return(dynCall_viijii=Module["dynCall_viijii"]=Module["asm"]["D"]).apply(null,arguments)};var dynCall_iiiiij=Module["dynCall_iiiiij"]=function(){return(dynCall_iiiiij=Module["dynCall_iiiiij"]=Module["asm"]["E"]).apply(null,arguments)};var dynCall_iiiiijj=Module["dynCall_iiiiijj"]=function(){return(dynCall_iiiiijj=Module["dynCall_iiiiijj"]=Module["asm"]["F"]).apply(null,arguments)};var dynCall_iiiiiijj=Module["dynCall_iiiiiijj"]=function(){return(dynCall_iiiiiijj=Module["dynCall_iiiiiijj"]=Module["asm"]["G"]).apply(null,arguments)};var calledRun;function ExitStatus(status){this.name="ExitStatus";this.message="Program terminated with exit("+status+")";this.status=status}dependenciesFulfilled=function runCaller(){if(!calledRun)run();if(!calledRun)dependenciesFulfilled=runCaller};function run(args){args=args||arguments_;if(runDependencies>0){return}preRun();if(runDependencies>0){return}function doRun(){if(calledRun)return;calledRun=true;Module["calledRun"]=true;if(ABORT)return;initRuntime();if(Module["onRuntimeInitialized"])Module["onRuntimeInitialized"]();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(function(){setTimeout(function(){Module["setStatus"]("")},1);doRun()},1)}else{doRun()}}Module["run"]=run;if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}run(); diff --git a/docs/native.wasm b/docs/native.wasm new file mode 100755 index 00000000..dcdb1ead Binary files /dev/null and b/docs/native.wasm differ diff --git a/docs/style.css b/docs/style.css new file mode 100644 index 00000000..b91a49b2 --- /dev/null +++ b/docs/style.css @@ -0,0 +1,101 @@ +* { + box-sizing: border-box; + margin: 0; + padding: 0; + text-decoration: none; + list-style: none; +} +body { + display: flex; + flex-direction: column; + height: 100vh; +} +#main { + flex: 1; + display: none; + z-index: 0; +} +.editor-container { + flex: 1; + width: 100%; + display: flex; + flex-direction: column; + margin: 6px; +} +.editor-container:first-child { + margin-right: 0; +} +.editor-header { + display: flex; + margin: 0 2px; +} +.editor-header > li { + height: 32px; + line-height: 24px; +} +.editor-header > li > span { + margin-right: 6px; +} +.editor-options { + margin-left: auto; +} +.editor-header-options { + display: flex; +} +.validation { + display: inline-block; + height: 20px; + width: 20px; + margin: 2px 0; + border-radius: 50%; + background-color: lightgreen; +} +.validation-invalid { + background-color: pink; +} +.option { + margin-right: 8px; +} +.option:last-child { + margin-right: 0; +} +.option input[type=checkbox] { + margin-right: 4px; +} +.option .parse { + padding-left: 8px; + padding-right: 8px; + height: 24px; + cursor: pointer; +} +.editor-area { + flex: 1; + border: 1px solid lightgray; +} +.editor-info { + margin-top: 6px; + height: 160px; + border: 1px solid lightgray; + padding: 8px; + overflow-y: auto; +} +.editor-info li { + cursor: pointer; +} +.editor-info li:hover{ + background-color: lightyellow; +} +.editor-sub-header { + padding: 4px; +} +.show-toggle { + margin-right: 6px; +} +#overlay { + position: absolute; + width: 100vw; + height: 100vh; + cursor: wait; + display: none; + z-index: -1; +} diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 42acab1f..70dd782e 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -1,15 +1,42 @@ -cmake_minimum_required(VERSION 3.0) -include_directories(..) -add_definitions("-std=c++1y") +cmake_minimum_required(VERSION 3.14) +project(example) add_executable(calc calc.cc) -target_link_libraries(calc pthread) +target_include_directories(calc PRIVATE ..) +target_link_libraries(calc ${add_link_deps}) add_executable(calc2 calc2.cc) -target_link_libraries(calc2 pthread) +target_include_directories(calc2 PRIVATE ..) +target_link_libraries(calc2 ${add_link_deps}) add_executable(calc3 calc3.cc) -target_link_libraries(calc3 pthread) +target_include_directories(calc3 PRIVATE ..) +target_link_libraries(calc3 ${add_link_deps}) -add_executable(calc_readme calc_readme.cc) -target_link_libraries(calc_readme pthread) +add_executable(calc4 calc4.cc) +target_include_directories(calc4 PRIVATE ..) +target_link_libraries(calc4 ${add_link_deps}) + +add_executable(calc5 calc5.cc) +target_include_directories(calc5 PRIVATE ..) +target_link_libraries(calc5 ${add_link_deps}) + +add_executable(indent indent.cc) +target_include_directories(indent PRIVATE ..) +target_link_libraries(indent ${add_link_deps}) + +add_executable(docx docx.cc) +target_include_directories(docx PRIVATE ..) +target_link_libraries(docx ${add_link_deps}) + +add_executable(sequence sequence.cc) +target_include_directories(sequence PRIVATE ..) +target_link_libraries(sequence ${add_link_deps}) + +add_executable(enter_leave enter_leave.cc) +target_include_directories(enter_leave PRIVATE ..) +target_link_libraries(enter_leave ${add_link_deps}) + +add_executable(choice choice.cc) +target_include_directories(choice PRIVATE ..) +target_link_libraries(choice ${add_link_deps}) diff --git a/example/calc.cc b/example/calc.cc index 2f8c01f6..13aaeae0 100644 --- a/example/calc.cc +++ b/example/calc.cc @@ -1,65 +1,53 @@ -// -// calc.cc -// -// Copyright (c) 2015 Yuji Hirose. All rights reserved. -// MIT License -// - -#include +#include #include -#include +#include using namespace peg; using namespace std; -int main(int argc, const char** argv) -{ - if (argc < 2 || string("--help") == argv[1]) { - cout << "usage: calc [formula]" << endl; - return 1; - } - - auto reduce = [](const SemanticValues& sv) -> long { - auto result = sv[0].get(); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = sv[i + 1].get(); - auto ope = sv[i].get(); - switch (ope) { - case '+': result += num; break; - case '-': result -= num; break; - case '*': result *= num; break; - case '/': result /= num; break; - } - } - return result; - }; - - parser parser(R"( - EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* - TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* - FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ - TERM_OPERATOR <- < [-+] > _ - FACTOR_OPERATOR <- < [/*] > _ - NUMBER <- < [0-9]+ > _ - ~_ <- [ \t\r\n]* +int main(void) { + // (2) Make a parser + parser parser(R"( + # Grammar for Calculator... + Additive <- Multiplicative '+' Additive / Multiplicative + Multiplicative <- Primary '*' Multiplicative^cond / Primary + Primary <- '(' Additive ')' / Number + Number <- < [0-9]+ > + %whitespace <- [ \t]* + cond <- '' { error_message "missing multiplicative" } )"); - parser["EXPRESSION"] = reduce; - parser["TERM"] = reduce; - parser["TERM_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); }; - parser["FACTOR_OPERATOR"] = [](const SemanticValues& sv) { return (char)*sv.c_str(); }; - parser["NUMBER"] = [](const SemanticValues& sv) { return atol(sv.c_str()); }; + assert(static_cast(parser) == true); - auto expr = argv[1]; - long val = 0; - if (parser.parse(expr, val)) { - cout << expr << " = " << val << endl; - return 0; + // (3) Setup actions + parser["Additive"] = [](const SemanticValues &vs) { + switch (vs.choice()) { + case 0: // "Multiplicative '+' Additive" + return any_cast(vs[0]) + any_cast(vs[1]); + default: // "Multiplicative" + return any_cast(vs[0]); + } + }; + + parser["Multiplicative"] = [](const SemanticValues &vs) { + switch (vs.choice()) { + case 0: // "Primary '*' Multiplicative" + return any_cast(vs[0]) * any_cast(vs[1]); + default: // "Primary" + return any_cast(vs[0]); } + }; - cout << "syntax error..." << endl; + parser["Number"] = [](const SemanticValues &vs) { + return vs.token_to_number(); + }; - return -1; -} + // (4) Parse + parser.enable_packrat_parsing(); // Enable packrat parsing. -// vim: et ts=4 sw=4 cin cino={1s ff=unix + int val = 0; + parser.parse(" (1 + 2) * ", val); + + // assert(val == 9); + assert(val == 0); +} diff --git a/example/calc.vcxproj b/example/calc.vcxproj deleted file mode 100644 index b8e8ac88..00000000 --- a/example/calc.vcxproj +++ /dev/null @@ -1,92 +0,0 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - - - - - - - {F85B641A-7538-4809-8175-C528FF632CF6} - Win32Proj - sample - calc - - - - Application - true - Unicode - v140 - - - Application - false - true - Unicode - v140 - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - .. - - - Console - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - .. - - - Console - true - true - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - - \ No newline at end of file diff --git a/example/calc2.cc b/example/calc2.cc index d0e1a0f1..c0121aba 100644 --- a/example/calc2.cc +++ b/example/calc2.cc @@ -5,12 +5,11 @@ // MIT License // -#include -#include #include +#include +#include using namespace peg; -using namespace std; // // PEG syntax: @@ -22,45 +21,47 @@ using namespace std; // FACTOR_OPERATOR <- [/*] // NUMBER <- [0-9]+ // -int main(int argc, const char** argv) -{ - if (argc < 2 || string("--help") == argv[1]) { - cout << "usage: calc [formula]" << endl; - return 1; - } +int main(int argc, const char **argv) { + if (argc < 2 || std::string("--help") == argv[1]) { + std::cout << "usage: calc [formula]" << std::endl; + return 1; + } - auto reduce = [](const SemanticValues& sv) -> long { - auto result = sv[0].get(); - for (auto i = 1u; i < sv.size(); i += 2) { - auto num = sv[i + 1].get(); - auto ope = sv[i].get(); - switch (ope) { - case '+': result += num; break; - case '-': result -= num; break; - case '*': result *= num; break; - case '/': result /= num; break; - } - } - return result; - }; + auto reduce = [](const SemanticValues &vs) { + auto result = std::any_cast(vs[0]); + for (auto i = 1u; i < vs.size(); i += 2) { + auto num = std::any_cast(vs[i + 1]); + auto ope = std::any_cast(vs[i]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; - Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; + Definition EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER; - EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; - TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce; - FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); - TERM_OPERATOR <= cls("+-"), [](const SemanticValues& sv) { return (char)*sv.c_str(); }; - FACTOR_OPERATOR <= cls("*/"), [](const SemanticValues& sv) { return (char)*sv.c_str(); }; - NUMBER <= oom(cls("0-9")), [](const SemanticValues& sv) { return atol(sv.c_str()); }; + EXPRESSION <= seq(TERM, zom(seq(TERM_OPERATOR, TERM))), reduce; + TERM <= seq(FACTOR, zom(seq(FACTOR_OPERATOR, FACTOR))), reduce; + FACTOR <= cho(NUMBER, seq(chr('('), EXPRESSION, chr(')'))); + TERM_OPERATOR <= cls("+-"), + [](const SemanticValues &vs) { return static_cast(*vs.sv().data()); }; + FACTOR_OPERATOR <= cls("*/"), + [](const SemanticValues &vs) { return static_cast(*vs.sv().data()); }; + NUMBER <= oom(cls("0-9")), + [](const SemanticValues &vs) { return vs.token_to_number(); }; - auto expr = argv[1]; - long val = 0; - if (EXPRESSION.parse_and_get_value(expr, val).ret) { - cout << expr << " = " << val << endl; - return 0; - } + auto expr = argv[1]; + long val = 0; + if (EXPRESSION.parse_and_get_value(expr, val).ret) { + std::cout << expr << " = " << val << std::endl; + return 0; + } - return -1; + return -1; } // vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/example/calc2.vcxproj b/example/calc2.vcxproj deleted file mode 100644 index 8e9d3792..00000000 --- a/example/calc2.vcxproj +++ /dev/null @@ -1,92 +0,0 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - - - - - - - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4} - Win32Proj - sample - calc2 - - - - Application - true - Unicode - v140 - - - Application - false - true - Unicode - v140 - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - .. - - - Console - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - .. - - - Console - true - true - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - - \ No newline at end of file diff --git a/example/calc3.cc b/example/calc3.cc index 723e7bfe..68c2bcc7 100644 --- a/example/calc3.cc +++ b/example/calc3.cc @@ -5,41 +5,39 @@ // MIT License // -#include -#include #include +#include +#include using namespace peg; -using namespace std; -int main(int argc, const char** argv) -{ - if (argc < 2 || string("--help") == argv[1]) { - cout << "usage: calc3 [formula]" << endl; - return 1; - } +int main(int argc, const char **argv) { + if (argc < 2 || std::string("--help") == argv[1]) { + std::cout << "usage: calc3 [formula]" << std::endl; + return 1; + } - function eval = [&](const Ast& ast) { - if (ast.name == "NUMBER") { - return stol(ast.token); - } else { - const auto& nodes = ast.nodes; - auto result = eval(*nodes[0]); - for (auto i = 1u; i < nodes.size(); i += 2) { - auto num = eval(*nodes[i + 1]); - auto ope = nodes[i]->token[0]; - switch (ope) { - case '+': result += num; break; - case '-': result -= num; break; - case '*': result *= num; break; - case '/': result /= num; break; - } - } - return result; + std::function eval = [&](const Ast &ast) { + if (ast.name == "NUMBER") { + return ast.token_to_number(); + } else { + const auto &nodes = ast.nodes; + auto result = eval(*nodes[0]); + for (auto i = 1u; i < nodes.size(); i += 2) { + auto num = eval(*nodes[i + 1]); + auto ope = nodes[i]->token[0]; + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; } - }; + } + return result; + } + }; - parser parser(R"( + parser parser(R"( EXPRESSION <- TERM (TERM_OPERATOR TERM)* TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* FACTOR <- NUMBER / '(' EXPRESSION ')' @@ -51,20 +49,20 @@ int main(int argc, const char** argv) %whitespace <- [ \t\r\n]* )"); - parser.enable_ast(); + parser.enable_ast(); - auto expr = argv[1]; - shared_ptr ast; - if (parser.parse(expr, ast)) { - ast = AstOptimizer(true).optimize(ast); - cout << ast_to_s(ast); - cout << expr << " = " << eval(*ast) << endl; - return 0; - } + auto expr = argv[1]; + std::shared_ptr ast; + if (parser.parse(expr, ast)) { + ast = parser.optimize_ast(ast); + std::cout << ast_to_s(ast); + std::cout << expr << " = " << eval(*ast) << std::endl; + return 0; + } - cout << "syntax error..." << endl; + std::cout << "syntax error..." << std::endl; - return -1; + return -1; } // vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/example/calc3.vcxproj b/example/calc3.vcxproj deleted file mode 100644 index 79f396ed..00000000 --- a/example/calc3.vcxproj +++ /dev/null @@ -1,92 +0,0 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - - - - - - - {E6146F73-3B4C-4D4C-BC55-148930954434} - Win32Proj - sample - calc3 - - - - Application - true - Unicode - v140 - - - Application - false - true - Unicode - v140 - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - .. - - - Console - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - .. - - - Console - true - true - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - - \ No newline at end of file diff --git a/example/calc4.cc b/example/calc4.cc new file mode 100644 index 00000000..563f994c --- /dev/null +++ b/example/calc4.cc @@ -0,0 +1,42 @@ +#include +#include +#include + +using namespace peg; +using namespace std; + +int main(void) { + parser parser(R"( + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L - + + L / * + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t\r\n]* + )"); + + parser["EXPRESSION"] = [](const SemanticValues &vs) { + auto result = any_cast(vs[0]); + if (vs.size() > 1) { + auto ope = any_cast(vs[1]); + auto num = any_cast(vs[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + }; + parser["OPERATOR"] = [](const SemanticValues &vs) { return *vs.sv().data(); }; + parser["NUMBER"] = [](const SemanticValues &vs) { return atol(vs.sv().data()); }; + + long val; + parser.parse(" -1 + (1 + 2) * 3 - -1", val); + + assert(val == 9); +} diff --git a/example/calc5.cc b/example/calc5.cc new file mode 100644 index 00000000..a76d3a5f --- /dev/null +++ b/example/calc5.cc @@ -0,0 +1,68 @@ +// +// calc5.cc +// +// Copyright (c) 2015 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peg; + +int main(int argc, const char **argv) { + if (argc < 2 || std::string("--help") == argv[1]) { + std::cout << "usage: calc5 [formula]" << std::endl; + return 1; + } + + std::function eval = [&](const Ast &ast) { + if (ast.name == "NUMBER") { + return ast.token_to_number(); + } else { + const auto &nodes = ast.nodes; + auto result = eval(*nodes[0]); + if (nodes.size() > 1) { + auto ope = nodes[1]->token[0]; + auto num = eval(*nodes[2]); + switch (ope) { + case '+': result += num; break; + case '-': result -= num; break; + case '*': result *= num; break; + case '/': result /= num; break; + } + } + return result; + } + }; + + parser parser(R"( + EXPRESSION <- ATOM (OPERATOR ATOM)* { + precedence + L - + + L / * + } + ATOM <- NUMBER / '(' EXPRESSION ')' + OPERATOR <- < [-+/*] > + NUMBER <- < '-'? [0-9]+ > + %whitespace <- [ \t\r\n]* + )"); + + parser.enable_ast(); + + auto expr = argv[1]; + std::shared_ptr ast; + if (parser.parse(expr, ast)) { + ast = parser.optimize_ast(ast); + std::cout << ast_to_s(ast); + std::cout << expr << " = " << eval(*ast) << std::endl; + return 0; + } + + std::cout << "syntax error..." << std::endl; + + return -1; +} + +// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/example/choice.cc b/example/choice.cc new file mode 100644 index 00000000..a3445a20 --- /dev/null +++ b/example/choice.cc @@ -0,0 +1,28 @@ +// +// choice.cc +// +// Copyright (c) 2023 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peg; + +int main(void) { + parser parser(R"( +type <- 'string' / 'int' / 'double' +%whitespace <- [ \t\r\n]* + )"); + + parser["type"] = [](const SemanticValues &vs) { + std::cout << vs.choice() << std::endl; + }; + + if (parser.parse("int")) { return 0; } + + std::cout << "syntax error..." << std::endl; + return -1; +} diff --git a/example/docx.cc b/example/docx.cc new file mode 100644 index 00000000..4778c33d --- /dev/null +++ b/example/docx.cc @@ -0,0 +1,64 @@ +// +// docx.cc +// +// Copyright (c) 2022 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peg; + +int main(void) { + parser parser(R"(ROOT <- DECL CONTENT +DECL <- '') .)+ '?>' +CONTENT <- (ELEMENT / TEXT)* +ELEMENT <- $(STAG CONTENT ETAG) / ATAG +STAG <- '<' $tag< TAG_NAME > (' ' ATTRIBUTES)?'>' +ETAG <- '' +ATAG <- '<' TAG_NAME (' ' ATTRIBUTES)? '/>' +TAG_NAME <- < [a-zA-Z:]+ > +ATTRIBUTES <- (!('>' / '/>') .)+ +TEXT <- < TEXT_DATA+ > +TEXT_DATA <- ![<] . + )"); + + std::vector stack; + stack.push_back("[ROOT]"); + + parser["STAG"] = [&](const SemanticValues &vs) { + auto tag = std::any_cast(vs[0]); + if (tag == "w:pPr" || tag == "w:t") { + std::cout << "<" << tag << ">" << std::endl; + stack.push_back(tag); + } + }; + + parser["ETAG"] = [&](const SemanticValues & /*vs*/) { + auto tag = stack.back(); + if (tag == "w:pPr" || tag == "w:t") { + std::cout << "" << std::endl; + stack.pop_back(); + } + }; + + parser["TAG_NAME"] = [&](const SemanticValues &vs) { + return vs.token_to_string(); + }; + + parser["TEXT"] = [&](const SemanticValues &vs) { + auto tag = stack.back(); + if (tag == "w:t") { std::cout << vs.sv(); } + }; + + const auto source = R"( +研究記事36魚ではなく人を集める漁師」になりますか「恐れることはありません。今後,あなたは人を生きたまま捕るのです」。ルカ5:1073番の歌私たちに大胆さを与えてください何を学ぶかイエスは自分の弟子になるよう,謙遜で働き者の漁師たちを招きました。イエスは現在でも,謙遜でよく働く人たちに,魚ではなく人を集める漁師」になるよう勧めています。この記事は,聖書を学んではいても,イエスの勧めの通りに行動するのをためらっている人が何をするとよいかを説明しています。ペテロ,アンデレ,ヤコブ,ヨハネは漁師でした。イエスから,「私に付いてきなさい。魚ではなく人を集める漁師にしてあげましょう」と言われた時,驚いたことでしょう。どうしましたか。「直ちに網を捨てて後に従った」と聖書は述べています。(マタ4:1822)その決定により,彼らの生活は大きく変わりました。魚ではなく「人を生きたまま捕る」ことになったのです。(ルカ5:10)現在イエスは,真理を愛する誠実な人たちに同じように呼び掛けています。(マタ28:19,20)あなたはその呼び掛けに応じて,人を集める漁師になりましたか。1.イエスは4人の漁師に何と言いましたか。4人はどうしましたか。語句の説明:「魚ではなく人を集める漁師」とは,良い知らせを伝え,イエスの弟子になるよう他の人を教える人全てのことです。あなたは聖書を学んでよく進歩し,伝道者になることを考えているかもしれません。なかなかその一歩を踏み出せないとしても,自分は駄目だとは考えないでください。慎重なのは,この決定の大切さを理解している証拠なのかもしれません。聖書には,ペテロと仲間たちが網を「直ちに」捨てた,と書かれています。でもペテロとアンデレはその場の勢いで行動したのではありません。イエスと出会ってメシアと認めたのは,それより6カ月以上前のことでした。(ヨハ1:3542)あなたもエホバとイエスについてすでに多くのことを学び,これからもエホバと親しくなりたいと思っていることでしょう。それでも,伝道者になることを決める前に,よく考える必要があります。ペテロたちはなぜそのような決定ができましたか。2.人を集める漁師になることを,時間をかけて決定する必要があるのはなぜですか。そのステップを踏む上でどんなことが助けになりますか。イエスの最初の弟子になった4人の漁師たちは,意欲を持ち,漁の仕方に詳しく,勇気があり,自分を甘やかさない人でした。こうした要素は,人を集める漁師になる上でも役立ったに違いありません。私たちがイエスの弟子として働く上でも役立ちます。この記事では,どうすればこうした点で成長できるかを考えます。3.何があると,伝道者になりたいという願いが強まりますか。意欲を高めるペテロは家族を養うために魚を集めました。でも漁は単なる生計の手段ではありませんでした。漁師の仕事に愛着を抱いていたようです。(ヨハ21:3,915)人を集める漁師としての活動にも愛着を持ちました。エホバの助けにより,その活動に熟達しました。(使徒2:14,414.ペテロは漁師の仕事をどう思っていましたか。私たちが伝道するのは,エホバを愛しているからです。それが伝道する何よりの理由です。エホバへの愛があれば,自分には無理だという気持ちを克服できます。イエスは,人を集める漁師になるようペテロに勧めた時,「恐れることはありません」とも言いました。(ルカ5:811を読む。)ペテロは,イエスの弟子になったらどうなるのかと考えて萎縮したのではありません。イエスの奇跡によって魚が大量に捕れたことに圧倒され,イエスと共に働く資格などないと感じたのです。あなたの場合はどうですか。イエスの弟子になることに何が伴うかを知って,気後れしていますか。もしそうであれば,エホバとイエスと隣人への愛を強めましょう。そうすれば,イエスの呼び掛けに応えて人を集める漁師になりたいと思うでしょう。(マタ22:37,39。ヨハ14:155.ルカ11節によれば,ペテロが恐れたのはなぜですか。私たちはどうすれば恐れの気持ちを克服できますか。ほかにもどんな理由で私たちは伝道しますか。「行って,……人々を弟子としなさい」というイエスの命令に従いたいと思っています。(マタ28:19,20)さらに,人々が「痛めつけられ,放り出され」,神の王国についての真理を切実に必要としているからです。(マタ9:36)エホバはあらゆる人が真理の正確な知識を得て救われることを望んでいます。(テモ一2:46.私たちはほかにもどんな理由で伝道しますか。伝道によって何が成し遂げられるかを考えるなら,命を救う活動に参加するよう動かされます。漁師が魚を集めるのは売るか食べるかするためですが,私たちが人々を集めるのはその人たちの命を救うためです。(ローマ10:1315を読む。テモ一4:167.ローマ101315節によれば,伝道が大切なのはなぜですか。伝道の仕方を学ぶイエスの時代,イスラエルの漁師は集めて良いのはどんな魚かを知っていなければなりませんでした。(レビ11:912)魚がどこにいるかも知る必要がありました。魚は,水質が合い,餌がたくさんある所に生息します。いつ漁に出るかも重要です。太平洋のある島で奉仕する宣教者は,そのことを知りました。地元の兄弟から,一緒に漁に行くよう誘われました。それで,「明日の朝9時に行きますね」と言いました。すると地元の兄弟から,「兄弟は漁のことが分かっていないね。自分に都合のいい時間ではなく,魚が捕れる時間に行かないと」と言われました。世紀の,人を集める漁師も,がいる場所に,会えそうな時間に行きました。例えばイエスの弟子たちは,神殿や会堂や広場で,また家から家に行って伝道しました。(使徒5:42;17:17;18:4)私たちも,区域の人々の生活スタイルに通じておきましょう。人に合わせ,人がいそうな場所で,会えそうな時間に伝道する必要があります。(コリ一9:19239.漁師はどんなことを知っていなければなりませんか。なぜですか。10漁師は,どんな道具が漁に必要で,どう使うかを知っていなければなりません。私たちも,伝道に必要な道具を持っていなければなりません。使い方も知らなければなりません。イエスは,人を集めるための明確な指示を弟子たちに与えました。何を持っていき,どこで伝道し,何と言うかを教えました。(マタ10:5。ルカ10:111)現在,エホバの組織は,使いやすい道具を収めた宣教ツールボックスを用意しています。各種の道具の使い方も教えています。使い方の訓練を受けた私たちは,自信を持って上手に使えるようになっています。(テモ二2:1510.エホバの組織はどんな道具を用意していますか。「ものみの塔」2018年10月号1116ページの「真理を教えましょう」という記事を参照。勇気を持つ11漁師には勇気が必要です。天候が急に変わって海が荒れることがあるからです。夜の暗い中で働かなければならないこともあります。同じように,人を集める漁師にも勇気が求められます。伝道に出始めてエホバの証人であることを明らかにすると,家族に反対され,友達に冷やかされ,伝えようとしても断られるかもしれません。の中にいるように感じるでしょう。でも,それは意外なことではありません。イエスは,弟子たちが敵対する人たちの中で伝道することになる,と予告しました。(マタ10:1611.人を集める漁師に勇気が要るのはなぜですか。12どうすれば勇気を持てますか。まず,イエスが今もこの活動を天から導いていることを意識してください。(ヨハ16:33。啓14:1416)さらに,あなたを必ず助けるというエホバの約束に対する信仰を強めてください。(マタ6:3234)信仰が強まれば,勇気も強まります。ペテロと仲間たちはイエスの弟子になるためにそれまでの仕事を辞めて,強い信仰を示しました。あなたも,聖書レッスンを始めて集会に出席し始めたことを友達や家族に伝えた時,強い信仰を示したのです。エホバの基準に合わせるために,行動や生活を大きく変えてきたことでしょう。それにも信仰や勇気が要ります。これからも勇気を示していくなら,「あなたがどこに行っても,あなたの神エホバが共にいる」ということを実感できるでしょう。(ヨシュア1:7を読む。)12.ヨシュア節によれば,どうすれば勇気を持てますか。13勇気を持つため,ほかにも何ができますか。勇気と大胆さを与えてください,と祈りましょう。(使徒4:29,31)エホバは祈りに答え,必ずあなたのそばにいて,支え続けてくださいます。エホバが過去にご自分に仕える人をどのように助けたかを思い巡らしましょう。さらに,あなたが問題を乗り越え,生活を改めるための力をエホバが与えてくださったということも考えてください。エホバはご自分の民が紅海を渡れるよう助けたのですから,あなたがイエスの弟子になるようにも助けてくださいます。(出14:13)次のように述べた詩編作者と同じ確信を持ちましょう。「エホバは私の側にいてくださる。私は恐れない。人が私に何を行えるだろう」。118:613.エホバの助けを思い巡らし,祈ると勇気を持てます。なぜそういえますか。14勇気を持つのに,もともと内気だった人が大胆に語れるようエホバがどのように助けたかを知ることも役立ちます。真<まさ>枝<え>姉妹の経験を考えてみてください。姉妹は控えめな人で,信じていることを人に話すなんてとてもできない,と思っていました。知らない人に話すことだけでも,自分では登れない壁のように感じました。それで姉妹は,エホバと隣人に対する愛を深めることを特に意識しました。伝道が急務であることを考え,伝道したいという意欲を抱けるよう助けてください,と祈りました。恐れを克服し,正規開拓奉仕を行えるまでになりました。エホバは伝道を始めたばかりの人が「勇気を出」せるようにもしてくださいます。知<とも>余<よ>姉妹の経験を取り上げましょう。初めて野外奉仕に出た時,戸口に出てきた女性に「エホバの証人とは関わりたくない」と怒鳴られ,追い返されてしまいました。でも,気を落とすどころか,一緒にいた姉妹に,「うれしいわ。何も言わないのに,あの人は私がエホバの証人だと思ってくれた」と言いました。知余は現在,正規開拓奉仕をしています。14.真枝と知余の経験からどんなことを学べますか。自分を甘やかさない15漁師として生計を立てている人は自分を甘やかしません。自分を甘やかさない人は,すべき事をしっかり果たします。漁師は朝早く起き,仕事を最後まで果たし,天候が崩れても漁を続けます。同じように,私たちが伝道をしっかり続け,最後まで果たすには,自分を甘やかさない態度が必要です。(マタ10:2215.自分を甘やかさない態度がクリスチャンに必要なのはなぜですか。16私たちには皆,自分に楽な道を選ぶ傾向があります。自分の力だけでは,その傾向を克服し,自分にとって難しい事柄を最後まで行うことはできません。自分を甘やかさないためには自制が必要です。エホバが聖なる力によって助けてくださいます。(ガラ5:22,2316.どうすれば難しく感じる事柄をきちんと行えますか。17パウロは自分を甘やかすことはしませんでした。正しいことを行うのに「自分の体を打ちたた」なければならないことを認めていました。(コリント第一9:2527を読む。)パウロは他の人に,自分を甘やかさず,「全てのことを適正に,取り決めに沿って行いましょう」と勧めました。(コリ一14:40)私たちも自分を甘やかさず,良い知らせを定期的に伝えることも含め,エホバの崇拝をきちんと続けましょう。(使徒2:4617.コリント第一2527節でパウロは,自分を甘やかさないためにどんな努力をしていると言っていますか。遅らせない18漁の場合,成功の尺度となるのは集めた魚の量です。私たちの場合,エホバの証人になるよう何人の人を助けたかによって成功が計られることはありません。(ルカ8:1115)良い知らせを伝え,人々を教えることを続ける限り,エホバは私たちが成功していると見てくださいます。なぜですか。エホバとイエスに従っていることになるからです。(マル13:10。使徒5:28,2918.エホバはどんなことを成功と見てくださいますか。19国によっては,漁が許可されているのは1年のうち数カ月の間だけです。そのような場合,漁を行える時期の終わりが近づくと,漁師はますます漁に集中します。人を集める漁師である私たちにも,今一層伝道に力を入れる理由があります。今の体制の終わりが非常に近いからです。命を救う活動を行う期間は限られています。この重要な活動に参加するのを遅らせたり,状況が整うのを待った方がよいと考えたりしないでください。(伝11:420意欲を高め,聖書について伝える方法を学び,勇気を持ち,自分を甘やかさない態度を持つため,今行動しましょう。人を集める漁師は,今800万人以上います。その人たちに加わってください。そうすれば,エホバからの喜びを経験できるでしょう。(ネヘ8:10)伝道を十分に行い,エホバが良いとご覧になるまで行い続けることを決意しましょう。次の記事では,人を集める漁師として,王国について伝道し続ける決意を強めるための3つの方法を学びます。1920.今伝道に力を入れるどんな理由がありますか。どのようにできますか意欲を高める勇気を持つ自分を甘やかさない 66番の歌 良い知らせを伝えるペテロと仲間たちは人を集める漁師になった。この大切な活動は現在も行われている。(45節を参照。)漁に打ち込む人は・・・1.魚が捕れる時間に,捕れる場所で働く。(89節を参照。)2.漁に必要な道具の使い方を知っている。(10節を参照。)3.海が荒れても勇気を持って働く。(1112節を参照。)image.cnt01:label1.ペテロと3人の弟子たちが,魚でいっぱいになった網を一生懸命に引き上げている。2.2人の姉妹が楽しそうに話しながら伝道している。image.cnt02:label漁に打ち込んでいる人を描いた3つの場面。812節で取り上げられている。image.cnt03:label夜明けに1人の漁師が網を投げている。image.cnt04:label夕暮れに2人の漁師が網を修理している。image.cnt05:label嵐の夜に2人の漁師が網を手繰り寄せている。image.lsr:labelペテロと3人の弟子たちが,魚でいっぱいになった網を一生懸命に引き上げている。image.lss:labelペテロと3人の弟子たちが,魚でいっぱいになった網を一生懸命に引き上げている。image.sqr:labelペテロと3人の弟子たちが,魚でいっぱいになった網を一生懸命に引き上げている。image.sqs:labelペテロと3人の弟子たちが,魚でいっぱいになった網を一生懸命に引き上げている。MEPS:Title「魚ではなく人を集める漁師」になりますかWEB:MetaTitle「魚ではなく人を集める漁師」になりますか 「ものみの塔」研究 +)"; + + if (parser.parse(source)) { return 0; } + + std::cout << "syntax error..." << std::endl; + return -1; +} diff --git a/example/enter_leave.cc b/example/enter_leave.cc new file mode 100644 index 00000000..7cc5ffe9 --- /dev/null +++ b/example/enter_leave.cc @@ -0,0 +1,39 @@ +// +// enter_leave.cc +// +// Copyright (c) 2023 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peg; + +int main(void) { + parser parser(R"( + S <- A+ + A <- 'A' + )"); + + parser["A"].enter = [](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, std::any & /*dt*/) { + std::cout << "enter" << std::endl; + }; + + parser["A"] = [](const SemanticValues & /*vs*/, std::any & /*dt*/) { + std::cout << "action!" << std::endl; + }; + + parser["A"].leave = [](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, size_t /*matchlen*/, + std::any & /*value*/, std::any & /*dt*/) { + std::cout << "leave" << std::endl; + }; + + if (parser.parse("A")) { return 0; } + + std::cout << "syntax error..." << std::endl; + return -1; +} diff --git a/example/example.sln b/example/example.sln deleted file mode 100644 index b6352c76..00000000 --- a/example/example.sln +++ /dev/null @@ -1,38 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 14 -VisualStudioVersion = 14.0.23107.0 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "calc.vcxproj", "{F85B641A-7538-4809-8175-C528FF632CF6}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc2", "calc2.vcxproj", "{1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc3", "calc3.vcxproj", "{E6146F73-3B4C-4D4C-BC55-148930954434}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.ActiveCfg = Debug|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.Build.0 = Debug|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.ActiveCfg = Release|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.Build.0 = Release|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Debug|Win32.ActiveCfg = Debug|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Debug|Win32.Build.0 = Debug|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Release|Win32.ActiveCfg = Release|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Release|Win32.Build.0 = Release|Win32 - {E6146F73-3B4C-4D4C-BC55-148930954434}.Debug|Win32.ActiveCfg = Debug|Win32 - {E6146F73-3B4C-4D4C-BC55-148930954434}.Debug|Win32.Build.0 = Debug|Win32 - {E6146F73-3B4C-4D4C-BC55-148930954434}.Release|Win32.ActiveCfg = Release|Win32 - {E6146F73-3B4C-4D4C-BC55-148930954434}.Release|Win32.Build.0 = Release|Win32 - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}.Debug|Win32.ActiveCfg = Debug|Win32 - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}.Debug|Win32.Build.0 = Debug|Win32 - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}.Release|Win32.ActiveCfg = Release|Win32 - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/example/indent.cc b/example/indent.cc new file mode 100644 index 00000000..af3b614c --- /dev/null +++ b/example/indent.cc @@ -0,0 +1,73 @@ +// +// indent.cc +// +// Copyright (c) 2022 Yuji Hirose. All rights reserved. +// MIT License +// + +// Based on https://gist.github.com/dmajda/04002578dd41ae8190fc + +#include +#include +#include + +using namespace peg; + +int main(void) { + parser parser(R"(Start <- Statements {} +Statements <- Statement* +Statement <- Samedent (S / I) + +S <- 'S' EOS { no_ast_opt } +I <- 'I' EOL Block / 'I' EOS { no_ast_opt } + +Block <- Statements {} + +~Samedent <- ' '* {} + +~EOS <- EOL / EOF +~EOL <- '\n' +~EOF <- !. + )"); + + size_t indent = 0; + + parser["Block"].enter = [&](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, std::any & /*dt*/) { indent += 2; }; + + parser["Block"].leave = [&](const Context & /*c*/, const char * /*s*/, + size_t /*n*/, size_t /*matchlen*/, + std::any & /*value*/, + std::any & /*dt*/) { indent -= 2; }; + + parser["Samedent"].predicate = + [&](const SemanticValues &vs, const std::any & /*dt*/, std::string &msg) { + if (indent != vs.sv().size()) { + msg = "different indent..."; + return false; + } + return true; + }; + + parser.enable_ast(); + + const auto source = R"(I + S + I + I + S + S + S + S +)"; + + std::shared_ptr ast; + if (parser.parse(source, ast)) { + ast = parser.optimize_ast(ast); + std::cout << ast_to_s(ast); + return 0; + } + + std::cout << "syntax error..." << std::endl; + return -1; +} diff --git a/example/sequence.cc b/example/sequence.cc new file mode 100644 index 00000000..8753ceda --- /dev/null +++ b/example/sequence.cc @@ -0,0 +1,29 @@ +// +// sequence.cc +// +// Copyright (c) 2023 Yuji Hirose. All rights reserved. +// MIT License +// + +#include +#include +#include + +using namespace peg; + +int main(void) { + parser parser(R"( +START <- SEQUENCE_A +SEQUENCE_A <- SEQUENCE('A') +SEQUENCE(X) <- X (',' X)* + )"); + + parser["SEQUENCE_A"] = [](const SemanticValues & /*vs*/) { + std::cout << "SEQUENCE_A" << std::endl; + }; + + if (parser.parse("A,A")) { return 0; } + + std::cout << "syntax error..." << std::endl; + return -1; +} diff --git a/grammar/cpp-peglib.peg b/grammar/cpp-peglib.peg new file mode 100644 index 00000000..dd7c45a1 --- /dev/null +++ b/grammar/cpp-peglib.peg @@ -0,0 +1,159 @@ +# Setup PEG syntax parser +Grammar <- Spacing Definition+ EndOfFile + +Definition <- + Ignore IdentCont Parameters LEFTARROW Expression Instruction? + / Ignore Identifier LEFTARROW Expression Instruction? + +Expression <- Sequence (SLASH Sequence)* + +Sequence <- (CUT / Prefix)* + +Prefix <- (AND / NOT)? SuffixWithLabel + +SuffixWithLabel <- Suffix (LABEL Identifier)? + +Suffix <- Primary Loop? + +Loop <- QUESTION / STAR / PLUS / Repetition + +Primary <- + Ignore IdentCont Arguments !LEFTARROW + / Ignore Identifier !(Parameters? LEFTARROW) + / OPEN Expression CLOSE + / BeginTok Expression EndTok + / BeginCapScope Expression EndCapScope + / BeginCap Expression EndCap + / CapScope + / BackRef + / DictionaryI + / LiteralI + / Dictionary + / Literal + / NegatedClassI + / NegatedClass + / ClassI + / Class + / DOT + +Identifier <- IdentCont Spacing + +IdentCont <- + +IdentStart <- !"↑" !"⇑" ([a-zA-Z_%] / [\u0080-\uFFFF]) + +IdentRest <- IdentStart / [0-9] + +Dictionary <- LiteralD (PIPE LiteralD)+ + +DictionaryI <- LiteralID (PIPE LiteralID)* + +lit_ope <- + ['] <(!['] Char)*> ['] Spacing + / ["] <(!["] Char)*> ["] Spacing + +Literal <- lit_ope + +LiteralD <- lit_ope + +lit_case_ignore_ope <- + ['] <(!['] Char)*> "'i" Spacing + / ["] <(!["] Char)*> '"i' Spacing + +LiteralI <- lit_case_ignore_ope + +LiteralID <- lit_case_ignore_ope + +# NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'. +Class <- '[' !'^' <(!']' Range)+> ']' Spacing +ClassI <- '[' !'^' <(!']' Range)+> ']i' Spacing +NegatedClass <- "[^" <(!']' Range)+> ']' Spacing +NegatedClassI <- "[^" <(!']' Range)+> ']i' Spacing + +Range <- (Char '-' ! ']' Char) / Char + +Char <- + '\\' [nrt'\"[\]\\^] + / '\\' [0-3] [0-7] [0-7] + / '\\' [0-7] [0-7]? + / "\\x" [0-9a-fA-F] [0-9a-fA-F]? + / "\\u" (((('0' [0-9a-fA-F]) / "10") [0-9a-fA-F]{4,4}) / [0-9a-fA-F]{4,5}) + / !'\\' . + +Repetition <- BeginBracket RepetitionRange EndBracket + +RepetitionRange <- + Number COMMA Number + / Number COMMA + / Number + / COMMA Number + +Number <- [0-9]+ Spacing + +CapScope <- BeginCapScope Expression EndCapScope + +LEFTARROW <- ("<-" / "←") Spacing + +~SLASH <- '/' Spacing +~PIPE <- '|' Spacing +AND <- '&' Spacing +NOT <- '!' Spacing +QUESTION <- '?' Spacing +STAR <- '*' Spacing +PLUS <- '+' Spacing +~OPEN <- '(' Spacing +~CLOSE <- ')' Spacing +DOT <- '.' Spacing + +CUT <- "↑" Spacing +~LABEL <- ('^' / "⇑") Spacing + +~Spacing <- (Space / Comment)* +Comment <- '#' (!EndOfLine . )* +Space <- ' ' / '\t' / EndOfLine +EndOfLine <- "\r\n" / '\n' / '\r' +EndOfFile <- ! . + +~BeginTok <- '<' Spacing +~EndTok <- '>' Spacing + +~BeginCapScope <- '$' '(' Spacing +~EndCapScope <- ')' Spacing + +BeginCap <- '$' '<' Spacing +~EndCap <- '>' Spacing + +BackRef <- '$' Spacing + +IGNORE <- '~' + +Ignore <- IGNORE? +Parameters <- OPEN Identifier (COMMA Identifier)* CLOSE +Arguments <- OPEN Expression (COMMA Expression)* CLOSE +~COMMA <- ',' Spacing + +# Instruction grammars +Instruction <- + BeginBracket (InstructionItem (InstructionItemSeparator InstructionItem)*)? EndBracket +InstructionItem <- PrecedenceClimbing / ErrorMessage / NoAstOpt +~InstructionItemSeparator <- ';' Spacing + +~SpacesZom <- Space* +~SpacesOom <- Space+ +~BeginBracket <- '{' Spacing +~EndBracket <- '}' Spacing + +# PrecedenceClimbing instruction +PrecedenceClimbing <- "precedence" SpacesOom PrecedenceInfo (SpacesOom PrecedenceInfo)* SpacesZom +PrecedenceInfo <- PrecedenceAssoc (~SpacesOom PrecedenceOpe)+ +PrecedenceOpe <- + ['] <(!(Space / [']) Char)*> ['] + / ["] <(!(Space / ["]) Char)*> ["] + / <(!(PrecedenceAssoc / Space / '}') . )+> +PrecedenceAssoc <- [LR] + +# Error message instruction +ErrorMessage <- "message" SpacesOom LiteralD SpacesZom + +# No Ast node optimization instruction +NoAstOpt <- "no_ast_opt" SpacesZom diff --git a/grammar/csv.peg b/grammar/csv.peg index 1f7c58c3..8ace87db 100644 --- a/grammar/csv.peg +++ b/grammar/csv.peg @@ -1,6 +1,4 @@ -# # CSV grammar based on RFC 4180 (http://www.ietf.org/rfc/rfc4180.txt) -# file <- (header NL)? record (NL record)* NL? header <- name (COMMA name)* @@ -14,5 +12,5 @@ CR <- '\r' DQUOTE <- '"' LF <- '\n' NL <- CR LF / CR / LF -TEXTDATA <- ![",] . +TEXTDATA <- !([",] / NL) . D_DQUOTE <- '"' '"' diff --git a/grammar/culebra.peg b/grammar/culebra.peg deleted file mode 100644 index c7a77193..00000000 --- a/grammar/culebra.peg +++ /dev/null @@ -1,89 +0,0 @@ - -PROGRAM <- _ STATEMENTS _ -STATEMENTS <- (STATEMENT (_sp_ (';' / _nl_) (_ STATEMENT)?)*)? -STATEMENT <- DEBUGGER / RETURN / LEXICAL_SCOPE / EXPRESSION - -DEBUGGER <- debugger -RETURN <- return (_sp_ !_nl_ EXPRESSION)? -LEXICAL_SCOPE <- BLOCK - -EXPRESSION <- ASSIGNMENT / LOGICAL_OR - -ASSIGNMENT <- LET _ MUTABLE _ PRIMARY (_ (ARGUMENTS / INDEX / DOT))* _ '=' _ EXPRESSION - -LOGICAL_OR <- LOGICAL_AND (_ '||' _ LOGICAL_AND)* -LOGICAL_AND <- CONDITION (_ '&&' _ CONDITION)* -CONDITION <- ADDITIVE (_ CONDITION_OPERATOR _ ADDITIVE)* -ADDITIVE <- UNARY_PLUS (_ ADDITIVE_OPERATOR _ UNARY_PLUS)* -UNARY_PLUS <- UNARY_PLUS_OPERATOR? UNARY_MINUS -UNARY_MINUS <- UNARY_MINUS_OPERATOR? UNARY_NOT -UNARY_NOT <- UNARY_NOT_OPERATOR? MULTIPLICATIVE -MULTIPLICATIVE <- CALL (_ MULTIPLICATIVE_OPERATOR _ CALL)* - -CALL <- PRIMARY (_ (ARGUMENTS / INDEX / DOT))* -ARGUMENTS <- '(' _ SEQUENCE _ ')' -INDEX <- '[' _ EXPRESSION _ ']' -DOT <- '.' _ IDENTIFIER -SEQUENCE <- (EXPRESSION (_ ',' _ EXPRESSION)*)? - -WHILE <- while _ EXPRESSION _ BLOCK -IF <- if _ EXPRESSION _ BLOCK (_ else _ if _ EXPRESSION _ BLOCK)* (_ else _ BLOCK)? - -PRIMARY <- WHILE / IF / FUNCTION / OBJECT / ARRAY / NIL / BOOLEAN / NUMBER / IDENTIFIER / STRING / INTERPOLATED_STRING / '(' _ EXPRESSION _ ')' - -FUNCTION <- fn _ PARAMETERS _ BLOCK -PARAMETERS <- '(' _ (PARAMETER (_ ',' _ PARAMETER)*)? _ ')' -PARAMETER <- MUTABLE _ IDENTIFIER - -BLOCK <- '{' _ STATEMENTS _ '}' - -CONDITION_OPERATOR <- '==' / '!=' / '<=' / '<' / '>=' / '>' -ADDITIVE_OPERATOR <- [-+] -UNARY_PLUS_OPERATOR <- '+' -UNARY_MINUS_OPERATOR <- '-' -UNARY_NOT_OPERATOR <- '!' -MULTIPLICATIVE_OPERATOR <- [*/%] - -LET <- < ('let' _wd_)? > -MUTABLE <- < ('mut' _wd_)? > - -IDENTIFIER <- < IdentInitChar IdentChar* > - -OBJECT <- '{' _ (OBJECT_PROPERTY (_ ',' _ OBJECT_PROPERTY)*)? _ '}' -OBJECT_PROPERTY <- MUTABLE _ IDENTIFIER _ ':' _ EXPRESSION - -ARRAY <- '[' _ SEQUENCE _ ']' (_ '(' _ EXPRESSION (_ ',' _ EXPRESSION)? _ ')')? - -NIL <- < 'nil' _wd_ > -BOOLEAN <- < ('true' / 'false') _wd_ > - -NUMBER <- < [0-9]+ > -STRING <- ['] < (!['] .)* > ['] - -INTERPOLATED_STRING <- '"' ('{' _ EXPRESSION _ '}' / INTERPOLATED_CONTENT)* '"' -INTERPOLATED_CONTENT <- (!["{] .) (!["{] .)* - -~debugger <- 'debugger' _wd_ -~while <- 'while' _wd_ -~if <- 'if' _wd_ -~else <- 'else' _wd_ -~fn <- 'fn' _wd_ -~return <- 'return' _wd_ - -~_ <- (WhiteSpace / End)* -~_sp_ <- SpaceChar* -~_nl_ <- LineComment? End -~_wd_ <- !IdentInitChar - -WhiteSpace <- SpaceChar / Comment -End <- EndOfLine / EndOfFile -Comment <- BlockComment / LineComment - -SpaceChar <- ' ' / '\t' -EndOfLine <- '\r\n' / '\n' / '\r' -EndOfFile <- !. -IdentInitChar <- [a-zA-Z_] -IdentChar <- [a-zA-Z0-9_] -BlockComment <- '/*' (!'*/' .)* '*/' -LineComment <- ('#' / '//') (!End .)* &End - diff --git a/grammar/json.peg b/grammar/json.peg new file mode 100644 index 00000000..75bf4d2e --- /dev/null +++ b/grammar/json.peg @@ -0,0 +1,26 @@ +# JSON grammar based on RFC 4627 (http://www.ietf.org/rfc/rfc4627.txt) + +json <- object / array + +object <- '{' (member (',' member)*)? '}' { no_ast_opt } +member <- string ':' value + +array <- '[' (value (',' value)*)? ']' + +value <- boolean / null / number / string / object / array + +boolean <- 'false' / 'true' +null <- 'null' + +number <- < minus int frac exp > +minus <- '-'? +int <- '0' / [1-9][0-9]* +frac <- ('.' [0-9]+)? +exp <- ([eE] [-+]? [0-9]+)? + +string <- '"' < char* > '"' +char <- unescaped / escaped +escaped <- '\\' (["\\/bfnrt] / 'u' [a-fA-F0-9]{4}) +unescaped <- [\u0020-\u0021\u0023-\u005b\u005d-\u10ffff] + +%whitespace <- [ \t\r\n]* diff --git a/language/culebra/CMakeLists.txt b/language/culebra/CMakeLists.txt deleted file mode 100644 index 8e327211..00000000 --- a/language/culebra/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -cmake_minimum_required(VERSION 3.0) -include_directories(../..) -add_definitions("-std=c++1y") - -add_executable(culebra main.cc) diff --git a/language/culebra/cul.vim b/language/culebra/cul.vim deleted file mode 100644 index 3fa37ee2..00000000 --- a/language/culebra/cul.vim +++ /dev/null @@ -1,41 +0,0 @@ - -syn match culOperator "\%(+\|-\|/\|*\|=\|\^\|&\||\|!\|>\|<\|%\)=\?" -syn match culDecNumber "\<[0-9][0-9_]*" -syn match culFuncCall "\w\(\w\)*("he=e-1,me=e-1 -syn match culError ";" -syn match culError "\s*$" -syn match culLineComment "\(\/\/\|#\).*" contains=@Spell,javaScriptCommentTodo - -syn keyword culFunction fn -syn keyword culSelf self -syn keyword culConditional if else -syn keyword culRepeat while -syn keyword culReturn return -syn keyword culDebugger debugger -syn keyword culBoolean true false -syn keyword culCommentTodo TODO FIXME XXX TBD contained -syn keyword culStorage mut - -syn region culStringS start=+'+ skip=+\\\\\|\\'+ end=+'\|$+ -syn region culStringD start=+"+ skip=+\\\\\|\\"+ end=+"\|$+ -syn region culComment start="/\*" end="\*/" contains=@Spell,javaScriptCommentTodo - -hi def link culBoolean Boolean -hi def link culComment Comment -hi def link culCommentTodo Todo -hi def link culConditional Conditional -hi def link culDecNumber Number -hi def link culFuncCall Function -hi def link culFunction Type -hi def link culLineComment Comment -hi def link culOperator Operator -hi def link culRepeat Repeat -hi def link culReturn Statement -hi def link culDebugger Debug -hi def link culSelf Constant -hi def link culStorage StorageClass -hi def link culStringD String -hi def link culStringS String -hi def link culError Error - -let b:current_syntax = "cul" diff --git a/language/culebra/culebra.h b/language/culebra/culebra.h deleted file mode 100644 index 213167fb..00000000 --- a/language/culebra/culebra.h +++ /dev/null @@ -1,1005 +0,0 @@ -#include -#include -#include -#include - -namespace culebra { - -const auto grammar_ = R"( - - PROGRAM <- _ STATEMENTS _ - STATEMENTS <- (STATEMENT (_sp_ (';' / _nl_) (_ STATEMENT)?)*)? - STATEMENT <- DEBUGGER / RETURN / LEXICAL_SCOPE / EXPRESSION - - DEBUGGER <- debugger - RETURN <- return (_sp_ !_nl_ EXPRESSION)? - LEXICAL_SCOPE <- BLOCK - - EXPRESSION <- ASSIGNMENT / LOGICAL_OR - - ASSIGNMENT <- LET _ MUTABLE _ PRIMARY (_ (ARGUMENTS / INDEX / DOT))* _ '=' _ EXPRESSION - - LOGICAL_OR <- LOGICAL_AND (_ '||' _ LOGICAL_AND)* - LOGICAL_AND <- CONDITION (_ '&&' _ CONDITION)* - CONDITION <- ADDITIVE (_ CONDITION_OPERATOR _ ADDITIVE)* - ADDITIVE <- UNARY_PLUS (_ ADDITIVE_OPERATOR _ UNARY_PLUS)* - UNARY_PLUS <- UNARY_PLUS_OPERATOR? UNARY_MINUS - UNARY_MINUS <- UNARY_MINUS_OPERATOR? UNARY_NOT - UNARY_NOT <- UNARY_NOT_OPERATOR? MULTIPLICATIVE - MULTIPLICATIVE <- CALL (_ MULTIPLICATIVE_OPERATOR _ CALL)* - - CALL <- PRIMARY (_ (ARGUMENTS / INDEX / DOT))* - ARGUMENTS <- '(' _ SEQUENCE _ ')' - INDEX <- '[' _ EXPRESSION _ ']' - DOT <- '.' _ IDENTIFIER - - SEQUENCE <- (EXPRESSION (_ ',' _ EXPRESSION)*)? - - WHILE <- while _ EXPRESSION _ BLOCK - IF <- if _ EXPRESSION _ BLOCK (_ else _ if _ EXPRESSION _ BLOCK)* (_ else _ BLOCK)? - - PRIMARY <- WHILE / IF / FUNCTION / OBJECT / ARRAY / NIL / BOOLEAN / NUMBER / IDENTIFIER / STRING / INTERPOLATED_STRING / '(' _ EXPRESSION _ ')' - - FUNCTION <- fn _ PARAMETERS _ BLOCK - PARAMETERS <- '(' _ (PARAMETER (_ ',' _ PARAMETER)*)? _ ')' - PARAMETER <- MUTABLE _ IDENTIFIER - - BLOCK <- '{' _ STATEMENTS _ '}' - - CONDITION_OPERATOR <- '==' / '!=' / '<=' / '<' / '>=' / '>' - ADDITIVE_OPERATOR <- [-+] - UNARY_PLUS_OPERATOR <- '+' - UNARY_MINUS_OPERATOR <- '-' - UNARY_NOT_OPERATOR <- '!' - MULTIPLICATIVE_OPERATOR <- [*/%] - - LET <- < ('let' _wd_)? > - MUTABLE <- < ('mut' _wd_)? > - - IDENTIFIER <- < IdentInitChar IdentChar* > - - OBJECT <- '{' _ (OBJECT_PROPERTY (_ ',' _ OBJECT_PROPERTY)*)? _ '}' - OBJECT_PROPERTY <- MUTABLE _ IDENTIFIER _ ':' _ EXPRESSION - - ARRAY <- '[' _ SEQUENCE _ ']' (_ '(' _ EXPRESSION (_ ',' _ EXPRESSION)? _ ')')? - - NIL <- < 'nil' _wd_ > - BOOLEAN <- < ('true' / 'false') _wd_ > - - NUMBER <- < [0-9]+ > - STRING <- ['] < (!['] .)* > ['] - - INTERPOLATED_STRING <- '"' ('{' _ EXPRESSION _ '}' / INTERPOLATED_CONTENT)* '"' - INTERPOLATED_CONTENT <- (!["{] .) (!["{] .)* - - ~debugger <- 'debugger' _wd_ - ~while <- 'while' _wd_ - ~if <- 'if' _wd_ - ~else <- 'else' _wd_ - ~fn <- 'fn' _wd_ - ~return <- 'return' _wd_ - - ~_ <- (WhiteSpace / End)* - ~_sp_ <- SpaceChar* - ~_nl_ <- LineComment? End - ~_wd_ <- !IdentInitChar - - WhiteSpace <- SpaceChar / Comment - End <- EndOfLine / EndOfFile - Comment <- BlockComment / LineComment - - SpaceChar <- ' ' / '\t' - EndOfLine <- '\r\n' / '\n' / '\r' - EndOfFile <- !. - IdentInitChar <- [a-zA-Z_] - IdentChar <- [a-zA-Z0-9_] - BlockComment <- '/*' (!'*/' .)* '*/' - LineComment <- ('#' / '//') (!End .)* &End - -)"; - -inline peg::parser& get_parser() -{ - static peg::parser parser; - static bool initialized = false; - - if (!initialized) { - initialized = true; - - parser.log = [&](size_t ln, size_t col, const std::string& msg) { - std::cerr << ln << ":" << col << ": " << msg << std::endl; - }; - - if (!parser.load_grammar(grammar_)) { - throw std::logic_error("invalid peg grammar"); - } - - parser.enable_ast(); - } - - return parser; -} - -struct Value; -struct Symbol; -struct Environment; - -struct FunctionValue { - struct Parameter { - std::string name; - bool mut; - }; - - FunctionValue( - const std::vector& params, - const std::function env)>& eval) - : params(std::make_shared>(params)) - , eval(eval) {} - - std::shared_ptr> params; - std::function env)> eval; -}; - -struct ObjectValue { - ObjectValue() : properties(std::make_shared>()) {} - bool has(const std::string& name) const; - const Value& get(const std::string& name) const; - void assign(const std::string& name, const Value& val); - void initialize(const std::string& name, const Value& val, bool mut); - virtual std::map& builtins(); - - std::shared_ptr> properties; -}; - -struct ArrayValue : public ObjectValue { - ArrayValue() : values(std::make_shared>()) {} - std::map& builtins() override; - - std::shared_ptr> values; -}; - -struct Value -{ - enum Type { Nil, Bool, Long, String, Object, Array, Function }; - - Value() : type(Nil) {} - Value(const Value& rhs) : type(rhs.type), v(rhs.v) {} - Value(Value&& rhs) : type(rhs.type), v(rhs.v) {} - - Value& operator=(const Value& rhs) { - if (this != &rhs) { - type = rhs.type; - v = rhs.v; - } - return *this; - } - - Value& operator=(Value&& rhs) { - type = rhs.type; - v = rhs.v; - return *this; - } - - explicit Value(bool b) : type(Bool), v(b) {} - explicit Value(long l) : type(Long), v(l) {} - explicit Value(std::string&& s) : type(String), v(s) {} - explicit Value(ObjectValue&& o) : type(Object), v(o) {} - explicit Value(ArrayValue&& a) : type(Array), v(a) {} - explicit Value(FunctionValue&& f) : type(Function), v(f) {} - - bool to_bool() const { - switch (type) { - case Bool: return v.get(); - case Long: return v.get() != 0; - default: throw std::runtime_error("type error."); - } - } - - long to_long() const { - switch (type) { - //case Bool: return v.get(); - case Long: return v.get(); - default: throw std::runtime_error("type error."); - } - } - - std::string to_string() const { - switch (type) { - case String: return v.get(); - default: throw std::runtime_error("type error."); - } - } - - FunctionValue to_function() const { - switch (type) { - case Function: return v.get(); - default: throw std::runtime_error("type error."); - } - } - - const ObjectValue& to_object() const { - switch (type) { - case Object: return v.get(); - case Array: return v.get(); - default: throw std::runtime_error("type error."); - } - } - - ObjectValue& to_object() { - switch (type) { - case Object: return v.get(); - default: throw std::runtime_error("type error."); - } - } - - const ArrayValue& to_array() const { - switch (type) { - case Array: return v.get(); - default: throw std::runtime_error("type error."); - } - } - - std::string str_object() const; - - std::string str_array() const { - const auto& values = *to_array().values; - std::string s = "["; - for (auto i = 0u; i < values.size(); i++) { - if (i != 0) { - s += ", "; - } - s += values[i].str(); - } - s += "]"; - return s; - } - - std::string str() const { - switch (type) { - case Nil: return "nil"; - case Bool: return to_bool() ? "true" : "false"; - case Long: return std::to_string(to_long()); - case String: return "'" + to_string() + "'"; - case Object: return str_object(); - case Array: return str_array(); - case Function: return "[function]"; - default: throw std::logic_error("invalid internal condition."); - } - // NOTREACHED - } - - std::ostream& out(std::ostream& os) const { - os << str(); - return os; - } - - bool operator==(const Value& rhs) const { - switch (type) { - case Nil: return rhs.type == Nil; - case Bool: return to_bool() == rhs.to_bool(); - case Long: return to_long() == rhs.to_long(); - case String: return to_string() == rhs.to_string(); - // TODO: Object and Array support - default: throw std::logic_error("invalid internal condition."); - } - // NOTREACHED - } - - bool operator!=(const Value& rhs) const { - return !operator==(rhs); - } - - bool operator<=(const Value& rhs) const { - switch (type) { - case Nil: return false; - case Bool: return to_bool() <= rhs.to_bool(); - case Long: return to_long() <= rhs.to_long(); - case String: return to_string() <= rhs.to_string(); - // TODO: Object and Array support - default: throw std::logic_error("invalid internal condition."); - } - // NOTREACHED - } - - bool operator<(const Value& rhs) const { - switch (type) { - case Nil: return false; - case Bool: return to_bool() < rhs.to_bool(); - case Long: return to_long() < rhs.to_long(); - case String: return to_string() < rhs.to_string(); - // TODO: Object and Array support - default: throw std::logic_error("invalid internal condition."); - } - // NOTREACHED - } - - bool operator>=(const Value& rhs) const { - switch (type) { - case Nil: return false; - case Bool: return to_bool() >= rhs.to_bool(); - case Long: return to_long() >= rhs.to_long(); - case String: return to_string() >= rhs.to_string(); - // TODO: Object and Array support - default: throw std::logic_error("invalid internal condition."); - } - // NOTREACHED - } - - bool operator>(const Value& rhs) const { - switch (type) { - case Nil: return false; - case Bool: return to_bool() > rhs.to_bool(); - case Long: return to_long() > rhs.to_long(); - case String: return to_string() > rhs.to_string(); - // TODO: Object and Array support - default: throw std::logic_error("invalid internal condition."); - } - // NOTREACHED - } - - Type type; - peg::any v; -}; - -struct Symbol { - Value val; - bool mut; -}; - -inline std::ostream& operator<<(std::ostream& os, const Value& val) -{ - return val.out(os); -} - -struct Environment -{ - Environment(std::shared_ptr parent = nullptr) - : level(parent ? parent->level + 1 : 0) { - } - - void append_outer(std::shared_ptr outer) { - if (this->outer) { - this->outer->append_outer(outer); - } else { - this->outer = outer; - } - } - - bool has(const std::string& s) const { - if (dictionary.find(s) != dictionary.end()) { - return true; - } - return outer && outer->has(s); - } - - const Value& get(const std::string& s) const { - if (dictionary.find(s) != dictionary.end()) { - return dictionary.at(s).val; - } else if (outer) { - return outer->get(s); - } - std::string msg = "undefined variable '" + s + "'..."; - throw std::runtime_error(msg); - } - - void assign(const std::string& s, const Value& val) { - assert(has(s)); - if (dictionary.find(s) != dictionary.end()) { - auto& sym = dictionary[s]; - if (!sym.mut) { - std::string msg = "immutable variable '" + s + "'..."; - throw std::runtime_error(msg); - } - sym.val = val; - return; - } - outer->assign(s, val); - return; - } - - void initialize(const std::string& s, const Value& val, bool mut) { - dictionary[s] = Symbol{ val, mut }; - } - - void initialize(const std::string& s, Value&& val, bool mut) { - dictionary[s] = Symbol{ std::move(val), mut }; - } - - size_t level; - std::shared_ptr outer; - std::map dictionary; -}; - -typedef std::function Debugger; - -inline bool ObjectValue::has(const std::string& name) const { - if (properties->find(name) == properties->end()) { - const auto& props = const_cast(this)->builtins(); - return props.find(name) != props.end(); - } - return true; -} - -inline const Value& ObjectValue::get(const std::string& name) const { - if (properties->find(name) == properties->end()) { - const auto& props = const_cast(this)->builtins(); - return props.at(name); - } - return properties->at(name).val; -} - -inline void ObjectValue::assign(const std::string& name, const Value& val) { - assert(has(name)); - auto& sym = properties->at(name); - if (!sym.mut) { - std::string msg = "immutable property '" + name + "'..."; - throw std::runtime_error(msg); - } - sym.val = val; - return; -} - -inline void ObjectValue::initialize(const std::string& name, const Value& val, bool mut) { - (*properties)[name] = Symbol{ val, mut }; -} - -inline std::map& ObjectValue::builtins() { - static std::map props_ = { - { - "size", - Value(FunctionValue( - {}, - [](std::shared_ptr callEnv) { - const auto& val = callEnv->get("this"); - long n = val.to_object().properties->size(); - return Value(n); - } - )) - } - }; - return props_; -} - -inline std::map& ArrayValue::builtins() { - static std::map props_ = { - { - "size", - Value(FunctionValue( - {}, - [](std::shared_ptr callEnv) { - const auto& val = callEnv->get("this"); - long n = val.to_array().values->size(); - return Value(n); - } - )) - }, - { - "push", - Value(FunctionValue { - { {"arg", false} }, - [](std::shared_ptr callEnv) { - const auto& val = callEnv->get("this"); - const auto& arg = callEnv->get("arg"); - val.to_array().values->push_back(arg); - return Value(); - } - }) - } - }; - return props_; -} - -inline std::string Value::str_object() const { - const auto& properties = *to_object().properties; - std::string s = "{"; - auto it = properties.begin(); - for (; it != properties.end(); ++it) { - if (it != properties.begin()) { - s += ", "; - } - const auto& name = it->first; - const auto& sym = it->second; - if (sym.mut) { - s += "mut "; - } - s += name; - s += ": "; - s += sym.val.str(); - } - s += "}"; - return s; -} - -inline void setup_built_in_functions(Environment& env) { - env.initialize( - "puts", - Value(FunctionValue( - { {"arg", true} }, - [](std::shared_ptr env) { - std::cout << env->get("arg").str() << std::endl; - return Value(); - } - )), - false); - - env.initialize( - "assert", - Value(FunctionValue( - { {"arg", true} }, - [](std::shared_ptr env) { - auto cond = env->get("arg").to_bool(); - if (!cond) { - auto line = env->get("__LINE__").to_long(); - auto column = env->get("__COLUMN__").to_long(); - std::string msg = "assert failed at " + std::to_string(line) + ":" + std::to_string(column) + "."; - throw std::runtime_error(msg); - } - return Value(); - } - )), - false); -} - -struct Interpreter -{ - Interpreter(Debugger debugger = nullptr) - : debugger_(debugger) { - } - - Value eval(const peg::Ast& ast, std::shared_ptr env) { - using peg::operator"" _; - - if (debugger_) { - if (ast.original_tag == "STATEMENT"_) { - auto force_to_break = ast.tag == "DEBUGGER"_; - debugger_(ast, *env, force_to_break); - } - } - - switch (ast.tag) { - case "STATEMENTS"_: return eval_statements(ast, env); - case "WHILE"_: return eval_while(ast, env); - case "IF"_: return eval_if(ast, env); - case "FUNCTION"_: return eval_function(ast, env); - case "CALL"_: return eval_call(ast, env); - case "LEXICAL_SCOPE"_: return eval_lexical_scope(ast, env); - case "ASSIGNMENT"_: return eval_assignment(ast, env); - case "LOGICAL_OR"_: return eval_logical_or(ast, env); - case "LOGICAL_AND"_: return eval_logical_and(ast, env); - case "CONDITION"_: return eval_condition(ast, env); - case "UNARY_PLUS"_: return eval_unary_plus(ast, env); - case "UNARY_MINUS"_: return eval_unary_minus(ast, env); - case "UNARY_NOT"_: return eval_unary_not(ast, env); - case "ADDITIVE"_: - case "MULTIPLICATIVE"_: return eval_bin_expression(ast, env); - case "IDENTIFIER"_: return eval_identifier(ast, env); - case "OBJECT"_: return eval_object(ast, env); - case "ARRAY"_: return eval_array(ast, env); - case "NIL"_: return eval_nil(ast, env); - case "BOOLEAN"_: return eval_bool(ast, env); - case "NUMBER"_: return eval_number(ast, env); - case "INTERPOLATED_STRING"_: return eval_interpolated_string(ast, env); - case "DEBUGGER"_: return Value(); - case "RETURN"_: eval_return(ast, env); - } - - if (ast.is_token) { - return Value(std::string(ast.token)); - } - - // NOTREACHED - throw std::logic_error("invalid Ast type"); - } - -private: - Value eval_statements(const peg::Ast& ast, std::shared_ptr env) { - if (ast.is_token) { - return eval(ast, env); - } else if (ast.nodes.empty()) { - return Value(); - } - auto it = ast.nodes.begin(); - while (it != ast.nodes.end() - 1) { - eval(**it, env); - ++it; - } - return eval(**it, env); - } - - Value eval_while(const peg::Ast& ast, std::shared_ptr env) { - for (;;) { - auto cond = eval(*ast.nodes[0], env); - if (!cond.to_bool()) { - break; - } - eval(*ast.nodes[1], env); - } - return Value(); - } - - Value eval_if(const peg::Ast& ast, std::shared_ptr env) { - const auto& nodes = ast.nodes; - - for (auto i = 0u; i < nodes.size(); i += 2) { - if (i + 1 == nodes.size()) { - return eval(*nodes[i], env); - } else { - auto cond = eval(*nodes[i], env); - if (cond.to_bool()) { - return eval(*nodes[i + 1], env); - } - } - } - - return Value(); - } - - Value eval_function(const peg::Ast& ast, std::shared_ptr env) { - std::vector params; - for (auto node: ast.nodes[0]->nodes) { - auto mut = node->nodes[0]->token == "mut"; - const auto& name = node->nodes[1]->token; - params.push_back({ name, mut }); - } - - auto body = ast.nodes[1]; - - return Value(FunctionValue( - params, - [=](std::shared_ptr callEnv) { - callEnv->append_outer(env); - return eval(*body, callEnv); - } - )); - }; - - Value eval_function_call(const peg::Ast& ast, std::shared_ptr env, const Value& val) { - const auto& f = val.to_function(); - const auto& params = *f.params; - const auto& args = ast.nodes; - - if (params.size() <= args.size()) { - auto callEnv = std::make_shared(env); - callEnv->initialize("self", val, false); - for (auto iprm = 0u; iprm < params.size(); iprm++) { - auto param = params[iprm]; - auto arg = args[iprm]; - auto val = eval(*arg, env); - callEnv->initialize(param.name, val, param.mut); - } - callEnv->initialize("__LINE__", Value((long)ast.line), false); - callEnv->initialize("__COLUMN__", Value((long)ast.column), false); - try { - return f.eval(callEnv); - } catch (const Value& e) { - return e; - } - } - - std::string msg = "arguments error..."; - throw std::runtime_error(msg); - } - - Value eval_array_reference(const peg::Ast& ast, std::shared_ptr env, const Value& val) { - const auto& arr = val.to_array(); - auto idx = eval(ast, env).to_long(); - if (idx < 0) { - idx = arr.values->size() + idx; - } - if (0 <= idx && idx < static_cast(arr.values->size())) { - return arr.values->at(idx); - } else { - throw std::logic_error("index out of range."); - } - return val; - } - - Value eval_property(const peg::Ast& ast, std::shared_ptr env, const Value& val) { - const auto& obj = val.to_object(); - auto name = ast.token; - if (!obj.has(name)) { - return Value(); - } - const auto& prop = obj.get(name); - if (prop.type == Value::Function) { - const auto& pf = prop.to_function(); - return Value(FunctionValue( - *pf.params, - [=](std::shared_ptr callEnv) { - callEnv->initialize("this", val, false); - return pf.eval(callEnv); - } - )); - } - return prop; - } - - Value eval_call(const peg::Ast& ast, std::shared_ptr env) { - using peg::operator"" _; - - Value val = eval(*ast.nodes[0], env); - - for (auto i = 1u; i < ast.nodes.size(); i++) { - const auto& postfix = *ast.nodes[i]; - - switch (postfix.original_tag) { - case "ARGUMENTS"_: val = eval_function_call(postfix, env, val); break; - case "INDEX"_: val = eval_array_reference(postfix, env, val); break; - case "DOT"_: val = eval_property(postfix, env, val); break; - default: throw std::logic_error("invalid internal condition."); - } - } - - return val; - } - - Value eval_lexical_scope(const peg::Ast& ast, std::shared_ptr env) { - auto scopeEnv = std::make_shared(); - scopeEnv->append_outer(env); - for (auto node: ast.nodes) { - eval(*node, scopeEnv); - } - return Value(); - } - - Value eval_logical_or(const peg::Ast& ast, std::shared_ptr env) { - assert(ast.nodes.size() > 1); // if the size is 1, thes node will be hoisted. - Value val; - for (auto node: ast.nodes) { - val = eval(*node, env); - if (val.to_bool()) { - return val; - } - } - return val; - } - - Value eval_logical_and(const peg::Ast& ast, std::shared_ptr env) { - Value val; - for (auto node: ast.nodes) { - val = eval(*node, env); - if (!val.to_bool()) { - return val; - } - } - return val; - } - - Value eval_condition(const peg::Ast& ast, std::shared_ptr env) { - auto lhs = eval(*ast.nodes[0], env); - auto ope = eval(*ast.nodes[1], env).to_string(); - auto rhs = eval(*ast.nodes[2], env); - - if (ope == "==") { return Value(lhs == rhs); } - else if (ope == "!=") { return Value(lhs != rhs); } - else if (ope == "<=") { return Value(lhs <= rhs); } - else if (ope == "<") { return Value(lhs < rhs); } - else if (ope == ">=") { return Value(lhs >= rhs); } - else if (ope == ">") { return Value(lhs > rhs); } - else { throw std::logic_error("invalid internal condition."); } - } - - Value eval_unary_plus(const peg::Ast& ast, std::shared_ptr env) { - return eval(*ast.nodes[1], env); - } - - Value eval_unary_minus(const peg::Ast& ast, std::shared_ptr env) { - return Value(eval(*ast.nodes[1], env).to_long() * -1); - } - - Value eval_unary_not(const peg::Ast& ast, std::shared_ptr env) { - return Value(!eval(*ast.nodes[1], env).to_bool()); - } - - Value eval_bin_expression(const peg::Ast& ast, std::shared_ptr env) { - auto ret = eval(*ast.nodes[0], env).to_long(); - for (auto i = 1u; i < ast.nodes.size(); i += 2) { - auto val = eval(*ast.nodes[i + 1], env).to_long(); - auto ope = eval(*ast.nodes[i], env).to_string()[0]; - switch (ope) { - case '+': ret += val; break; - case '-': ret -= val; break; - case '*': ret *= val; break; - case '%': ret %= val; break; - case '/': - if (val == 0) { - throw std::runtime_error("divide by 0 error"); - } - ret /= val; - break; - } - } - return Value(ret); - } - - bool is_keyword(const std::string& ident) const { - static std::set keywords = { "nil", "true", "false", "mut", "debugger", "return", "while", "if", "else", "fn" }; - return keywords.find(ident) != keywords.end(); - } - - Value eval_assignment(const peg::Ast& ast, std::shared_ptr env) { - auto lvaloff = 2; - auto lvalcnt = ast.nodes.size() - 3; - - auto let = ast.nodes[0]->token == "let"; - auto mut = ast.nodes[1]->token == "mut"; - auto rval = eval(*ast.nodes.back(), env); - - if (lvalcnt == 1) { - const auto& ident = ast.nodes[lvaloff]->token; - if (!let && env->has(ident)) { - env->assign(ident, rval); - } else if (is_keyword(ident)) { - throw std::runtime_error("left-hand side is invalid variable name."); - } else { - env->initialize(ident, rval, mut); - } - return rval; - } else { - using peg::operator"" _; - - Value lval = eval(*ast.nodes[lvaloff], env); - - auto end = lvaloff + lvalcnt - 1; - for (auto i = lvaloff + 1; i < end; i++) { - const auto& postfix = *ast.nodes[i]; - - switch (postfix.original_tag) { - case "ARGUMENTS"_: lval = eval_function_call(postfix, env, lval); break; - case "INDEX"_: lval = eval_array_reference(postfix, env, lval); break; - case "DOT"_: lval = eval_property(postfix, env, lval); break; - default: throw std::logic_error("invalid internal condition."); - } - } - - const auto& postfix = *ast.nodes[end]; - - switch (postfix.original_tag) { - case "INDEX"_: { - const auto& arr = lval.to_array(); - auto idx = eval(postfix, env).to_long(); - if (0 <= idx && idx < static_cast(arr.values->size())) { - arr.values->at(idx) = rval; - } else { - throw std::logic_error("index out of range."); - } - return rval; - } - case "DOT"_: { - auto& obj = lval.to_object(); - auto name = postfix.token; - if (obj.has(name)) { - obj.assign(name, rval); - } else { - obj.initialize(name, rval, mut); - } - return rval; - } - default: - throw std::logic_error("invalid internal condition."); - } - } - }; - - Value eval_identifier(const peg::Ast& ast, std::shared_ptr env) { - return env->get(ast.token); - }; - - Value eval_object(const peg::Ast& ast, std::shared_ptr env) { - ObjectValue obj; - for (auto i = 0u; i < ast.nodes.size(); i++) { - const auto& prop = *ast.nodes[i]; - auto mut = prop.nodes[0]->token == "mut"; - const auto& name = prop.nodes[1]->token; - auto val = eval(*prop.nodes[2], env); - obj.properties->emplace(name, Symbol{ std::move(val), mut }); - } - return Value(std::move(obj)); - } - - Value eval_array(const peg::Ast& ast, std::shared_ptr env) { - ArrayValue arr; - - if (ast.nodes.size() >= 2) { - auto count = eval(*ast.nodes[1], env).to_long(); - if (ast.nodes.size() == 3) { - auto val = eval(*ast.nodes[2], env); - arr.values->resize(count, std::move(val)); - } else { - arr.values->resize(count); - } - } - - const auto& nodes = ast.nodes[0]->nodes; - for (auto i = 0u; i < nodes.size(); i++) { - auto expr = nodes[i]; - auto val = eval(*expr, env); - if (i < arr.values->size()) { - arr.values->at(i) = std::move(val); - } else { - arr.values->push_back(std::move(val)); - } - } - - return Value(std::move(arr)); - } - - Value eval_nil(const peg::Ast& ast, std::shared_ptr env) { - return Value(); - }; - - Value eval_bool(const peg::Ast& ast, std::shared_ptr env) { - return Value(ast.token == "true"); - }; - - Value eval_number(const peg::Ast& ast, std::shared_ptr env) { - return Value(stol(ast.token)); - }; - - Value eval_interpolated_string(const peg::Ast& ast, std::shared_ptr env) { - std::string s; - for (auto node: ast.nodes) { - const auto& val = eval(*node, env); - if (val.type == Value::String) { - s += val.to_string(); - } else { - s += val.str(); - } - } - return Value(std::move(s)); - }; - - void eval_return(const peg::Ast& ast, std::shared_ptr env) { - if (ast.nodes.empty()) { - throw Value(); - } else { - throw eval(*ast.nodes[0], env); - } - } - - Debugger debugger_; -}; - -inline std::shared_ptr parse( - const std::string& path, - const char* expr, - size_t len, - std::vector& msgs) -{ - auto& parser = get_parser(); - - parser.log = [&](size_t ln, size_t col, const std::string& err_msg) { - std::stringstream ss; - ss << path << ":" << ln << ":" << col << ": " << err_msg << std::endl; - msgs.push_back(ss.str()); - }; - - std::shared_ptr ast; - if (parser.parse_n(expr, len, ast, path.c_str())) { - return peg::AstOptimizer(true, { "PARAMETERS", "SEQUENCE", "OBJECT", "ARRAY", "RETURN", "LEXICAL_SCOPE" }).optimize(ast); - } - - return nullptr; -} - -inline bool interpret( - const std::shared_ptr& ast, - std::shared_ptr env, - Value& val, - std::vector& msgs, - Debugger debugger = nullptr) -{ - try { - val = Interpreter(debugger).eval(*ast, env); - return true; - } catch (const Value& e) { - val = e; - return true; - } catch (std::runtime_error& e) { - msgs.push_back(e.what()); - } - - return false; -} - -} // namespace culebra diff --git a/language/culebra/culebra.sln b/language/culebra/culebra.sln deleted file mode 100644 index ced9fb73..00000000 --- a/language/culebra/culebra.sln +++ /dev/null @@ -1,22 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 14 -VisualStudioVersion = 14.0.23107.0 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "culebra", "culebra.vcxproj", "{F85B641A-7538-4809-8175-C528FF632CF6}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.ActiveCfg = Debug|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.Build.0 = Debug|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.ActiveCfg = Release|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/language/culebra/culebra.vcxproj b/language/culebra/culebra.vcxproj deleted file mode 100644 index dcb7333c..00000000 --- a/language/culebra/culebra.vcxproj +++ /dev/null @@ -1,93 +0,0 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - - - - - - - - - {F85B641A-7538-4809-8175-C528FF632CF6} - Win32Proj - culebra - - - - Application - true - Unicode - v140 - - - Application - false - true - Unicode - v140 - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - ../.. - - - Console - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - ../.. - - - Console - true - true - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - - \ No newline at end of file diff --git a/language/culebra/linenoise.hpp b/language/culebra/linenoise.hpp deleted file mode 100644 index bacd245b..00000000 --- a/language/culebra/linenoise.hpp +++ /dev/null @@ -1,1970 +0,0 @@ -/* - * linenoise.hpp -- Multi-platfrom C++ header-only linenoise library. - * - * All credits and commendations have to go to the authors of the - * following excellent libraries. - * - * - linenoise.h and linenose.c (https://github.com/antirez/linenoise) - * - ANSI.c (https://github.com/adoxa/ansicon) - * - Win32_ANSI.h and Win32_ANSI.c (https://github.com/MSOpenTech/redis) - * - * ------------------------------------------------------------------------ - * - * Copyright (c) 2015 yhirose - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* linenoise.h -- guerrilla line editing library against the idea that a - * line editing lib needs to be 20,000 lines of C code. - * - * See linenoise.c for more information. - * - * ------------------------------------------------------------------------ - * - * Copyright (c) 2010, Salvatore Sanfilippo - * Copyright (c) 2010, Pieter Noordhuis - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * ANSI.c - ANSI escape sequence console driver. - * - * Copyright (C) 2005-2014 Jason Hood - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the author be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - * - * Jason Hood - * jadoxa@yahoo.com.au - */ - -/* - * Win32_ANSI.h and Win32_ANSI.c - * - * Derived from ANSI.c by Jason Hood, from his ansicon project (https://github.com/adoxa/ansicon), with modifications. - * - * Copyright (c), Microsoft Open Technologies, Inc. - * All rights reserved. - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __LINENOISE_HPP -#define __LINENOISE_HPP - -#ifndef _WIN32 -#include -#include -#include -#else -#define NOMINMAX -#include -#include -#ifndef STDIN_FILENO -#define STDIN_FILENO (_fileno(stdin)) -#endif -#ifndef STDOUT_FILENO -#define STDOUT_FILENO 1 -#endif -#define isatty _isatty -#define write win32_write -#define read _read -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace linenoise { - -typedef std::function&)> CompletionCallback; - -#ifdef _WIN32 - -namespace ansi { - -#define lenof(array) (sizeof(array)/sizeof(*(array))) - -typedef struct -{ - BYTE foreground; // ANSI base color (0 to 7; add 30) - BYTE background; // ANSI base color (0 to 7; add 40) - BYTE bold; // console FOREGROUND_INTENSITY bit - BYTE underline; // console BACKGROUND_INTENSITY bit - BYTE rvideo; // swap foreground/bold & background/underline - BYTE concealed; // set foreground/bold to background/underline - BYTE reverse; // swap console foreground & background attributes -} GRM, *PGRM; // Graphic Rendition Mode - - -inline bool is_digit(char c) { return '0' <= c && c <= '9'; } - -// ========== Global variables and constants - -HANDLE hConOut; // handle to CONOUT$ - -const char ESC = '\x1B'; // ESCape character -const char BEL = '\x07'; -const char SO = '\x0E'; // Shift Out -const char SI = '\x0F'; // Shift In - -const size_t MAX_ARG = 16; // max number of args in an escape sequence -int state; // automata state -TCHAR prefix; // escape sequence prefix ( '[', ']' or '(' ); -TCHAR prefix2; // secondary prefix ( '?' or '>' ); -TCHAR suffix; // escape sequence suffix -int es_argc; // escape sequence args count -int es_argv[MAX_ARG]; // escape sequence args -TCHAR Pt_arg[MAX_PATH * 2]; // text parameter for Operating System Command -int Pt_len; -BOOL shifted; - - -// DEC Special Graphics Character Set from -// http://vt100.net/docs/vt220-rm/table2-4.html -// Some of these may not look right, depending on the font and code page (in -// particular, the Control Pictures probably won't work at all). -const WCHAR G1[] = -{ - ' ', // _ - blank - L'\x2666', // ` - Black Diamond Suit - L'\x2592', // a - Medium Shade - L'\x2409', // b - HT - L'\x240c', // c - FF - L'\x240d', // d - CR - L'\x240a', // e - LF - L'\x00b0', // f - Degree Sign - L'\x00b1', // g - Plus-Minus Sign - L'\x2424', // h - NL - L'\x240b', // i - VT - L'\x2518', // j - Box Drawings Light Up And Left - L'\x2510', // k - Box Drawings Light Down And Left - L'\x250c', // l - Box Drawings Light Down And Right - L'\x2514', // m - Box Drawings Light Up And Right - L'\x253c', // n - Box Drawings Light Vertical And Horizontal - L'\x00af', // o - SCAN 1 - Macron - L'\x25ac', // p - SCAN 3 - Black Rectangle - L'\x2500', // q - SCAN 5 - Box Drawings Light Horizontal - L'_', // r - SCAN 7 - Low Line - L'_', // s - SCAN 9 - Low Line - L'\x251c', // t - Box Drawings Light Vertical And Right - L'\x2524', // u - Box Drawings Light Vertical And Left - L'\x2534', // v - Box Drawings Light Up And Horizontal - L'\x252c', // w - Box Drawings Light Down And Horizontal - L'\x2502', // x - Box Drawings Light Vertical - L'\x2264', // y - Less-Than Or Equal To - L'\x2265', // z - Greater-Than Or Equal To - L'\x03c0', // { - Greek Small Letter Pi - L'\x2260', // | - Not Equal To - L'\x00a3', // } - Pound Sign - L'\x00b7', // ~ - Middle Dot -}; - -#define FIRST_G1 '_' -#define LAST_G1 '~' - - -// color constants - -#define FOREGROUND_BLACK 0 -#define FOREGROUND_WHITE FOREGROUND_RED|FOREGROUND_GREEN|FOREGROUND_BLUE - -#define BACKGROUND_BLACK 0 -#define BACKGROUND_WHITE BACKGROUND_RED|BACKGROUND_GREEN|BACKGROUND_BLUE - -const BYTE foregroundcolor[8] = - { - FOREGROUND_BLACK, // black foreground - FOREGROUND_RED, // red foreground - FOREGROUND_GREEN, // green foreground - FOREGROUND_RED | FOREGROUND_GREEN, // yellow foreground - FOREGROUND_BLUE, // blue foreground - FOREGROUND_BLUE | FOREGROUND_RED, // magenta foreground - FOREGROUND_BLUE | FOREGROUND_GREEN, // cyan foreground - FOREGROUND_WHITE // white foreground - }; - -const BYTE backgroundcolor[8] = - { - BACKGROUND_BLACK, // black background - BACKGROUND_RED, // red background - BACKGROUND_GREEN, // green background - BACKGROUND_RED | BACKGROUND_GREEN, // yellow background - BACKGROUND_BLUE, // blue background - BACKGROUND_BLUE | BACKGROUND_RED, // magenta background - BACKGROUND_BLUE | BACKGROUND_GREEN, // cyan background - BACKGROUND_WHITE, // white background - }; - -const BYTE attr2ansi[8] = // map console attribute to ANSI number -{ - 0, // black - 4, // blue - 2, // green - 6, // cyan - 1, // red - 5, // magenta - 3, // yellow - 7 // white -}; - -GRM grm; - -// saved cursor position -COORD SavePos; - -// ========== Print Buffer functions - -#define BUFFER_SIZE 2048 - -int nCharInBuffer; -WCHAR ChBuffer[BUFFER_SIZE]; - -//----------------------------------------------------------------------------- -// FlushBuffer() -// Writes the buffer to the console and empties it. -//----------------------------------------------------------------------------- - -inline void FlushBuffer(void) -{ - DWORD nWritten; - if (nCharInBuffer <= 0) return; - WriteConsole(hConOut, ChBuffer, nCharInBuffer, &nWritten, NULL); - nCharInBuffer = 0; -} - -//----------------------------------------------------------------------------- -// PushBuffer( WCHAR c ) -// Adds a character in the buffer. -//----------------------------------------------------------------------------- - -inline void PushBuffer(WCHAR c) -{ - if (shifted && c >= FIRST_G1 && c <= LAST_G1) - c = G1[c - FIRST_G1]; - ChBuffer[nCharInBuffer] = c; - if (++nCharInBuffer == BUFFER_SIZE) - FlushBuffer(); -} - -//----------------------------------------------------------------------------- -// SendSequence( LPTSTR seq ) -// Send the string to the input buffer. -//----------------------------------------------------------------------------- - -inline void SendSequence(LPTSTR seq) -{ - DWORD out; - INPUT_RECORD in; - HANDLE hStdIn = GetStdHandle(STD_INPUT_HANDLE); - - in.EventType = KEY_EVENT; - in.Event.KeyEvent.bKeyDown = TRUE; - in.Event.KeyEvent.wRepeatCount = 1; - in.Event.KeyEvent.wVirtualKeyCode = 0; - in.Event.KeyEvent.wVirtualScanCode = 0; - in.Event.KeyEvent.dwControlKeyState = 0; - for (; *seq; ++seq) - { - in.Event.KeyEvent.uChar.UnicodeChar = *seq; - WriteConsoleInput(hStdIn, &in, 1, &out); - } -} - -// ========== Print functions - -//----------------------------------------------------------------------------- -// InterpretEscSeq() -// Interprets the last escape sequence scanned by ParseAndPrintANSIString -// prefix escape sequence prefix -// es_argc escape sequence args count -// es_argv[] escape sequence args array -// suffix escape sequence suffix -// -// for instance, with \e[33;45;1m we have -// prefix = '[', -// es_argc = 3, es_argv[0] = 33, es_argv[1] = 45, es_argv[2] = 1 -// suffix = 'm' -//----------------------------------------------------------------------------- - -inline void InterpretEscSeq(void) -{ - int i; - WORD attribut; - CONSOLE_SCREEN_BUFFER_INFO Info; - CONSOLE_CURSOR_INFO CursInfo; - DWORD len, NumberOfCharsWritten; - COORD Pos; - SMALL_RECT Rect; - CHAR_INFO CharInfo; - - if (prefix == '[') - { - if (prefix2 == '?' && (suffix == 'h' || suffix == 'l')) - { - if (es_argc == 1 && es_argv[0] == 25) - { - GetConsoleCursorInfo(hConOut, &CursInfo); - CursInfo.bVisible = (suffix == 'h'); - SetConsoleCursorInfo(hConOut, &CursInfo); - return; - } - } - // Ignore any other \e[? or \e[> sequences. - if (prefix2 != 0) - return; - - GetConsoleScreenBufferInfo(hConOut, &Info); - switch (suffix) - { - case 'm': - if (es_argc == 0) es_argv[es_argc++] = 0; - for (i = 0; i < es_argc; i++) - { - if (30 <= es_argv[i] && es_argv[i] <= 37) - grm.foreground = es_argv[i] - 30; - else if (40 <= es_argv[i] && es_argv[i] <= 47) - grm.background = es_argv[i] - 40; - else switch (es_argv[i]) - { - case 0: - case 39: - case 49: - { - TCHAR def[4]; - int a; - *def = '7'; def[1] = '\0'; - GetEnvironmentVariable(L"ANSICON_DEF", def, lenof(def)); - a = wcstol(def, NULL, 16); - grm.reverse = FALSE; - if (a < 0) - { - grm.reverse = TRUE; - a = -a; - } - if (es_argv[i] != 49) - grm.foreground = attr2ansi[a & 7]; - if (es_argv[i] != 39) - grm.background = attr2ansi[(a >> 4) & 7]; - if (es_argv[i] == 0) - { - if (es_argc == 1) - { - grm.bold = a & FOREGROUND_INTENSITY; - grm.underline = a & BACKGROUND_INTENSITY; - } - else - { - grm.bold = 0; - grm.underline = 0; - } - grm.rvideo = 0; - grm.concealed = 0; - } - } - break; - - case 1: grm.bold = FOREGROUND_INTENSITY; break; - case 5: // blink - case 4: grm.underline = BACKGROUND_INTENSITY; break; - case 7: grm.rvideo = 1; break; - case 8: grm.concealed = 1; break; - case 21: // oops, this actually turns on double underline - case 22: grm.bold = 0; break; - case 25: - case 24: grm.underline = 0; break; - case 27: grm.rvideo = 0; break; - case 28: grm.concealed = 0; break; - } - } - if (grm.concealed) - { - if (grm.rvideo) - { - attribut = foregroundcolor[grm.foreground] - | backgroundcolor[grm.foreground]; - if (grm.bold) - attribut |= FOREGROUND_INTENSITY | BACKGROUND_INTENSITY; - } - else - { - attribut = foregroundcolor[grm.background] - | backgroundcolor[grm.background]; - if (grm.underline) - attribut |= FOREGROUND_INTENSITY | BACKGROUND_INTENSITY; - } - } - else if (grm.rvideo) - { - attribut = foregroundcolor[grm.background] - | backgroundcolor[grm.foreground]; - if (grm.bold) - attribut |= BACKGROUND_INTENSITY; - if (grm.underline) - attribut |= FOREGROUND_INTENSITY; - } - else - attribut = foregroundcolor[grm.foreground] | grm.bold - | backgroundcolor[grm.background] | grm.underline; - if (grm.reverse) - attribut = ((attribut >> 4) & 15) | ((attribut & 15) << 4); - SetConsoleTextAttribute(hConOut, attribut); - return; - - case 'J': - if (es_argc == 0) es_argv[es_argc++] = 0; // ESC[J == ESC[0J - if (es_argc != 1) return; - switch (es_argv[0]) - { - case 0: // ESC[0J erase from cursor to end of display - len = (Info.dwSize.Y - Info.dwCursorPosition.Y - 1) * Info.dwSize.X - + Info.dwSize.X - Info.dwCursorPosition.X - 1; - FillConsoleOutputCharacter(hConOut, ' ', len, - Info.dwCursorPosition, - &NumberOfCharsWritten); - FillConsoleOutputAttribute(hConOut, Info.wAttributes, len, - Info.dwCursorPosition, - &NumberOfCharsWritten); - return; - - case 1: // ESC[1J erase from start to cursor. - Pos.X = 0; - Pos.Y = 0; - len = Info.dwCursorPosition.Y * Info.dwSize.X - + Info.dwCursorPosition.X + 1; - FillConsoleOutputCharacter(hConOut, ' ', len, Pos, - &NumberOfCharsWritten); - FillConsoleOutputAttribute(hConOut, Info.wAttributes, len, Pos, - &NumberOfCharsWritten); - return; - - case 2: // ESC[2J Clear screen and home cursor - Pos.X = 0; - Pos.Y = 0; - len = Info.dwSize.X * Info.dwSize.Y; - FillConsoleOutputCharacter(hConOut, ' ', len, Pos, - &NumberOfCharsWritten); - FillConsoleOutputAttribute(hConOut, Info.wAttributes, len, Pos, - &NumberOfCharsWritten); - SetConsoleCursorPosition(hConOut, Pos); - return; - - default: - return; - } - - case 'K': - if (es_argc == 0) es_argv[es_argc++] = 0; // ESC[K == ESC[0K - if (es_argc != 1) return; - switch (es_argv[0]) - { - case 0: // ESC[0K Clear to end of line - len = Info.dwSize.X - Info.dwCursorPosition.X + 1; - FillConsoleOutputCharacter(hConOut, ' ', len, - Info.dwCursorPosition, - &NumberOfCharsWritten); - FillConsoleOutputAttribute(hConOut, Info.wAttributes, len, - Info.dwCursorPosition, - &NumberOfCharsWritten); - return; - - case 1: // ESC[1K Clear from start of line to cursor - Pos.X = 0; - Pos.Y = Info.dwCursorPosition.Y; - FillConsoleOutputCharacter(hConOut, ' ', - Info.dwCursorPosition.X + 1, Pos, - &NumberOfCharsWritten); - FillConsoleOutputAttribute(hConOut, Info.wAttributes, - Info.dwCursorPosition.X + 1, Pos, - &NumberOfCharsWritten); - return; - - case 2: // ESC[2K Clear whole line. - Pos.X = 0; - Pos.Y = Info.dwCursorPosition.Y; - FillConsoleOutputCharacter(hConOut, ' ', Info.dwSize.X, Pos, - &NumberOfCharsWritten); - FillConsoleOutputAttribute(hConOut, Info.wAttributes, - Info.dwSize.X, Pos, - &NumberOfCharsWritten); - return; - - default: - return; - } - - case 'X': // ESC[#X Erase # characters. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[X == ESC[1X - if (es_argc != 1) return; - FillConsoleOutputCharacter(hConOut, ' ', es_argv[0], - Info.dwCursorPosition, - &NumberOfCharsWritten); - FillConsoleOutputAttribute(hConOut, Info.wAttributes, es_argv[0], - Info.dwCursorPosition, - &NumberOfCharsWritten); - return; - - case 'L': // ESC[#L Insert # blank lines. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[L == ESC[1L - if (es_argc != 1) return; - Rect.Left = 0; - Rect.Top = Info.dwCursorPosition.Y; - Rect.Right = Info.dwSize.X - 1; - Rect.Bottom = Info.dwSize.Y - 1; - Pos.X = 0; - Pos.Y = Info.dwCursorPosition.Y + es_argv[0]; - CharInfo.Char.UnicodeChar = ' '; - CharInfo.Attributes = Info.wAttributes; - ScrollConsoleScreenBuffer(hConOut, &Rect, NULL, Pos, &CharInfo); - return; - - case 'M': // ESC[#M Delete # lines. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[M == ESC[1M - if (es_argc != 1) return; - if (es_argv[0] > Info.dwSize.Y - Info.dwCursorPosition.Y) - es_argv[0] = Info.dwSize.Y - Info.dwCursorPosition.Y; - Rect.Left = 0; - Rect.Top = Info.dwCursorPosition.Y + es_argv[0]; - Rect.Right = Info.dwSize.X - 1; - Rect.Bottom = Info.dwSize.Y - 1; - Pos.X = 0; - Pos.Y = Info.dwCursorPosition.Y; - CharInfo.Char.UnicodeChar = ' '; - CharInfo.Attributes = Info.wAttributes; - ScrollConsoleScreenBuffer(hConOut, &Rect, NULL, Pos, &CharInfo); - return; - - case 'P': // ESC[#P Delete # characters. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[P == ESC[1P - if (es_argc != 1) return; - if (Info.dwCursorPosition.X + es_argv[0] > Info.dwSize.X - 1) - es_argv[0] = Info.dwSize.X - Info.dwCursorPosition.X; - Rect.Left = Info.dwCursorPosition.X + es_argv[0]; - Rect.Top = Info.dwCursorPosition.Y; - Rect.Right = Info.dwSize.X - 1; - Rect.Bottom = Info.dwCursorPosition.Y; - CharInfo.Char.UnicodeChar = ' '; - CharInfo.Attributes = Info.wAttributes; - ScrollConsoleScreenBuffer(hConOut, &Rect, NULL, Info.dwCursorPosition, - &CharInfo); - return; - - case '@': // ESC[#@ Insert # blank characters. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[@ == ESC[1@ - if (es_argc != 1) return; - if (Info.dwCursorPosition.X + es_argv[0] > Info.dwSize.X - 1) - es_argv[0] = Info.dwSize.X - Info.dwCursorPosition.X; - Rect.Left = Info.dwCursorPosition.X; - Rect.Top = Info.dwCursorPosition.Y; - Rect.Right = Info.dwSize.X - 1 - es_argv[0]; - Rect.Bottom = Info.dwCursorPosition.Y; - Pos.X = Info.dwCursorPosition.X + es_argv[0]; - Pos.Y = Info.dwCursorPosition.Y; - CharInfo.Char.UnicodeChar = ' '; - CharInfo.Attributes = Info.wAttributes; - ScrollConsoleScreenBuffer(hConOut, &Rect, NULL, Pos, &CharInfo); - return; - - case 'k': // ESC[#k - case 'A': // ESC[#A Moves cursor up # lines - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[A == ESC[1A - if (es_argc != 1) return; - Pos.Y = Info.dwCursorPosition.Y - es_argv[0]; - if (Pos.Y < 0) Pos.Y = 0; - Pos.X = Info.dwCursorPosition.X; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case 'e': // ESC[#e - case 'B': // ESC[#B Moves cursor down # lines - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[B == ESC[1B - if (es_argc != 1) return; - Pos.Y = Info.dwCursorPosition.Y + es_argv[0]; - if (Pos.Y >= Info.dwSize.Y) Pos.Y = Info.dwSize.Y - 1; - Pos.X = Info.dwCursorPosition.X; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case 'a': // ESC[#a - case 'C': // ESC[#C Moves cursor forward # spaces - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[C == ESC[1C - if (es_argc != 1) return; - Pos.X = Info.dwCursorPosition.X + es_argv[0]; - if (Pos.X >= Info.dwSize.X) Pos.X = Info.dwSize.X - 1; - Pos.Y = Info.dwCursorPosition.Y; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case 'j': // ESC[#j - case 'D': // ESC[#D Moves cursor back # spaces - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[D == ESC[1D - if (es_argc != 1) return; - Pos.X = Info.dwCursorPosition.X - es_argv[0]; - if (Pos.X < 0) Pos.X = 0; - Pos.Y = Info.dwCursorPosition.Y; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case 'E': // ESC[#E Moves cursor down # lines, column 1. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[E == ESC[1E - if (es_argc != 1) return; - Pos.Y = Info.dwCursorPosition.Y + es_argv[0]; - if (Pos.Y >= Info.dwSize.Y) Pos.Y = Info.dwSize.Y - 1; - Pos.X = 0; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case 'F': // ESC[#F Moves cursor up # lines, column 1. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[F == ESC[1F - if (es_argc != 1) return; - Pos.Y = Info.dwCursorPosition.Y - es_argv[0]; - if (Pos.Y < 0) Pos.Y = 0; - Pos.X = 0; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case '`': // ESC[#` - case 'G': // ESC[#G Moves cursor column # in current row. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[G == ESC[1G - if (es_argc != 1) return; - Pos.X = es_argv[0] - 1; - if (Pos.X >= Info.dwSize.X) Pos.X = Info.dwSize.X - 1; - if (Pos.X < 0) Pos.X = 0; - Pos.Y = Info.dwCursorPosition.Y; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case 'd': // ESC[#d Moves cursor row #, current column. - if (es_argc == 0) es_argv[es_argc++] = 1; // ESC[d == ESC[1d - if (es_argc != 1) return; - Pos.Y = es_argv[0] - 1; - if (Pos.Y < 0) Pos.Y = 0; - if (Pos.Y >= Info.dwSize.Y) Pos.Y = Info.dwSize.Y - 1; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case 'f': // ESC[#;#f - case 'H': // ESC[#;#H Moves cursor to line #, column # - if (es_argc == 0) - es_argv[es_argc++] = 1; // ESC[H == ESC[1;1H - if (es_argc == 1) - es_argv[es_argc++] = 1; // ESC[#H == ESC[#;1H - if (es_argc > 2) return; - Pos.X = es_argv[1] - 1; - if (Pos.X < 0) Pos.X = 0; - if (Pos.X >= Info.dwSize.X) Pos.X = Info.dwSize.X - 1; - Pos.Y = es_argv[0] - 1; - if (Pos.Y < 0) Pos.Y = 0; - if (Pos.Y >= Info.dwSize.Y) Pos.Y = Info.dwSize.Y - 1; - SetConsoleCursorPosition(hConOut, Pos); - return; - - case 's': // ESC[s Saves cursor position for recall later - if (es_argc != 0) return; - SavePos = Info.dwCursorPosition; - return; - - case 'u': // ESC[u Return to saved cursor position - if (es_argc != 0) return; - SetConsoleCursorPosition(hConOut, SavePos); - return; - - case 'n': // ESC[#n Device status report - if (es_argc != 1) return; // ESC[n == ESC[0n -> ignored - switch (es_argv[0]) - { - case 5: // ESC[5n Report status - SendSequence(L"\33[0n"); // "OK" - return; - - case 6: // ESC[6n Report cursor position - { - TCHAR buf[32]; - wsprintf(buf, L"\33[%d;%dR", Info.dwCursorPosition.Y + 1, - Info.dwCursorPosition.X + 1); - SendSequence(buf); - } - return; - - default: - return; - } - - case 't': // ESC[#t Window manipulation - if (es_argc != 1) return; - if (es_argv[0] == 21) // ESC[21t Report xterm window's title - { - TCHAR buf[MAX_PATH * 2]; - DWORD len = GetConsoleTitle(buf + 3, lenof(buf) - 3 - 2); - // Too bad if it's too big or fails. - buf[0] = ESC; - buf[1] = ']'; - buf[2] = 'l'; - buf[3 + len] = ESC; - buf[3 + len + 1] = '\\'; - buf[3 + len + 2] = '\0'; - SendSequence(buf); - } - return; - - default: - return; - } - } - else // (prefix == ']') - { - // Ignore any \e]? or \e]> sequences. - if (prefix2 != 0) - return; - - if (es_argc == 1 && es_argv[0] == 0) // ESC]0;titleST - { - SetConsoleTitle(Pt_arg); - } - } -} - -//----------------------------------------------------------------------------- -// ParseAndPrintANSIString(hDev, lpBuffer, nNumberOfBytesToWrite) -// Parses the string lpBuffer, interprets the escapes sequences and prints the -// characters in the device hDev (console). -// The lexer is a three states automata. -// If the number of arguments es_argc > MAX_ARG, only the MAX_ARG-1 firsts and -// the last arguments are processed (no es_argv[] overflow). -//----------------------------------------------------------------------------- - -inline BOOL ParseAndPrintANSIString(HANDLE hDev, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite, LPDWORD lpNumberOfBytesWritten) -{ - DWORD i; - LPCSTR s; - - if (hDev != hConOut) // reinit if device has changed - { - hConOut = hDev; - state = 1; - shifted = FALSE; - } - for (i = nNumberOfBytesToWrite, s = (LPCSTR)lpBuffer; i > 0; i--, s++) - { - if (state == 1) - { - if (*s == ESC) state = 2; - else if (*s == SO) shifted = TRUE; - else if (*s == SI) shifted = FALSE; - else PushBuffer(*s); - } - else if (state == 2) - { - if (*s == ESC); // \e\e...\e == \e - else if ((*s == '[') || (*s == ']')) - { - FlushBuffer(); - prefix = *s; - prefix2 = 0; - state = 3; - Pt_len = 0; - *Pt_arg = '\0'; - } - else if (*s == ')' || *s == '(') state = 6; - else state = 1; - } - else if (state == 3) - { - if (is_digit(*s)) - { - es_argc = 0; - es_argv[0] = *s - '0'; - state = 4; - } - else if (*s == ';') - { - es_argc = 1; - es_argv[0] = 0; - es_argv[1] = 0; - state = 4; - } - else if (*s == '?' || *s == '>') - { - prefix2 = *s; - } - else - { - es_argc = 0; - suffix = *s; - InterpretEscSeq(); - state = 1; - } - } - else if (state == 4) - { - if (is_digit(*s)) - { - es_argv[es_argc] = 10 * es_argv[es_argc] + (*s - '0'); - } - else if (*s == ';') - { - if (es_argc < MAX_ARG - 1) es_argc++; - es_argv[es_argc] = 0; - if (prefix == ']') - state = 5; - } - else - { - es_argc++; - suffix = *s; - InterpretEscSeq(); - state = 1; - } - } - else if (state == 5) - { - if (*s == BEL) - { - Pt_arg[Pt_len] = '\0'; - InterpretEscSeq(); - state = 1; - } - else if (*s == '\\' && Pt_len > 0 && Pt_arg[Pt_len - 1] == ESC) - { - Pt_arg[--Pt_len] = '\0'; - InterpretEscSeq(); - state = 1; - } - else if (Pt_len < lenof(Pt_arg) - 1) - Pt_arg[Pt_len++] = *s; - } - else if (state == 6) - { - // Ignore it (ESC ) 0 is implicit; nothing else is supported). - state = 1; - } - } - FlushBuffer(); - if (lpNumberOfBytesWritten != NULL) - *lpNumberOfBytesWritten = nNumberOfBytesToWrite - i; - return (i == 0); -} - -} // namespace ansi - -HANDLE hOut; -HANDLE hIn; -DWORD consolemode; - -inline int win32read(char *c) { - - DWORD foo; - INPUT_RECORD b; - KEY_EVENT_RECORD e; - BOOL altgr; - - while (1) { - if (!ReadConsoleInput(hIn, &b, 1, &foo)) return 0; - if (!foo) return 0; - - if (b.EventType == KEY_EVENT && b.Event.KeyEvent.bKeyDown) { - - e = b.Event.KeyEvent; - *c = b.Event.KeyEvent.uChar.AsciiChar; - - altgr = e.dwControlKeyState & (LEFT_CTRL_PRESSED | RIGHT_ALT_PRESSED); - - if (e.dwControlKeyState & (LEFT_CTRL_PRESSED | RIGHT_CTRL_PRESSED) && !altgr) { - - /* Ctrl+Key */ - switch (*c) { - case 'D': - *c = 4; - return 1; - case 'C': - *c = 3; - return 1; - case 'H': - *c = 8; - return 1; - case 'T': - *c = 20; - return 1; - case 'B': /* ctrl-b, left_arrow */ - *c = 2; - return 1; - case 'F': /* ctrl-f right_arrow*/ - *c = 6; - return 1; - case 'P': /* ctrl-p up_arrow*/ - *c = 16; - return 1; - case 'N': /* ctrl-n down_arrow*/ - *c = 14; - return 1; - case 'U': /* Ctrl+u, delete the whole line. */ - *c = 21; - return 1; - case 'K': /* Ctrl+k, delete from current to end of line. */ - *c = 11; - return 1; - case 'A': /* Ctrl+a, go to the start of the line */ - *c = 1; - return 1; - case 'E': /* ctrl+e, go to the end of the line */ - *c = 5; - return 1; - } - - /* Other Ctrl+KEYs ignored */ - } else { - - switch (e.wVirtualKeyCode) { - - case VK_ESCAPE: /* ignore - send ctrl-c, will return -1 */ - *c = 3; - return 1; - case VK_RETURN: /* enter */ - *c = 13; - return 1; - case VK_LEFT: /* left */ - *c = 2; - return 1; - case VK_RIGHT: /* right */ - *c = 6; - return 1; - case VK_UP: /* up */ - *c = 16; - return 1; - case VK_DOWN: /* down */ - *c = 14; - return 1; - case VK_HOME: - *c = 1; - return 1; - case VK_END: - *c = 5; - return 1; - case VK_BACK: - *c = 8; - return 1; - case VK_DELETE: - *c = 127; - return 1; - default: - if (*c) return 1; - } - } - } - } - - return -1; /* Makes compiler happy */ -} - -inline int win32_write(int fd, const void *buffer, unsigned int count) { - if (fd == _fileno(stdout)) { - DWORD bytesWritten = 0; - if (FALSE != ansi::ParseAndPrintANSIString(GetStdHandle(STD_OUTPUT_HANDLE), buffer, (DWORD)count, &bytesWritten)) { - return (int)bytesWritten; - } else { - errno = GetLastError(); - return 0; - } - } else if (fd == _fileno(stderr)) { - DWORD bytesWritten = 0; - if (FALSE != ansi::ParseAndPrintANSIString(GetStdHandle(STD_ERROR_HANDLE), buffer, (DWORD)count, &bytesWritten)) { - return (int)bytesWritten; - } else { - errno = GetLastError(); - return 0; - } - } else { - return _write(fd, buffer, count); - } -} -#endif // _WIN32 - -#define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100 -#define LINENOISE_MAX_LINE 4096 -static const char *unsupported_term[] = {"dumb","cons25","emacs",NULL}; -static CompletionCallback completionCallback; - -#ifndef _WIN32 -static struct termios orig_termios; /* In order to restore at exit.*/ -#endif -static bool rawmode = false; /* For atexit() function to check if restore is needed*/ -static bool mlmode = false; /* Multi line mode. Default is single line. */ -static bool atexit_registered = false; /* Register atexit just 1 time. */ -static size_t history_max_len = LINENOISE_DEFAULT_HISTORY_MAX_LEN; -static std::vector history; - -/* The linenoiseState structure represents the state during line editing. - * We pass this state to functions implementing specific editing - * functionalities. */ -struct linenoiseState { - int ifd; /* Terminal stdin file descriptor. */ - int ofd; /* Terminal stdout file descriptor. */ - char *buf; /* Edited line buffer. */ - size_t buflen; /* Edited line buffer size. */ - std::string prompt; /* Prompt to display. */ - size_t pos; /* Current cursor position. */ - size_t oldpos; /* Previous refresh cursor position. */ - size_t len; /* Current edited line length. */ - size_t cols; /* Number of columns in terminal. */ - size_t maxrows; /* Maximum num of rows used so far (multiline mode) */ - int history_index; /* The history index we are currently editing. */ -}; - -enum KEY_ACTION { - KEY_NULL = 0, /* NULL */ - CTRL_A = 1, /* Ctrl+a */ - CTRL_B = 2, /* Ctrl-b */ - CTRL_C = 3, /* Ctrl-c */ - CTRL_D = 4, /* Ctrl-d */ - CTRL_E = 5, /* Ctrl-e */ - CTRL_F = 6, /* Ctrl-f */ - CTRL_H = 8, /* Ctrl-h */ - TAB = 9, /* Tab */ - CTRL_K = 11, /* Ctrl+k */ - CTRL_L = 12, /* Ctrl+l */ - ENTER = 13, /* Enter */ - CTRL_N = 14, /* Ctrl-n */ - CTRL_P = 16, /* Ctrl-p */ - CTRL_T = 20, /* Ctrl-t */ - CTRL_U = 21, /* Ctrl+u */ - CTRL_W = 23, /* Ctrl+w */ - ESC = 27, /* Escape */ - BACKSPACE = 127 /* Backspace */ -}; - -void linenoiseAtExit(void); -bool AddHistory(const char *line); -void refreshLine(struct linenoiseState *l); - -/* ======================= Low level terminal handling ====================== */ - -/* Set if to use or not the multi line mode. */ -inline void SetMultiLine(bool ml) { - mlmode = ml; -} - -/* Return true if the terminal name is in the list of terminals we know are - * not able to understand basic escape sequences. */ -inline bool isUnsupportedTerm(void) { -#ifndef _WIN32 - char *term = getenv("TERM"); - int j; - - if (term == NULL) return false; - for (j = 0; unsupported_term[j]; j++) - if (!strcasecmp(term,unsupported_term[j])) return true; -#endif - return false; -} - -/* Raw mode: 1960 magic shit. */ -inline bool enableRawMode(int fd) { -#ifndef _WIN32 - struct termios raw; - - if (!isatty(STDIN_FILENO)) goto fatal; - if (!atexit_registered) { - atexit(linenoiseAtExit); - atexit_registered = true; - } - if (tcgetattr(fd,&orig_termios) == -1) goto fatal; - - raw = orig_termios; /* modify the original mode */ - /* input modes: no break, no CR to NL, no parity check, no strip char, - * no start/stop output control. */ - raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON); - /* output modes - disable post processing */ - raw.c_oflag &= ~(OPOST); - /* control modes - set 8 bit chars */ - raw.c_cflag |= (CS8); - /* local modes - choing off, canonical off, no extended functions, - * no signal chars (^Z,^C) */ - raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG); - /* control chars - set return condition: min number of bytes and timer. - * We want read to return every single byte, without timeout. */ - raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */ - - /* put terminal in raw mode after flushing */ - if (tcsetattr(fd,TCSAFLUSH,&raw) < 0) goto fatal; - rawmode = true; -#else - if (!atexit_registered) { - /* Init windows console handles only once */ - hOut = GetStdHandle(STD_OUTPUT_HANDLE); - if (hOut==INVALID_HANDLE_VALUE) goto fatal; - - if (!GetConsoleMode(hOut, &consolemode)) { - CloseHandle(hOut); - errno = ENOTTY; - return false; - }; - - hIn = GetStdHandle(STD_INPUT_HANDLE); - if (hIn == INVALID_HANDLE_VALUE) { - CloseHandle(hOut); - errno = ENOTTY; - return false; - } - - GetConsoleMode(hIn, &consolemode); - SetConsoleMode(hIn, ENABLE_PROCESSED_INPUT); - - /* Cleanup them at exit */ - atexit(linenoiseAtExit); - atexit_registered = true; - } - - rawmode = true; -#endif - return true; - -fatal: - errno = ENOTTY; - return false; -} - -inline void disableRawMode(int fd) { -#ifdef _WIN32 - rawmode = false; -#else - /* Don't even check the return value as it's too late. */ - if (rawmode && tcsetattr(fd,TCSAFLUSH,&orig_termios) != -1) - rawmode = false; -#endif -} - -/* Use the ESC [6n escape sequence to query the horizontal cursor position - * and return it. On error -1 is returned, on success the position of the - * cursor. */ -inline int getCursorPosition(int ifd, int ofd) { - char buf[32]; - int cols, rows; - unsigned int i = 0; - - /* Report cursor location */ - if (write(ofd, "\x1b[6n", 4) != 4) return -1; - - /* Read the response: ESC [ rows ; cols R */ - while (i < sizeof(buf)-1) { - if (read(ifd,buf+i,1) != 1) break; - if (buf[i] == 'R') break; - i++; - } - buf[i] = '\0'; - - /* Parse it. */ - if (buf[0] != ESC || buf[1] != '[') return -1; -#ifdef _WIN32 - if (sscanf_s(buf+2,"%d;%d",&rows,&cols) != 2) return -1; -#else - if (sscanf(buf + 2, "%d;%d", &rows, &cols) != 2) return -1; -#endif - return cols; -} - -/* Try to get the number of columns in the current terminal, or assume 80 - * if it fails. */ -inline int getColumns(int ifd, int ofd) { -#ifdef _WIN32 - CONSOLE_SCREEN_BUFFER_INFO b; - - if (!GetConsoleScreenBufferInfo(hOut, &b)) return 80; - return b.srWindow.Right - b.srWindow.Left; -#else - struct winsize ws; - - if (ioctl(1, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0) { - /* ioctl() failed. Try to query the terminal itself. */ - int start, cols; - - /* Get the initial position so we can restore it later. */ - start = getCursorPosition(ifd,ofd); - if (start == -1) goto failed; - - /* Go to right margin and get position. */ - if (write(ofd,"\x1b[999C",6) != 6) goto failed; - cols = getCursorPosition(ifd,ofd); - if (cols == -1) goto failed; - - /* Restore position. */ - if (cols > start) { - char seq[32]; - snprintf(seq,32,"\x1b[%dD",cols-start); - if (write(ofd,seq,strlen(seq)) == -1) { - /* Can't recover... */ - } - } - return cols; - } else { - return ws.ws_col; - } - -failed: - return 80; -#endif -} - -/* Clear the screen. Used to handle ctrl+l */ -inline void linenoiseClearScreen(void) { - if (write(STDOUT_FILENO,"\x1b[H\x1b[2J",7) <= 0) { - /* nothing to do, just to avoid warning. */ - } -} - -/* Beep, used for completion when there is nothing to complete or when all - * the choices were already shown. */ -inline void linenoiseBeep(void) { - fprintf(stderr, "\x7"); - fflush(stderr); -} - -/* ============================== Completion ================================ */ - -/* This is an helper function for linenoiseEdit() and is called when the - * user types the key in order to complete the string currently in the - * input. - * - * The state of the editing is encapsulated into the pointed linenoiseState - * structure as described in the structure definition. */ -inline int completeLine(struct linenoiseState *ls) { - std::vector lc; - int nread, nwritten; - char c = 0; - - completionCallback(ls->buf,lc); - if (lc.empty()) { - linenoiseBeep(); - } else { - size_t stop = 0, i = 0; - - while(!stop) { - /* Show completion or original buffer */ - if (i < lc.size()) { - struct linenoiseState saved = *ls; - - ls->len = ls->pos = lc[i].size(); - ls->buf = &lc[i][0]; - refreshLine(ls); - ls->len = saved.len; - ls->pos = saved.pos; - ls->buf = saved.buf; - } else { - refreshLine(ls); - } - - nread = read(ls->ifd,&c,1); - if (nread <= 0) { - return -1; - } - - switch(c) { - case 9: /* tab */ - i = (i+1) % (lc.size()+1); - if (i == lc.size()) linenoiseBeep(); - break; - case 27: /* escape */ - /* Re-show original buffer */ - if (i < lc.size()) refreshLine(ls); - stop = 1; - break; - default: - /* Update buffer and return */ - if (i < lc.size()) { -#ifdef _WIN32 - nwritten = _snprintf_s(ls->buf, ls->buflen, _TRUNCATE,"%s", &lc[i][0]); -#else - nwritten = snprintf(ls->buf, ls->buflen, "%s", &lc[i][0]); -#endif - ls->len = ls->pos = nwritten; - } - stop = 1; - break; - } - } - } - - return c; /* Return last read character */ -} - -/* Register a callback function to be called for tab-completion. */ -void SetCompletionCallback(CompletionCallback fn) { - completionCallback = fn; -} - -/* =========================== Line editing ================================= */ - -/* Single line low level line refresh. - * - * Rewrite the currently edited line accordingly to the buffer content, - * cursor position, and number of columns of the terminal. */ -inline void refreshSingleLine(struct linenoiseState *l) { - char seq[64]; - size_t plen = l->prompt.length(); - int fd = l->ofd; - char *buf = l->buf; - size_t len = l->len; - size_t pos = l->pos; - std::string ab; - - while((plen+pos) >= l->cols) { - buf++; - len--; - pos--; - } - while (plen+len > l->cols) { - len--; - } - - /* Cursor to left edge */ -#ifdef _WIN32 - _snprintf_s(seq, 64, _TRUNCATE, "\r"); -#else - snprintf(seq, 64, "\r"); -#endif - ab += seq; - /* Write the prompt and the current buffer content */ - ab += l->prompt; - ab.append(buf, len); - /* Erase to right */ -#ifdef _WIN32 - _snprintf_s(seq,64,_TRUNCATE,"\x1b[0K"); -#else - snprintf(seq, 64, "\x1b[0K"); -#endif - ab += seq; - /* Move cursor to original position. */ -#ifdef _WIN32 - _snprintf_s(seq, 64, _TRUNCATE, "\r\x1b[%dC", (int)(pos + plen)); -#else - snprintf(seq, 64, "\r\x1b[%dC", (int)(pos + plen)); -#endif - ab += seq; - if (write(fd,ab.c_str(),ab.length()) == -1) {} /* Can't recover from write error. */ -} - -/* Multi line low level line refresh. - * - * Rewrite the currently edited line accordingly to the buffer content, - * cursor position, and number of columns of the terminal. */ -inline void refreshMultiLine(struct linenoiseState *l) { - char seq[64]; - auto plen = l->prompt.length(); - int rows = (int)((plen+l->len+l->cols-1)/l->cols); /* rows used by current buf. */ - int rpos = (int)((plen+l->oldpos+l->cols)/l->cols); /* cursor relative row. */ - int rpos2; /* rpos after refresh. */ - int col; /* colum position, zero-based. */ - int old_rows = (int)l->maxrows; - int fd = l->ofd, j; - std::string ab; - - /* Update maxrows if needed. */ - if (rows > (int)l->maxrows) l->maxrows = rows; - - /* First step: clear all the lines used before. To do so start by - * going to the last row. */ - if (old_rows-rpos > 0) { -#ifdef _WIN32 - _snprintf_s(seq,64,_TRUNCATE,"\x1b[%dB", old_rows-rpos); -#else - snprintf(seq, 64, "\x1b[%dB", old_rows - rpos); -#endif - ab += seq; - } - - /* Now for every row clear it, go up. */ - for (j = 0; j < old_rows-1; j++) { -#ifdef _WIN32 - _snprintf_s(seq, 64, _TRUNCATE, "\r\x1b[0K\x1b[1A"); -#else - snprintf(seq, 64, "\r\x1b[0K\x1b[1A"); -#endif - ab += seq; - } - - /* Clean the top line. */ -#ifdef _WIN32 - _snprintf_s(seq, 64, _TRUNCATE, "\r\x1b[0K"); -#else - snprintf(seq, 64, "\r\x1b[0K"); -#endif - ab += seq; - - /* Write the prompt and the current buffer content */ - ab += l->prompt; - ab.append(l->buf, l->len); - - /* If we are at the very end of the screen with our prompt, we need to - * emit a newline and move the prompt to the first column. */ - if (l->pos && - l->pos == l->len && - (l->pos+plen) % l->cols == 0) - { - ab += "\n"; -#ifdef _WIN32 - _snprintf_s(seq, 64, _TRUNCATE, "\r"); -#else - snprintf(seq, 64, "\r"); -#endif - ab += seq; - rows++; - if (rows > (int)l->maxrows) l->maxrows = rows; - } - - /* Move cursor to right position. */ - rpos2 = (int)((plen+l->pos+l->cols)/l->cols); /* current cursor relative row. */ - - /* Go up till we reach the expected positon. */ - if (rows-rpos2 > 0) { -#ifdef _WIN32 - _snprintf_s(seq, 64, _TRUNCATE, "\x1b[%dA", rows - rpos2); -#else - snprintf(seq, 64, "\x1b[%dA", rows - rpos2); -#endif - ab += seq; - } - - /* Set column. */ - col = (plen+(int)l->pos) % (int)l->cols; - if (col) -#ifdef _WIN32 - _snprintf_s(seq, 64, _TRUNCATE, "\r\x1b[%dC", col); -#else - snprintf(seq, 64, "\r\x1b[%dC", col); -#endif - else -#ifdef _WIN32 - _snprintf_s(seq, 64, _TRUNCATE, "\r"); -#else - snprintf(seq, 64, "\r"); -#endif - ab += seq; - - l->oldpos = l->pos; - - if (write(fd,ab.c_str(),ab.length()) == -1) {} /* Can't recover from write error. */ -} - -/* Calls the two low level functions refreshSingleLine() or - * refreshMultiLine() according to the selected mode. */ -inline void refreshLine(struct linenoiseState *l) { - if (mlmode) - refreshMultiLine(l); - else - refreshSingleLine(l); -} - -/* Insert the character 'c' at cursor current position. - * - * On error writing to the terminal -1 is returned, otherwise 0. */ -int linenoiseEditInsert(struct linenoiseState *l, char c) { - if (l->len < l->buflen) { - if (l->len == l->pos) { - l->buf[l->pos] = c; - l->pos++; - l->len++; - l->buf[l->len] = '\0'; - if ((!mlmode && l->prompt.length()+l->len < l->cols) /* || mlmode */) { - /* Avoid a full update of the line in the - * trivial case. */ - if (write(l->ofd,&c,1) == -1) return -1; - } else { - refreshLine(l); - } - } else { - memmove(l->buf+l->pos+1,l->buf+l->pos,l->len-l->pos); - l->buf[l->pos] = c; - l->len++; - l->pos++; - l->buf[l->len] = '\0'; - refreshLine(l); - } - } - return 0; -} - -/* Move cursor on the left. */ -void linenoiseEditMoveLeft(struct linenoiseState *l) { - if (l->pos > 0) { - l->pos--; - refreshLine(l); - } -} - -/* Move cursor on the right. */ -void linenoiseEditMoveRight(struct linenoiseState *l) { - if (l->pos != l->len) { - l->pos++; - refreshLine(l); - } -} - -/* Move cursor to the start of the line. */ -inline void linenoiseEditMoveHome(struct linenoiseState *l) { - if (l->pos != 0) { - l->pos = 0; - refreshLine(l); - } -} - -/* Move cursor to the end of the line. */ -inline void linenoiseEditMoveEnd(struct linenoiseState *l) { - if (l->pos != l->len) { - l->pos = l->len; - refreshLine(l); - } -} - -/* Substitute the currently edited line with the next or previous history - * entry as specified by 'dir'. */ -#define LINENOISE_HISTORY_NEXT 0 -#define LINENOISE_HISTORY_PREV 1 -inline void linenoiseEditHistoryNext(struct linenoiseState *l, int dir) { - if (history.size() > 1) { - /* Update the current history entry before to - * overwrite it with the next one. */ - history[history.size() - 1 - l->history_index] = l->buf; - /* Show the new entry */ - l->history_index += (dir == LINENOISE_HISTORY_PREV) ? 1 : -1; - if (l->history_index < 0) { - l->history_index = 0; - return; - } else if (l->history_index >= (int)history.size()) { - l->history_index = history.size()-1; - return; - } - memset(l->buf, 0, l->buflen); -#ifdef _WIN32 - strcpy_s(l->buf, l->buflen, history[history.size() - 1 - l->history_index].c_str()); -#else - strcpy(l->buf, history[history.size() - 1 - l->history_index].c_str()); -#endif - l->len = l->pos = strlen(l->buf); - refreshLine(l); - } -} - -/* Delete the character at the right of the cursor without altering the cursor - * position. Basically this is what happens with the "Delete" keyboard key. */ -inline void linenoiseEditDelete(struct linenoiseState *l) { - if (l->len > 0 && l->pos < l->len) { - memmove(l->buf+l->pos,l->buf+l->pos+1,l->len-l->pos-1); - l->len--; - l->buf[l->len] = '\0'; - refreshLine(l); - } -} - -/* Backspace implementation. */ -inline void linenoiseEditBackspace(struct linenoiseState *l) { - if (l->pos > 0 && l->len > 0) { - memmove(l->buf+l->pos-1,l->buf+l->pos,l->len-l->pos); - l->pos--; - l->len--; - l->buf[l->len] = '\0'; - refreshLine(l); - } -} - -/* Delete the previosu word, maintaining the cursor at the start of the - * current word. */ -inline void linenoiseEditDeletePrevWord(struct linenoiseState *l) { - size_t old_pos = l->pos; - size_t diff; - - while (l->pos > 0 && l->buf[l->pos-1] == ' ') - l->pos--; - while (l->pos > 0 && l->buf[l->pos-1] != ' ') - l->pos--; - diff = old_pos - l->pos; - memmove(l->buf+l->pos,l->buf+old_pos,l->len-old_pos+1); - l->len -= diff; - refreshLine(l); -} - -/* This function is the core of the line editing capability of linenoise. - * It expects 'fd' to be already in "raw mode" so that every key pressed - * will be returned ASAP to read(). - * - * The resulting string is put into 'buf' when the user type enter, or - * when ctrl+d is typed. - * - * The function returns the length of the current buffer. */ -inline int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt) -{ - struct linenoiseState l; - - /* Populate the linenoise state that we pass to functions implementing - * specific editing functionalities. */ - l.ifd = stdin_fd; - l.ofd = stdout_fd; - l.buf = buf; - l.buflen = buflen; - l.prompt = prompt; - l.oldpos = l.pos = 0; - l.len = 0; - l.cols = getColumns(stdin_fd, stdout_fd); - l.maxrows = 0; - l.history_index = 0; - - /* Buffer starts empty. */ - l.buf[0] = '\0'; - l.buflen--; /* Make sure there is always space for the nulterm */ - - /* The latest history entry is always our current buffer, that - * initially is just an empty string. */ - AddHistory(""); - - if (write(l.ofd,prompt,l.prompt.length()) == -1) return -1; - while(1) { - char c; - int nread; - char seq[3]; - -#ifdef _WIN32 - nread = win32read(&c); -#else - nread = read(l.ifd,&c,1); -#endif - if (nread <= 0) return (int)l.len; - - /* Only autocomplete when the callback is set. It returns < 0 when - * there was an error reading from fd. Otherwise it will return the - * character that should be handled next. */ - if (c == 9 && completionCallback != NULL) { - c = completeLine(&l); - /* Return on errors */ - if (c < 0) return (int)l.len; - /* Read next character when 0 */ - if (c == 0) continue; - } - - switch(c) { - case ENTER: /* enter */ - history.pop_back(); - if (mlmode) linenoiseEditMoveEnd(&l); - return (int)l.len; - case CTRL_C: /* ctrl-c */ - errno = EAGAIN; - return -1; - case BACKSPACE: /* backspace */ - case 8: /* ctrl-h */ - linenoiseEditBackspace(&l); - break; - case CTRL_D: /* ctrl-d, remove char at right of cursor, or if the - line is empty, act as end-of-file. */ - if (l.len > 0) { - linenoiseEditDelete(&l); - } else { - history.pop_back(); - return -1; - } - break; - case CTRL_T: /* ctrl-t, swaps current character with previous. */ - if (l.pos > 0 && l.pos < l.len) { - int aux = buf[l.pos-1]; - buf[l.pos-1] = buf[l.pos]; - buf[l.pos] = aux; - if (l.pos != l.len-1) l.pos++; - refreshLine(&l); - } - break; - case CTRL_B: /* ctrl-b */ - linenoiseEditMoveLeft(&l); - break; - case CTRL_F: /* ctrl-f */ - linenoiseEditMoveRight(&l); - break; - case CTRL_P: /* ctrl-p */ - linenoiseEditHistoryNext(&l, LINENOISE_HISTORY_PREV); - break; - case CTRL_N: /* ctrl-n */ - linenoiseEditHistoryNext(&l, LINENOISE_HISTORY_NEXT); - break; - case ESC: /* escape sequence */ - /* Read the next two bytes representing the escape sequence. - * Use two calls to handle slow terminals returning the two - * chars at different times. */ - if (read(l.ifd,seq,1) == -1) break; - if (read(l.ifd,seq+1,1) == -1) break; - - /* ESC [ sequences. */ - if (seq[0] == '[') { - if (seq[1] >= '0' && seq[1] <= '9') { - /* Extended escape, read additional byte. */ - if (read(l.ifd,seq+2,1) == -1) break; - if (seq[2] == '~') { - switch(seq[1]) { - case '3': /* Delete key. */ - linenoiseEditDelete(&l); - break; - } - } - } else { - switch(seq[1]) { - case 'A': /* Up */ - linenoiseEditHistoryNext(&l, LINENOISE_HISTORY_PREV); - break; - case 'B': /* Down */ - linenoiseEditHistoryNext(&l, LINENOISE_HISTORY_NEXT); - break; - case 'C': /* Right */ - linenoiseEditMoveRight(&l); - break; - case 'D': /* Left */ - linenoiseEditMoveLeft(&l); - break; - case 'H': /* Home */ - linenoiseEditMoveHome(&l); - break; - case 'F': /* End*/ - linenoiseEditMoveEnd(&l); - break; - } - } - } - - /* ESC O sequences. */ - else if (seq[0] == 'O') { - switch(seq[1]) { - case 'H': /* Home */ - linenoiseEditMoveHome(&l); - break; - case 'F': /* End*/ - linenoiseEditMoveEnd(&l); - break; - } - } - break; - default: - if (linenoiseEditInsert(&l,c)) return -1; - break; - case CTRL_U: /* Ctrl+u, delete the whole line. */ - buf[0] = '\0'; - l.pos = l.len = 0; - refreshLine(&l); - break; - case CTRL_K: /* Ctrl+k, delete from current to end of line. */ - buf[l.pos] = '\0'; - l.len = l.pos; - refreshLine(&l); - break; - case CTRL_A: /* Ctrl+a, go to the start of the line */ - linenoiseEditMoveHome(&l); - break; - case CTRL_E: /* ctrl+e, go to the end of the line */ - linenoiseEditMoveEnd(&l); - break; - case CTRL_L: /* ctrl+l, clear screen */ - linenoiseClearScreen(); - refreshLine(&l); - break; - case CTRL_W: /* ctrl+w, delete previous word */ - linenoiseEditDeletePrevWord(&l); - break; - } - } - return (int)l.len; -} - -/* This function calls the line editing function linenoiseEdit() using - * the STDIN file descriptor set in raw mode. */ -inline std::string linenoiseRaw(const char *prompt) { - std::string line; - - if (!isatty(STDIN_FILENO)) { - /* Not a tty: read from file / pipe. */ - std::getline(std::cin, line); - } else { - /* Interactive editing. */ - if (enableRawMode(STDIN_FILENO) == false) return line; - - char buf[LINENOISE_MAX_LINE]; - auto count = linenoiseEdit(STDIN_FILENO, STDOUT_FILENO, buf, LINENOISE_MAX_LINE, prompt); - if (count != -1) { - line.assign(buf, count); - } - - disableRawMode(STDIN_FILENO); - printf("\n"); - } - return line; -} - -/* The high level function that is the main API of the linenoise library. - * This function checks if the terminal has basic capabilities, just checking - * for a blacklist of stupid terminals, and later either calls the line - * editing function or uses dummy fgets() so that you will be able to type - * something even in the most desperate of the conditions. */ -inline std::string Readline(const char *prompt) { - if (isUnsupportedTerm()) { - printf("%s",prompt); - fflush(stdout); - std::string line; - std::getline(std::cin, line); - return line; - } else { - return linenoiseRaw(prompt); - } -} - -/* ================================ History ================================= */ - -/* At exit we'll try to fix the terminal to the initial conditions. */ -inline void linenoiseAtExit(void) { - disableRawMode(STDIN_FILENO); -} - -/* This is the API call to add a new entry in the linenoise history. - * It uses a fixed array of char pointers that are shifted (memmoved) - * when the history max length is reached in order to remove the older - * entry and make room for the new one, so it is not exactly suitable for huge - * histories, but will work well for a few hundred of entries. - * - * Using a circular buffer is smarter, but a bit more complex to handle. */ -inline bool AddHistory(const char* line) { - if (history_max_len == 0) return false; - - /* Don't add duplicated lines. */ - if (!history.empty() && history.back() == line) return false; - - /* If we reached the max length, remove the older line. */ - if (history.size() == history_max_len) { - history.erase(history.begin()); - } - history.push_back(line); - - return true; -} - -/* Set the maximum length for the history. This function can be called even - * if there is already some history, the function will make sure to retain - * just the latest 'len' elements if the new history length value is smaller - * than the amount of items already inside the history. */ -inline bool SetHistoryMaxLen(size_t len) { - if (len < 1) return false; - history_max_len = len; - if (len < history.size()) { - history.resize(len); - } - return true; -} - -/* Save the history in the specified file. On success *true* is returned - * otherwise *false* is returned. */ -inline bool SaveHistory(const char* path) { - std::ofstream f(path); // TODO: need 'std::ios::binary'? - if (!f) return false; - for (const auto& h: history) { - f << h << std::endl; - } - return true; -} - -/* Load the history from the specified file. If the file does not exist - * zero is returned and no operation is performed. - * - * If the file exists and the operation succeeded *true* is returned, otherwise - * on error *false* is returned. */ -inline bool LoadHistory(const char* path) { - std::ifstream f(path); - if (!f) return false; - std::string line; - while (std::getline(f, line)) { - AddHistory(line.c_str()); - } - return true; -} - -inline const std::vector& GetHistory() { - return history; -} - -} // namespace linenoise - -#ifdef _WIN32 -#undef snprintf -#undef isatty -#undef write -#undef read -#endif - -#endif /* __LINENOISE_HPP */ diff --git a/language/culebra/main.cc b/language/culebra/main.cc deleted file mode 100644 index 5f962c35..00000000 --- a/language/culebra/main.cc +++ /dev/null @@ -1,315 +0,0 @@ -#include "culebra.h" -#include "linenoise.hpp" -#include -#include -#include -#include - -using namespace peg; -using namespace std; - -bool read_file(const char* path, vector& buff) -{ - ifstream ifs(path, ios::in|ios::binary); - if (ifs.fail()) { - return false; - } - - auto size = static_cast(ifs.seekg(0, ios::end).tellg()); - - if (size > 0) { - buff.resize(size); - ifs.seekg(0, ios::beg).read(&buff[0], static_cast(buff.size())); - } - - return true; -} - -struct CommandLineDebugger -{ - void operator()(const Ast& ast, culebra::Environment& env, bool force_to_break) { - if (quit) { - return; - } - - if ((command_ == "n" && env.level <= level_) || - (command_ == "s") || - (command_ == "o" && env.level < level_)) { - force_to_break = true; - } - - if (force_to_break) { - static auto show_initial_usage = true; - if (show_initial_usage) { - show_initial_usage = false; - usage(); - } - - show_lines(ast); - - for (;;) { - cout << endl << "debug> "; - - string s; - std::getline(cin, s); - - istringstream is(s); - is >> command_; - - if (command_ == "h") { - usage(); - } else if (command_ == "l") { - is >> display_lines_; - show_lines(ast); - } else if (command_ == "p") { - string symbol; - is >> symbol; - print(ast, env, symbol); - } else if (command_ == "c") { - break; - } else if (command_ == "n") { - break; - } else if (command_ == "s") { - break; - } else if (command_ == "o") { - break; - } else if (command_ == "q") { - quit = true; - break; - } - } - level_ = env.level;; - } - } - - void show_lines(const Ast& ast) { - prepare_cache(ast.path); - - cout << endl << "Break in " << ast.path << ":" << ast.line << endl; - - auto count = get_line_count(ast.path); - - auto lines_ahead = (size_t)((display_lines_ - .5) / 2); - auto start = (size_t)max((int)ast.line - (int)lines_ahead, 1); - auto end = min(start + display_lines_, count); - - auto needed_digits = to_string(count).length(); - - for (auto l = start; l < end; l++) { - auto s = get_line(ast.path, l); - if (l == ast.line) { - cout << "> "; - } else { - cout << " "; - } - cout << setw(needed_digits) << l << " " << s << endl; - } - } - - shared_ptr find_function_node(const Ast& ast) { - auto node = ast.parent; - while (node->parent && node->tag != "FUNCTION"_) { - node = node->parent; - } - return node; - } - - void enum_identifiers(const Ast& ast, set& references) { - for (auto node: ast.nodes) { - switch (node->tag) { - case "IDENTIFIER"_: - references.insert(node->token); - break; - case "FUNCTION"_: - break; - default: - enum_identifiers(*node, references); - break; - } - } - } - - void print(const Ast& ast, culebra::Environment& env, const string& symbol) { - if (symbol.empty()) { - print_all(ast, env); - } else if (env.has(symbol)) { - cout << symbol << ": " << env.get(symbol).str() << endl; - } else { - cout << "'" << symbol << "'" << "is not undefined." << endl; - } - } - - void print_all(const Ast& ast, culebra::Environment& env) { - auto node = find_function_node(ast); - set references; - enum_identifiers(*node, references); - for (const auto& symbol: references) { - if (env.has(symbol)) { - const auto& val = env.get(symbol); - if (val.type != culebra::Value::Function) { - cout << symbol << ": " << val.str() << endl; - } - } - } - } - - size_t get_line_count(const string& path) { - return sources_[path].size(); - } - - string get_line(const string& path, size_t line) { - const auto& positions = sources_[path]; - auto idx = line - 1; - auto first = idx > 0 ? positions[idx - 1] : 0; - auto last = positions[idx]; - auto size = last - first; - - string s(size, 0); - ifstream ifs(path, ios::in | ios::binary); - ifs.seekg(first, ios::beg).read((char*)s.data(), static_cast(s.size())); - - size_t count = 0; - auto rit = s.rbegin(); - while (rit != s.rend()) { - if (*rit == '\n') { - count++; - } - ++rit; - } - - s = s.substr(0, s.size() - count); - - return s; - } - - void prepare_cache(const string& path) { - auto it = sources_.find(path); - if (it == sources_.end()) { - vector buff; - read_file(path.c_str(), buff); - - auto& positions = sources_[path]; - - auto i = 0u; - for (; i < buff.size(); i++) { - if (buff[i] == '\n') { - positions.push_back(i + 1); - } - } - positions.push_back(i); - } - } - - void usage() { - cout << "Usage: (c)ontinue, (n)ext, (s)tep in, step (o)out, (p)ring, (l)ist, (q)uit" << endl; - } - - bool quit = false; - string command_; - size_t level_ = 0; - size_t display_lines_ = 4; - map> sources_; -}; - -int repl(shared_ptr env, bool print_ast) -{ - for (;;) { - auto line = linenoise::Readline("cul> "); - - if (line == "exit" || line == "quit") { - break; - } - - if (!line.empty()) { - vector msgs; - auto ast = culebra::parse("(repl)", line.data(), line.size(), msgs); - if (ast) { - if (print_ast) { - cout << peg::ast_to_s(ast); - } - - culebra::Value val; - if (interpret(ast, env, val, msgs)) { - cout << val << endl; - linenoise::AddHistory(line.c_str()); - continue; - } - } - - for (const auto& msg : msgs) { - cout << msg << endl;; - } - } - } - - return 0; -} - -int main(int argc, const char** argv) -{ - auto print_ast = false; - auto shell = false; - auto debug = false; - vector path_list; - - int argi = 1; - while (argi < argc) { - auto arg = argv[argi++]; - if (string("--shell") == arg) { - shell = true; - } else if (string("--ast") == arg) { - print_ast = true; - } else if (string("--debug") == arg) { - debug = true; - } else { - path_list.push_back(arg); - } - } - - if (!shell) { - shell = path_list.empty(); - } - - try { - auto env = make_shared(); - setup_built_in_functions(*env); - - for (auto path: path_list) { - vector buff; - if (!read_file(path, buff)) { - cerr << "can't open '" << path << "'." << endl; - return -1; - } - - vector msgs; - auto ast = culebra::parse(path, buff.data(), buff.size(), msgs); - if (ast) { - if (print_ast) { - cout << peg::ast_to_s(ast); - } - - culebra::Value val; - auto dbg = debug ? CommandLineDebugger() : culebra::Debugger(); - if (interpret(ast, env, val, msgs, dbg)) { - return 0; - } - } - - for (const auto& msg : msgs) { - cerr << msg << endl; - } - return -1; - } - - if (shell) { - repl(env, print_ast); - } - } catch (exception& e) { - cerr << e.what() << endl; - return -1; - } - - return 0; -} - -// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/language/culebra/samples/closure.cul b/language/culebra/samples/closure.cul deleted file mode 100644 index bfebe159..00000000 --- a/language/culebra/samples/closure.cul +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Closure test - */ - -make_func = fn (mut x) { - mut n = 100 - fn () { - n = n + 1 - x = x + 1 + n - } -} - -f = make_func(10) - -puts("1: { f() }") -puts("2: { f() }") -puts("3: { f() }") diff --git a/language/culebra/samples/fib.cul b/language/culebra/samples/fib.cul deleted file mode 100644 index c3e12e2a..00000000 --- a/language/culebra/samples/fib.cul +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Fibonacci - */ - -fib = fn (x) { - if x < 2 { - x - } else { - self(x - 2) + self(x -1) - } -} - -mut i = 0 -while i < 30 { - puts("{i}: {fib(i)}") - i = i + 1 -} diff --git a/language/culebra/samples/fizzbuzz.cul b/language/culebra/samples/fizzbuzz.cul deleted file mode 100644 index 95c57e08..00000000 --- a/language/culebra/samples/fizzbuzz.cul +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Fizz Buzz - */ - -mut i = 1 -while i < 24 { - if i % 15 == 0 { - puts('FizzBuzz') - } else if i % 5 == 0 { - puts('Buzz') - } else if i % 3 == 0 { - puts('Fizz') - } else { - puts(i) - } - i = i + 1 -} diff --git a/language/culebra/samples/test.cul b/language/culebra/samples/test.cul deleted file mode 100644 index d4d06eae..00000000 --- a/language/culebra/samples/test.cul +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Unit tests - */ - -test_call = fn () { - ret = fn(){[1,fn(){[4,5,6]},3]}()[1]()[1] - assert(ret == 5) -} - -test_return = fn () { - f = fn (x) { - if x % 2 { - return 'odd' - } - 'even' - } - assert(f(3) == 'odd') - assert(f(4) == 'even') - - mut val = 0 - f2 = fn () { - val = 1 - return // comment - val = 2 - } - f2() - assert(val == 1) -} - -test_nil = fn () { - assert(nil == nil) - assert(!(nil != nil)) - - a = nil - assert(a == nil) - assert(!(a != nil)) - assert(!(a <= nil)) - assert(!(a < nil)) - assert(!(a >= nil)) - assert(!(a > nil)) - assert(nil == a) - assert(!(nil != a)) - assert(!(nil <= a)) - assert(!(nil < a)) - assert(!(nil >= a)) - assert(!(nil > a)) -} - -test_closure = fn () { - make_func = fn (mut x) { - mut n = 100 - fn () { - n = n + 1 - x = x + 1 + n - } - } - - f = make_func(10) - f() - f() - ret = f() - - assert(ret == 319) -} - -test_array = fn () { - a = [1,2,3] - assert(a.size() == 3) - - a.push(4) - assert(a.size() == 4) - - b = [] - assert(b.size() == 0) - - c = [1] - assert(c.size() == 1) - - d = [1,2,3](5, 0) - assert(d.size() == 5 && d[-1] == 0) - - e = [1,2,3](2) - assert(e.size() == 3 && e[-1] == 3) - - f = [1,2,3](5) - assert(f.size() == 5 && f[-1] == nil) -} - -g_ = 1 - -test_function = fn () { - a = 1 - make = fn () { - b = 1 - fn (c) { - g_ + a + b + c - } - } - f = make() - assert(f(1) == 4) -} - -test_object = fn () { - n = 1 - o = { - n: 123, - s: 'str', - f1: fn (x) { x + this.n }, - f2: fn (x) { x + n } - } - assert(o.size() == 4) - assert(o.f1(10) == 133) - assert(o.f2(10) == 11) - - a = {} - a.b = 1 - assert(a.a == nil) - assert(a.b == 1) - assert(a.size() == 1) -} - -test_object_factory = fn () { - ctor = fn (init) { - mut n = init - - { - add: fn (x) { - n = n + x - }, - sub: fn (x) { - n = n - x - }, - val: fn () { - n - } - } - } - - calc = ctor(10) - - assert(calc.val() == 10) - assert(calc.add(1) == 11) - assert(calc.sub(1) == 10) -} - -test_class = fn () { - // TODO: support 'prototype' property - Car = { - new: fn(miles_per_run) { - mut total_miles = 0 - - { - run: fn (times) { - total_miles = total_miles + miles_per_run * times - }, - total: fn () { - total_miles - } - } - } - } - - car = Car.new(5) - car.run(1) - car.run(2) - - assert(car.total() == 15) -} - -test_sum = fn () { - mut i = 1 - mut ret = 0 - while i <= 10 { - ret = ret + i - i = i + 1 - } - - assert(ret == 55) -} - -test_fib = fn () { - fib = fn (x) { - if x < 2 { - x - } else { - self(x - 2) + self(x -1) - } - } - - ret = fib(15) - - assert(ret == 610) -} - -test_interpolated_string = fn () { - hello = "Hello" - world = "World!" - ret = "{hello} {world}" - assert(ret == 'Hello World!') -} - -test_lexical_scope = fn () { - a = 0 - { - let a = 1; - assert(a == 1) - } - assert(a == 0) - - mut b = 0 - { - b = 1; - assert(b == 1) - } - assert(b == 1) - - c = 0 - { - let mut c = 0; - c = 1 - assert(c == 1) - } - assert(c == 0) - - obj = { - name: 'object' - } - - assert(obj.name == 'object') -} - -debugger -test_call() -test_return() -test_closure() -test_nil() -test_array() -test_function() -test_object() -test_object_factory() -test_class() -debugger -test_sum() -test_fib() -test_interpolated_string() -test_lexical_scope() - -return // end diff --git a/language/pl0/CMakeLists.txt b/language/pl0/CMakeLists.txt deleted file mode 100644 index 297763bc..00000000 --- a/language/pl0/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -cmake_minimum_required(VERSION 3.0) -include_directories(../..) -add_definitions("-std=c++1y") - -add_executable(pl0 pl0.cc) diff --git a/language/pl0/pl0.cc b/language/pl0/pl0.cc deleted file mode 100644 index 19af5f1e..00000000 --- a/language/pl0/pl0.cc +++ /dev/null @@ -1,455 +0,0 @@ -// -// pl0.cc - PL/0 interpreter (https://en.wikipedia.org/wiki/PL/0) -// -// Copyright (c) 2015 Yuji Hirose. All rights reserved. -// MIT License -// - -#include -#include -#include -#include - -using namespace peg; -using namespace std; - -/* - * PEG Grammar - */ -auto grammar = R"( - program <- _ block '.' _ - - block <- const var procedure statement - const <- ('CONST' __ ident '=' _ number (',' _ ident '=' _ number)* ';' _)? - var <- ('VAR' __ ident (',' _ ident)* ';' _)? - procedure <- ('PROCEDURE' __ ident ';' _ block ';' _)* - - statement <- (assignment / call / statements / if / while / out / in)? - assignment <- ident ':=' _ expression - call <- 'CALL' __ ident - statements <- 'BEGIN' __ statement (';' _ statement )* 'END' __ - if <- 'IF' __ condition 'THEN' __ statement - while <- 'WHILE' __ condition 'DO' __ statement - out <- ('out' __ / 'write' __ / '!' _) expression - in <- ('in' __ / 'read' __ / '?' _) ident - - condition <- odd / compare - odd <- 'ODD' __ expression - compare <- expression compare_op expression - compare_op <- < '=' / '#' / '<=' / '<' / '>=' / '>' > _ - - expression <- sign term (term_op term)* - sign <- < [-+]? > _ - term_op <- < [-+] > _ - - term <- factor (factor_op factor)* - factor_op <- < [*/] > _ - - factor <- ident / number / '(' _ expression ')' _ - - ident <- < [a-z] [a-z0-9]* > _ - number <- < [0-9]+ > _ - - ~_ <- [ \t\r\n]* - ~__ <- ![a-z0-9_] _ -)"; - -/* - * Utilities - */ -string format_error_message(const string& path, size_t ln, size_t col, const string& msg) { - stringstream ss; - ss << path << ":" << ln << ":" << col << ": " << msg << endl; - return ss.str(); -} - -/* - * Ast - */ -struct SymbolScope; - -struct Annotation -{ - shared_ptr scope; -}; - -typedef AstBase AstPL0; - -/* - * Symbol Table - */ -struct SymbolScope -{ - SymbolScope(shared_ptr outer) : outer(outer) {} - - bool has_symbol(const string& ident) const { - auto ret = constants.count(ident) || variables.count(ident); - return ret ? true : (outer ? outer->has_symbol(ident) : false); - } - - bool has_constant(const string& ident) const { - return constants.count(ident) ? true : (outer ? outer->has_constant(ident) : false); - } - - bool has_variable(const string& ident) const { - return variables.count(ident) ? true : (outer ? outer->has_variable(ident) : false); - } - - bool has_procedure(const string& ident) const { - return procedures.count(ident) ? true : (outer ? outer->has_procedure(ident) : false); - } - - map constants; - set variables; - map> procedures; - -private: - shared_ptr outer; -}; - -void throw_runtime_error(const shared_ptr node, const string& msg) { - throw runtime_error(format_error_message(node->path, node->line, node->column, msg)); -} - -struct SymbolTable -{ - static void build_on_ast(const shared_ptr ast, shared_ptr scope = nullptr) { - switch (ast->tag) { - case "block"_: block(ast, scope); break; - case "assignment"_: assignment(ast, scope); break; - case "call"_: call(ast, scope); break; - case "ident"_: ident(ast, scope); break; - default: for (auto node: ast->nodes) { build_on_ast(node, scope); } break; - } - } - -private: - static void block(const shared_ptr ast, shared_ptr outer) { - // block <- const var procedure statement - auto scope = make_shared(outer); - const auto& nodes = ast->nodes; - constants(nodes[0], scope); - variables(nodes[1], scope); - procedures(nodes[2], scope); - build_on_ast(nodes[3], scope); - ast->scope = scope; - } - - static void constants(const shared_ptr ast, shared_ptr scope) { - // const <- ('CONST' __ ident '=' _ number(',' _ ident '=' _ number)* ';' _) ? - const auto& nodes = ast->nodes; - for (auto i = 0u; i < nodes.size(); i += 2) { - const auto& ident = nodes[i + 0]->token; - if (scope->has_symbol(ident)) { - throw_runtime_error(nodes[i], "'" + ident + "' is already defined..."); - } - auto number = stoi(nodes[i + 1]->token); - scope->constants.emplace(ident, number); - } - } - - static void variables(const shared_ptr ast, shared_ptr scope) { - // var <- ('VAR' __ ident(',' _ ident)* ';' _) ? - const auto& nodes = ast->nodes; - for (auto i = 0u; i < nodes.size(); i += 1) { - const auto& ident = nodes[i]->token; - if (scope->has_symbol(ident)) { - throw_runtime_error(nodes[i], "'" + ident + "' is already defined..."); - } - scope->variables.emplace(ident); - } - } - - static void procedures(const shared_ptr ast, shared_ptr scope) { - // procedure <- ('PROCEDURE' __ ident ';' _ block ';' _)* - const auto& nodes = ast->nodes; - for (auto i = 0u; i < nodes.size(); i += 2) { - const auto& ident = nodes[i + 0]->token; - auto block = nodes[i + 1]; - scope->procedures[ident] = block; - build_on_ast(block, scope); - } - } - - static void assignment(const shared_ptr ast, shared_ptr scope) { - // assignment <- ident ':=' _ expression - const auto& ident = ast->nodes[0]->token; - if (scope->has_constant(ident)) { - throw_runtime_error(ast->nodes[0], "cannot modify constant value '" + ident + "'..."); - } else if (!scope->has_variable(ident)) { - throw_runtime_error(ast->nodes[0], "undefined variable '" + ident + "'..."); - } - } - - static void call(const shared_ptr ast, shared_ptr scope) { - // call <- 'CALL' __ ident - const auto& ident = ast->nodes[0]->token; - if (!scope->has_procedure(ident)) { - throw_runtime_error(ast->nodes[0], "undefined procedure '" + ident + "'..."); - } - } - - static void ident(const shared_ptr ast, shared_ptr scope) { - const auto& ident = ast->token; - if (!scope->has_symbol(ident)) { - throw_runtime_error(ast, "undefined variable '" + ident + "'..."); - } - } -}; - -/* - * Environment - */ -struct Environment -{ - Environment(shared_ptr scope, shared_ptr outer) - : scope(scope), outer(outer) {} - - int get_value(const string& ident) const { - auto it = scope->constants.find(ident); - if (it != scope->constants.end()) { - return it->second; - } else if (scope->variables.count(ident)) { - return variables.at(ident); - } - return outer->get_value(ident); - } - - void set_variable(const string& ident, int val) { - if (scope->variables.count(ident)) { - variables[ident] = val; - } else { - outer->set_variable(ident, val); - } - } - - shared_ptr get_procedure(const string& ident) const { - auto it = scope->procedures.find(ident); - return it != scope->procedures.end() ? it->second : outer->get_procedure(ident); - } - -private: - shared_ptr scope; - shared_ptr outer; - map variables; -}; - -/* - * Interpreter - */ -struct Interpreter -{ - static void exec(const shared_ptr ast, shared_ptr env = nullptr) { - switch (ast->tag) { - case "block"_: exec_block(ast, env); break; - case "statement"_: exec_statement(ast, env); break; - case "assignment"_: exec_assignment(ast, env); break; - case "call"_: exec_call(ast, env); break; - case "statements"_: exec_statements(ast, env); break; - case "if"_: exec_if(ast, env); break; - case "while"_: exec_while(ast, env); break; - case "out"_: exec_out(ast, env); break; - case "in"_: exec_in(ast, env); break; - default: exec(ast->nodes[0], env); break; - } - } - -private: - static void exec_block(const shared_ptr ast, shared_ptr outer) { - // block <- const var procedure statement - exec(ast->nodes[3], make_shared(ast->scope, outer)); - } - - static void exec_statement(const shared_ptr ast, shared_ptr env) { - // statement <- (assignment / call / statements / if / while / out / in)? - if (!ast->nodes.empty()) { - exec(ast->nodes[0], env); - } - } - - static void exec_assignment(const shared_ptr ast, shared_ptr env) { - // assignment <- ident ':=' _ expression - env->set_variable(ast->nodes[0]->token, eval(ast->nodes[1], env)); - } - - static void exec_call(const shared_ptr ast, shared_ptr env) { - // call <- 'CALL' __ ident - exec_block(env->get_procedure(ast->nodes[0]->token), env); - } - - static void exec_statements(const shared_ptr ast, shared_ptr env) { - // statements <- 'BEGIN' __ statement (';' _ statement )* 'END' __ - for (auto stmt: ast->nodes) { - exec(stmt, env); - } - } - - static void exec_if(const shared_ptr ast, shared_ptr env) { - // if <- 'IF' __ condition 'THEN' __ statement - if (eval_condition(ast->nodes[0], env)) { - exec(ast->nodes[1], env); - } - } - - static void exec_while(const shared_ptr ast, shared_ptr env) { - // while <- 'WHILE' __ condition 'DO' __ statement - auto cond = ast->nodes[0]; - auto stmt = ast->nodes[1]; - while (eval_condition(cond, env)) { - exec(stmt, env); - } - } - - static void exec_out(const shared_ptr ast, shared_ptr env) { - // out <- ('out' __ / 'write' __ / '!' _) expression - cout << eval(ast->nodes[0], env) << endl; - } - - static void exec_in(const shared_ptr ast, shared_ptr env) { - // in <- ('in' __ / 'read' __ / '?' _) ident - int val; - cin >> val; - env->set_variable(ast->nodes[0]->token, val); - } - - static bool eval_condition(const shared_ptr ast, shared_ptr env) { - // condition <- odd / compare - const auto& node = ast->nodes[0]; - switch (node->tag) { - case "odd"_: return eval_odd(node, env); - case "compare"_: return eval_compare(node, env); - default: throw logic_error("invalid AstPL0 type"); - } - } - - static bool eval_odd(const shared_ptr ast, shared_ptr env) { - // odd <- 'ODD' __ expression - return eval_expression(ast->nodes[0], env) != 0; - } - - static bool eval_compare(const shared_ptr ast, shared_ptr env) { - // compare <- expression compare_op expression - const auto& nodes = ast->nodes; - auto lval = eval_expression(nodes[0], env); - auto op = peg::str2tag(nodes[1]->token.c_str()); - auto rval = eval_expression(nodes[2], env); - switch (op) { - case "="_: return lval == rval; - case "#"_: return lval != rval; - case "<="_: return lval <= rval; - case "<"_: return lval < rval; - case ">="_: return lval >= rval; - case ">"_: return lval > rval; - default: throw logic_error("invalid operator"); - } - } - - static int eval(const shared_ptr ast, shared_ptr env) { - switch (ast->tag) { - case "expression"_: return eval_expression(ast, env); - case "term"_: return eval_term(ast, env); - case "ident"_: return eval_ident(ast, env); - case "number"_: return eval_number(ast, env); - default: return eval(ast->nodes[0], env); - } - } - - static int eval_expression(const shared_ptr ast, shared_ptr env) { - // expression <- sign term (term_op term)* - const auto& nodes = ast->nodes; - auto sign = nodes[0]->token; - auto sign_val = (sign.empty() || sign == "+") ? 1 : -1; - auto val = eval(nodes[1], env) * sign_val; - for (auto i = 2u; i < nodes.size(); i += 2) { - auto ope = nodes[i + 0]->token[0]; - auto rval = eval(nodes[i + 1], env); - switch (ope) { - case '+': val = val + rval; break; - case '-': val = val - rval; break; - } - } - return val; - } - - static int eval_term(const shared_ptr ast, shared_ptr env) { - // term <- factor (factor_op factor)* - const auto& nodes = ast->nodes; - auto val = eval(nodes[0], env); - for (auto i = 1u; i < nodes.size(); i += 2) { - auto ope = nodes[i + 0]->token[0]; - auto rval = eval(nodes[i + 1], env); - switch (ope) { - case '*': - val = val * rval; - break; - case '/': - if (rval == 0) { - throw_runtime_error(ast, "divide by 0 error"); - } - val = val / rval; - break; - } - } - return val; - } - - static int eval_ident(const shared_ptr ast, shared_ptr env) { - return env->get_value(ast->token); - } - - static int eval_number(const shared_ptr ast, shared_ptr env) { - return stol(ast->token); - } -}; - -/* - * Main - */ -int main(int argc, const char** argv) -{ - if (argc < 2) { - cout << "usage: pl0 PATH [--ast]" << endl; - return 1; - } - - // Read a source file into memory - auto path = argv[1]; - vector source; - - ifstream ifs(path, ios::in | ios::binary); - if (ifs.fail()) { - cerr << "can't open the source file." << endl; - return -1; - } - source.resize(static_cast(ifs.seekg(0, ios::end).tellg())); - if (!source.empty()) { - ifs.seekg(0, ios::beg).read(&source[0], static_cast(source.size())); - } - - // Setup a PEG parser - parser parser(grammar); - parser.enable_ast(); - parser.log = [&](size_t ln, size_t col, const string& msg) { - cerr << format_error_message(path, ln, col, msg) << endl; - }; - - // Parse the source and make an AST - shared_ptr ast; - if (parser.parse_n(source.data(), source.size(), ast, path)) { - if (argc > 2 && string("--ast") == argv[2]) { - cout << ast_to_s(ast); - } - try { - SymbolTable::build_on_ast(ast); - Interpreter::exec(ast); - } catch (const runtime_error& e) { - cerr << e.what() << endl; - } - return 0; - } - - return -1; -} - -// vim: et ts=4 sw=4 cin cino={1s ff=unix - diff --git a/language/pl0/pl0.sln b/language/pl0/pl0.sln deleted file mode 100644 index 2f45413a..00000000 --- a/language/pl0/pl0.sln +++ /dev/null @@ -1,34 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 14 -VisualStudioVersion = 14.0.23107.0 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pl0", "pl0.vcxproj", "{6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.ActiveCfg = Debug|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.Build.0 = Debug|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.ActiveCfg = Release|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.Build.0 = Release|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Debug|Win32.ActiveCfg = Debug|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Debug|Win32.Build.0 = Debug|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Release|Win32.ActiveCfg = Release|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Release|Win32.Build.0 = Release|Win32 - {E6146F73-3B4C-4D4C-BC55-148930954434}.Debug|Win32.ActiveCfg = Debug|Win32 - {E6146F73-3B4C-4D4C-BC55-148930954434}.Debug|Win32.Build.0 = Debug|Win32 - {E6146F73-3B4C-4D4C-BC55-148930954434}.Release|Win32.ActiveCfg = Release|Win32 - {E6146F73-3B4C-4D4C-BC55-148930954434}.Release|Win32.Build.0 = Release|Win32 - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}.Debug|Win32.ActiveCfg = Debug|Win32 - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}.Debug|Win32.Build.0 = Debug|Win32 - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}.Release|Win32.ActiveCfg = Release|Win32 - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/language/pl0/pl0.vcxproj b/language/pl0/pl0.vcxproj deleted file mode 100644 index 48be9c91..00000000 --- a/language/pl0/pl0.vcxproj +++ /dev/null @@ -1,92 +0,0 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - - - - - - - {6C5633BD-3CAE-498E-B0C6-ED90A1A99C47} - Win32Proj - sample - pl0 - - - - Application - true - Unicode - v140 - - - Application - false - true - Unicode - v140 - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - ../.. - - - Console - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - ../.. - - - Console - true - true - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - - \ No newline at end of file diff --git a/lint/CMakeLists.txt b/lint/CMakeLists.txt index 45f9d69a..fdf5bf8c 100644 --- a/lint/CMakeLists.txt +++ b/lint/CMakeLists.txt @@ -1,5 +1,6 @@ -cmake_minimum_required(VERSION 3.0) -include_directories(..) -add_definitions("-std=c++1y") +cmake_minimum_required(VERSION 3.14) +project(peglint) -add_executable(peglint peglint.cc server.cc) +add_executable(peglint peglint.cc) +target_include_directories(peglint PRIVATE ..) +target_link_libraries(peglint ${add_link_deps}) diff --git a/lint/README.md b/lint/README.md index 9f48c1c4..5523af4c 100644 --- a/lint/README.md +++ b/lint/README.md @@ -4,5 +4,130 @@ peglint The lint utility for PEG. ``` -usage: peglint [--ast] [--optimize_ast_nodes|--opt] [--server [PORT]] [--trace] [grammar file path] [source file path] +usage: grammar_file_path [source_file_path] + + options: + --ast: show AST tree + --packrat: enable packrat memoise + --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction + --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction + --source: source text + --trace: show concise trace messages + --profile: show profile report + --verbose: verbose output for trace and profile +``` + +### Build peglint + +``` +> cd lint +> mkdir build +> cd build +> cmake .. +> make +``` + +### Lint grammar + +``` +> cat a.peg +A <- 'hello' ^ 'world' + +> peglint a.peg +a.peg:1:16: syntax error +``` + +``` +> cat a.peg +A <- B + +> peglint a.peg +a.peg:1:6: 'B' is not defined. +``` + +``` +> cat a.peg +A <- B / C +B <- 'b' +C <- A + +> peglint a.peg +a.peg:1:10: 'C' is left recursive. +a.peg:3:6: 'A' is left recursive. +``` + +### Lint source text + +``` +> cat a.peg +Additive <- Multiplicative '+' Additive / Multiplicative +Multiplicative <- Primary '*' Multiplicative / Primary +Primary <- '(' Additive ')' / Number +Number <- < [0-9]+ > +%whitespace <- [ \t\r\n]* + +> peglint --source "1 + a * 3" a.peg +[commandline]:1:3: syntax error +``` + +### AST + +``` +> cat a.txt +1 + 2 * 3 + +> peglint --ast a.peg a.txt ++ Additive + + Multiplicative + + Primary + - Number (1) + + Additive + + Multiplicative + + Primary + - Number (2) + + Multiplicative + + Primary + - Number (3) +``` + +### AST optimization + +``` +> peglint --ast --opt --source "1 + 2 * 3" a.peg ++ Additive + - Multiplicative[Number] (1) + + Additive[Multiplicative] + - Primary[Number] (2) + - Multiplicative[Number] (3) +``` + +### Adjust AST optimization with `no_ast_opt` instruction + +``` +> cat a.peg +Additive <- Multiplicative '+' Additive / Multiplicative +Multiplicative <- Primary '*' Multiplicative / Primary +Primary <- '(' Additive ')' / Number { no_ast_opt } +Number <- < [0-9]+ > +%whitespace <- [ \t\r\n]* + +> peglint --ast --opt --source "1 + 2 * 3" a.peg ++ Additive/0 + + Multiplicative/1[Primary] + - Number (1) + + Additive/1[Multiplicative] + + Primary/1 + - Number (2) + + Multiplicative/1[Primary] + - Number (3) + +> peglint --ast --opt-only --source "1 + 2 * 3" a.peg ++ Additive/0 + + Multiplicative/1 + - Primary/1[Number] (1) + + Additive/1 + + Multiplicative/0 + - Primary/1[Number] (2) + + Multiplicative/1 + - Primary/1[Number] (3) ``` diff --git a/lint/httplib.h b/lint/httplib.h deleted file mode 100644 index ad47a219..00000000 --- a/lint/httplib.h +++ /dev/null @@ -1,959 +0,0 @@ -// -// httplib.h -// -// Copyright (c) 2012 Yuji Hirose. All rights reserved. -// The Boost Software License 1.0 -// - -#ifndef _CPPHTTPLIB_HTTPSLIB_H_ -#define _CPPHTTPLIB_HTTPSLIB_H_ - -#ifdef _MSC_VER -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_NONSTDC_NO_DEPRECATE - -#ifndef SO_SYNCHRONOUS_NONALERT -#define SO_SYNCHRONOUS_NONALERT 0x20; -#endif -#ifndef SO_OPENTYPE -#define SO_OPENTYPE 0x7008 -#endif -#if (_MSC_VER < 1900) -#define snprintf _snprintf_s -#endif - -#define S_ISREG(m) (((m)&S_IFREG)==S_IFREG) -#define S_ISDIR(m) (((m)&S_IFDIR)==S_IFDIR) - -#include -#include -#include -#include - -#undef min -#undef max - -typedef SOCKET socket_t; -#else -#include -#include -#include -#include -#include -#include -#include - -typedef int socket_t; -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace httplib -{ - -typedef std::map Map; -typedef std::multimap MultiMap; -typedef std::smatch Match; - -struct Request { - std::string method; - std::string url; - MultiMap headers; - std::string body; - Map params; - Match matches; - - bool has_header(const char* key) const; - std::string get_header_value(const char* key) const; - void set_header(const char* key, const char* val); - - bool has_param(const char* key) const; -}; - -struct Response { - int status; - MultiMap headers; - std::string body; - - bool has_header(const char* key) const; - std::string get_header_value(const char* key) const; - void set_header(const char* key, const char* val); - - void set_redirect(const char* url); - void set_content(const char* s, size_t n, const char* content_type); - void set_content(const std::string& s, const char* content_type); - - Response() : status(-1) {} -}; - -class Server { -public: - typedef std::function Handler; - typedef std::function Logger; - - Server(); - - void get(const char* pattern, Handler handler); - void post(const char* pattern, Handler handler); - - bool set_base_dir(const char* path); - - void set_error_handler(Handler handler); - void set_logger(Logger logger); - - bool listen(const char* host, int port); - void stop(); - -private: - typedef std::vector> Handlers; - - void process_request(FILE* fp_read, FILE* fp_write); - bool read_request_line(FILE* fp, Request& req); - bool routing(Request& req, Response& res); - bool handle_file_request(Request& req, Response& res); - bool dispatch_request(Request& req, Response& res, Handlers& handlers); - - socket_t svr_sock_; - std::string base_dir_; - Handlers get_handlers_; - Handlers post_handlers_; - Handler error_handler_; - Logger logger_; -}; - -class Client { -public: - Client(const char* host, int port); - - std::shared_ptr get(const char* url); - std::shared_ptr head(const char* url); - std::shared_ptr post(const char* url, const std::string& body, const char* content_type); - std::shared_ptr post(const char* url, const Map& params); - - bool send(const Request& req, Response& res); - -private: - bool read_response_line(FILE* fp, Response& res); - - const std::string host_; - const int port_; -}; - -// Implementation -namespace detail { - -template -void split(const char* b, const char* e, char d, Fn fn) -{ - int i = 0; - int beg = 0; - - while (e ? (b + i != e) : (b[i] != '\0')) { - if (b[i] == d) { - fn(&b[beg], &b[i]); - beg = i + 1; - } - i++; - } - - if (i) { - fn(&b[beg], &b[i]); - } -} - -template -inline bool read_and_close_socket(socket_t sock, T callback) -{ - FILE* fp_read; - FILE* fp_write; -#ifdef _MSC_VER - int osfhandle = _open_osfhandle(sock, _O_RDONLY); - fp_read = _fdopen(osfhandle, "rb"); - fp_write = _fdopen(osfhandle, "wb"); -#else - fp_read = fdopen(sock, "rb"); - fp_write = fdopen(sock, "wb"); -#endif - - auto ret = callback(fp_read, fp_write); - -#ifdef _MSC_VER - sock = osfhandle; -#else - fclose(fp_read); - fclose(fp_write); -#endif - - return ret; -} - -inline int shutdown_socket(socket_t sock) -{ -#ifdef _MSC_VER - return shutdown(sock, SD_BOTH); -#else - return shutdown(sock, SHUT_RDWR); -#endif -} - -inline int close_socket(socket_t sock) -{ -#ifdef _MSC_VER - return closesocket(sock); -#else - return close(sock); -#endif -} - -template -socket_t create_socket(const char* host, int port, Fn fn) -{ -#ifdef _MSC_VER - int opt = SO_SYNCHRONOUS_NONALERT; - setsockopt(INVALID_SOCKET, SOL_SOCKET, SO_OPENTYPE, (char*)&opt, sizeof(opt)); -#endif - - // Get address info - struct addrinfo hints; - struct addrinfo *result; - - memset(&hints, 0, sizeof(struct addrinfo)); - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - hints.ai_flags = 0; - hints.ai_protocol = 0; - - auto service = std::to_string(port); - - if (getaddrinfo(host, service.c_str(), &hints, &result)) { - return -1; - } - - for (auto rp = result; rp; rp = rp->ai_next) { - // Create a socket - auto sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); - if (sock == -1) { - continue; - } - - // Make 'reuse address' option available - int yes = 1; - setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char*)&yes, sizeof(yes)); - - // bind or connect - if (fn(sock, *rp)) { - freeaddrinfo(result); - return sock; - } - - close_socket(sock); - } - - freeaddrinfo(result); - return -1; -} - -inline socket_t create_server_socket(const char* host, int port) -{ - return create_socket(host, port, [](socket_t sock, struct addrinfo& ai) -> socket_t { - if (::bind(sock, ai.ai_addr, ai.ai_addrlen)) { - return false; - } - if (listen(sock, 5)) { // Listen through 5 channels - return false; - } - return true; - }); -} - -inline socket_t create_client_socket(const char* host, int port) -{ - return create_socket(host, port, [](socket_t sock, struct addrinfo& ai) -> socket_t { - if (connect(sock, ai.ai_addr, ai.ai_addrlen)) { - return false; - } - return true; - }); -} - -inline bool is_file(const std::string& s) -{ - struct stat st; - return stat(s.c_str(), &st) >= 0 && S_ISREG(st.st_mode); -} - -inline bool is_dir(const std::string& s) -{ - struct stat st; - return stat(s.c_str(), &st) >= 0 && S_ISDIR(st.st_mode); -} - -inline void read_file(const std::string& path, std::string& out) -{ - std::ifstream fs(path, std::ios_base::binary); - fs.seekg(0, std::ios_base::end); - auto size = fs.tellg(); - fs.seekg(0); - out.resize(size); - fs.read(&out[0], size); -} - -inline std::string get_file_extention(const std::string& path) -{ - std::smatch m; - auto pat = std::regex("\\.([a-zA-Z0-9]+)$"); - auto ret = std::regex_search(path, m, pat); - std::string content_type; - if (ret) { - return m[1].str(); - } - return std::string(); -} - -inline const char* get_content_type_from_file_extention(const std::string& ext) -{ - if (ext == "html") { - return "text/html"; - } - return "text/plain"; -} - -inline const char* status_message(int status) -{ - switch (status) { - case 200: return "OK"; - case 400: return "Bad Request"; - case 404: return "Not Found"; - default: - case 500: return "Internal Server Error"; - } -} - -inline const char* get_header_value(const MultiMap& map, const char* key, const char* def) -{ - auto it = map.find(key); - if (it != map.end()) { - return it->second.c_str(); - } - return def; -} - -inline int get_header_value_int(const MultiMap& map, const char* key, int def) -{ - auto it = map.find(key); - if (it != map.end()) { - return std::stoi(it->second); - } - return def; -} - -inline bool read_headers(FILE* fp, MultiMap& headers) -{ - static std::regex re("(.+?): (.+?)\r\n"); - - const auto BUFSIZ_HEADER = 2048; - char buf[BUFSIZ_HEADER]; - - for (;;) { - if (!fgets(buf, BUFSIZ_HEADER, fp)) { - return false; - } - if (!strcmp(buf, "\r\n")) { - break; - } - std::cmatch m; - if (std::regex_match(buf, m, re)) { - auto key = std::string(m[1]); - auto val = std::string(m[2]); - headers.insert(std::make_pair(key, val)); - } - } - - return true; -} - -template -bool read_content(T& x, FILE* fp) -{ - auto len = get_header_value_int(x.headers, "Content-Length", 0); - if (len) { - x.body.assign(len, 0); - if (!fread(&x.body[0], x.body.size(), 1, fp)) { - return false; - } - } - return true; -} - -template -inline void write_headers(FILE* fp, const T& res) -{ - fprintf(fp, "Connection: close\r\n"); - - for (const auto& x: res.headers) { - if (x.first != "Content-Type" && x.first != "Content-Length") { - fprintf(fp, "%s: %s\r\n", x.first.c_str(), x.second.c_str()); - } - } - - auto t = get_header_value(res.headers, "Content-Type", "text/plain"); - fprintf(fp, "Content-Type: %s\r\n", t); - fprintf(fp, "Content-Length: %ld\r\n", res.body.size()); - fprintf(fp, "\r\n"); -} - -inline void write_response(FILE* fp, const Request& req, const Response& res) -{ - fprintf(fp, "HTTP/1.0 %d %s\r\n", res.status, status_message(res.status)); - - write_headers(fp, res); - - if (!res.body.empty() && req.method != "HEAD") { - fprintf(fp, "%s", res.body.c_str()); - } -} - -inline std::string encode_url(http://github.com/const%20std::string&%20s) -{ - std::string result; - - for (auto i = 0; s[i]; i++) { - switch (s[i]) { - case ' ': result += "+"; break; - case '\'': result += "%27"; break; - case ',': result += "%2C"; break; - case ':': result += "%3A"; break; - case ';': result += "%3B"; break; - default: - if (s[i] < 0) { - result += '%'; - char hex[4]; - size_t len = snprintf(hex, sizeof(hex), "%02X", (unsigned char)s[i]); - assert(len == 2); - result.append(hex, len); - } else { - result += s[i]; - } - break; - } - } - - return result; -} - -inline bool is_hex(char c, int& v) -{ - if (0x20 <= c && isdigit(c)) { - v = c - '0'; - return true; - } else if ('A' <= c && c <= 'F') { - v = c - 'A' + 10; - return true; - } else if ('a' <= c && c <= 'f') { - v = c - 'a' + 10; - return true; - } - return false; -} - -inline int from_hex_to_i(const std::string& s, int i, int cnt, int& val) -{ - val = 0; - for (; s[i] && cnt; i++, cnt--) { - int v = 0; - if (is_hex(s[i], v)) { - val = val * 16 + v; - } else { - break; - } - } - return --i; -} - -inline size_t to_utf8(int code, char* buff) -{ - if (code < 0x0080) { - buff[0] = (code & 0x7F); - return 1; - } else if (code < 0x0800) { - buff[0] = (0xC0 | ((code >> 6) & 0x1F)); - buff[1] = (0x80 | (code & 0x3F)); - return 2; - } else if (code < 0xD800) { - buff[0] = (0xE0 | ((code >> 12) & 0xF)); - buff[1] = (0x80 | ((code >> 6) & 0x3F)); - buff[2] = (0x80 | (code & 0x3F)); - return 3; - } else if (code < 0xE000) { // D800 - DFFF is invalid... - return 0; - } else if (code < 0x10000) { - buff[0] = (0xE0 | ((code >> 12) & 0xF)); - buff[1] = (0x80 | ((code >> 6) & 0x3F)); - buff[2] = (0x80 | (code & 0x3F)); - return 3; - } else if (code < 0x110000) { - buff[0] = (0xF0 | ((code >> 18) & 0x7)); - buff[1] = (0x80 | ((code >> 12) & 0x3F)); - buff[2] = (0x80 | ((code >> 6) & 0x3F)); - buff[3] = (0x80 | (code & 0x3F)); - return 4; - } - - // NOTREACHED - return 0; -} - -inline std::string decode_url(http://github.com/const%20std::string&%20s) -{ - std::string result; - - for (int i = 0; s[i]; i++) { - if (s[i] == '%') { - i++; - assert(s[i]); - - if (s[i] == '%') { - result += s[i]; - } else if (s[i] == 'u') { - // Unicode - i++; - assert(s[i]); - - int val = 0; - i = from_hex_to_i(s, i, 4, val); - - char buff[4]; - size_t len = to_utf8(val, buff); - - if (len > 0) { - result.append(buff, len); - } - } else { - // HEX - int val = 0; - i = from_hex_to_i(s, i, 2, val); - result += val; - } - } else if (s[i] == '+') { - result += ' '; - } else { - result += s[i]; - } - } - - return result; -} - -inline void write_request(FILE* fp, const Request& req) -{ - auto url = encode_url(http://github.com/req.url); - fprintf(fp, "%s %s HTTP/1.0\r\n", req.method.c_str(), url.c_str()); - - write_headers(fp, req); - - if (!req.body.empty()) { - if (req.has_header("application/x-www-form-urlencoded")) { - fprintf(fp, "%s", encode_url(http://github.com/req.body).c_str()); - } else { - fprintf(fp, "%s", req.body.c_str()); - } - } -} - -inline void parse_query_text(const std::string& s, Map& params) -{ - split(&s[0], &s[s.size()], '&', [&](const char* b, const char* e) { - std::string key; - std::string val; - split(b, e, '=', [&](const char* b, const char* e) { - if (key.empty()) { - key.assign(b, e); - } else { - val.assign(b, e); - } - }); - params[key] = detail::decode_url(http://github.com/val); - }); -} - -#ifdef _MSC_VER -class WSInit { -public: - WSInit::WSInit() { - WSADATA wsaData; - WSAStartup(0x0002, &wsaData); - } - - WSInit::~WSInit() { - WSACleanup(); - } -}; - -static WSInit wsinit_; -#endif - -} // namespace detail - -// Request implementation -inline bool Request::has_header(const char* key) const -{ - return headers.find(key) != headers.end(); -} - -inline std::string Request::get_header_value(const char* key) const -{ - return detail::get_header_value(headers, key, ""); -} - -inline void Request::set_header(const char* key, const char* val) -{ - headers.insert(std::make_pair(key, val)); -} - -inline bool Request::has_param(const char* key) const -{ - return params.find(key) != params.end(); -} - -// Response implementation -inline bool Response::has_header(const char* key) const -{ - return headers.find(key) != headers.end(); -} - -inline std::string Response::get_header_value(const char* key) const -{ - return detail::get_header_value(headers, key, ""); -} - -inline void Response::set_header(const char* key, const char* val) -{ - headers.insert(std::make_pair(key, val)); -} - -inline void Response::set_redirect(const char* url) -{ - set_header("Location", url); - status = 302; -} - -inline void Response::set_content(const char* s, size_t n, const char* content_type) -{ - body.assign(s, n); - set_header("Content-Type", content_type); -} - -inline void Response::set_content(const std::string& s, const char* content_type) -{ - body = s; - set_header("Content-Type", content_type); -} - -// HTTP server implementation -inline Server::Server() - : svr_sock_(-1) -{ -} - -inline void Server::get(const char* pattern, Handler handler) -{ - get_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); -} - -inline void Server::post(const char* pattern, Handler handler) -{ - post_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); -} - -inline bool Server::set_base_dir(const char* path) -{ - if (detail::is_dir(path)) { - base_dir_ = path; - return true; - } - return false; -} - -inline void Server::set_error_handler(Handler handler) -{ - error_handler_ = handler; -} - -inline void Server::set_logger(Logger logger) -{ - logger_ = logger; -} - -inline bool Server::listen(const char* host, int port) -{ - svr_sock_ = detail::create_server_socket(host, port); - if (svr_sock_ == -1) { - return false; - } - - auto ret = true; - - for (;;) { - socket_t sock = accept(svr_sock_, NULL, NULL); - - if (sock == -1) { - if (svr_sock_ != -1) { - detail::close_socket(svr_sock_); - ret = false; - } else { - ; // The server socket was closed by user. - } - break; - } - - // TODO: should be async - detail::read_and_close_socket(sock, [this](FILE* fp_read, FILE* fp_write) { - process_request(fp_read, fp_write); - return true; - }); - } - - return ret; -} - -inline void Server::stop() -{ - detail::shutdown_socket(svr_sock_); - detail::close_socket(svr_sock_); - svr_sock_ = -1; -} - -inline bool Server::read_request_line(FILE* fp, Request& req) -{ - const auto BUFSIZ_REQUESTLINE = 2048; - char buf[BUFSIZ_REQUESTLINE]; - if (!fgets(buf, BUFSIZ_REQUESTLINE, fp)) { - return false; - } - - static std::regex re("(GET|HEAD|POST) ([^?]+)(?:\\?(.+?))? HTTP/1\\.[01]\r\n"); - - std::cmatch m; - if (std::regex_match(buf, m, re)) { - req.method = std::string(m[1]); - req.url = detail::decode_url(http://github.com/m[2]); - - // Parse query text - auto len = std::distance(m[3].first, m[3].second); - if (len > 0) { - detail::parse_query_text(m[3], req.params); - } - - return true; - } - - return false; -} - -inline bool Server::handle_file_request(Request& req, Response& res) -{ - if (!base_dir_.empty()) { - std::string path = base_dir_ + req.url; - - if (!path.empty() && path.back() == '/') { - path += "index.html"; - } - - if (detail::is_file(path)) { - detail::read_file(path, res.body); - res.set_header("Content-Type", - detail::get_content_type_from_file_extention( - detail::get_file_extention(path))); - res.status = 200; - return true; - } - } - - return false; -} - -inline bool Server::routing(Request& req, Response& res) -{ - if (req.method == "GET" && handle_file_request(req, res)) { - return true; - } - - if (req.method == "GET" || req.method == "HEAD") { - return dispatch_request(req, res, get_handlers_); - } else if (req.method == "POST") { - return dispatch_request(req, res, post_handlers_); - } - return false; -} - -inline bool Server::dispatch_request(Request& req, Response& res, Handlers& handlers) -{ - for (const auto& x: handlers) { - const auto& pattern = x.first; - const auto& handler = x.second; - - if (std::regex_match(req.url, req.matches, pattern)) { - handler(req, res); - return true; - } - } - return false; -} - -inline void Server::process_request(FILE* fp_read, FILE* fp_write) -{ - Request req; - Response res; - - if (!read_request_line(fp_read, req) || - !detail::read_headers(fp_read, req.headers)) { - return; - } - - if (req.method == "POST") { - if (!detail::read_content(req, fp_read)) { - return; - } - static std::string type = "application/x-www-form-urlencoded"; - if (!req.get_header_value("Content-Type").compare(0, type.size(), type)) { - detail::parse_query_text(req.body, req.params); - } - } - - if (routing(req, res)) { - if (res.status == -1) { - res.status = 200; - } - } else { - res.status = 404; - } - assert(res.status != -1); - - if (400 <= res.status && error_handler_) { - error_handler_(req, res); - } - - detail::write_response(fp_write, req, res); - fflush(fp_write); - - if (logger_) { - logger_(req, res); - } -} - -// HTTP client implementation -inline Client::Client(const char* host, int port) - : host_(host) - , port_(port) -{ -} - -inline bool Client::read_response_line(FILE* fp, Response& res) -{ - const auto BUFSIZ_RESPONSELINE = 2048; - char buf[BUFSIZ_RESPONSELINE]; - if (!fgets(buf, BUFSIZ_RESPONSELINE, fp)) { - return false; - } - - const static std::regex re("HTTP/1\\.[01] (\\d+?) .+\r\n"); - - std::cmatch m; - if (std::regex_match(buf, m, re)) { - res.status = std::stoi(std::string(m[1])); - } - - return true; -} - -inline bool Client::send(const Request& req, Response& res) -{ - auto sock = detail::create_client_socket(host_.c_str(), port_); - if (sock == -1) { - return false; - } - - return detail::read_and_close_socket(sock, [&](FILE* fp_read, FILE* fp_write) { - // Send request - detail::write_request(fp_write, req); - fflush(fp_write); - - // Receive response - if (!read_response_line(fp_read, res) || - !detail::read_headers(fp_read, res.headers)) { - return false; - } - if (req.method != "HEAD") { - if (!detail::read_content(res, fp_read)) { - return false; - } - } - - return true; - }); -} - -inline std::shared_ptr Client::get(const char* url) -{ - Request req; - req.method = "GET"; - req.url = url; - - auto res = std::make_shared(); - - return send(req, *res) ? res : nullptr; -} - -inline std::shared_ptr Client::head(const char* url) -{ - Request req; - req.method = "HEAD"; - req.url = url; - - auto res = std::make_shared(); - - return send(req, *res) ? res : nullptr; -} - -inline std::shared_ptr Client::post( - const char* url, const std::string& body, const char* content_type) -{ - Request req; - req.method = "POST"; - req.url = url; - req.set_header("Content-Type", content_type); - req.body = body; - - auto res = std::make_shared(); - - return send(req, *res) ? res : nullptr; -} - -inline std::shared_ptr Client::post( - const char* url, const Map& params) -{ - std::string query; - for (auto it = params.begin(); it != params.end(); ++it) { - if (it != params.begin()) { - query += "&"; - } - query += it->first; - query += "="; - query += it->second; - } - - return post(url, query, "application/x-www-form-urlencoded"); -} - -} // namespace httplib - -#endif - -// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/lint/peglint.cc b/lint/peglint.cc index f3c25a0f..0479883d 100644 --- a/lint/peglint.cc +++ b/lint/peglint.cc @@ -1,160 +1,155 @@ // // peglint.cc // -// Copyright (c) 2015 Yuji Hirose. All rights reserved. +// Copyright (c) 2022 Yuji Hirose. All rights reserved. // MIT License // -#include #include +#include +#include using namespace std; -int run_server(int port, const vector& syntax, const vector& source); +inline bool read_file(const char *path, vector &buff) { + ifstream ifs(path, ios::in | ios::binary); + if (ifs.fail()) { return false; } -bool read_file(const char* path, vector& buff) -{ - ifstream ifs(path, ios::in | ios::binary); - if (ifs.fail()) { - return false; - } + buff.resize(static_cast(ifs.seekg(0, ios::end).tellg())); + if (!buff.empty()) { + ifs.seekg(0, ios::beg).read(&buff[0], static_cast(buff.size())); + } + return true; +} - buff.resize(static_cast(ifs.seekg(0, ios::end).tellg())); - if (!buff.empty()) { - ifs.seekg(0, ios::beg).read(&buff[0], static_cast(buff.size())); - } - return true; +inline vector split(const string &s, char delim) { + vector elems; + stringstream ss(s); + string elem; + while (getline(ss, elem, delim)) { + elems.push_back(elem); + } + return elems; } -int main(int argc, const char** argv) -{ - auto opt_ast = false; - auto opt_optimize_ast_nodes = false; - auto opt_help = false; - auto opt_server = false; - int port = 1234; - auto opt_trace = false; - vector path_list; - - auto argi = 1; - while (argi < argc) { - auto arg = argv[argi++]; - if (string("--help") == arg) { - opt_help = true; - } else if (string("--ast") == arg) { - opt_ast = true; - } else if (string("--optimize_ast_nodes") == arg || string("--opt") == arg) { - opt_optimize_ast_nodes = true; - } else if (string("--server") == arg) { - opt_server = true; - if (argi < argc) { - port = std::stoi(argv[argi++]); - } - } else if (string("--trace") == arg) { - opt_trace = true; - } else { - path_list.push_back(arg); - } +int main(int argc, const char **argv) { + auto opt_packrat = false; + auto opt_ast = false; + auto opt_optimize = false; + auto opt_mode = true; + auto opt_help = false; + auto opt_source = false; + vector source; + auto opt_trace = false; + auto opt_verbose = false; + auto opt_profile = false; + vector path_list; + + auto argi = 1; + while (argi < argc) { + auto arg = argv[argi++]; + if (string("--help") == arg) { + opt_help = true; + } else if (string("--packrat") == arg) { + opt_packrat = true; + } else if (string("--ast") == arg) { + opt_ast = true; + } else if (string("--opt") == arg || string("--opt-all") == arg) { + opt_optimize = true; + opt_mode = true; + } else if (string("--opt-only") == arg) { + opt_optimize = true; + opt_mode = false; + } else if (string("--source") == arg) { + opt_source = true; + if (argi < argc) { + std::string text = argv[argi++]; + source.assign(text.begin(), text.end()); + } + } else if (string("--trace") == arg) { + opt_trace = true; + } else if (string("--profile") == arg) { + opt_profile = true; + } else if (string("--verbose") == arg) { + opt_verbose = true; + } else { + path_list.push_back(arg); } - - if ((path_list.empty() && !opt_server) || opt_help) { - cerr << "usage: peglint [--ast] [--optimize_ast_nodes|--opt] [--server [PORT]] [--trace] [grammar file path] [source file path]" << endl; - return 1; + } + + if (path_list.empty() || opt_help) { + cerr << R"(usage: grammar_file_path [source_file_path] + + options: + --source: source text + --packrat: enable packrat memoise + --ast: show AST tree + --opt, --opt-all: optimize all AST nodes except nodes selected with `no_ast_opt` instruction + --opt-only: optimize only AST nodes selected with `no_ast_opt` instruction + --trace: show concise trace messages + --profile: show profile report + --verbose: verbose output for trace and profile +)"; + + return 1; + } + + // Check PEG grammar + auto syntax_path = path_list[0]; + + vector syntax; + if (!read_file(syntax_path, syntax)) { + cerr << "can't open the grammar file." << endl; + return -1; + } + + peg::parser parser; + + parser.set_logger([&](size_t ln, size_t col, const string &msg) { + cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl; + }); + + if (!parser.load_grammar(syntax.data(), syntax.size())) { return -1; } + + if (path_list.size() < 2 && !opt_source) { return 0; } + + // Check source + std::string source_path = "[commandline]"; + if (path_list.size() >= 2) { + if (!read_file(path_list[1], source)) { + cerr << "can't open the code file." << endl; + return -1; } + source_path = path_list[1]; + } - // Sever mode - if (opt_server) { - vector syntax; - vector source; - - if (path_list.size() >= 1 && !read_file(path_list[0], syntax)) { - cerr << "can't open the grammar file." << endl; - return -1; - } - - if (path_list.size() >= 2 && !read_file(path_list[1], source)) { - cerr << "can't open the code file." << endl; - return -1; - } + parser.set_logger([&](size_t ln, size_t col, const string &msg) { + cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl; + }); - return run_server(port, syntax, source); - } + if (opt_packrat) { parser.enable_packrat_parsing(); } - // Check PEG grammar - auto syntax_path = path_list[0]; + if (opt_trace) { enable_tracing(parser, std::cout); } - vector syntax; - if (!read_file(syntax_path, syntax)) { - cerr << "can't open the grammar file." << endl; - return -1; - } + if (opt_profile) { enable_profiling(parser, std::cout); } - peg::parser parser; + parser.set_verbose_trace(opt_verbose); - parser.log = [&](auto ln, auto col, const auto& msg) { - cerr << syntax_path << ":" << ln << ":" << col << ": " << msg << endl; - }; + if (opt_ast) { + parser.enable_ast(); - if (!parser.load_grammar(syntax.data(), syntax.size())) { - return -1; - } + std::shared_ptr ast; + auto ret = parser.parse_n(source.data(), source.size(), ast); - if (path_list.size() < 2) { - return 0; + if (ast) { + if (opt_optimize) { ast = parser.optimize_ast(ast, opt_mode); } + std::cout << peg::ast_to_s(ast); } - // Check source - auto source_path = path_list[1]; + if (!ret) { return -1; } + } else { + if (!parser.parse_n(source.data(), source.size())) { return -1; } + } - vector source; - if (!read_file(source_path, source)) { - auto beg = source_path; - auto end = source_path + strlen(source_path); - source.assign(beg, end); - source_path = "[commendline]"; - } - - parser.log = [&](auto ln, auto col, const auto& msg) { - cerr << source_path << ":" << ln << ":" << col << ": " << msg << endl; - }; - - if (opt_trace) { - std::cout << "pos:lev\trule/ope" << std::endl; - std::cout << "-------\t--------" << std::endl; - size_t prev_pos = 0; - parser.enable_trace([&](auto name, auto s, auto n, auto& sv, auto& c, auto& dt) { - auto pos = s - c.s; - auto backtrack = (pos < prev_pos ? "*" : ""); - string indent; - auto level = c.nest_level; - while (level--) { - indent += " "; - } - std::cout - << pos << ":" << c.nest_level << backtrack << "\t" - << indent << name << std::endl; - prev_pos = pos; - }); - } - - if (opt_ast) { - parser.enable_ast(); - - std::shared_ptr ast; - if (!parser.parse_n(source.data(), source.size(), ast)) { - return -1; - } - - ast = peg::AstOptimizer(opt_optimize_ast_nodes).optimize(ast); - std::cout << peg::ast_to_s(ast); - - } else { - if (!parser.parse_n(source.data(), source.size())) { - return -1; - } - } - - return 0; + return 0; } - -// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/lint/peglint.sln b/lint/peglint.sln deleted file mode 100644 index ae9f084c..00000000 --- a/lint/peglint.sln +++ /dev/null @@ -1,26 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 2013 -VisualStudioVersion = 12.0.31101.0 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "peglint", "peglint.vcxproj", "{F85B641A-7538-4809-8175-C528FF632CF6}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.ActiveCfg = Debug|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Debug|Win32.Build.0 = Debug|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.ActiveCfg = Release|Win32 - {F85B641A-7538-4809-8175-C528FF632CF6}.Release|Win32.Build.0 = Release|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Debug|Win32.ActiveCfg = Debug|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Debug|Win32.Build.0 = Debug|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Release|Win32.ActiveCfg = Release|Win32 - {1D09607B-E1C0-4D62-8AB4-9E2D2C2DC6E4}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/lint/peglint.vcxproj b/lint/peglint.vcxproj deleted file mode 100644 index a5499e9f..00000000 --- a/lint/peglint.vcxproj +++ /dev/null @@ -1,93 +0,0 @@ - - - - - Debug - Win32 - - - Release - Win32 - - - - - - - - - - - - {F85B641A-7538-4809-8175-C528FF632CF6} - Win32Proj - peglint - - - - Application - true - Unicode - v140 - - - Application - false - true - Unicode - v140 - - - - - - - - - - - - - true - - - false - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - .. - - - Console - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - .. - - - Console - true - true - true - Ws2_32.lib;%(AdditionalDependencies) - - - - - - \ No newline at end of file diff --git a/lint/server.cc b/lint/server.cc deleted file mode 100644 index 57b4dd60..00000000 --- a/lint/server.cc +++ /dev/null @@ -1,349 +0,0 @@ - -#include "httplib.h" -#include "peglib.h" -#include -#include - -using namespace httplib; -using namespace std; - -string indexHTML = R"( - - - -PEG Playground - - - - -
      -
      -
        -
      • Grammar:
      • -
      • Valid
      • -
      -
      {{syntax}}
      -
      -
      -
      -
        -
      • Code:
      • -
      • Valid
      • -
      -
      {{source}}
      -
      
      -        
      
      -        
      -
      -
      - - - - - - -)"; - -function makeJSONFormatter(string& json) -{ - auto init = make_shared(true); - - return [&json, init](size_t ln, size_t col, const string& msg) mutable { - if (!init) { - json += ","; - } - json += "{"; - json += R"("ln":)" + to_string(ln) + ","; - json += R"("col":)" + to_string(col) + ","; - json += R"("msg":")" + msg + R"(")"; - json += "}"; - *init = false; - }; -} - -bool parse_grammar(const string& text, peg::parser& peg, string& json) -{ - peg.log = makeJSONFormatter(json); - json += "["; - auto ret = peg.load_grammar(text.data(), text.size()); - json += "]"; - return ret; -} - -bool parse_code(const string& text, peg::parser& peg, string& json, shared_ptr& ast) -{ - peg.enable_ast(); - peg.log = makeJSONFormatter(json); - json += "["; - auto ret = peg.parse_n(text.data(), text.size(), ast); - json += "]"; - return ret; -} - -string replace_all(const string& str, const char* from, const char* to) -{ - string ret; - ret.reserve(str.length()); - - size_t from_len = 0; - while (from[from_len]) { - from_len++; - } - - size_t start_pos = 0, pos; - while ((pos = str.find(from, start_pos)) != string::npos) { - ret += str.substr(start_pos, pos - start_pos); - ret += to; - pos += from_len; - start_pos = pos; - } - ret += str.substr(start_pos); - return ret; -} - -int run_server(int port, const vector& syntax, const vector& source) -{ - Server svr; - - svr.get("/", [&](const Request& req, Response& res) { - indexHTML = replace_all(indexHTML, "{{syntax}}", string(syntax.data(), syntax.size()).c_str()); - indexHTML = replace_all(indexHTML, "{{source}}", string(source.data(), source.size()).c_str()); - - res.set_content(indexHTML, "text/html"); - }); - - svr.post("/parse", [](const Request& req, Response& res) { - const auto& grammarText = req.params.at("grammar"); - - string grammarResult; - string codeResult; - string astResult; - string astResultOptimized; - - peg::parser peg; - auto ret = parse_grammar(grammarText, peg, grammarResult); - - if (ret && peg) { - const auto& codeText = req.params.at("code"); - shared_ptr ast; - if (parse_code(codeText, peg, codeResult, ast)) { - astResult = peg::ast_to_s(ast); - astResult = replace_all(astResult, "\n", "\\n"); - - astResultOptimized = peg::ast_to_s(peg::AstOptimizer(true).optimize(ast)); - astResultOptimized = replace_all(astResultOptimized, "\n", "\\n"); - } - } - - string json; - json += "{"; - json += "\"grammar\":" + grammarResult; - if (!codeResult.empty()) { - json += ",\"code\":" + codeResult; - json += ",\"ast\":\"" + astResult + "\""; - json += ",\"astOptimized\":\"" + astResultOptimized + "\""; - } - json += "}"; - - res.set_content(json, "application/json"); - }); - - svr.set_error_handler([](const Request& req, Response& res) { - const char* fmt = "

      Error Status: %d

      "; - char buf[BUFSIZ]; - snprintf(buf, sizeof(buf), fmt, res.status); - res.set_content(buf, "text/html"); - }); - - cerr << "Server running at http://localhost:" << port << "/" << endl; - svr.listen("localhost", port); - - return 0; -} - -// vim: et ts=4 sw=4 cin cino={1s ff=unix diff --git a/peg.vim b/peg.vim index 223d5dfc..c549289b 100644 --- a/peg.vim +++ b/peg.vim @@ -1,7 +1,8 @@ -syn match pegBoth /=/ -syn match pegM2U /->/ -syn match pegU2M /<-/ +set commentstring=#\ %s + +syn match pegAssign /<-/ +syn match pegAssign2 /←/ syn match pegName /\v[a-zA-Z_][a-zA-Z0-9_]*/ @@ -13,9 +14,8 @@ syn region pegClass start=/\v\[/ skip=/\v\\./ end=/\v]/ "syn match pegOperator /\(*\|?\|+\|!\|\.\|\~\)/ -hi def link pegBoth Statement -hi def link pegM2U Statement -hi def link pegU2M Statement +hi def link pegAssign Statement +hi def link pegAssign2 Statement hi def link pegName Identifier diff --git a/peglib.h b/peglib.h index e741cb30..52b93541 100644 --- a/peglib.h +++ b/peglib.h @@ -1,15 +1,27 @@ // // peglib.h // -// Copyright (c) 2015 Yuji Hirose. All rights reserved. +// Copyright (c) 2022 Yuji Hirose. All rights reserved. // MIT License // -#ifndef _CPPPEGLIB_PEGLIB_H_ -#define _CPPPEGLIB_PEGLIB_H_ +#pragma once + +/* + * Configuration + */ + +#ifndef CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT +#define CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT 32 +#endif #include +#include #include +#include +#if __has_include() +#include +#endif #include #include #include @@ -19,2775 +31,4939 @@ #include #include #include +#include #include #include +#include #include -// guard for older versions of VC++ -#ifdef _MSC_VER -// VS2013 has no constexpr -#if (_MSC_VER == 1800) -#define PEGLIB_NO_CONSTEXPR_SUPPORT -#elif (_MSC_VER >= 1800) -// good to go -#else (_MSC_VER < 1800) -#error "Requires C+11 support" -#endif +#if !defined(__cplusplus) || __cplusplus < 201703L +#error "Requires complete C++17 support" #endif namespace peg { -extern void* enabler; - /*----------------------------------------------------------------------------- - * any + * scope_exit *---------------------------------------------------------------------------*/ -class any -{ -public: - any() : content_(nullptr) {} +// This is based on +// "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189". - any(const any& rhs) : content_(rhs.clone()) {} +template struct scope_exit { + explicit scope_exit(EF &&f) + : exit_function(std::move(f)), execute_on_destruction{true} {} - any(any&& rhs) : content_(rhs.content_) { - rhs.content_ = nullptr; - } + scope_exit(scope_exit &&rhs) + : exit_function(std::move(rhs.exit_function)), + execute_on_destruction{rhs.execute_on_destruction} { + rhs.release(); + } - template - any(const T& value) : content_(new holder(value)) {} + ~scope_exit() { + if (execute_on_destruction) { this->exit_function(); } + } - any& operator=(const any& rhs) { - if (this != &rhs) { - if (content_) { - delete content_; - } - content_ = rhs.clone(); - } - return *this; - } + void release() { this->execute_on_destruction = false; } - any& operator=(any&& rhs) { - if (this != &rhs) { - if (content_) { - delete content_; - } - content_ = rhs.content_; - rhs.content_ = nullptr; - } - return *this; - } +private: + scope_exit(const scope_exit &) = delete; + void operator=(const scope_exit &) = delete; + scope_exit &operator=(scope_exit &&) = delete; - ~any() { - delete content_; - } + EF exit_function; + bool execute_on_destruction; +}; - bool is_undefined() const { - return content_ == nullptr; - } +/*----------------------------------------------------------------------------- + * UTF8 functions + *---------------------------------------------------------------------------*/ - template < - typename T, - typename std::enable_if::value>::type*& = enabler - > - T& get() { - if (!content_) { - throw std::bad_cast(); - } - auto p = dynamic_cast*>(content_); - assert(p); - if (!p) { - throw std::bad_cast(); - } - return p->value_; - } - - template < - typename T, - typename std::enable_if::value>::type*& = enabler - > - T& get() { - return *this; - } - - template < - typename T, - typename std::enable_if::value>::type*& = enabler - > - const T& get() const { - assert(content_); - auto p = dynamic_cast*>(content_); - assert(p); - if (!p) { - throw std::bad_cast(); - } - return p->value_; - } +inline size_t codepoint_length(const char *s8, size_t l) { + if (l) { + auto b = static_cast(s8[0]); + if ((b & 0x80) == 0) { + return 1; + } else if ((b & 0xE0) == 0xC0 && l >= 2) { + return 2; + } else if ((b & 0xF0) == 0xE0 && l >= 3) { + return 3; + } else if ((b & 0xF8) == 0xF0 && l >= 4) { + return 4; + } + } + return 0; +} - template < - typename T, - typename std::enable_if::value>::type*& = enabler - > - const any& get() const { - return *this; - } +inline size_t codepoint_count(const char *s8, size_t l) { + size_t count = 0; + for (size_t i = 0; i < l; i += codepoint_length(s8 + i, l - i)) { + count++; + } + return count; +} -private: - struct placeholder { - virtual ~placeholder() {}; - virtual placeholder* clone() const = 0; - }; +inline size_t encode_codepoint(char32_t cp, char *buff) { + if (cp < 0x0080) { + buff[0] = static_cast(cp & 0x7F); + return 1; + } else if (cp < 0x0800) { + buff[0] = static_cast(0xC0 | ((cp >> 6) & 0x1F)); + buff[1] = static_cast(0x80 | (cp & 0x3F)); + return 2; + } else if (cp < 0xD800) { + buff[0] = static_cast(0xE0 | ((cp >> 12) & 0xF)); + buff[1] = static_cast(0x80 | ((cp >> 6) & 0x3F)); + buff[2] = static_cast(0x80 | (cp & 0x3F)); + return 3; + } else if (cp < 0xE000) { + // D800 - DFFF is invalid... + return 0; + } else if (cp < 0x10000) { + buff[0] = static_cast(0xE0 | ((cp >> 12) & 0xF)); + buff[1] = static_cast(0x80 | ((cp >> 6) & 0x3F)); + buff[2] = static_cast(0x80 | (cp & 0x3F)); + return 3; + } else if (cp < 0x110000) { + buff[0] = static_cast(0xF0 | ((cp >> 18) & 0x7)); + buff[1] = static_cast(0x80 | ((cp >> 12) & 0x3F)); + buff[2] = static_cast(0x80 | ((cp >> 6) & 0x3F)); + buff[3] = static_cast(0x80 | (cp & 0x3F)); + return 4; + } + return 0; +} - template - struct holder : placeholder { - holder(const T& value) : value_(value) {} - placeholder* clone() const override { - return new holder(value_); - } - T value_; - }; +inline std::string encode_codepoint(char32_t cp) { + char buff[4]; + auto l = encode_codepoint(cp, buff); + return std::string(buff, l); +} - placeholder* clone() const { - return content_ ? content_->clone() : nullptr; +inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes, + char32_t &cp) { + if (l) { + auto b = static_cast(s8[0]); + if ((b & 0x80) == 0) { + bytes = 1; + cp = b; + return true; + } else if ((b & 0xE0) == 0xC0) { + if (l >= 2) { + bytes = 2; + cp = ((static_cast(s8[0] & 0x1F)) << 6) | + (static_cast(s8[1] & 0x3F)); + return true; + } + } else if ((b & 0xF0) == 0xE0) { + if (l >= 3) { + bytes = 3; + cp = ((static_cast(s8[0] & 0x0F)) << 12) | + ((static_cast(s8[1] & 0x3F)) << 6) | + (static_cast(s8[2] & 0x3F)); + return true; + } + } else if ((b & 0xF8) == 0xF0) { + if (l >= 4) { + bytes = 4; + cp = ((static_cast(s8[0] & 0x07)) << 18) | + ((static_cast(s8[1] & 0x3F)) << 12) | + ((static_cast(s8[2] & 0x3F)) << 6) | + (static_cast(s8[3] & 0x3F)); + return true; + } } + } + return false; +} - placeholder* content_; -}; +inline size_t decode_codepoint(const char *s8, size_t l, char32_t &cp) { + size_t bytes; + if (decode_codepoint(s8, l, bytes, cp)) { return bytes; } + return 0; +} + +inline char32_t decode_codepoint(const char *s8, size_t l) { + char32_t cp = 0; + decode_codepoint(s8, l, cp); + return cp; +} + +inline std::u32string decode(const char *s8, size_t l) { + std::u32string out; + size_t i = 0; + while (i < l) { + auto beg = i++; + while (i < l && (s8[i] & 0xc0) == 0x80) { + i++; + } + out += decode_codepoint(&s8[beg], (i - beg)); + } + return out; +} + +template const char *u8(const T *s) { + return reinterpret_cast(s); +} /*----------------------------------------------------------------------------- - * scope_exit + * escape_characters *---------------------------------------------------------------------------*/ -// This is based on "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189". +inline std::string escape_characters(const char *s, size_t n) { + std::string str; + for (size_t i = 0; i < n; i++) { + auto c = s[i]; + switch (c) { + case '\f': str += "\\f"; break; + case '\n': str += "\\n"; break; + case '\r': str += "\\r"; break; + case '\t': str += "\\t"; break; + case '\v': str += "\\v"; break; + default: str += c; break; + } + } + return str; +} -template -struct scope_exit -{ - explicit scope_exit(EF&& f) - : exit_function(std::move(f)) - , execute_on_destruction{true} {} +inline std::string escape_characters(std::string_view sv) { + return escape_characters(sv.data(), sv.size()); +} - scope_exit(scope_exit&& rhs) - : exit_function(std::move(rhs.exit_function)) - , execute_on_destruction{rhs.execute_on_destruction} { - rhs.release(); - } +/*----------------------------------------------------------------------------- + * resolve_escape_sequence + *---------------------------------------------------------------------------*/ - ~scope_exit() { - if (execute_on_destruction) { - this->exit_function(); - } - } +inline bool is_hex(char c, int &v) { + if ('0' <= c && c <= '9') { + v = c - '0'; + return true; + } else if ('a' <= c && c <= 'f') { + v = c - 'a' + 10; + return true; + } else if ('A' <= c && c <= 'F') { + v = c - 'A' + 10; + return true; + } + return false; +} - void release() { - this->execute_on_destruction = false; - } +inline bool is_digit(char c, int &v) { + if ('0' <= c && c <= '9') { + v = c - '0'; + return true; + } + return false; +} -private: - scope_exit(const scope_exit&) = delete; - void operator=(const scope_exit&) = delete; - scope_exit& operator=(scope_exit&&) = delete; +inline std::pair parse_hex_number(const char *s, size_t n, + size_t i) { + int ret = 0; + int val; + while (i < n && is_hex(s[i], val)) { + ret = static_cast(ret * 16 + val); + i++; + } + return std::pair(ret, i); +} - EF exit_function; - bool execute_on_destruction; -}; +inline std::pair parse_octal_number(const char *s, size_t n, + size_t i) { + int ret = 0; + int val; + while (i < n && is_digit(s[i], val)) { + ret = static_cast(ret * 8 + val); + i++; + } + return std::pair(ret, i); +} + +inline std::string resolve_escape_sequence(const char *s, size_t n) { + std::string r; + r.reserve(n); + + size_t i = 0; + while (i < n) { + auto ch = s[i]; + if (ch == '\\') { + i++; + assert(i < n); + + switch (s[i]) { + case 'f': + r += '\f'; + i++; + break; + case 'n': + r += '\n'; + i++; + break; + case 'r': + r += '\r'; + i++; + break; + case 't': + r += '\t'; + i++; + break; + case 'v': + r += '\v'; + i++; + break; + case '\'': + r += '\''; + i++; + break; + case '"': + r += '"'; + i++; + break; + case '[': + r += '['; + i++; + break; + case ']': + r += ']'; + i++; + break; + case '\\': + r += '\\'; + i++; + break; + case 'x': + case 'u': { + char32_t cp; + std::tie(cp, i) = parse_hex_number(s, n, i + 1); + r += encode_codepoint(cp); + break; + } + default: { + char32_t cp; + std::tie(cp, i) = parse_octal_number(s, n, i); + r += encode_codepoint(cp); + break; + } + } + } else { + r += ch; + i++; + } + } + return r; +} + +/*----------------------------------------------------------------------------- + * token_to_number_ - This function should be removed eventually + *---------------------------------------------------------------------------*/ -template -auto make_scope_exit(EF&& exit_function) -> scope_exit { - return scope_exit>(std::forward(exit_function)); +template T token_to_number_(std::string_view sv) { + T n = 0; +#if __has_include() + if constexpr (!std::is_floating_point::value) { + std::from_chars(sv.data(), sv.data() + sv.size(), n); +#else + if constexpr (false) { +#endif + } else { + auto s = std::string(sv); + std::istringstream ss(s); + ss >> n; + } + return n; } /*----------------------------------------------------------------------------- - * PEG + * Trie *---------------------------------------------------------------------------*/ -/* -* Semantic values -*/ -struct SemanticValues : protected std::vector -{ - const char* path; - const char* ss; - const char* c_str() const { return s_; } - size_t length() const { return n_; } - size_t choice() const { return choice_; } - - std::vector> tokens; - - SemanticValues() : s_(nullptr), n_(0), choice_(0) {} - - using std::vector::iterator; - using std::vector::const_iterator; - using std::vector::size; - using std::vector::empty; - using std::vector::assign; - using std::vector::begin; - using std::vector::end; - using std::vector::rbegin; - using std::vector::rend; - using std::vector::operator[]; - using std::vector::at; - using std::vector::resize; - using std::vector::front; - using std::vector::back; - using std::vector::push_back; - using std::vector::pop_back; - using std::vector::insert; - using std::vector::erase; - using std::vector::clear; - using std::vector::swap; - using std::vector::emplace; - using std::vector::emplace_back; - - std::string str() const { - return std::string(s_, n_); - } - - std::string token(size_t id = 0) const { - if (!tokens.empty()) { - assert(id < tokens.size()); - const auto& tok = tokens[id]; - return std::string(tok.first, tok.second); +class Trie { +public: + Trie(const std::vector &items, bool ignore_case) + : ignore_case_(ignore_case) { + size_t id = 0; + for (const auto &item : items) { + const auto &s = ignore_case ? to_lower(item) : item; + for (size_t len = 1; len <= item.size(); len++) { + auto last = len == item.size(); + std::string_view sv(s.data(), len); + auto it = dic_.find(sv); + if (it == dic_.end()) { + dic_.emplace(sv, Info{last, last, id}); + } else if (last) { + it->second.match = true; + } else { + it->second.done = false; } - return std::string(s_, n_); + } + id++; + } + } + + size_t match(const char *text, size_t text_len, size_t &id) const { + std::string lower_text; + if (ignore_case_) { + lower_text = to_lower(text); + text = lower_text.data(); + } + + size_t match_len = 0; + auto done = false; + size_t len = 1; + while (!done && len <= text_len) { + std::string_view sv(text, len); + auto it = dic_.find(sv); + if (it == dic_.end()) { + done = true; + } else { + if (it->second.match) { + match_len = len; + id = it->second.id; + } + if (it->second.done) { done = true; } + } + len += 1; } + return match_len; + } - template - auto transform(size_t beg = 0, size_t end = -1) const -> vector { - return this->transform(beg, end, [](const any& v) { return v.get(); }); - } + size_t size() const { return dic_.size(); } private: - friend class Context; - friend class PrioritizedChoice; - friend class Holder; - - const char* s_; - size_t n_; - size_t choice_; - - template - auto transform(F f) const -> vector::type> { - vector::type> r; - for (const auto& v: *this) { - r.emplace_back(f(v)); - } - return r; + std::string to_lower(std::string s) const { + for (char &c : s) { + c = std::tolower(c); } + return s; + } - template - auto transform(size_t beg, size_t end, F f) const -> vector::type> { - vector::type> r; - end = (std::min)(end, size()); - for (size_t i = beg; i < end; i++) { - r.emplace_back(f((*this)[i])); - } - return r; - } + struct Info { + bool done; + bool match; + size_t id; + }; + + // TODO: Use unordered_map when heterogeneous lookup is supported in C++20 + // std::unordered_map dic_; + std::map> dic_; + + bool ignore_case_; }; +/*----------------------------------------------------------------------------- + * PEG + *---------------------------------------------------------------------------*/ + /* - * Semantic action + * Line information utility function */ -template < - typename R, typename F, - typename std::enable_if::value>::type*& = enabler, - typename... Args> -any call(F fn, Args&&... args) { - fn(std::forward(args)...); - return any(); -} - -template < - typename R, typename F, - typename std::enable_if::type, any>::value>::type*& = enabler, - typename... Args> -any call(F fn, Args&&... args) { - return fn(std::forward(args)...); -} - -template < - typename R, typename F, - typename std::enable_if< - !std::is_void::value && - !std::is_same::type, any>::value>::type*& = enabler, - typename... Args> -any call(F fn, Args&&... args) { - return any(fn(std::forward(args)...)); -} +inline std::pair line_info(const char *start, const char *cur) { + auto p = start; + auto col_ptr = p; + auto no = 1; -class Action -{ -public: - Action() = default; + while (p < cur) { + if (*p == '\n') { + no++; + col_ptr = p + 1; + } + p++; + } - Action(const Action& rhs) : fn_(rhs.fn_) {} + auto col = codepoint_count(col_ptr, p - col_ptr) + 1; - template ::value && !std::is_same::value>::type*& = enabler> - Action(F fn) : fn_(make_adaptor(fn, &F::operator())) {} + return std::pair(no, col); +} - template ::value>::type*& = enabler> - Action(F fn) : fn_(make_adaptor(fn, fn)) {} +/* + * String tag + */ +inline constexpr unsigned int str2tag_core(const char *s, size_t l, + unsigned int h) { + return (l == 0) ? h + : str2tag_core(s + 1, l - 1, + (h * 33) ^ static_cast(*s)); +} - template ::value>::type*& = enabler> - Action(F fn) {} +inline constexpr unsigned int str2tag(std::string_view sv) { + return str2tag_core(sv.data(), sv.size(), 0); +} - template ::value && !std::is_same::value>::type*& = enabler> - void operator=(F fn) { - fn_ = make_adaptor(fn, &F::operator()); - } +namespace udl { - template ::value>::type*& = enabler> - void operator=(F fn) { - fn_ = make_adaptor(fn, fn); - } +inline constexpr unsigned int operator"" _(const char *s, size_t l) { + return str2tag_core(s, l, 0); +} - template ::value>::type*& = enabler> - void operator=(F fn) {} +} // namespace udl - operator bool() const { - return (bool)fn_; - } +/* + * Semantic values + */ +class Context; - any operator()(const SemanticValues& sv, any& dt) const { - return fn_(sv, dt); - } +struct SemanticValues : protected std::vector { + SemanticValues() = default; + SemanticValues(Context *c) : c_(c) {} + + // Input text + const char *path = nullptr; + const char *ss = nullptr; + + // Matched string + std::string_view sv() const { return sv_; } + + // Definition name + const std::string &name() const { return name_; } + + std::vector tags; + + // Line number and column at which the matched string is + std::pair line_info() const; + + // Choice count + size_t choice_count() const { return choice_count_; } + + // Choice number (0 based index) + size_t choice() const { return choice_; } + + // Tokens + std::vector tokens; + + std::string_view token(size_t id = 0) const { + if (tokens.empty()) { return sv_; } + assert(id < tokens.size()); + return tokens[id]; + } + + // Token conversion + std::string token_to_string(size_t id = 0) const { + return std::string(token(id)); + } + + template T token_to_number() const { + return token_to_number_(token()); + } + + // Transform the semantic value vector to another vector + template + std::vector transform(size_t beg = 0, + size_t end = static_cast(-1)) const { + std::vector r; + end = (std::min)(end, size()); + for (size_t i = beg; i < end; i++) { + r.emplace_back(std::any_cast((*this)[i])); + } + return r; + } + + void append(SemanticValues &chvs) { + sv_ = chvs.sv_; + for (auto &v : chvs) { + emplace_back(std::move(v)); + } + for (auto &tag : chvs.tags) { + tags.emplace_back(std::move(tag)); + } + for (auto &tok : chvs.tokens) { + tokens.emplace_back(std::move(tok)); + } + } + + using std::vector::iterator; + using std::vector::const_iterator; + using std::vector::size; + using std::vector::empty; + using std::vector::assign; + using std::vector::begin; + using std::vector::end; + using std::vector::rbegin; + using std::vector::rend; + using std::vector::operator[]; + using std::vector::at; + using std::vector::resize; + using std::vector::front; + using std::vector::back; + using std::vector::push_back; + using std::vector::pop_back; + using std::vector::insert; + using std::vector::erase; + using std::vector::clear; + using std::vector::swap; + using std::vector::emplace; + using std::vector::emplace_back; private: - template - struct TypeAdaptor { - TypeAdaptor(std::function fn) - : fn_(fn) {} - any operator()(const SemanticValues& sv, any& dt) { - return call(fn_, sv); - } - std::function fn_; - }; - - template - struct TypeAdaptor_c { - TypeAdaptor_c(std::function fn) - : fn_(fn) {} - any operator()(const SemanticValues& sv, any& dt) { - return call(fn_, sv, dt); - } - std::function fn_; - }; - - typedef std::function Fty; + friend class Context; + friend class Dictionary; + friend class Sequence; + friend class PrioritizedChoice; + friend class Repetition; + friend class Holder; + friend class PrecedenceClimbing; + + Context *c_ = nullptr; + std::string_view sv_; + size_t choice_count_ = 0; + size_t choice_ = 0; + std::string name_; +}; - template - Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv) const) { - return TypeAdaptor(fn); - } +/* + * Semantic action + */ +template std::any call(F fn, Args &&...args) { + using R = decltype(fn(std::forward(args)...)); + if constexpr (std::is_void::value) { + fn(std::forward(args)...); + return std::any(); + } else if constexpr (std::is_same::type, + std::any>::value) { + return fn(std::forward(args)...); + } else { + return std::any(fn(std::forward(args)...)); + } +} - template - Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv)) { - return TypeAdaptor(fn); - } +template +struct argument_count : argument_count {}; +template +struct argument_count + : std::integral_constant {}; +template +struct argument_count + : std::integral_constant {}; +template +struct argument_count + : std::integral_constant {}; + +class Action { +public: + Action() = default; + Action(Action &&rhs) = default; + template Action(F fn) : fn_(make_adaptor(fn)) {} + template void operator=(F fn) { fn_ = make_adaptor(fn); } + Action &operator=(const Action &rhs) = default; - template - Fty make_adaptor(F fn, R (*mf)(const SemanticValues& sv)) { - return TypeAdaptor(fn); - } + operator bool() const { return bool(fn_); } - template - Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv, any& dt) const) { - return TypeAdaptor_c(fn); - } + std::any operator()(SemanticValues &vs, std::any &dt) const { + return fn_(vs, dt); + } - template - Fty make_adaptor(F fn, R (F::*mf)(const SemanticValues& sv, any& dt)) { - return TypeAdaptor_c(fn); - } +private: + using Fty = std::function; - template - Fty make_adaptor(F fn, R(*mf)(const SemanticValues& sv, any& dt)) { - return TypeAdaptor_c(fn); + template Fty make_adaptor(F fn) { + if constexpr (argument_count::value == 1) { + return [fn](auto &vs, auto & /*dt*/) { return call(fn, vs); }; + } else { + return [fn](auto &vs, auto &dt) { return call(fn, vs, dt); }; } + } - Fty fn_; + Fty fn_; }; /* - * Semantic predicate + * Parse result helper */ -// Note: 'parse_error' exception class should be be used in sematic action handlers to reject the rule. -struct parse_error { - parse_error() = default; - parse_error(const char* s) : s_(s) {} - const char* what() const { return s_.empty() ? nullptr : s_.c_str(); } -private: - std::string s_; -}; +inline bool success(size_t len) { return len != static_cast(-1); } -/* - * Match action - */ -typedef std::function MatchAction; +inline bool fail(size_t len) { return len == static_cast(-1); } /* - * Result + * Log */ -inline bool success(size_t len) { - return len != -1; -} - -inline bool fail(size_t len) { - return len == -1; -} +using Log = std::function; /* - * Context + * ErrorInfo */ -class Ope; -class Context; class Definition; -typedef std::function Tracer; - -class Context -{ -public: - const char* path; - const char* s; - const size_t l; - - const char* error_pos; - const char* message_pos; - std::string message; // TODO: should be `int`. - - std::vector> value_stack; - size_t value_stack_size; - - size_t nest_level; - - bool in_token; - - std::shared_ptr whitespaceOpe; - bool in_whitespace; - - const size_t def_count; - const bool enablePackratParsing; - std::vector cache_register; - std::vector cache_success; - - std::map, std::tuple> cache_result; - - std::function tracer; - - Context( - const char* path, - const char* s, - size_t l, - size_t def_count, - std::shared_ptr whitespaceOpe, - bool enablePackratParsing, - Tracer tracer) - : path(path) - , s(s) - , l(l) - , error_pos(nullptr) - , message_pos(nullptr) - , value_stack_size(0) - , nest_level(0) - , in_token(false) - , whitespaceOpe(whitespaceOpe) - , in_whitespace(false) - , def_count(def_count) - , enablePackratParsing(enablePackratParsing) - , cache_register(enablePackratParsing ? def_count * (l + 1) : 0) - , cache_success(enablePackratParsing ? def_count * (l + 1) : 0) - , tracer(tracer) - { - } - - template - void packrat(const char* s, size_t def_id, size_t& len, any& val, T fn) { - if (!enablePackratParsing) { - fn(val); - return; - } +struct ErrorInfo { + const char *error_pos = nullptr; + std::vector> expected_tokens; + const char *message_pos = nullptr; + std::string message; + std::string label; + const char *last_output_pos = nullptr; + bool keep_previous_token = false; - auto col = s - this->s; - auto has_cache = cache_register[def_count * col + def_id]; + void clear() { + error_pos = nullptr; + expected_tokens.clear(); + message_pos = nullptr; + message.clear(); + } - if (has_cache) { - if (cache_success[def_count * col + def_id]) { - const auto& key = std::make_pair(s - this->s, def_id); - std::tie(len, val) = cache_result[key]; - return; - } else { - len = -1; - return; - } - } else { - fn(val); - cache_register[def_count * col + def_id] = true; - cache_success[def_count * col + def_id] = success(len); - if (success(len)) { - const auto& key = std::make_pair(s - this->s, def_id); - cache_result[key] = std::make_pair(len, val); - } - return; - } + void add(const char *error_literal, const Definition *error_rule) { + for (const auto &[t, r] : expected_tokens) { + if (t == error_literal && r == error_rule) { return; } } + expected_tokens.emplace_back(error_literal, error_rule); + } - SemanticValues& push() { - assert(value_stack_size <= value_stack.size()); - if (value_stack_size == value_stack.size()) { - value_stack.emplace_back(std::make_shared()); - } - auto& sv = *value_stack[value_stack_size++]; - if (!sv.empty()) { - sv.clear(); + void output_log(const Log &log, const char *s, size_t n); + +private: + int cast_char(char c) const { return static_cast(c); } + + std::string heuristic_error_token(const char *s, size_t n, + const char *pos) const { + auto len = n - std::distance(s, pos); + if (len) { + size_t i = 0; + auto c = cast_char(pos[i++]); + if (!std::ispunct(c) && !std::isspace(c)) { + while (i < len && !std::ispunct(cast_char(pos[i])) && + !std::isspace(cast_char(pos[i]))) { + i++; } - sv.path = path; - sv.ss = s; - sv.s_ = nullptr; - sv.n_ = 0; - sv.tokens.clear(); - return sv; - } + } - void pop() { - value_stack_size--; - } + size_t count = CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT; + size_t j = 0; + while (count > 0 && j < i) { + j += codepoint_length(&pos[j], i - j); + count--; + } - void set_error_pos(const char* s) { - if (error_pos < s) error_pos = s; + return escape_characters(pos, j); } + return std::string(); + } - void trace(const char* name, const char* s, size_t n, SemanticValues& sv, any& dt) const { - if (tracer) tracer(name, s, n, sv, *this, dt); + std::string replace_all(std::string str, const std::string &from, + const std::string &to) const { + size_t pos = 0; + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); } + return str; + } }; /* - * Parser operators + * Context */ -class Ope -{ -public: - struct Visitor; +class Ope; - virtual ~Ope() {}; - virtual size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const = 0; - virtual void accept(Visitor& v) = 0; -}; +using TracerEnter = std::function; -class Sequence : public Ope -{ +using TracerLeave = std::function; + +using TracerStartOrEnd = std::function; + +class Context { public: - Sequence(const Sequence& rhs) : opes_(rhs.opes_) {} - -#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 - // NOTE: Compiler Error C2797 on Visual Studio 2013 - // "The C++ compiler in Visual Studio does not implement list - // initialization inside either a member initializer list or a non-static - // data member initializer. Before Visual Studio 2013 Update 3, this was - // silently converted to a function call, which could lead to bad code - // generation. Visual Studio 2013 Update 3 reports this as an error." - template - Sequence(const Args& ...args) { - opes_ = std::vector>{ static_cast>(args)... }; - } -#else - template - Sequence(const Args& ...args) : opes_{ static_cast>(args)... } {} -#endif + const char *path; + const char *s; + const size_t l; - Sequence(const std::vector>& opes) : opes_(opes) {} - Sequence(std::vector>&& opes) : opes_(opes) {} + ErrorInfo error_info; + bool recovered = false; - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("Sequence", s, n, sv, dt); - size_t i = 0; - for (const auto& ope : opes_) { - c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); - const auto& rule = *ope; - auto len = rule.parse(s + i, n - i, sv, c, dt); - if (fail(len)) { - return -1; - } - i += len; - } - return i; - } + std::vector> value_stack; + size_t value_stack_size = 0; - void accept(Visitor& v) override; + std::vector rule_stack; + std::vector>> args_stack; - std::vector> opes_; -}; + size_t in_token_boundary_count = 0; -class PrioritizedChoice : public Ope -{ -public: -#if defined(_MSC_VER) && _MSC_VER < 1900 // Less than Visual Studio 2015 - // NOTE: Compiler Error C2797 on Visual Studio 2013 - // "The C++ compiler in Visual Studio does not implement list - // initialization inside either a member initializer list or a non-static - // data member initializer. Before Visual Studio 2013 Update 3, this was - // silently converted to a function call, which could lead to bad code - // generation. Visual Studio 2013 Update 3 reports this as an error." - template - PrioritizedChoice(const Args& ...args) { - opes_ = std::vector>{ static_cast>(args)... }; - } -#else - template - PrioritizedChoice(const Args& ...args) : opes_{ static_cast>(args)... } {} -#endif + std::shared_ptr whitespaceOpe; + bool in_whitespace = false; - PrioritizedChoice(const std::vector>& opes) : opes_(opes) {} - PrioritizedChoice(std::vector>&& opes) : opes_(opes) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("PrioritizedChoice", s, n, sv, dt); - size_t id = 0; - for (const auto& ope : opes_) { - c.nest_level++; - auto& chldsv = c.push(); - auto se = make_scope_exit([&]() { - c.nest_level--; - c.pop(); - }); - const auto& rule = *ope; - auto len = rule.parse(s, n, chldsv, c, dt); - if (success(len)) { - if (!chldsv.empty()) { - sv.insert(sv.end(), chldsv.begin(), chldsv.end()); - } - sv.s_ = chldsv.c_str(); - sv.n_ = chldsv.length(); - sv.choice_ = id; - sv.tokens.insert(sv.tokens.end(), chldsv.tokens.begin(), chldsv.tokens.end()); - return len; - } - id++; - } - return -1; - } + std::shared_ptr wordOpe; - void accept(Visitor& v) override; + std::vector> capture_scope_stack; + size_t capture_scope_stack_size = 0; - size_t size() const { return opes_.size(); } + std::vector cut_stack; - std::vector> opes_; -}; + const size_t def_count; + const bool enablePackratParsing; + std::vector cache_registered; + std::vector cache_success; -class ZeroOrMore : public Ope -{ -public: - ZeroOrMore(const std::shared_ptr& ope) : ope_(ope) {} + std::map, std::tuple> + cache_values; - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("ZeroOrMore", s, n, sv, dt); - auto save_error_pos = c.error_pos; - size_t i = 0; - while (n - i > 0) { - c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); - auto save_sv_size = sv.size(); - auto save_tok_size = sv.tokens.size(); - const auto& rule = *ope_; - auto len = rule.parse(s + i, n - i, sv, c, dt); - if (fail(len)) { - if (sv.size() != save_sv_size) { - sv.erase(sv.begin() + save_sv_size); - } - if (sv.tokens.size() != save_tok_size) { - sv.tokens.erase(sv.tokens.begin() + save_tok_size); - } - c.error_pos = save_error_pos; - break; - } - i += len; - } - return i; - } + TracerEnter tracer_enter; + TracerLeave tracer_leave; + std::any trace_data; + const bool verbose_trace; - void accept(Visitor& v) override; + Log log; - std::shared_ptr ope_; -}; + Context(const char *path, const char *s, size_t l, size_t def_count, + std::shared_ptr whitespaceOpe, std::shared_ptr wordOpe, + bool enablePackratParsing, TracerEnter tracer_enter, + TracerLeave tracer_leave, std::any trace_data, bool verbose_trace, + Log log) + : path(path), s(s), l(l), whitespaceOpe(whitespaceOpe), wordOpe(wordOpe), + def_count(def_count), enablePackratParsing(enablePackratParsing), + cache_registered(enablePackratParsing ? def_count * (l + 1) : 0), + cache_success(enablePackratParsing ? def_count * (l + 1) : 0), + tracer_enter(tracer_enter), tracer_leave(tracer_leave), + trace_data(trace_data), verbose_trace(verbose_trace), log(log) { -class OneOrMore : public Ope -{ -public: - OneOrMore(const std::shared_ptr& ope) : ope_(ope) {} + push_args({}); + push_capture_scope(); + } - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("OneOrMore", s, n, sv, dt); - auto len = 0; - { - c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); - const auto& rule = *ope_; - len = rule.parse(s, n, sv, c, dt); - if (fail(len)) { - return -1; - } - } - auto save_error_pos = c.error_pos; - auto i = len; - while (n - i > 0) { - c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); - auto save_sv_size = sv.size(); - auto save_tok_size = sv.tokens.size(); - const auto& rule = *ope_; - auto len = rule.parse(s + i, n - i, sv, c, dt); - if (fail(len)) { - if (sv.size() != save_sv_size) { - sv.erase(sv.begin() + save_sv_size); - } - if (sv.tokens.size() != save_tok_size) { - sv.tokens.erase(sv.tokens.begin() + save_tok_size); - } - c.error_pos = save_error_pos; - break; - } - i += len; - } - return i; - } + ~Context() { + pop_capture_scope(); - void accept(Visitor& v) override; + assert(!value_stack_size); + assert(!capture_scope_stack_size); + assert(cut_stack.empty()); + } - std::shared_ptr ope_; -}; + Context(const Context &) = delete; + Context(Context &&) = delete; + Context operator=(const Context &) = delete; -class Option : public Ope -{ -public: - Option(const std::shared_ptr& ope) : ope_(ope) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("Option", s, n, sv, dt); - auto save_error_pos = c.error_pos; - c.nest_level++; - auto save_sv_size = sv.size(); - auto save_tok_size = sv.tokens.size(); - auto se = make_scope_exit([&]() { c.nest_level--; }); - const auto& rule = *ope_; - auto len = rule.parse(s, n, sv, c, dt); - if (success(len)) { - return len; - } else { - if (sv.size() != save_sv_size) { - sv.erase(sv.begin() + save_sv_size); - } - if (sv.tokens.size() != save_tok_size) { - sv.tokens.erase(sv.tokens.begin() + save_tok_size); - } - c.error_pos = save_error_pos; - return 0; - } + template + void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val, + T fn) { + if (!enablePackratParsing) { + fn(val); + return; } - void accept(Visitor& v) override; + auto col = a_s - s; + auto idx = def_count * static_cast(col) + def_id; - std::shared_ptr ope_; -}; + if (cache_registered[idx]) { + if (cache_success[idx]) { + auto key = std::pair(col, def_id); + std::tie(len, val) = cache_values[key]; + return; + } else { + len = static_cast(-1); + return; + } + } else { + fn(val); + cache_registered[idx] = true; + cache_success[idx] = success(len); + if (success(len)) { + auto key = std::pair(col, def_id); + cache_values[key] = std::pair(len, val); + } + return; + } + } + + SemanticValues &push() { + push_capture_scope(); + return push_semantic_values_scope(); + } + + void pop() { + pop_capture_scope(); + pop_semantic_values_scope(); + } + + // Semantic values + SemanticValues &push_semantic_values_scope() { + assert(value_stack_size <= value_stack.size()); + if (value_stack_size == value_stack.size()) { + value_stack.emplace_back(std::make_shared(this)); + } else { + auto &vs = *value_stack[value_stack_size]; + if (!vs.empty()) { + vs.clear(); + if (!vs.tags.empty()) { vs.tags.clear(); } + } + vs.sv_ = std::string_view(); + vs.choice_count_ = 0; + vs.choice_ = 0; + if (!vs.tokens.empty()) { vs.tokens.clear(); } + } + + auto &vs = *value_stack[value_stack_size++]; + vs.path = path; + vs.ss = s; + return vs; + } + + void pop_semantic_values_scope() { value_stack_size--; } + + // Arguments + void push_args(std::vector> &&args) { + args_stack.emplace_back(args); + } + + void pop_args() { args_stack.pop_back(); } + + const std::vector> &top_args() const { + return args_stack[args_stack.size() - 1]; + } + + // Capture scope + void push_capture_scope() { + assert(capture_scope_stack_size <= capture_scope_stack.size()); + if (capture_scope_stack_size == capture_scope_stack.size()) { + capture_scope_stack.emplace_back( + std::map()); + } else { + auto &cs = capture_scope_stack[capture_scope_stack_size]; + if (!cs.empty()) { cs.clear(); } + } + capture_scope_stack_size++; + } + + void pop_capture_scope() { capture_scope_stack_size--; } + + void shift_capture_values() { + assert(capture_scope_stack_size >= 2); + auto curr = &capture_scope_stack[capture_scope_stack_size - 1]; + auto prev = curr - 1; + for (const auto &[k, v] : *curr) { + (*prev)[k] = v; + } + } + + // Error + void set_error_pos(const char *a_s, const char *literal = nullptr); + + // Trace + void trace_enter(const Ope &ope, const char *a_s, size_t n, + const SemanticValues &vs, std::any &dt); + void trace_leave(const Ope &ope, const char *a_s, size_t n, + const SemanticValues &vs, std::any &dt, size_t len); + bool is_traceable(const Ope &ope) const; + + // Line info + std::pair line_info(const char *cur) const { + std::call_once(source_line_index_init_, [this]() { + for (size_t pos = 0; pos < l; pos++) { + if (s[pos] == '\n') { source_line_index.push_back(pos); } + } + source_line_index.push_back(l); + }); -class AndPredicate : public Ope -{ -public: - AndPredicate(const std::shared_ptr& ope) : ope_(ope) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("AndPredicate", s, n, sv, dt); - c.nest_level++; - auto& chldsv = c.push(); - auto se = make_scope_exit([&]() { - c.nest_level--; - c.pop(); - }); - const auto& rule = *ope_; - auto len = rule.parse(s, n, chldsv, c, dt); - if (success(len)) { - return 0; - } else { - return -1; - } - } + auto pos = static_cast(std::distance(s, cur)); + + auto it = std::lower_bound( + source_line_index.begin(), source_line_index.end(), pos, + [](size_t element, size_t value) { return element < value; }); - void accept(Visitor& v) override; + auto id = static_cast(std::distance(source_line_index.begin(), it)); + auto off = pos - (id == 0 ? 0 : source_line_index[id - 1] + 1); + return std::pair(id + 1, off + 1); + } - std::shared_ptr ope_; + size_t next_trace_id = 0; + std::vector trace_ids; + bool ignore_trace_state = false; + mutable std::once_flag source_line_index_init_; + mutable std::vector source_line_index; }; -class NotPredicate : public Ope -{ +/* + * Parser operators + */ +class Ope { public: - NotPredicate(const std::shared_ptr& ope) : ope_(ope) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("NotPredicate", s, n, sv, dt); - auto save_error_pos = c.error_pos; - c.nest_level++; - auto& chldsv = c.push(); - auto se = make_scope_exit([&]() { - c.nest_level--; - c.pop(); - }); - const auto& rule = *ope_; - auto len = rule.parse(s, n, chldsv, c, dt); - if (success(len)) { - c.set_error_pos(s); - return -1; - } else { - c.error_pos = save_error_pos; - return 0; - } - } - - void accept(Visitor& v) override; - - std::shared_ptr ope_; + struct Visitor; + + virtual ~Ope() = default; + size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const; + virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs, + Context &c, std::any &dt) const = 0; + virtual void accept(Visitor &v) = 0; }; -class LiteralString : public Ope -{ +class Sequence : public Ope { public: - LiteralString(const std::string& s) : lit_(s) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; + template + Sequence(const Args &...args) + : opes_{static_cast>(args)...} {} + Sequence(const std::vector> &opes) : opes_(opes) {} + Sequence(std::vector> &&opes) : opes_(opes) {} + + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override { + auto &chvs = c.push_semantic_values_scope(); + auto se = scope_exit([&]() { c.pop_semantic_values_scope(); }); + size_t i = 0; + for (const auto &ope : opes_) { + auto len = ope->parse(s + i, n - i, chvs, c, dt); + if (fail(len)) { return len; } + i += len; + } + vs.append(chvs); + return i; + } - void accept(Visitor& v) override; + void accept(Visitor &v) override; - std::string lit_; + std::vector> opes_; }; -class CharacterClass : public Ope -{ +class PrioritizedChoice : public Ope { public: - CharacterClass(const std::string& chars) : chars_(chars) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("CharacterClass", s, n, sv, dt); - // TODO: UTF8 support - if (n < 1) { - c.set_error_pos(s); - return -1; - } - auto ch = s[0]; - auto i = 0u; - while (i < chars_.size()) { - if (i + 2 < chars_.size() && chars_[i + 1] == '-') { - if (chars_[i] <= ch && ch <= chars_[i + 2]) { - return 1; - } - i += 3; - } else { - if (chars_[i] == ch) { - return 1; - } - i += 1; - } - } - c.set_error_pos(s); - return -1; - } + template + PrioritizedChoice(bool for_label, const Args &...args) + : opes_{static_cast>(args)...}, + for_label_(for_label) {} + PrioritizedChoice(const std::vector> &opes) + : opes_(opes) {} + PrioritizedChoice(std::vector> &&opes) : opes_(opes) {} + + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override { + size_t len = static_cast(-1); + + if (!for_label_) { c.cut_stack.push_back(false); } + auto se = scope_exit([&]() { + if (!for_label_) { c.cut_stack.pop_back(); } + }); - void accept(Visitor& v) override; + size_t id = 0; + for (const auto &ope : opes_) { + if (!c.cut_stack.empty()) { c.cut_stack.back() = false; } - std::string chars_; -}; + auto &chvs = c.push(); + c.error_info.keep_previous_token = id > 0; + auto se = scope_exit([&]() { + c.pop(); + c.error_info.keep_previous_token = false; + }); -class Character : public Ope -{ -public: - Character(char ch) : ch_(ch) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("Character", s, n, sv, dt); - // TODO: UTF8 support - if (n < 1 || s[0] != ch_) { - c.set_error_pos(s); - return -1; - } - return 1; + len = ope->parse(s, n, chvs, c, dt); + + if (success(len)) { + vs.append(chvs); + vs.choice_count_ = opes_.size(); + vs.choice_ = id; + c.shift_capture_values(); + break; + } else if (!c.cut_stack.empty() && c.cut_stack.back()) { + break; + } + + id++; } - void accept(Visitor& v) override; + return len; + } - char ch_; -}; + void accept(Visitor &v) override; -class AnyCharacter : public Ope -{ -public: - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("AnyCharacter", s, n, sv, dt); - // TODO: UTF8 support - if (n < 1) { - c.set_error_pos(s); - return -1; - } - return 1; - } + size_t size() const { return opes_.size(); } - void accept(Visitor& v) override; + std::vector> opes_; + bool for_label_ = false; }; -class Capture : public Ope -{ +class Repetition : public Ope { public: - Capture(const std::shared_ptr& ope, MatchAction ma, size_t id, const std::string& name) - : ope_(ope), match_action_(ma), id_(id), name_(name) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - const auto& rule = *ope_; - auto len = rule.parse(s, n, sv, c, dt); - if (success(len) && match_action_) { - match_action_(s, len, id_, name_); - } + Repetition(const std::shared_ptr &ope, size_t min, size_t max) + : ope_(ope), min_(min), max_(max) {} + + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override { + size_t count = 0; + size_t i = 0; + while (count < min_) { + auto &chvs = c.push(); + auto se = scope_exit([&]() { c.pop(); }); + + auto len = ope_->parse(s + i, n - i, chvs, c, dt); + + if (success(len)) { + vs.append(chvs); + c.shift_capture_values(); + } else { return len; + } + i += len; + count++; } - void accept(Visitor& v) override; + while (count < max_) { + auto &chvs = c.push(); + auto se = scope_exit([&]() { c.pop(); }); - std::shared_ptr ope_; + auto len = ope_->parse(s + i, n - i, chvs, c, dt); -private: - MatchAction match_action_; - size_t id_; - std::string name_; -}; + if (success(len)) { + vs.append(chvs); + c.shift_capture_values(); + } else { + break; + } + i += len; + count++; + } + return i; + } -class TokenBoundary : public Ope -{ -public: - TokenBoundary(const std::shared_ptr& ope) : ope_(ope) {} + void accept(Visitor &v) override; - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; + bool is_zom() const { + return min_ == 0 && max_ == std::numeric_limits::max(); + } - void accept(Visitor& v) override; + static std::shared_ptr zom(const std::shared_ptr &ope) { + return std::make_shared(ope, 0, + std::numeric_limits::max()); + } - std::shared_ptr ope_; + static std::shared_ptr oom(const std::shared_ptr &ope) { + return std::make_shared(ope, 1, + std::numeric_limits::max()); + } + + static std::shared_ptr opt(const std::shared_ptr &ope) { + return std::make_shared(ope, 0, 1); + } + + std::shared_ptr ope_; + size_t min_; + size_t max_; }; -class Ignore : public Ope -{ +class AndPredicate : public Ope { public: - Ignore(const std::shared_ptr& ope) : ope_(ope) {} + AndPredicate(const std::shared_ptr &ope) : ope_(ope) {} - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - const auto& rule = *ope_; - auto& chldsv = c.push(); - auto se = make_scope_exit([&]() { - c.pop(); - }); - return rule.parse(s, n, chldsv, c, dt); + size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, + Context &c, std::any &dt) const override { + auto &chvs = c.push(); + auto se = scope_exit([&]() { c.pop(); }); + + auto len = ope_->parse(s, n, chvs, c, dt); + + if (success(len)) { + return 0; + } else { + return len; } + } - void accept(Visitor& v) override; + void accept(Visitor &v) override; - std::shared_ptr ope_; + std::shared_ptr ope_; }; -typedef std::function Parser; - -class User : public Ope -{ +class NotPredicate : public Ope { public: - User(Parser fn) : fn_(fn) {} + NotPredicate(const std::shared_ptr &ope) : ope_(ope) {} - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - c.trace("User", s, n, sv, dt); - assert(fn_); - return fn_(s, n, sv, dt); + size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, + Context &c, std::any &dt) const override { + auto &chvs = c.push(); + auto se = scope_exit([&]() { c.pop(); }); + auto len = ope_->parse(s, n, chvs, c, dt); + if (success(len)) { + c.set_error_pos(s); + return static_cast(-1); + } else { + return 0; } + } - void accept(Visitor& v) override; + void accept(Visitor &v) override; - std::function fn_; + std::shared_ptr ope_; }; -class WeakHolder : public Ope -{ +class Dictionary : public Ope, public std::enable_shared_from_this { public: - WeakHolder(const std::shared_ptr& ope) : weak_(ope) {} + Dictionary(const std::vector &v, bool ignore_case) + : trie_(v, ignore_case) {} - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - auto ope = weak_.lock(); - assert(ope); - const auto& rule = *ope; - return rule.parse(s, n, sv, c, dt); - } + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override; - void accept(Visitor& v) override; + void accept(Visitor &v) override; - std::weak_ptr weak_; + Trie trie_; }; -class Holder : public Ope -{ +class LiteralString : public Ope, + public std::enable_shared_from_this { public: - Holder(Definition* outer) - : outer_(outer) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; + LiteralString(std::string &&s, bool ignore_case) + : lit_(s), ignore_case_(ignore_case), is_word_(false) {} - void accept(Visitor& v) override; + LiteralString(const std::string &s, bool ignore_case) + : lit_(s), ignore_case_(ignore_case), is_word_(false) {} - any reduce(const SemanticValues& sv, any& dt) const; + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override; - std::shared_ptr ope_; - Definition* outer_; + void accept(Visitor &v) override; - friend class Definition; + std::string lit_; + bool ignore_case_; + mutable std::once_flag init_is_word_; + mutable bool is_word_; }; -class DefinitionReference : public Ope -{ +class CharacterClass : public Ope, + public std::enable_shared_from_this { public: - DefinitionReference( - const std::unordered_map& grammar, const std::string& name, const char* s) - : grammar_(grammar) - , name_(name) - , s_(s) {} - - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override; - - void accept(Visitor& v) override; + CharacterClass(const std::string &s, bool negated, bool ignore_case) + : negated_(negated), ignore_case_(ignore_case) { + auto chars = decode(s.data(), s.length()); + auto i = 0u; + while (i < chars.size()) { + if (i + 2 < chars.size() && chars[i + 1] == '-') { + auto cp1 = chars[i]; + auto cp2 = chars[i + 2]; + ranges_.emplace_back(std::pair(cp1, cp2)); + i += 3; + } else { + auto cp = chars[i]; + ranges_.emplace_back(std::pair(cp, cp)); + i += 1; + } + } + assert(!ranges_.empty()); + } + + CharacterClass(const std::vector> &ranges, + bool negated, bool ignore_case) + : ranges_(ranges), negated_(negated), ignore_case_(ignore_case) { + assert(!ranges_.empty()); + } + + size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, + Context &c, std::any & /*dt*/) const override { + if (n < 1) { + c.set_error_pos(s); + return static_cast(-1); + } + + char32_t cp = 0; + auto len = decode_codepoint(s, n, cp); + + for (const auto &range : ranges_) { + if (in_range(range, cp)) { + if (negated_) { + c.set_error_pos(s); + return static_cast(-1); + } else { + return len; + } + } + } - std::shared_ptr get_rule() const; + if (negated_) { + return len; + } else { + c.set_error_pos(s); + return static_cast(-1); + } + } - const std::unordered_map& grammar_; - const std::string name_; - const char* s_; + void accept(Visitor &v) override; private: - mutable std::once_flag init_; - mutable std::shared_ptr rule_; + bool in_range(const std::pair &range, char32_t cp) const { + if (ignore_case_) { + auto cpl = std::tolower(cp); + return std::tolower(range.first) <= cpl && + cpl <= std::tolower(range.second); + } else { + return range.first <= cp && cp <= range.second; + } + } + + std::vector> ranges_; + bool negated_; + bool ignore_case_; }; -class Whitespace : public Ope -{ +class Character : public Ope, public std::enable_shared_from_this { public: - Whitespace(const std::shared_ptr& ope) : ope_(ope) {} + Character(char32_t ch) : ch_(ch) {} - size_t parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const override { - if (c.in_whitespace) { - return 0; - } - c.in_whitespace = true; - auto se = make_scope_exit([&]() { c.in_whitespace = false; }); - const auto& rule = *ope_; - return rule.parse(s, n, sv, c, dt); + size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, + Context &c, std::any & /*dt*/) const override { + if (n < 1) { + c.set_error_pos(s); + return static_cast(-1); } - void accept(Visitor& v) override; + char32_t cp = 0; + auto len = decode_codepoint(s, n, cp); - std::shared_ptr ope_; -}; + if (cp != ch_) { + c.set_error_pos(s); + return static_cast(-1); + } + return len; + } -/* - * Visitor - */ -struct Ope::Visitor -{ - virtual void visit(Sequence& ope) {} - virtual void visit(PrioritizedChoice& ope) {} - virtual void visit(ZeroOrMore& ope) {} - virtual void visit(OneOrMore& ope) {} - virtual void visit(Option& ope) {} - virtual void visit(AndPredicate& ope) {} - virtual void visit(NotPredicate& ope) {} - virtual void visit(LiteralString& ope) {} - virtual void visit(CharacterClass& ope) {} - virtual void visit(Character& ope) {} - virtual void visit(AnyCharacter& ope) {} - virtual void visit(Capture& ope) {} - virtual void visit(TokenBoundary& ope) {} - virtual void visit(Ignore& ope) {} - virtual void visit(User& ope) {} - virtual void visit(WeakHolder& ope) {} - virtual void visit(Holder& ope) {} - virtual void visit(DefinitionReference& ope) {} - virtual void visit(Whitespace& ope) {} + void accept(Visitor &v) override; + + char32_t ch_; }; -struct AssignIDToDefinition : public Ope::Visitor -{ - void visit(Sequence& ope) override { - for (auto op: ope.opes_) { - op->accept(*this); - } - } - void visit(PrioritizedChoice& ope) override { - for (auto op: ope.opes_) { - op->accept(*this); - } +class AnyCharacter : public Ope, + public std::enable_shared_from_this { +public: + size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, + Context &c, std::any & /*dt*/) const override { + auto len = codepoint_length(s, n); + if (len < 1) { + c.set_error_pos(s); + return static_cast(-1); } - void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); } - void visit(OneOrMore& ope) override { ope.ope_->accept(*this); } - void visit(Option& ope) override { ope.ope_->accept(*this); } - void visit(AndPredicate& ope) override { ope.ope_->accept(*this); } - void visit(NotPredicate& ope) override { ope.ope_->accept(*this); } - void visit(Capture& ope) override { ope.ope_->accept(*this); } - void visit(TokenBoundary& ope) override { ope.ope_->accept(*this); } - void visit(Ignore& ope) override { ope.ope_->accept(*this); } - void visit(WeakHolder& ope) override { ope.weak_.lock()->accept(*this); } - void visit(Holder& ope) override; - void visit(DefinitionReference& ope) override { ope.get_rule()->accept(*this); } - - std::unordered_map ids; + return len; + } + + void accept(Visitor &v) override; }; -struct IsToken : public Ope::Visitor -{ - IsToken() : has_token_boundary(false), has_rule(false) {} +class CaptureScope : public Ope { +public: + CaptureScope(const std::shared_ptr &ope) : ope_(ope) {} - void visit(Sequence& ope) override { - for (auto op: ope.opes_) { - op->accept(*this); - } - } - void visit(PrioritizedChoice& ope) override { - for (auto op: ope.opes_) { - op->accept(*this); - } - } - void visit(ZeroOrMore& ope) override { ope.ope_->accept(*this); } - void visit(OneOrMore& ope) override { ope.ope_->accept(*this); } - void visit(Option& ope) override { ope.ope_->accept(*this); } - void visit(Capture& ope) override { ope.ope_->accept(*this); } - void visit(TokenBoundary& ope) override { has_token_boundary = true; } - void visit(Ignore& ope) override { ope.ope_->accept(*this); } - void visit(WeakHolder& ope) override { ope.weak_.lock()->accept(*this); } - void visit(DefinitionReference& ope) override { has_rule = true; } + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override { + c.push_capture_scope(); + auto se = scope_exit([&]() { c.pop_capture_scope(); }); + return ope_->parse(s, n, vs, c, dt); + } - bool is_token() const { - return has_token_boundary || !has_rule; - } + void accept(Visitor &v) override; - bool has_token_boundary; - bool has_rule; + std::shared_ptr ope_; }; -static const char* WHITESPACE_DEFINITION_NAME = "%whitespace"; +class Capture : public Ope { +public: + using MatchAction = std::function; + + Capture(const std::shared_ptr &ope, MatchAction ma) + : ope_(ope), match_action_(ma) {} -/* - * Definition - */ -class Definition -{ + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override { + auto len = ope_->parse(s, n, vs, c, dt); + if (success(len) && match_action_) { match_action_(s, len, c); } + return len; + } + + void accept(Visitor &v) override; + + std::shared_ptr ope_; + MatchAction match_action_; +}; + +class TokenBoundary : public Ope { public: - struct Result { - bool ret; - size_t len; - const char* error_pos; - const char* message_pos; - const std::string message; - }; + TokenBoundary(const std::shared_ptr &ope) : ope_(ope) {} - Definition() - : ignoreSemanticValue(false) - , enablePackratParsing(false) - , is_token(false) - , has_token_boundary(false) - , holder_(std::make_shared(this)) {} - - Definition(const Definition& rhs) - : name(rhs.name) - , ignoreSemanticValue(false) - , enablePackratParsing(false) - , is_token(false) - , has_token_boundary(false) - , holder_(rhs.holder_) - { - holder_->outer_ = this; - } + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override; - Definition(Definition&& rhs) - : name(std::move(rhs.name)) - , ignoreSemanticValue(rhs.ignoreSemanticValue) - , whitespaceOpe(rhs.whitespaceOpe) - , enablePackratParsing(rhs.enablePackratParsing) - , is_token(rhs.is_token) - , has_token_boundary(rhs.has_token_boundary) - , holder_(std::move(rhs.holder_)) - { - holder_->outer_ = this; - } + void accept(Visitor &v) override; - Definition(const std::shared_ptr& ope) - : ignoreSemanticValue(false) - , enablePackratParsing(false) - , is_token(false) - , has_token_boundary(false) - , holder_(std::make_shared(this)) - { - *this <= ope; - } + std::shared_ptr ope_; +}; - operator std::shared_ptr() { - return std::make_shared(holder_); - } +class Ignore : public Ope { +public: + Ignore(const std::shared_ptr &ope) : ope_(ope) {} - Definition& operator<=(const std::shared_ptr& ope) { - IsToken isToken; - ope->accept(isToken); - is_token = isToken.is_token(); - has_token_boundary = isToken.has_token_boundary; + size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/, + Context &c, std::any &dt) const override { + auto &chvs = c.push_semantic_values_scope(); + auto se = scope_exit([&]() { c.pop_semantic_values_scope(); }); + return ope_->parse(s, n, chvs, c, dt); + } - holder_->ope_ = ope; + void accept(Visitor &v) override; - return *this; - } + std::shared_ptr ope_; +}; - Result parse(const char* s, size_t n, const char* path = nullptr) const { - SemanticValues sv; - any dt; - return parse_core(s, n, sv, dt, path); - } +using Parser = std::function; - Result parse(const char* s, const char* path = nullptr) const { - auto n = strlen(s); - return parse(s, n, path); - } +class User : public Ope { +public: + User(Parser fn) : fn_(fn) {} + size_t parse_core(const char *s, size_t n, SemanticValues &vs, + Context & /*c*/, std::any &dt) const override { + assert(fn_); + return fn_(s, n, vs, dt); + } + void accept(Visitor &v) override; + std::function + fn_; +}; - Result parse(const char* s, size_t n, any& dt, const char* path = nullptr) const { - SemanticValues sv; - return parse_core(s, n, sv, dt, path); - } +class WeakHolder : public Ope { +public: + WeakHolder(const std::shared_ptr &ope) : weak_(ope) {} - Result parse(const char* s, any& dt, const char* path = nullptr) const { - auto n = strlen(s); - return parse(s, n, dt, path); - } + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override { + auto ope = weak_.lock(); + assert(ope); + return ope->parse(s, n, vs, c, dt); + } - template - Result parse_and_get_value(const char* s, size_t n, T& val, const char* path = nullptr) const { - SemanticValues sv; - any dt; - auto r = parse_core(s, n, sv, dt, path); - if (r.ret && !sv.empty() && !sv.front().is_undefined()) { - val = sv[0].get(); - } - return r; - } + void accept(Visitor &v) override; - template - Result parse_and_get_value(const char* s, T& val, const char* path = nullptr) const { - auto n = strlen(s); - return parse_and_get_value(s, n, val, path); - } + std::weak_ptr weak_; +}; - template - Result parse_and_get_value(const char* s, size_t n, any& dt, T& val, const char* path = nullptr) const { - SemanticValues sv; - auto r = parse_core(s, n, sv, dt, path); - if (r.ret && !sv.empty() && !sv.front().is_undefined()) { - val = sv[0].get(); - } - return r; - } +class Holder : public Ope { +public: + Holder(Definition *outer) : outer_(outer) {} - template - Result parse_and_get_value(const char* s, any& dt, T& val, const char* path = nullptr) const { - auto n = strlen(s); - return parse_and_get_value(s, n, dt, val, path); - } + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override; - Definition& operator=(Action a) { - action = a; - return *this; - } + void accept(Visitor &v) override; - template - Definition& operator,(T fn) { - operator=(fn); - return *this; - } + std::any reduce(SemanticValues &vs, std::any &dt) const; - Definition& operator~() { - ignoreSemanticValue = true; - return *this; - } + const std::string &name() const; + const std::string &trace_name() const; - void accept(Ope::Visitor& v) { - holder_->accept(v); - } + std::shared_ptr ope_; + Definition *outer_; + mutable std::once_flag trace_name_init_; + mutable std::string trace_name_; - std::shared_ptr get_core_operator() { - return holder_->ope_; - } + friend class Definition; +}; - std::string name; - size_t id; - Action action; - std::function enter; - std::function leave; - std::function error_message; - bool ignoreSemanticValue; - std::shared_ptr whitespaceOpe; - bool enablePackratParsing; - bool is_token; - bool has_token_boundary; - Tracer tracer; +using Grammar = std::unordered_map; -private: - friend class DefinitionReference; +class Reference : public Ope, public std::enable_shared_from_this { +public: + Reference(const Grammar &grammar, const std::string &name, const char *s, + bool is_macro, const std::vector> &args) + : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args), + rule_(nullptr), iarg_(0) {} - Definition& operator=(const Definition& rhs); - Definition& operator=(Definition&& rhs); + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override; - Result parse_core(const char* s, size_t n, SemanticValues& sv, any& dt, const char* path) const { - AssignIDToDefinition assignId; - holder_->accept(assignId); + void accept(Visitor &v) override; - std::shared_ptr ope = holder_; - if (whitespaceOpe) { - ope = std::make_shared(whitespaceOpe, ope); - } + std::shared_ptr get_core_operator() const; - Context cxt(path, s, n, assignId.ids.size(), whitespaceOpe, enablePackratParsing, tracer); - auto len = ope->parse(s, n, sv, cxt, dt); - return Result{ success(len), len, cxt.error_pos, cxt.message_pos, cxt.message }; - } + const Grammar &grammar_; + const std::string name_; + const char *s_; - std::shared_ptr holder_; + const bool is_macro_; + const std::vector> args_; + + Definition *rule_; + size_t iarg_; }; -/* - * Implementations - */ +class Whitespace : public Ope { +public: + Whitespace(const std::shared_ptr &ope) : ope_(ope) {} -inline size_t LiteralString::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { - c.trace("LiteralString", s, n, sv, dt); + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override { + if (c.in_whitespace) { return 0; } + c.in_whitespace = true; + auto se = scope_exit([&]() { c.in_whitespace = false; }); + return ope_->parse(s, n, vs, c, dt); + } - auto i = 0u; - for (; i < lit_.size(); i++) { - if (i >= n || s[i] != lit_[i]) { - c.set_error_pos(s); - return -1; - } - } + void accept(Visitor &v) override; - // Skip whiltespace - if (!c.in_token) { - if (c.whitespaceOpe) { - auto len = c.whitespaceOpe->parse(s + i, n - i, sv, c, dt); - if (fail(len)) { - return -1; - } - i += len; - } - } + std::shared_ptr ope_; +}; - return i; -} +class BackReference : public Ope { +public: + BackReference(std::string &&name) : name_(name) {} -inline size_t TokenBoundary::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { - c.in_token = true; - auto se = make_scope_exit([&]() { c.in_token = false; }); - const auto& rule = *ope_; - auto len = rule.parse(s, n, sv, c, dt); - if (success(len)) { - sv.tokens.push_back(std::make_pair(s, len)); + BackReference(const std::string &name) : name_(name) {} - if (c.whitespaceOpe) { - auto l = c.whitespaceOpe->parse(s + len, n - len, sv, c, dt); - if (fail(l)) { - return -1; - } - len += l; - } - } - return len; -} + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override; -inline size_t Holder::parse(const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { - if (!ope_) { - throw std::logic_error("Uninitialized definition ope was used..."); - } + void accept(Visitor &v) override; - c.trace(outer_->name.c_str(), s, n, sv, dt); - c.nest_level++; - auto se = make_scope_exit([&]() { c.nest_level--; }); + std::string name_; +}; - size_t len; - any val; +class PrecedenceClimbing : public Ope { +public: + using BinOpeInfo = std::map>; - c.packrat(s, outer_->id, len, val, [&](any& val) { - auto& chldsv = c.push(); + PrecedenceClimbing(const std::shared_ptr &atom, + const std::shared_ptr &binop, const BinOpeInfo &info, + const Definition &rule) + : atom_(atom), binop_(binop), info_(info), rule_(rule) {} - if (outer_->enter) { - outer_->enter(dt); - } + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override { + return parse_expression(s, n, vs, c, dt, 0); + } - auto se = make_scope_exit([&]() { - c.pop(); + void accept(Visitor &v) override; - if (outer_->leave) { - outer_->leave(dt); - } - }); + std::shared_ptr atom_; + std::shared_ptr binop_; + BinOpeInfo info_; + const Definition &rule_; - const auto& rule = *ope_; - len = rule.parse(s, n, chldsv, c, dt); - - // Invoke action - if (success(len)) { - chldsv.s_ = s; - chldsv.n_ = len; - - try { - val = reduce(chldsv, dt); - } catch (const parse_error& e) { - if (e.what()) { - if (c.message_pos < s) { - c.message_pos = s; - c.message = e.what(); - } - } - len = -1; - } - } - }); +private: + size_t parse_expression(const char *s, size_t n, SemanticValues &vs, + Context &c, std::any &dt, size_t min_prec) const; - if (success(len)) { - if (!outer_->ignoreSemanticValue) { - sv.emplace_back(val); - } - } else { - if (outer_->error_message) { - if (c.message_pos < s) { - c.message_pos = s; - c.message = outer_->error_message(); - } - } - } + Definition &get_reference_for_binop(Context &c) const; +}; - return len; -} +class Recovery : public Ope { +public: + Recovery(const std::shared_ptr &ope) : ope_(ope) {} -inline any Holder::reduce(const SemanticValues& sv, any& dt) const { - if (outer_->action) { - return outer_->action(sv, dt); - } else if (sv.empty()) { - return any(); - } else { - return sv.front(); - } -} + size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, + std::any &dt) const override; -inline size_t DefinitionReference::parse( - const char* s, size_t n, SemanticValues& sv, Context& c, any& dt) const { - const auto& rule = *get_rule(); - return rule.parse(s, n, sv, c, dt); -} + void accept(Visitor &v) override; -inline std::shared_ptr DefinitionReference::get_rule() const { - if (!rule_) { - std::call_once(init_, [this]() { - rule_ = grammar_.at(name_).holder_; - }); - } - assert(rule_); - return rule_; + std::shared_ptr ope_; }; -inline void Sequence::accept(Visitor& v) { v.visit(*this); } -inline void PrioritizedChoice::accept(Visitor& v) { v.visit(*this); } -inline void ZeroOrMore::accept(Visitor& v) { v.visit(*this); } -inline void OneOrMore::accept(Visitor& v) { v.visit(*this); } -inline void Option::accept(Visitor& v) { v.visit(*this); } -inline void AndPredicate::accept(Visitor& v) { v.visit(*this); } -inline void NotPredicate::accept(Visitor& v) { v.visit(*this); } -inline void LiteralString::accept(Visitor& v) { v.visit(*this); } -inline void CharacterClass::accept(Visitor& v) { v.visit(*this); } -inline void Character::accept(Visitor& v) { v.visit(*this); } -inline void AnyCharacter::accept(Visitor& v) { v.visit(*this); } -inline void Capture::accept(Visitor& v) { v.visit(*this); } -inline void TokenBoundary::accept(Visitor& v) { v.visit(*this); } -inline void Ignore::accept(Visitor& v) { v.visit(*this); } -inline void User::accept(Visitor& v) { v.visit(*this); } -inline void WeakHolder::accept(Visitor& v) { v.visit(*this); } -inline void Holder::accept(Visitor& v) { v.visit(*this); } -inline void DefinitionReference::accept(Visitor& v) { v.visit(*this); } -inline void Whitespace::accept(Visitor& v) { v.visit(*this); } - -inline void AssignIDToDefinition::visit(Holder& ope) { - auto p = (void*)ope.outer_; - if (ids.count(p)) { - return; - } - auto id = ids.size(); - ids[p] = id; - ope.outer_->id = id; - ope.ope_->accept(*this); -} +class Cut : public Ope, public std::enable_shared_from_this { +public: + size_t parse_core(const char * /*s*/, size_t /*n*/, SemanticValues & /*vs*/, + Context &c, std::any & /*dt*/) const override { + if (!c.cut_stack.empty()) { c.cut_stack.back() = true; } + return 0; + } + + void accept(Visitor &v) override; +}; /* * Factories */ -template -std::shared_ptr seq(Args&& ...args) { - return std::make_shared(static_cast>(args)...); +template std::shared_ptr seq(Args &&...args) { + return std::make_shared(static_cast>(args)...); } -template -std::shared_ptr cho(Args&& ...args) { - return std::make_shared(static_cast>(args)...); +template std::shared_ptr cho(Args &&...args) { + return std::make_shared( + false, static_cast>(args)...); } -inline std::shared_ptr zom(const std::shared_ptr& ope) { - return std::make_shared(ope); +template std::shared_ptr cho4label_(Args &&...args) { + return std::make_shared( + true, static_cast>(args)...); } -inline std::shared_ptr oom(const std::shared_ptr& ope) { - return std::make_shared(ope); +inline std::shared_ptr zom(const std::shared_ptr &ope) { + return Repetition::zom(ope); } -inline std::shared_ptr opt(const std::shared_ptr& ope) { - return std::make_shared