Skip to content

Commit a07b37e

Browse files
authored
Improve performance for comment parsing (open-source-parsers#1052)
* Improve performance for comment parsing * Fix weird main.cpp issue * Readd newline * remove carriage return feed char * Remove unnecessary checks
1 parent aebc7fa commit a07b37e

File tree

2 files changed

+48
-25
lines changed

2 files changed

+48
-25
lines changed

src/lib_json/json_reader.cpp

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#endif // if !defined(JSON_IS_AMALGAMATION)
1313
#include <cassert>
1414
#include <cstring>
15+
#include <iostream>
1516
#include <istream>
1617
#include <limits>
1718
#include <memory>
@@ -942,7 +943,7 @@ class OurReader {
942943
void skipSpaces();
943944
bool match(const Char* pattern, int patternLength);
944945
bool readComment();
945-
bool readCStyleComment();
946+
bool readCStyleComment(bool* containsNewLineResult);
946947
bool readCppStyleComment();
947948
bool readString();
948949
bool readStringSingleQuote();
@@ -977,18 +978,20 @@ class OurReader {
977978
static bool containsNewLine(Location begin, Location end);
978979

979980
using Nodes = std::stack<Value*>;
980-
Nodes nodes_;
981-
Errors errors_;
982-
String document_;
983-
Location begin_;
984-
Location end_;
985-
Location current_;
986-
Location lastValueEnd_;
987-
Value* lastValue_;
988-
String commentsBefore_;
981+
982+
Nodes nodes_{};
983+
Errors errors_{};
984+
String document_{};
985+
Location begin_ = nullptr;
986+
Location end_ = nullptr;
987+
Location current_ = nullptr;
988+
Location lastValueEnd_ = nullptr;
989+
Value* lastValue_ = nullptr;
990+
bool lastValueHasAComment_ = false;
991+
String commentsBefore_{};
989992

990993
OurFeatures const features_;
991-
bool collectComments_;
994+
bool collectComments_ = false;
992995
}; // OurReader
993996

994997
// complete copy of Read impl, for OurReader
@@ -1001,9 +1004,7 @@ bool OurReader::containsNewLine(OurReader::Location begin,
10011004
return false;
10021005
}
10031006

1004-
OurReader::OurReader(OurFeatures const& features)
1005-
: begin_(), end_(), current_(), lastValueEnd_(), lastValue_(),
1006-
features_(features), collectComments_() {}
1007+
OurReader::OurReader(OurFeatures const& features) : features_(features) {}
10071008

10081009
bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
10091010
bool collectComments) {
@@ -1134,6 +1135,7 @@ bool OurReader::readValue() {
11341135

11351136
if (collectComments_) {
11361137
lastValueEnd_ = current_;
1138+
lastValueHasAComment_ = false;
11371139
lastValue_ = &currentValue();
11381140
}
11391141

@@ -1280,21 +1282,32 @@ bool OurReader::match(const Char* pattern, int patternLength) {
12801282
}
12811283

12821284
bool OurReader::readComment() {
1283-
Location commentBegin = current_ - 1;
1284-
Char c = getNextChar();
1285+
const Location commentBegin = current_ - 1;
1286+
const Char c = getNextChar();
12851287
bool successful = false;
1286-
if (c == '*')
1287-
successful = readCStyleComment();
1288-
else if (c == '/')
1288+
bool cStyleWithEmbeddedNewline = false;
1289+
1290+
const bool isCStyleComment = (c == '*');
1291+
const bool isCppStyleComment = (c == '/');
1292+
if (isCStyleComment) {
1293+
successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1294+
} else if (isCppStyleComment) {
12891295
successful = readCppStyleComment();
1296+
}
1297+
12901298
if (!successful)
12911299
return false;
12921300

12931301
if (collectComments_) {
12941302
CommentPlacement placement = commentBefore;
1295-
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1296-
if (c != '*' || !containsNewLine(commentBegin, current_))
1297-
placement = commentAfterOnSameLine;
1303+
1304+
if (!lastValueHasAComment_) {
1305+
if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1306+
if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1307+
placement = commentAfterOnSameLine;
1308+
lastValueHasAComment_ = true;
1309+
}
1310+
}
12981311
}
12991312

13001313
addComment(commentBegin, current_, placement);
@@ -1334,12 +1347,18 @@ void OurReader::addComment(Location begin, Location end,
13341347
}
13351348
}
13361349

1337-
bool OurReader::readCStyleComment() {
1350+
bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1351+
*containsNewLineResult = false;
1352+
13381353
while ((current_ + 1) < end_) {
13391354
Char c = getNextChar();
1340-
if (c == '*' && *current_ == '/')
1355+
if (c == '*' && *current_ == '/') {
13411356
break;
1357+
} else if (c == '\n') {
1358+
*containsNewLineResult = true;
1359+
}
13421360
}
1361+
13431362
return getNextChar() == '/';
13441363
}
13451364

src/test_lib_json/fuzz.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
2323
return 0;
2424
}
2525

26-
uint32_t hash_settings = *(const uint32_t*)data;
26+
const uint32_t hash_settings = static_cast<uint32_t>(data[0]) |
27+
(static_cast<uint32_t>(data[1]) << 8) |
28+
(static_cast<uint32_t>(data[2]) << 16) |
29+
(static_cast<uint32_t>(data[3]) << 24);
2730
data += sizeof(uint32_t);
2831
size -= sizeof(uint32_t);
2932

@@ -36,6 +39,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
3639
builder.settings_["failIfExtra_"] = hash_settings & (1 << 6);
3740
builder.settings_["rejectDupKeys_"] = hash_settings & (1 << 7);
3841
builder.settings_["allowSpecialFloats_"] = hash_settings & (1 << 8);
42+
builder.settings_["collectComments"] = hash_settings & (1 << 9);
3943

4044
std::unique_ptr<Json::CharReader> reader(builder.newCharReader());
4145

0 commit comments

Comments
 (0)