Skip to content

Commit 83946a2

Browse files
authored
Ignore byte order mark in the head of UTF-8 text. (open-source-parsers#1149)
* Ignore bom at the beginning of the UTF-8 text
1 parent 91f1553 commit 83946a2

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

src/lib_json/json_reader.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,7 @@ class OurFeatures {
871871
bool failIfExtra_;
872872
bool rejectDupKeys_;
873873
bool allowSpecialFloats_;
874+
bool allowBom_;
874875
size_t stackLimit_;
875876
}; // OurFeatures
876877

@@ -939,6 +940,7 @@ class OurReader {
939940

940941
bool readToken(Token& token);
941942
void skipSpaces();
943+
void skipBom(bool allowBom);
942944
bool match(const Char* pattern, int patternLength);
943945
bool readComment();
944946
bool readCStyleComment(bool* containsNewLineResult);
@@ -1022,6 +1024,8 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
10221024
nodes_.pop();
10231025
nodes_.push(&root);
10241026

1027+
// skip byte order mark if it exists at the beginning of the UTF-8 text.
1028+
skipBom(features_.allowBom_);
10251029
bool successful = readValue();
10261030
nodes_.pop();
10271031
Token token;
@@ -1268,6 +1272,17 @@ void OurReader::skipSpaces() {
12681272
}
12691273
}
12701274

1275+
void OurReader::skipBom(bool allowBom) {
1276+
// If BOM is not allowed, then skip it.
1277+
// The default value is: false
1278+
if (!allowBom) {
1279+
if (strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1280+
begin_ += 3;
1281+
current_ = begin_;
1282+
}
1283+
}
1284+
}
1285+
12711286
bool OurReader::match(const Char* pattern, int patternLength) {
12721287
if (end_ - current_ < patternLength)
12731288
return false;
@@ -1885,6 +1900,7 @@ CharReader* CharReaderBuilder::newCharReader() const {
18851900
features.failIfExtra_ = settings_["failIfExtra"].asBool();
18861901
features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
18871902
features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1903+
features.allowBom_ = settings_["allowBom"].asBool();
18881904
return new OurCharReader(collectComments, features);
18891905
}
18901906
static void getValidReaderKeys(std::set<String>* valid_keys) {
@@ -1900,6 +1916,7 @@ static void getValidReaderKeys(std::set<String>* valid_keys) {
19001916
valid_keys->insert("failIfExtra");
19011917
valid_keys->insert("rejectDupKeys");
19021918
valid_keys->insert("allowSpecialFloats");
1919+
valid_keys->insert("allowBom");
19031920
}
19041921
bool CharReaderBuilder::validate(Json::Value* invalid) const {
19051922
Json::Value my_invalid;
@@ -1934,6 +1951,7 @@ void CharReaderBuilder::strictMode(Json::Value* settings) {
19341951
(*settings)["failIfExtra"] = true;
19351952
(*settings)["rejectDupKeys"] = true;
19361953
(*settings)["allowSpecialFloats"] = false;
1954+
(*settings)["allowBom"] = false;
19371955
//! [CharReaderBuilderStrictMode]
19381956
}
19391957
// static
@@ -1950,6 +1968,7 @@ void CharReaderBuilder::setDefaults(Json::Value* settings) {
19501968
(*settings)["failIfExtra"] = false;
19511969
(*settings)["rejectDupKeys"] = false;
19521970
(*settings)["allowSpecialFloats"] = false;
1971+
(*settings)["allowBom"] = false;
19531972
//! [CharReaderBuilderDefaults]
19541973
}
19551974

src/test_lib_json/main.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3577,6 +3577,32 @@ JSONTEST_FIXTURE_LOCAL(BuilderTest, settings) {
35773577
}
35783578
}
35793579

3580+
struct BomTest : JsonTest::TestCase {};
3581+
3582+
JSONTEST_FIXTURE_LOCAL(BomTest, skipBom) {
3583+
const std::string with_bom = "\xEF\xBB\xBF{\"key\" : \"value\"}";
3584+
Json::Value root;
3585+
JSONCPP_STRING errs;
3586+
std::istringstream iss(with_bom);
3587+
bool ok = parseFromStream(Json::CharReaderBuilder(), iss, &root, &errs);
3588+
// The default behavior is to skip the BOM, so we can parse it normally.
3589+
JSONTEST_ASSERT(ok);
3590+
JSONTEST_ASSERT(errs.empty());
3591+
JSONTEST_ASSERT_STRING_EQUAL(root["key"].asString(), "value");
3592+
}
3593+
JSONTEST_FIXTURE_LOCAL(BomTest, allowBom) {
3594+
const std::string with_bom = "\xEF\xBB\xBF{\"key\" : \"value\"}";
3595+
Json::Value root;
3596+
JSONCPP_STRING errs;
3597+
std::istringstream iss(with_bom);
3598+
Json::CharReaderBuilder b;
3599+
b.settings_["allowBom"] = true;
3600+
bool ok = parseFromStream(b, iss, &root, &errs);
3601+
// Detect the BOM, and failed on it.
3602+
JSONTEST_ASSERT(!ok);
3603+
JSONTEST_ASSERT(!errs.empty());
3604+
}
3605+
35803606
struct IteratorTest : JsonTest::TestCase {};
35813607

35823608
JSONTEST_FIXTURE_LOCAL(IteratorTest, convert) {

0 commit comments

Comments
 (0)