From 03e38bc407663671af95483426d6d1b4a1d9a5bc Mon Sep 17 00:00:00 2001 From: Scott Wyatt Date: Thu, 17 Apr 2025 13:11:58 -0400 Subject: [PATCH 1/6] feat(): adds ArrayIndex event - Announcing that there is a new Array index allows streamers to partition values at sizes. --- src/lib.rs | 1 + src/read.rs | 11 +++++++++++ src/write.rs | 3 +++ 3 files changed, 15 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index f9dfd94..4e2ff9c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,7 @@ pub enum JsonEvent<'a> { Null, StartArray, EndArray, + ArrayIndex, StartObject, EndObject, ObjectKey(Cow<'a, str>), diff --git a/src/read.rs b/src/read.rs index 3fb5ab2..1b2ffdb 100644 --- a/src/read.rs +++ b/src/read.rs @@ -501,17 +501,26 @@ impl LowLevelJsonParser { if token == JsonToken::ClosingSquareBracket { return (Some(JsonEvent::EndArray), Some("Trailing commas are not allowed".into())); } + if let Err(e) = self.push_state_stack(JsonState::ArrayCommaOrEnd) { return (None, Some(e)); } + self.apply_new_token_for_value(token) } + Some(JsonState::ArrayIndex) => { + (Some(JsonEvent::ArrayIndex), None) + } Some(JsonState::ArrayCommaOrEnd) => match token { JsonToken::Comma => { (None, self.push_state_stack(JsonState::ArrayValue).err()) } JsonToken::ClosingSquareBracket => (Some(JsonEvent::EndArray), None), _ => { + if let Err(e) = self.push_state_stack(JsonState::ArrayIndex) { + return (None, Some(e)); + } + let _ = self.push_state_stack(JsonState::ArrayValue); // We already have an error let (event, _) = self.apply_new_token(token); (event, Some("Array values must be followed by a comma to add a new value or a squared bracket to end the array".into())) @@ -596,6 +605,7 @@ enum JsonState { ObjectColon, ObjectValue, ObjectCommaOrEnd, + ArrayIndex, ArrayValue, ArrayValueOrEnd, ArrayCommaOrEnd, @@ -1132,6 +1142,7 @@ fn owned_event(event: JsonEvent<'_>) -> JsonEvent<'static> { JsonEvent::Number(n) => JsonEvent::Number(n.into_owned().into()), JsonEvent::Boolean(b) => JsonEvent::Boolean(b), JsonEvent::Null => JsonEvent::Null, + JsonEvent::ArrayIndex => JsonEvent::ArrayIndex, JsonEvent::StartArray => JsonEvent::StartArray, JsonEvent::EndArray => JsonEvent::EndArray, JsonEvent::StartObject => JsonEvent::StartObject, diff --git a/src/write.rs b/src/write.rs index 24bcd1f..a5d6f25 100644 --- a/src/write.rs +++ b/src/write.rs @@ -192,6 +192,9 @@ impl LowLevelJsonSerializer { "Closing a not opened object", )), }, + JsonEvent::ArrayIndex => { + Ok(()) + }, JsonEvent::ObjectKey(key) => { match self.state_stack.pop() { Some(JsonState::OpenObject) => (), From 14a632d2e3dfa5a0a4ff1bb2e36a587a5b5e6cb0 Mon Sep 17 00:00:00 2001 From: Scott Wyatt Date: Thu, 17 Apr 2025 19:33:14 -0400 Subject: [PATCH 2/6] feat(): adds sync drain_next_value_as_string - This allows the stream to buffer the next value into a string so that it can be passed to any other parser or whatever --- README.md | 2 +- src/read.rs | 111 ++++++++++++++++++++++++++++++++++++++++++++++++ tests/drains.rs | 49 +++++++++++++++++++++ 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 tests/drains.rs diff --git a/README.md b/README.md index d6ae9a7..7daa7c3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ JSON streaming parser and serializer JSON event parser is a simple streaming JSON parser and serializer implementation in Rust. -It does not aims to be the fastest JSON parser possible but to be a simple implementation allowing to parse larger than +It does not aim to be the fastest JSON parser possible but to be a simple implementation allowing to parse larger than memory files. If you want fast and battle-tested code you might prefer to diff --git a/src/read.rs b/src/read.rs index 1b2ffdb..4777d28 100644 --- a/src/read.rs +++ b/src/read.rs @@ -114,6 +114,117 @@ impl ReaderJsonParser { pub fn read_next_event(&mut self) -> Result, JsonParseError> { self.parse_next() } + + + + /// Drains and returns the raw JSON string corresponding to the next full value (object, array, or scalar) + /// after the last emitted ObjectKey (or current position). + pub fn drain_next_value_as_string(&mut self) -> Result { + let mut nesting = 0; + let mut found_start = false; + + let mut offset = self.input_buffer_start; + let mut cursor = self.input_buffer_start; + + loop { + // SAFETY: shadow parsing avoids borrow checker issues + let LowLevelJsonParserResult { + event, + consumed_bytes, + } = self.parser.parse_next( + #[allow(unsafe_code)] + unsafe { + let input_buffer_ptr: *const [u8] = + &self.input_buffer[offset..self.input_buffer_end]; + &*input_buffer_ptr + }, + self.is_ending, + ); + + if consumed_bytes == 0 && self.is_ending { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "Unexpected EOF while draining value", + ) + .into()); + } + + if let Some(event) = event { + let event = event?; + + match event { + JsonEvent::StartObject | JsonEvent::StartArray => { + if !found_start { + // Skip colon and whitespace before value + let mut scan = offset; + while scan < self.input_buffer_end { + let b = self.input_buffer[scan]; + if b == b':' || b.is_ascii_whitespace() { + scan += 1; + } else { + break; + } + } + cursor = scan; + found_start = true; + } + nesting += 1; + } + JsonEvent::EndObject | JsonEvent::EndArray => { + nesting -= 1; + if nesting == 0 { + let end = offset + consumed_bytes; + // self.parser = shadow_parser; + self.input_buffer_start = end; + return Ok(str::from_utf8(&self.input_buffer[cursor..end]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? + .to_string()); + } + } + JsonEvent::Eof => { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "Unexpected EOF while draining value", + ) + .into()); + } + _ => { + if !found_start { + // scalar case + cursor = offset; + found_start = true; + } + if nesting == 0 { + let end = offset + consumed_bytes; + self.input_buffer_start = end; + return Ok(str::from_utf8(&self.input_buffer[cursor..end]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? + .to_string()); + } + } + } + } + + offset += consumed_bytes; + + if offset >= self.input_buffer_end { + // shift + refill + let remaining = self.input_buffer_end - self.input_buffer_start; + self.input_buffer.copy_within(self.input_buffer_start..self.input_buffer_end, 0); + self.input_buffer_start = 0; + self.input_buffer_end = remaining; + offset = self.input_buffer_end; + + if self.input_buffer.len() < self.max_buffer_size { + self.input_buffer.resize(self.input_buffer.len() + MIN_BUFFER_SIZE, 0); + } + + let read = self.read.read(&mut self.input_buffer[self.input_buffer_end..])?; + self.input_buffer_end += read; + self.is_ending = read == 0; + } + } + } } /// Parses a JSON file from an [`AsyncRead`] implementation. diff --git a/tests/drains.rs b/tests/drains.rs new file mode 100644 index 0000000..6cb375d --- /dev/null +++ b/tests/drains.rs @@ -0,0 +1,49 @@ +#[cfg(test)] +mod tests { + use super::*; + use json_event_parser::{JsonEvent, ReaderJsonParser}; + + #[test] + fn test_drain_next_value_as_string() -> Result<(), Box> { + let json = br#" + { + "skip": 123, + "target": { + "nested": [1, 2, {"deep": true}], + "another": "value" + }, + "after": false + } + "#; + + let mut parser = ReaderJsonParser::new(&json[..]); + + while let Ok(event) = parser.parse_next() { + + match event { + JsonEvent::ObjectKey(key) => { + println!("KEY: {:?}", key); + if key == "target" { + let raw = parser.drain_next_value_as_string()?; + + println!("brk raw: {:?}", raw); + + let expected = r#"{ + "nested": [1, 2, {"deep": true}], + "another": "value" + }"#; + + assert_eq!(raw, expected.to_string()); + return Ok(()); + } + if key == "nested" || key == "another" { + panic!("nested or another key found"); + } + } + _ => {} + } + } + + panic!("target key not found") + } +} From eb0a5e83bf4dc7d8888ef4414b86a1cdf464a393 Mon Sep 17 00:00:00 2001 From: Scott Wyatt Date: Thu, 17 Apr 2025 21:58:15 -0400 Subject: [PATCH 3/6] fmt --- benches/parser.rs | 86 +- src/lib.rs | 48 +- src/read.rs | 1868 ++++++++++++++++++++++--------------------- src/write.rs | 424 +++++----- tests/drains.rs | 1 - tests/errors.rs | 74 +- tests/test_suite.rs | 178 +++-- 7 files changed, 1341 insertions(+), 1338 deletions(-) diff --git a/benches/parser.rs b/benches/parser.rs index 88c3645..25ab36d 100644 --- a/benches/parser.rs +++ b/benches/parser.rs @@ -3,57 +3,57 @@ use json_event_parser::{JsonEvent, ReaderJsonParser}; use std::fs::{self, read_dir}; fn bench_parse_json_benchmark(c: &mut Criterion) { - for dataset in ["canada", "citm_catalog", "twitter"] { - let data = fs::read(format!( - "{}/benches/json-benchmark/data/{dataset}.json", - env!("CARGO_MANIFEST_DIR") - )) - .unwrap(); - c.bench_function(dataset, |b| { - b.iter(|| { - let mut reader = ReaderJsonParser::new(data.as_slice()); - while reader.parse_next().unwrap() != JsonEvent::Eof { - // read more - } - }) - }); - } + for dataset in ["canada", "citm_catalog", "twitter"] { + let data = fs::read(format!( + "{}/benches/json-benchmark/data/{dataset}.json", + env!("CARGO_MANIFEST_DIR") + )) + .unwrap(); + c.bench_function(dataset, |b| { + b.iter(|| { + let mut reader = ReaderJsonParser::new(data.as_slice()); + while reader.parse_next().unwrap() != JsonEvent::Eof { + // read more + } + }) + }); + } } fn bench_parse_testsuite(c: &mut Criterion) { - let example = load_testsuite_example(); + let example = load_testsuite_example(); - c.bench_function("JSON test suite", |b| { - b.iter(|| { - let mut reader = ReaderJsonParser::new(example.as_slice()); - while reader.parse_next().unwrap() != JsonEvent::Eof { - // read more - } - }) - }); + c.bench_function("JSON test suite", |b| { + b.iter(|| { + let mut reader = ReaderJsonParser::new(example.as_slice()); + while reader.parse_next().unwrap() != JsonEvent::Eof { + // read more + } + }) + }); } fn load_testsuite_example() -> Vec { - let mut result = Vec::new(); - result.extend_from_slice(b"[\n"); - for file in read_dir(format!( - "{}/JSONTestSuite/test_parsing", - env!("CARGO_MANIFEST_DIR") - )) - .unwrap() - { - let file = file.unwrap(); - let file_name = file.file_name().to_str().unwrap().to_owned(); - if file_name.starts_with("y_") && file_name.ends_with(".json") { - if result.len() > 2 { - result.extend_from_slice(b",\n"); - } - result.push(b'\t'); - result.extend_from_slice(&fs::read(file.path()).unwrap()); - } + let mut result = Vec::new(); + result.extend_from_slice(b"[\n"); + for file in read_dir(format!( + "{}/JSONTestSuite/test_parsing", + env!("CARGO_MANIFEST_DIR") + )) + .unwrap() + { + let file = file.unwrap(); + let file_name = file.file_name().to_str().unwrap().to_owned(); + if file_name.starts_with("y_") && file_name.ends_with(".json") { + if result.len() > 2 { + result.extend_from_slice(b",\n"); + } + result.push(b'\t'); + result.extend_from_slice(&fs::read(file.path()).unwrap()); } - result.extend_from_slice(b"\n]"); - result + } + result.extend_from_slice(b"\n]"); + result } criterion_group!(parser, bench_parse_testsuite, bench_parse_json_benchmark); diff --git a/src/lib.rs b/src/lib.rs index 4e2ff9c..a15243c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,14 @@ #![doc = include_str!("../README.md")] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![deny( - future_incompatible, - nonstandard_style, - rust_2018_idioms, - missing_copy_implementations, - trivial_casts, - trivial_numeric_casts, - unsafe_code, - unused_qualifications + future_incompatible, + nonstandard_style, + rust_2018_idioms, + missing_copy_implementations, + trivial_casts, + trivial_numeric_casts, + unsafe_code, + unused_qualifications )] mod read; @@ -16,29 +16,31 @@ mod write; #[cfg(feature = "async-tokio")] pub use crate::read::TokioAsyncReaderJsonParser; -pub use crate::read::{ +#[cfg(feature = "async-tokio")] +pub use crate::write::TokioAsyncWriterJsonSerializer; +pub use crate::{ + read::{ JsonParseError, JsonSyntaxError, LowLevelJsonParser, LowLevelJsonParserResult, ReaderJsonParser, SliceJsonParser, TextPosition, + }, + write::{LowLevelJsonSerializer, WriterJsonSerializer}, }; -#[cfg(feature = "async-tokio")] -pub use crate::write::TokioAsyncWriterJsonSerializer; -pub use crate::write::{LowLevelJsonSerializer, WriterJsonSerializer}; use std::borrow::Cow; /// Possible events during JSON parsing. #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum JsonEvent<'a> { - String(Cow<'a, str>), - Number(Cow<'a, str>), - Boolean(bool), - Null, - StartArray, - EndArray, - ArrayIndex, - StartObject, - EndObject, - ObjectKey(Cow<'a, str>), - Eof, + String(Cow<'a, str>), + Number(Cow<'a, str>), + Boolean(bool), + Null, + StartArray, + EndArray, + ArrayIndex, + StartObject, + EndObject, + ObjectKey(Cow<'a, str>), + Eof, } #[cfg(feature = "async-tokio")] diff --git a/src/read.rs b/src/read.rs index 4777d28..d5f619a 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1,10 +1,13 @@ use crate::JsonEvent; -use std::borrow::Cow; -use std::cmp::{max, min}; -use std::error::Error; -use std::io::{self, Read}; -use std::ops::Range; -use std::{fmt, str}; +use std::{ + borrow::Cow, + cmp::{max, min}, + error::Error, + fmt, + io::{self, Read}, + ops::Range, + str, +}; #[cfg(feature = "async-tokio")] use tokio::io::{AsyncRead, AsyncReadExt}; @@ -27,95 +30,96 @@ const MAX_BUFFER_SIZE: usize = 4096 * 4096; /// # std::io::Result::Ok(()) /// ``` pub struct ReaderJsonParser { - input_buffer: Vec, - input_buffer_start: usize, - input_buffer_end: usize, - max_buffer_size: usize, - is_ending: bool, - read: R, - parser: LowLevelJsonParser, + input_buffer: Vec, + input_buffer_start: usize, + input_buffer_end: usize, + max_buffer_size: usize, + is_ending: bool, + read: R, + parser: LowLevelJsonParser, } impl ReaderJsonParser { - pub const fn new(read: R) -> Self { - Self { - input_buffer: Vec::new(), - input_buffer_start: 0, - input_buffer_end: 0, - max_buffer_size: MAX_BUFFER_SIZE, - is_ending: false, - read, - parser: LowLevelJsonParser::new(), - } + pub const fn new(read: R) -> Self { + Self { + input_buffer: Vec::new(), + input_buffer_start: 0, + input_buffer_end: 0, + max_buffer_size: MAX_BUFFER_SIZE, + is_ending: false, + read, + parser: LowLevelJsonParser::new(), } + } - /// Sets the max size of the internal buffer in bytes - pub fn with_max_buffer_size(mut self, size: usize) -> Self { - self.max_buffer_size = size; - self - } + /// Sets the max size of the internal buffer in bytes + pub fn with_max_buffer_size(mut self, size: usize) -> Self { + self.max_buffer_size = size; + self + } - pub fn parse_next(&mut self) -> Result, JsonParseError> { - loop { - { - let LowLevelJsonParserResult { - event, - consumed_bytes, - } = self.parser.parse_next( - #[allow(unsafe_code)] - unsafe { - let input_buffer_ptr: *const [u8] = - &self.input_buffer[self.input_buffer_start..self.input_buffer_end]; - &*input_buffer_ptr - }, // SAFETY: Borrow checker workaround https://github.com/rust-lang/rust/issues/70255 - self.is_ending, - ); - self.input_buffer_start += consumed_bytes; - if let Some(event) = event { - return Ok(event?); - } - } - if self.input_buffer_start > 0 { - self.input_buffer - .copy_within(self.input_buffer_start..self.input_buffer_end, 0); - self.input_buffer_end -= self.input_buffer_start; - self.input_buffer_start = 0; - } - if self.input_buffer.len() == self.max_buffer_size { - return Err(io::Error::new( - io::ErrorKind::OutOfMemory, - format!( - "Reached the buffer maximal size of {}", - self.max_buffer_size - ), - ) - .into()); - } - let min_end = min( - self.input_buffer_end + MIN_BUFFER_SIZE, - self.max_buffer_size, - ); - if self.input_buffer.len() < min_end { - self.input_buffer.resize(min_end, 0); - } - if self.input_buffer.len() < self.input_buffer.capacity() { - // We keep extending to have as much space as available without reallocation - self.input_buffer.resize(self.input_buffer.capacity(), 0); - } - let read = self - .read - .read(&mut self.input_buffer[self.input_buffer_end..])?; - self.input_buffer_end += read; - self.is_ending = read == 0; + pub fn parse_next(&mut self) -> Result, JsonParseError> { + loop { + { + let LowLevelJsonParserResult { + event, + consumed_bytes, + } = self.parser.parse_next( + #[allow(unsafe_code)] + unsafe { + let input_buffer_ptr: *const [u8] = + &self.input_buffer[self.input_buffer_start..self.input_buffer_end]; + &*input_buffer_ptr + }, // SAFETY: Borrow checker workaround https://github.com/rust-lang/rust/issues/70255 + self.is_ending, + ); + self.input_buffer_start += consumed_bytes; + if let Some(event) = event { + return Ok(event?); } + } + if self.input_buffer_start > 0 { + self + .input_buffer + .copy_within(self.input_buffer_start..self.input_buffer_end, 0); + self.input_buffer_end -= self.input_buffer_start; + self.input_buffer_start = 0; + } + if self.input_buffer.len() == self.max_buffer_size { + return Err( + io::Error::new( + io::ErrorKind::OutOfMemory, + format!( + "Reached the buffer maximal size of {}", + self.max_buffer_size + ), + ) + .into(), + ); + } + let min_end = min( + self.input_buffer_end + MIN_BUFFER_SIZE, + self.max_buffer_size, + ); + if self.input_buffer.len() < min_end { + self.input_buffer.resize(min_end, 0); + } + if self.input_buffer.len() < self.input_buffer.capacity() { + // We keep extending to have as much space as available without reallocation + self.input_buffer.resize(self.input_buffer.capacity(), 0); + } + let read = self + .read + .read(&mut self.input_buffer[self.input_buffer_end..])?; + self.input_buffer_end += read; + self.is_ending = read == 0; } + } - #[deprecated(note = "Use parse_next() instead")] - pub fn read_next_event(&mut self) -> Result, JsonParseError> { - self.parse_next() - } - - + #[deprecated(note = "Use parse_next() instead")] + pub fn read_next_event(&mut self) -> Result, JsonParseError> { + self.parse_next() + } /// Drains and returns the raw JSON string corresponding to the next full value (object, array, or scalar) /// after the last emitted ObjectKey (or current position). @@ -134,19 +138,20 @@ impl ReaderJsonParser { } = self.parser.parse_next( #[allow(unsafe_code)] unsafe { - let input_buffer_ptr: *const [u8] = - &self.input_buffer[offset..self.input_buffer_end]; + let input_buffer_ptr: *const [u8] = &self.input_buffer[offset..self.input_buffer_end]; &*input_buffer_ptr }, self.is_ending, ); if consumed_bytes == 0 && self.is_ending { - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - "Unexpected EOF while draining value", - ) - .into()); + return Err( + io::Error::new( + io::ErrorKind::UnexpectedEof, + "Unexpected EOF while draining value", + ) + .into(), + ); } if let Some(event) = event { @@ -176,17 +181,21 @@ impl ReaderJsonParser { let end = offset + consumed_bytes; // self.parser = shadow_parser; self.input_buffer_start = end; - return Ok(str::from_utf8(&self.input_buffer[cursor..end]) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? - .to_string()); + return Ok( + str::from_utf8(&self.input_buffer[cursor..end]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? + .to_string(), + ); } } JsonEvent::Eof => { - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - "Unexpected EOF while draining value", - ) - .into()); + return Err( + io::Error::new( + io::ErrorKind::UnexpectedEof, + "Unexpected EOF while draining value", + ) + .into(), + ); } _ => { if !found_start { @@ -197,9 +206,11 @@ impl ReaderJsonParser { if nesting == 0 { let end = offset + consumed_bytes; self.input_buffer_start = end; - return Ok(str::from_utf8(&self.input_buffer[cursor..end]) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? - .to_string()); + return Ok( + str::from_utf8(&self.input_buffer[cursor..end]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? + .to_string(), + ); } } } @@ -210,16 +221,22 @@ impl ReaderJsonParser { if offset >= self.input_buffer_end { // shift + refill let remaining = self.input_buffer_end - self.input_buffer_start; - self.input_buffer.copy_within(self.input_buffer_start..self.input_buffer_end, 0); + self + .input_buffer + .copy_within(self.input_buffer_start..self.input_buffer_end, 0); self.input_buffer_start = 0; self.input_buffer_end = remaining; offset = self.input_buffer_end; if self.input_buffer.len() < self.max_buffer_size { - self.input_buffer.resize(self.input_buffer.len() + MIN_BUFFER_SIZE, 0); + self + .input_buffer + .resize(self.input_buffer.len() + MIN_BUFFER_SIZE, 0); } - let read = self.read.read(&mut self.input_buffer[self.input_buffer_end..])?; + let read = self + .read + .read(&mut self.input_buffer[self.input_buffer_end..])?; self.input_buffer_end += read; self.is_ending = read == 0; } @@ -248,95 +265,98 @@ impl ReaderJsonParser { /// ``` #[cfg(feature = "async-tokio")] pub struct TokioAsyncReaderJsonParser { - input_buffer: Vec, - input_buffer_start: usize, - input_buffer_end: usize, - max_buffer_size: usize, - is_ending: bool, - read: R, - parser: LowLevelJsonParser, + input_buffer: Vec, + input_buffer_start: usize, + input_buffer_end: usize, + max_buffer_size: usize, + is_ending: bool, + read: R, + parser: LowLevelJsonParser, } #[cfg(feature = "async-tokio")] impl TokioAsyncReaderJsonParser { - pub const fn new(read: R) -> Self { - Self { - input_buffer: Vec::new(), - input_buffer_start: 0, - input_buffer_end: 0, - max_buffer_size: MAX_BUFFER_SIZE, - is_ending: false, - read, - parser: LowLevelJsonParser::new(), - } + pub const fn new(read: R) -> Self { + Self { + input_buffer: Vec::new(), + input_buffer_start: 0, + input_buffer_end: 0, + max_buffer_size: MAX_BUFFER_SIZE, + is_ending: false, + read, + parser: LowLevelJsonParser::new(), } + } - /// Sets the max size of the internal buffer in bytes - pub fn with_max_buffer_size(mut self, size: usize) -> Self { - self.max_buffer_size = size; - self - } + /// Sets the max size of the internal buffer in bytes + pub fn with_max_buffer_size(mut self, size: usize) -> Self { + self.max_buffer_size = size; + self + } - pub async fn parse_next(&mut self) -> Result, JsonParseError> { - loop { - { - let LowLevelJsonParserResult { - event, - consumed_bytes, - } = self.parser.parse_next( - #[allow(unsafe_code)] - unsafe { - let input_buffer_ptr: *const [u8] = - &self.input_buffer[self.input_buffer_start..self.input_buffer_end]; - &*input_buffer_ptr - }, // Borrow checker workaround https://github.com/rust-lang/rust/issues/70255 - self.is_ending, - ); - self.input_buffer_start += consumed_bytes; - if let Some(event) = event { - return Ok(event?); - } - } - if self.input_buffer_start > 0 { - self.input_buffer - .copy_within(self.input_buffer_start..self.input_buffer_end, 0); - self.input_buffer_end -= self.input_buffer_start; - self.input_buffer_start = 0; - } - if self.input_buffer.len() == self.max_buffer_size { - return Err(io::Error::new( - io::ErrorKind::OutOfMemory, - format!( - "Reached the buffer maximal size of {}", - self.max_buffer_size - ), - ) - .into()); - } - let min_end = min( - self.input_buffer_end + MIN_BUFFER_SIZE, - self.max_buffer_size, - ); - if self.input_buffer.len() < min_end { - self.input_buffer.resize(min_end, 0); - } - if self.input_buffer.len() < self.input_buffer.capacity() { - // We keep extending to have as much space as available without reallocation - self.input_buffer.resize(self.input_buffer.capacity(), 0); - } - let read = self - .read - .read(&mut self.input_buffer[self.input_buffer_end..]) - .await?; - self.input_buffer_end += read; - self.is_ending = read == 0; + pub async fn parse_next(&mut self) -> Result, JsonParseError> { + loop { + { + let LowLevelJsonParserResult { + event, + consumed_bytes, + } = self.parser.parse_next( + #[allow(unsafe_code)] + unsafe { + let input_buffer_ptr: *const [u8] = + &self.input_buffer[self.input_buffer_start..self.input_buffer_end]; + &*input_buffer_ptr + }, // Borrow checker workaround https://github.com/rust-lang/rust/issues/70255 + self.is_ending, + ); + self.input_buffer_start += consumed_bytes; + if let Some(event) = event { + return Ok(event?); } + } + if self.input_buffer_start > 0 { + self + .input_buffer + .copy_within(self.input_buffer_start..self.input_buffer_end, 0); + self.input_buffer_end -= self.input_buffer_start; + self.input_buffer_start = 0; + } + if self.input_buffer.len() == self.max_buffer_size { + return Err( + io::Error::new( + io::ErrorKind::OutOfMemory, + format!( + "Reached the buffer maximal size of {}", + self.max_buffer_size + ), + ) + .into(), + ); + } + let min_end = min( + self.input_buffer_end + MIN_BUFFER_SIZE, + self.max_buffer_size, + ); + if self.input_buffer.len() < min_end { + self.input_buffer.resize(min_end, 0); + } + if self.input_buffer.len() < self.input_buffer.capacity() { + // We keep extending to have as much space as available without reallocation + self.input_buffer.resize(self.input_buffer.capacity(), 0); + } + let read = self + .read + .read(&mut self.input_buffer[self.input_buffer_end..]) + .await?; + self.input_buffer_end += read; + self.is_ending = read == 0; } + } - #[deprecated(note = "Use parse_next() instead")] - pub async fn read_next_event(&mut self) -> Result, JsonParseError> { - self.parse_next().await - } + #[deprecated(note = "Use parse_next() instead")] + pub async fn read_next_event(&mut self) -> Result, JsonParseError> { + self.parse_next().await + } } /// Parses a JSON file from a `&[u8]`. @@ -353,35 +373,35 @@ impl TokioAsyncReaderJsonParser { /// # std::io::Result::Ok(()) /// ``` pub struct SliceJsonParser<'a> { - input_buffer: &'a [u8], - parser: LowLevelJsonParser, + input_buffer: &'a [u8], + parser: LowLevelJsonParser, } impl<'a> SliceJsonParser<'a> { - pub const fn new(buffer: &'a [u8]) -> Self { - Self { - input_buffer: buffer, - parser: LowLevelJsonParser::new(), - } + pub const fn new(buffer: &'a [u8]) -> Self { + Self { + input_buffer: buffer, + parser: LowLevelJsonParser::new(), } + } - pub fn parse_next(&mut self) -> Result, JsonSyntaxError> { - loop { - let LowLevelJsonParserResult { - event, - consumed_bytes, - } = self.parser.parse_next(self.input_buffer, true); - self.input_buffer = &self.input_buffer[consumed_bytes..]; - if let Some(event) = event { - return event; - } - } + pub fn parse_next(&mut self) -> Result, JsonSyntaxError> { + loop { + let LowLevelJsonParserResult { + event, + consumed_bytes, + } = self.parser.parse_next(self.input_buffer, true); + self.input_buffer = &self.input_buffer[consumed_bytes..]; + if let Some(event) = event { + return event; + } } + } - #[deprecated(note = "Use parse_next() instead")] - pub fn read_next_event(&mut self) -> Result, JsonSyntaxError> { - self.parse_next() - } + #[deprecated(note = "Use parse_next() instead")] + pub fn read_next_event(&mut self) -> Result, JsonSyntaxError> { + self.parse_next() + } } /// A low-level JSON parser acting on a provided buffer. @@ -439,119 +459,119 @@ impl<'a> SliceJsonParser<'a> { /// # std::io::Result::Ok(()) /// ``` pub struct LowLevelJsonParser { - lexer: JsonLexer, - state_stack: Vec, - max_state_stack_size: usize, - element_read: bool, - buffered_event: Option>, + lexer: JsonLexer, + state_stack: Vec, + max_state_stack_size: usize, + element_read: bool, + buffered_event: Option>, } impl LowLevelJsonParser { - pub const fn new() -> Self { - Self { - lexer: JsonLexer { - file_offset: 0, - file_line: 0, - file_start_of_last_line: 0, - file_start_of_last_token: 0, - is_start: true, - }, - state_stack: Vec::new(), - max_state_stack_size: MAX_STATE_STACK_SIZE, - element_read: false, - buffered_event: None, - } + pub const fn new() -> Self { + Self { + lexer: JsonLexer { + file_offset: 0, + file_line: 0, + file_start_of_last_line: 0, + file_start_of_last_token: 0, + is_start: true, + }, + state_stack: Vec::new(), + max_state_stack_size: MAX_STATE_STACK_SIZE, + element_read: false, + buffered_event: None, } + } - /// Maximal allowed number of nested object and array openings. Infinite by default. - pub fn with_max_stack_size(mut self, size: usize) -> Self { - self.max_state_stack_size = size; - self - } + /// Maximal allowed number of nested object and array openings. Infinite by default. + pub fn with_max_stack_size(mut self, size: usize) -> Self { + self.max_state_stack_size = size; + self + } - /// Reads a new event from the data in `input_buffer`. - /// - /// `is_ending` must be set to true if all the JSON data have been already consumed or are in `input_buffer`. - pub fn parse_next<'a>( - &mut self, - input_buffer: &'a [u8], - is_ending: bool, - ) -> LowLevelJsonParserResult<'a> { - if let Some(event) = self.buffered_event.take() { + /// Reads a new event from the data in `input_buffer`. + /// + /// `is_ending` must be set to true if all the JSON data have been already consumed or are in `input_buffer`. + pub fn parse_next<'a>( + &mut self, + input_buffer: &'a [u8], + is_ending: bool, + ) -> LowLevelJsonParserResult<'a> { + if let Some(event) = self.buffered_event.take() { + return LowLevelJsonParserResult { + consumed_bytes: 0, + event: Some(Ok(event)), + }; + } + let start_file_offset = self.lexer.file_offset; + while let Some(token) = self.lexer.read_next_token( + &input_buffer[usize::try_from(self.lexer.file_offset - start_file_offset).unwrap()..], + is_ending, + ) { + let consumed_bytes = (self.lexer.file_offset - start_file_offset) + .try_into() + .unwrap(); + match token { + Ok(token) => { + let (event, error) = self.apply_new_token(token); + let error = error.map(|e| { + self.lexer.syntax_error( + self.lexer.file_start_of_last_token..self.lexer.file_offset, + e, + ) + }); + if let Some(error) = error { + self.buffered_event = event.map(owned_event); return LowLevelJsonParserResult { - consumed_bytes: 0, - event: Some(Ok(event)), + consumed_bytes, + event: Some(Err(error)), }; + } + if let Some(event) = event { + return LowLevelJsonParserResult { + consumed_bytes, + event: Some(Ok(event)), + }; + } } - let start_file_offset = self.lexer.file_offset; - while let Some(token) = self.lexer.read_next_token( - &input_buffer[usize::try_from(self.lexer.file_offset - start_file_offset).unwrap()..], - is_ending, - ) { - let consumed_bytes = (self.lexer.file_offset - start_file_offset) - .try_into() - .unwrap(); - match token { - Ok(token) => { - let (event, error) = self.apply_new_token(token); - let error = error.map(|e| { - self.lexer.syntax_error( - self.lexer.file_start_of_last_token..self.lexer.file_offset, - e, - ) - }); - if let Some(error) = error { - self.buffered_event = event.map(owned_event); - return LowLevelJsonParserResult { - consumed_bytes, - event: Some(Err(error)), - }; - } - if let Some(event) = event { - return LowLevelJsonParserResult { - consumed_bytes, - event: Some(Ok(event)), - }; - } - } - Err(error) => { - return LowLevelJsonParserResult { - consumed_bytes, - event: Some(Err(error)), - } - } - } - } - LowLevelJsonParserResult { - consumed_bytes: (self.lexer.file_offset - start_file_offset) - .try_into() - .unwrap(), - event: if is_ending { - self.buffered_event = Some(JsonEvent::Eof); - Some(Err(self.lexer.syntax_error( - self.lexer.file_offset..self.lexer.file_offset + 1, - "Unexpected end of file", - ))) - } else { - None - }, + Err(error) => { + return LowLevelJsonParserResult { + consumed_bytes, + event: Some(Err(error)), + } } + } } - - #[deprecated(note = "Use parse_next() instead")] - pub fn read_next_event<'a>( - &mut self, - input_buffer: &'a [u8], - is_ending: bool, - ) -> LowLevelJsonParserResult<'a> { - self.parse_next(input_buffer, is_ending) + LowLevelJsonParserResult { + consumed_bytes: (self.lexer.file_offset - start_file_offset) + .try_into() + .unwrap(), + event: if is_ending { + self.buffered_event = Some(JsonEvent::Eof); + Some(Err(self.lexer.syntax_error( + self.lexer.file_offset..self.lexer.file_offset + 1, + "Unexpected end of file", + ))) + } else { + None + }, } + } + + #[deprecated(note = "Use parse_next() instead")] + pub fn read_next_event<'a>( + &mut self, + input_buffer: &'a [u8], + is_ending: bool, + ) -> LowLevelJsonParserResult<'a> { + self.parse_next(input_buffer, is_ending) + } - fn apply_new_token<'a>( - &mut self, - token: JsonToken<'a>, - ) -> (Option>, Option) { - match self.state_stack.pop() { + fn apply_new_token<'a>( + &mut self, + token: JsonToken<'a>, + ) -> (Option>, Option) { + match self.state_stack.pop() { Some(JsonState::ObjectKeyOrEnd) => { if token == JsonToken::ClosingCurlyBracket { (Some(JsonEvent::EndObject), None) @@ -648,331 +668,325 @@ impl LowLevelJsonParser { self.apply_new_token_for_value(token) } } - } + } - fn apply_new_token_for_value<'a>( - &mut self, - token: JsonToken<'a>, - ) -> (Option>, Option) { - match token { - JsonToken::OpeningSquareBracket => ( - Some(JsonEvent::StartArray), - self.push_state_stack(JsonState::ArrayValueOrEnd).err(), - ), - JsonToken::ClosingSquareBracket => ( - None, - Some("Unexpected closing square bracket, no array to close".into()), - ), - JsonToken::OpeningCurlyBracket => ( - Some(JsonEvent::StartObject), - self.push_state_stack(JsonState::ObjectKeyOrEnd).err(), - ), - JsonToken::ClosingCurlyBracket => ( - None, - Some("Unexpected closing curly bracket, no array to close".into()), - ), - JsonToken::Comma => (None, Some("Unexpected comma, no values to separate".into())), - JsonToken::Colon => (None, Some("Unexpected colon, no key to follow".into())), - JsonToken::String(string) => (Some(JsonEvent::String(string)), None), - JsonToken::Number(number) => (Some(JsonEvent::Number(number)), None), - JsonToken::True => (Some(JsonEvent::Boolean(true)), None), - JsonToken::False => (Some(JsonEvent::Boolean(false)), None), - JsonToken::Null => (Some(JsonEvent::Null), None), - JsonToken::Eof => ( - Some(JsonEvent::Eof), - Some("Unexpected end of file, a value was expected".into()), - ), - } + fn apply_new_token_for_value<'a>( + &mut self, + token: JsonToken<'a>, + ) -> (Option>, Option) { + match token { + JsonToken::OpeningSquareBracket => ( + Some(JsonEvent::StartArray), + self.push_state_stack(JsonState::ArrayValueOrEnd).err(), + ), + JsonToken::ClosingSquareBracket => ( + None, + Some("Unexpected closing square bracket, no array to close".into()), + ), + JsonToken::OpeningCurlyBracket => ( + Some(JsonEvent::StartObject), + self.push_state_stack(JsonState::ObjectKeyOrEnd).err(), + ), + JsonToken::ClosingCurlyBracket => ( + None, + Some("Unexpected closing curly bracket, no array to close".into()), + ), + JsonToken::Comma => (None, Some("Unexpected comma, no values to separate".into())), + JsonToken::Colon => (None, Some("Unexpected colon, no key to follow".into())), + JsonToken::String(string) => (Some(JsonEvent::String(string)), None), + JsonToken::Number(number) => (Some(JsonEvent::Number(number)), None), + JsonToken::True => (Some(JsonEvent::Boolean(true)), None), + JsonToken::False => (Some(JsonEvent::Boolean(false)), None), + JsonToken::Null => (Some(JsonEvent::Null), None), + JsonToken::Eof => ( + Some(JsonEvent::Eof), + Some("Unexpected end of file, a value was expected".into()), + ), } + } - fn push_state_stack(&mut self, state: JsonState) -> Result<(), String> { - self.check_stack_size()?; - self.state_stack.push(state); - Ok(()) - } + fn push_state_stack(&mut self, state: JsonState) -> Result<(), String> { + self.check_stack_size()?; + self.state_stack.push(state); + Ok(()) + } - fn check_stack_size(&self) -> Result<(), String> { - if self.state_stack.len() > self.max_state_stack_size { - Err(format!( - "Max stack size of {} reached on an object opening", - self.max_state_stack_size - )) - } else { - Ok(()) - } + fn check_stack_size(&self) -> Result<(), String> { + if self.state_stack.len() > self.max_state_stack_size { + Err(format!( + "Max stack size of {} reached on an object opening", + self.max_state_stack_size + )) + } else { + Ok(()) } + } } impl Default for LowLevelJsonParser { - fn default() -> Self { - Self::new() - } + fn default() -> Self { + Self::new() + } } #[derive(Eq, PartialEq, Copy, Clone, Debug)] enum JsonState { - ObjectKey, - ObjectKeyOrEnd, - ObjectColon, - ObjectValue, - ObjectCommaOrEnd, - ArrayIndex, - ArrayValue, - ArrayValueOrEnd, - ArrayCommaOrEnd, + ObjectKey, + ObjectKeyOrEnd, + ObjectColon, + ObjectValue, + ObjectCommaOrEnd, + ArrayIndex, + ArrayValue, + ArrayValueOrEnd, + ArrayCommaOrEnd, } #[derive(Eq, PartialEq, Clone, Debug)] enum JsonToken<'a> { - OpeningSquareBracket, // [ - ClosingSquareBracket, // ] - OpeningCurlyBracket, // { - ClosingCurlyBracket, // } - Comma, // , - Colon, // : - String(Cow<'a, str>), // "..." - Number(Cow<'a, str>), // 1.2e3 - True, // true - False, // false - Null, // null - Eof, // EOF + OpeningSquareBracket, // [ + ClosingSquareBracket, // ] + OpeningCurlyBracket, // { + ClosingCurlyBracket, // } + Comma, // , + Colon, // : + String(Cow<'a, str>), // "..." + Number(Cow<'a, str>), // 1.2e3 + True, // true + False, // false + Null, // null + Eof, // EOF } struct JsonLexer { - file_offset: u64, - file_line: u64, - file_start_of_last_line: u64, - file_start_of_last_token: u64, - is_start: bool, + file_offset: u64, + file_line: u64, + file_start_of_last_line: u64, + file_start_of_last_token: u64, + is_start: bool, } impl JsonLexer { - fn read_next_token<'a>( - &mut self, - mut input_buffer: &'a [u8], - is_ending: bool, - ) -> Option, JsonSyntaxError>> { - // We remove BOM at the beginning - if self.is_start { - if input_buffer.len() < 3 && !is_ending { - return None; - } - self.is_start = false; - if input_buffer.starts_with(&[0xEF, 0xBB, 0xBF]) { - input_buffer = &input_buffer[3..]; - self.file_offset += 3; - } - } + fn read_next_token<'a>( + &mut self, + mut input_buffer: &'a [u8], + is_ending: bool, + ) -> Option, JsonSyntaxError>> { + // We remove BOM at the beginning + if self.is_start { + if input_buffer.len() < 3 && !is_ending { + return None; + } + self.is_start = false; + if input_buffer.starts_with(&[0xEF, 0xBB, 0xBF]) { + input_buffer = &input_buffer[3..]; + self.file_offset += 3; + } + } - // We skip whitespaces - let mut i = 0; - while let Some(c) = input_buffer.get(i) { - match *c { - b' ' | b'\t' => { - i += 1; - } - b'\n' => { - i += 1; - self.file_line += 1; - self.file_start_of_last_line = self.file_offset + u64::try_from(i).unwrap(); - } - b'\r' => { - i += 1; - if let Some(c) = input_buffer.get(i) { - if *c == b'\n' { - i += 1; // \r\n - } - } else if !is_ending { - // We need an extra byte to check if followed by \n - i -= 1; - self.file_offset += u64::try_from(i).unwrap(); - return None; - } - self.file_line += 1; - self.file_start_of_last_line = self.file_offset + u64::try_from(i).unwrap(); - } - _ => { - break; - } + // We skip whitespaces + let mut i = 0; + while let Some(c) = input_buffer.get(i) { + match *c { + b' ' | b'\t' => { + i += 1; + } + b'\n' => { + i += 1; + self.file_line += 1; + self.file_start_of_last_line = self.file_offset + u64::try_from(i).unwrap(); + } + b'\r' => { + i += 1; + if let Some(c) = input_buffer.get(i) { + if *c == b'\n' { + i += 1; // \r\n } + } else if !is_ending { + // We need an extra byte to check if followed by \n + i -= 1; + self.file_offset += u64::try_from(i).unwrap(); + return None; + } + self.file_line += 1; + self.file_start_of_last_line = self.file_offset + u64::try_from(i).unwrap(); } - self.file_offset += u64::try_from(i).unwrap(); - input_buffer = &input_buffer[i..]; - self.file_start_of_last_token = self.file_offset; - - if is_ending && input_buffer.is_empty() { - return Some(Ok(JsonToken::Eof)); + _ => { + break; } + } + } + self.file_offset += u64::try_from(i).unwrap(); + input_buffer = &input_buffer[i..]; + self.file_start_of_last_token = self.file_offset; - // we get the first character - match *input_buffer.first()? { - b'{' => { - self.file_offset += 1; - Some(Ok(JsonToken::OpeningCurlyBracket)) + if is_ending && input_buffer.is_empty() { + return Some(Ok(JsonToken::Eof)); + } + + // we get the first character + match *input_buffer.first()? { + b'{' => { + self.file_offset += 1; + Some(Ok(JsonToken::OpeningCurlyBracket)) + } + b'}' => { + self.file_offset += 1; + Some(Ok(JsonToken::ClosingCurlyBracket)) + } + b'[' => { + self.file_offset += 1; + Some(Ok(JsonToken::OpeningSquareBracket)) + } + b']' => { + self.file_offset += 1; + Some(Ok(JsonToken::ClosingSquareBracket)) + } + b',' => { + self.file_offset += 1; + Some(Ok(JsonToken::Comma)) + } + b':' => { + self.file_offset += 1; + Some(Ok(JsonToken::Colon)) + } + b'"' => self.read_string(input_buffer), + b't' => self.read_constant(input_buffer, is_ending, "true", JsonToken::True), + b'f' => self.read_constant(input_buffer, is_ending, "false", JsonToken::False), + b'n' => self.read_constant(input_buffer, is_ending, "null", JsonToken::Null), + b'-' | b'0'..=b'9' => self.read_number(input_buffer, is_ending), + c => { + self.file_offset += 1; + Some(Err(self.syntax_error( + self.file_offset - 1..self.file_offset, + if c < 128 { + format!("Unexpected char: '{}'", char::from(c)) + } else { + format!("Unexpected byte: \\x{c:X}") + }, + ))) + } + } + } + + fn read_string<'a>( + &mut self, + input_buffer: &'a [u8], + ) -> Option, JsonSyntaxError>> { + let mut error = None; + let mut string: Option<(String, usize)> = None; + let mut next_byte_offset = 1; + loop { + match *input_buffer.get(next_byte_offset)? { + b'"' => { + // end of string + let result = Some(if let Some(error) = error { + Err(error) + } else if let Some((mut string, read_until)) = string { + if read_until < next_byte_offset { + let (str, e) = self.decode_utf8( + &input_buffer[read_until..next_byte_offset], + self.file_offset + u64::try_from(read_until).unwrap(), + ); + error = error.or(e); + string.push_str(&str); } - b'}' => { - self.file_offset += 1; - Some(Ok(JsonToken::ClosingCurlyBracket)) + if let Some(error) = error { + Err(error) + } else { + Ok(JsonToken::String(Cow::Owned(string))) } - b'[' => { - self.file_offset += 1; - Some(Ok(JsonToken::OpeningSquareBracket)) + } else { + let (string, error) = + self.decode_utf8(&input_buffer[1..next_byte_offset], self.file_offset + 1); + if let Some(error) = error { + Err(error) + } else { + Ok(JsonToken::String(string)) } - b']' => { - self.file_offset += 1; - Some(Ok(JsonToken::ClosingSquareBracket)) + }); + self.file_offset += u64::try_from(next_byte_offset).unwrap() + 1; + return result; + } + b'\\' => { + // Escape sequences + if string.is_none() { + string = Some((String::new(), 1)) + } + let (string, read_until) = string.as_mut().unwrap(); + if *read_until < next_byte_offset { + let (str, e) = self.decode_utf8( + &input_buffer[*read_until..next_byte_offset], + self.file_offset + u64::try_from(*read_until).unwrap(), + ); + error = error.or(e); + string.push_str(&str); + } + next_byte_offset += 1; + match *input_buffer.get(next_byte_offset)? { + b'"' => { + string.push('"'); + next_byte_offset += 1; } - b',' => { - self.file_offset += 1; - Some(Ok(JsonToken::Comma)) + b'\\' => { + string.push('\\'); + next_byte_offset += 1; } - b':' => { - self.file_offset += 1; - Some(Ok(JsonToken::Colon)) + b'/' => { + string.push('/'); + next_byte_offset += 1; } - b'"' => self.read_string(input_buffer), - b't' => self.read_constant(input_buffer, is_ending, "true", JsonToken::True), - b'f' => self.read_constant(input_buffer, is_ending, "false", JsonToken::False), - b'n' => self.read_constant(input_buffer, is_ending, "null", JsonToken::Null), - b'-' | b'0'..=b'9' => self.read_number(input_buffer, is_ending), - c => { - self.file_offset += 1; - Some(Err(self.syntax_error( - self.file_offset - 1..self.file_offset, - if c < 128 { - format!("Unexpected char: '{}'", char::from(c)) - } else { - format!("Unexpected byte: \\x{c:X}") - }, - ))) + b'b' => { + string.push('\u{8}'); + next_byte_offset += 1; } - } - } - - fn read_string<'a>( - &mut self, - input_buffer: &'a [u8], - ) -> Option, JsonSyntaxError>> { - let mut error = None; - let mut string: Option<(String, usize)> = None; - let mut next_byte_offset = 1; - loop { - match *input_buffer.get(next_byte_offset)? { - b'"' => { - // end of string - let result = Some(if let Some(error) = error { - Err(error) - } else if let Some((mut string, read_until)) = string { - if read_until < next_byte_offset { - let (str, e) = self.decode_utf8( - &input_buffer[read_until..next_byte_offset], - self.file_offset + u64::try_from(read_until).unwrap(), - ); - error = error.or(e); - string.push_str(&str); - } - if let Some(error) = error { - Err(error) - } else { - Ok(JsonToken::String(Cow::Owned(string))) - } - } else { - let (string, error) = self - .decode_utf8(&input_buffer[1..next_byte_offset], self.file_offset + 1); - if let Some(error) = error { - Err(error) - } else { - Ok(JsonToken::String(string)) - } - }); - self.file_offset += u64::try_from(next_byte_offset).unwrap() + 1; - return result; + b'f' => { + string.push('\u{C}'); + next_byte_offset += 1; + } + b'n' => { + string.push('\n'); + next_byte_offset += 1; + } + b'r' => { + string.push('\r'); + next_byte_offset += 1; + } + b't' => { + string.push('\t'); + next_byte_offset += 1; + } + b'u' => { + next_byte_offset += 1; + let val = input_buffer.get(next_byte_offset..next_byte_offset + 4)?; + next_byte_offset += 4; + let code_point = match read_hexa_char(val) { + Ok(cp) => cp, + Err(e) => { + error = error.or_else(|| { + let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); + Some(self.syntax_error(pos - 4..pos, e)) + }); + char::REPLACEMENT_CHARACTER.into() } - b'\\' => { - // Escape sequences - if string.is_none() { - string = Some((String::new(), 1)) - } - let (string, read_until) = string.as_mut().unwrap(); - if *read_until < next_byte_offset { - let (str, e) = self.decode_utf8( - &input_buffer[*read_until..next_byte_offset], - self.file_offset + u64::try_from(*read_until).unwrap(), - ); - error = error.or(e); - string.push_str(&str); - } - next_byte_offset += 1; - match *input_buffer.get(next_byte_offset)? { - b'"' => { - string.push('"'); - next_byte_offset += 1; - } - b'\\' => { - string.push('\\'); - next_byte_offset += 1; - } - b'/' => { - string.push('/'); - next_byte_offset += 1; - } - b'b' => { - string.push('\u{8}'); - next_byte_offset += 1; - } - b'f' => { - string.push('\u{C}'); - next_byte_offset += 1; - } - b'n' => { - string.push('\n'); - next_byte_offset += 1; - } - b'r' => { - string.push('\r'); - next_byte_offset += 1; - } - b't' => { - string.push('\t'); - next_byte_offset += 1; - } - b'u' => { - next_byte_offset += 1; - let val = input_buffer.get(next_byte_offset..next_byte_offset + 4)?; - next_byte_offset += 4; - let code_point = match read_hexa_char(val) { - Ok(cp) => cp, - Err(e) => { - error = error.or_else(|| { - let pos = self.file_offset - + u64::try_from(next_byte_offset).unwrap(); - Some(self.syntax_error(pos - 4..pos, e)) - }); - char::REPLACEMENT_CHARACTER.into() - } - }; - if let Some(c) = char::from_u32(code_point) { - string.push(c); - } else { - let high_surrogate = code_point; - if !(0xD800..=0xDBFF).contains(&high_surrogate) { - error = error.or_else(|| { - let pos = self.file_offset - + u64::try_from(next_byte_offset).unwrap(); - Some(self.syntax_error( - pos - 6..pos, - format!( - "\\u{:X} is not a valid high surrogate", - high_surrogate - ), - )) - }); - } - let val = - input_buffer.get(next_byte_offset..next_byte_offset + 6)?; - next_byte_offset += 6; - if !val.starts_with(b"\\u") { - error = error.or_else(|| { + }; + if let Some(c) = char::from_u32(code_point) { + string.push(c); + } else { + let high_surrogate = code_point; + if !(0xD800..=0xDBFF).contains(&high_surrogate) { + error = error.or_else(|| { + let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); + Some(self.syntax_error( + pos - 6..pos, + format!("\\u{:X} is not a valid high surrogate", high_surrogate), + )) + }); + } + let val = input_buffer.get(next_byte_offset..next_byte_offset + 6)?; + next_byte_offset += 6; + if !val.starts_with(b"\\u") { + error = error.or_else(|| { let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); Some(self.syntax_error( pos - 6..pos, @@ -982,302 +996,294 @@ impl JsonLexer { ) )) }); - } - let low_surrogate = match read_hexa_char(&val[2..]) { - Ok(cp) => cp, - Err(e) => { - error = error.or_else(|| { - let pos = self.file_offset - + u64::try_from(next_byte_offset).unwrap(); - Some(self.syntax_error(pos - 6..pos, e)) - }); - char::REPLACEMENT_CHARACTER.into() - } - }; - if !(0xDC00..=0xDFFF).contains(&low_surrogate) { - error = error.or_else(|| { - let pos = self.file_offset - + u64::try_from(next_byte_offset).unwrap(); - Some(self.syntax_error( - pos - 6..pos, - format!( - "\\u{:X} is not a valid low surrogate", - low_surrogate - ), - )) - }); - } - let code_point = 0x10000 - + ((high_surrogate & 0x03FF) << 10) - + (low_surrogate & 0x03FF); - if let Some(c) = char::from_u32(code_point) { - string.push(c) - } else { - string.push(char::REPLACEMENT_CHARACTER); - error = error.or_else(|| { - let pos = self.file_offset - + u64::try_from(next_byte_offset).unwrap(); - Some(self.syntax_error( - pos - 12..pos, - format!( - "\\u{:X}\\u{:X} is an invalid surrogate pair", - high_surrogate, low_surrogate - ), - )) - }); - } - } - } - c => { - next_byte_offset += 1; - error = error.or_else(|| { - let pos = - self.file_offset + u64::try_from(next_byte_offset).unwrap(); - Some(self.syntax_error( - pos - 2..pos, - format!("'\\{}' is not a valid escape sequence", char::from(c)), - )) - }); - string.push(char::REPLACEMENT_CHARACTER); - } - } - *read_until = next_byte_offset; } - c @ (0..=0x1F) => { + let low_surrogate = match read_hexa_char(&val[2..]) { + Ok(cp) => cp, + Err(e) => { error = error.or_else(|| { - let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); - Some(self.syntax_error( - pos..pos + 1, - format!("'{}' is not allowed in JSON strings", char::from(c)), - )) + let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); + Some(self.syntax_error(pos - 6..pos, e)) }); - next_byte_offset += 1; + char::REPLACEMENT_CHARACTER.into() + } + }; + if !(0xDC00..=0xDFFF).contains(&low_surrogate) { + error = error.or_else(|| { + let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); + Some(self.syntax_error( + pos - 6..pos, + format!("\\u{:X} is not a valid low surrogate", low_surrogate), + )) + }); } - _ => { - next_byte_offset += 1; + let code_point = + 0x10000 + ((high_surrogate & 0x03FF) << 10) + (low_surrogate & 0x03FF); + if let Some(c) = char::from_u32(code_point) { + string.push(c) + } else { + string.push(char::REPLACEMENT_CHARACTER); + error = error.or_else(|| { + let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); + Some(self.syntax_error( + pos - 12..pos, + format!( + "\\u{:X}\\u{:X} is an invalid surrogate pair", + high_surrogate, low_surrogate + ), + )) + }); } + } } + c => { + next_byte_offset += 1; + error = error.or_else(|| { + let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); + Some(self.syntax_error( + pos - 2..pos, + format!("'\\{}' is not a valid escape sequence", char::from(c)), + )) + }); + string.push(char::REPLACEMENT_CHARACTER); + } + } + *read_until = next_byte_offset; } - } - - fn read_constant( - &mut self, - input_buffer: &[u8], - is_ending: bool, - expected: &str, - value: JsonToken<'static>, - ) -> Option, JsonSyntaxError>> { - if input_buffer.get(..expected.len())? == expected.as_bytes() { - self.file_offset += u64::try_from(expected.len()).unwrap(); - return Some(Ok(value)); + c @ (0..=0x1F) => { + error = error.or_else(|| { + let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap(); + Some(self.syntax_error( + pos..pos + 1, + format!("'{}' is not allowed in JSON strings", char::from(c)), + )) + }); + next_byte_offset += 1; } - let ascii_chars = input_buffer - .iter() - .take_while(|c| c.is_ascii_alphabetic()) - .count(); - if ascii_chars == input_buffer.len() && !is_ending { - return None; // We might read a bigger token + _ => { + next_byte_offset += 1; } - let read = max(1, ascii_chars); // We want to consume at least a byte - let start_offset = self.file_offset; - self.file_offset += u64::try_from(read).unwrap(); - Some(Err(self.syntax_error( - start_offset..self.file_offset, - format!("{} expected", expected), - ))) + } } + } - fn read_number<'a>( - &mut self, - input_buffer: &'a [u8], - is_ending: bool, - ) -> Option, JsonSyntaxError>> { - let mut next_byte_offset = 0; - if *input_buffer.get(next_byte_offset)? == b'-' { - next_byte_offset += 1; - } - // integer starting with first bytes - match *input_buffer.get(next_byte_offset)? { - b'0' => { - next_byte_offset += 1; - } - b'1'..=b'9' => { - next_byte_offset += 1; - next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?; - } - c => { - next_byte_offset += 1; - self.file_offset += u64::try_from(next_byte_offset).unwrap(); - return Some(Err(self.syntax_error( - self.file_offset - 1..self.file_offset, - format!("A number is not allowed to start with '{}'", char::from(c)), - ))); - } - } + fn read_constant( + &mut self, + input_buffer: &[u8], + is_ending: bool, + expected: &str, + value: JsonToken<'static>, + ) -> Option, JsonSyntaxError>> { + if input_buffer.get(..expected.len())? == expected.as_bytes() { + self.file_offset += u64::try_from(expected.len()).unwrap(); + return Some(Ok(value)); + } + let ascii_chars = input_buffer + .iter() + .take_while(|c| c.is_ascii_alphabetic()) + .count(); + if ascii_chars == input_buffer.len() && !is_ending { + return None; // We might read a bigger token + } + let read = max(1, ascii_chars); // We want to consume at least a byte + let start_offset = self.file_offset; + self.file_offset += u64::try_from(read).unwrap(); + Some(Err(self.syntax_error( + start_offset..self.file_offset, + format!("{} expected", expected), + ))) + } - // Dot - if input_buffer.get(next_byte_offset).map_or_else( - || if is_ending { Some(None) } else { None }, - |c| Some(Some(*c)), - )? == Some(b'.') - { - next_byte_offset += 1; - let c = *input_buffer.get(next_byte_offset)?; - next_byte_offset += 1; - if !c.is_ascii_digit() { - self.file_offset += u64::try_from(next_byte_offset).unwrap(); - return Some(Err(self.syntax_error( - self.file_offset - 1..self.file_offset, - format!( - "A number fractional part must start with a digit and not '{}'", - char::from(c) - ), - ))); - } - next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?; - } + fn read_number<'a>( + &mut self, + input_buffer: &'a [u8], + is_ending: bool, + ) -> Option, JsonSyntaxError>> { + let mut next_byte_offset = 0; + if *input_buffer.get(next_byte_offset)? == b'-' { + next_byte_offset += 1; + } + // integer starting with first bytes + match *input_buffer.get(next_byte_offset)? { + b'0' => { + next_byte_offset += 1; + } + b'1'..=b'9' => { + next_byte_offset += 1; + next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?; + } + c => { + next_byte_offset += 1; + self.file_offset += u64::try_from(next_byte_offset).unwrap(); + return Some(Err(self.syntax_error( + self.file_offset - 1..self.file_offset, + format!("A number is not allowed to start with '{}'", char::from(c)), + ))); + } + } - // Exp - let c = input_buffer.get(next_byte_offset).map_or_else( - || if is_ending { Some(None) } else { None }, - |c| Some(Some(*c)), - )?; - if c == Some(b'e') || c == Some(b'E') { - next_byte_offset += 1; - match *input_buffer.get(next_byte_offset)? { - b'-' | b'+' => { - next_byte_offset += 1; - let c = *input_buffer.get(next_byte_offset)?; - next_byte_offset += 1; - if !c.is_ascii_digit() { - self.file_offset += u64::try_from(next_byte_offset).unwrap(); - return Some(Err(self.syntax_error( - self.file_offset - 1..self.file_offset, - format!( - "A number exponential part must contain at least a digit, '{}' found", - char::from(c) - ), - ))); - } - } - b'0'..=b'9' => { - next_byte_offset += 1; - } - c => { - next_byte_offset += 1; - self.file_offset += u64::try_from(next_byte_offset).unwrap(); - return Some(Err(self.syntax_error( - self.file_offset - 1..self.file_offset, - format!( - "A number exponential part must start with +, - or a digit, '{}' found", - char::from(c) - ), - ))); - } - } - next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?; - } + // Dot + if input_buffer.get(next_byte_offset).map_or_else( + || if is_ending { Some(None) } else { None }, + |c| Some(Some(*c)), + )? == Some(b'.') + { + next_byte_offset += 1; + let c = *input_buffer.get(next_byte_offset)?; + next_byte_offset += 1; + if !c.is_ascii_digit() { self.file_offset += u64::try_from(next_byte_offset).unwrap(); - Some(Ok(JsonToken::Number(Cow::Borrowed( - str::from_utf8(&input_buffer[..next_byte_offset]).unwrap(), - )))) + return Some(Err(self.syntax_error( + self.file_offset - 1..self.file_offset, + format!( + "A number fractional part must start with a digit and not '{}'", + char::from(c) + ), + ))); + } + next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?; } - fn decode_utf8<'a>( - &self, - input_buffer: &'a [u8], - start_position: u64, - ) -> (Cow<'a, str>, Option) { - match str::from_utf8(input_buffer) { - Ok(str) => (Cow::Borrowed(str), None), - Err(e) => ( - String::from_utf8_lossy(input_buffer), - Some({ - let pos = start_position + u64::try_from(e.valid_up_to()).unwrap(); - self.syntax_error(pos..pos + 1, format!("Invalid UTF-8: {e}")) - }), + // Exp + let c = input_buffer.get(next_byte_offset).map_or_else( + || if is_ending { Some(None) } else { None }, + |c| Some(Some(*c)), + )?; + if c == Some(b'e') || c == Some(b'E') { + next_byte_offset += 1; + match *input_buffer.get(next_byte_offset)? { + b'-' | b'+' => { + next_byte_offset += 1; + let c = *input_buffer.get(next_byte_offset)?; + next_byte_offset += 1; + if !c.is_ascii_digit() { + self.file_offset += u64::try_from(next_byte_offset).unwrap(); + return Some(Err(self.syntax_error( + self.file_offset - 1..self.file_offset, + format!( + "A number exponential part must contain at least a digit, '{}' found", + char::from(c) + ), + ))); + } + } + b'0'..=b'9' => { + next_byte_offset += 1; + } + c => { + next_byte_offset += 1; + self.file_offset += u64::try_from(next_byte_offset).unwrap(); + return Some(Err(self.syntax_error( + self.file_offset - 1..self.file_offset, + format!( + "A number exponential part must start with +, - or a digit, '{}' found", + char::from(c) ), + ))); } + } + next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?; } + self.file_offset += u64::try_from(next_byte_offset).unwrap(); + Some(Ok(JsonToken::Number(Cow::Borrowed( + str::from_utf8(&input_buffer[..next_byte_offset]).unwrap(), + )))) + } - fn syntax_error(&self, file_offset: Range, message: impl Into) -> JsonSyntaxError { - let start_file_offset = max(file_offset.start, self.file_start_of_last_line); - JsonSyntaxError { - location: TextPosition { - line: self.file_line, - column: start_file_offset - self.file_start_of_last_line, // TODO: unicode - offset: start_file_offset, - }..TextPosition { - line: self.file_line, - column: file_offset.end - self.file_start_of_last_line, // TODO: unicode - offset: file_offset.end, - }, - message: message.into(), - } + fn decode_utf8<'a>( + &self, + input_buffer: &'a [u8], + start_position: u64, + ) -> (Cow<'a, str>, Option) { + match str::from_utf8(input_buffer) { + Ok(str) => (Cow::Borrowed(str), None), + Err(e) => ( + String::from_utf8_lossy(input_buffer), + Some({ + let pos = start_position + u64::try_from(e.valid_up_to()).unwrap(); + self.syntax_error(pos..pos + 1, format!("Invalid UTF-8: {e}")) + }), + ), + } + } + + fn syntax_error(&self, file_offset: Range, message: impl Into) -> JsonSyntaxError { + let start_file_offset = max(file_offset.start, self.file_start_of_last_line); + JsonSyntaxError { + location: TextPosition { + line: self.file_line, + column: start_file_offset - self.file_start_of_last_line, // TODO: unicode + offset: start_file_offset, + }..TextPosition { + line: self.file_line, + column: file_offset.end - self.file_start_of_last_line, // TODO: unicode + offset: file_offset.end, + }, + message: message.into(), } + } } fn read_hexa_char(input: &[u8]) -> Result { - let mut value = 0; - for c in input.iter().copied() { - value = value * 16 - + match c { - b'0'..=b'9' => u32::from(c) - u32::from(b'0'), - b'a'..=b'f' => u32::from(c) - u32::from(b'a') + 10, - b'A'..=b'F' => u32::from(c) - u32::from(b'A') + 10, - _ => { - return Err(format!( - "Unexpected character in a unicode escape: '{}'", - char::from(c) - )) - } - } - } - Ok(value) + let mut value = 0; + for c in input.iter().copied() { + value = value * 16 + + match c { + b'0'..=b'9' => u32::from(c) - u32::from(b'0'), + b'a'..=b'f' => u32::from(c) - u32::from(b'a') + 10, + b'A'..=b'F' => u32::from(c) - u32::from(b'A') + 10, + _ => { + return Err(format!( + "Unexpected character in a unicode escape: '{}'", + char::from(c) + )) + } + } + } + Ok(value) } fn read_digits(input_buffer: &[u8], is_ending: bool) -> Option { - let count = input_buffer - .iter() - .take_while(|c| c.is_ascii_digit()) - .count(); - if count == input_buffer.len() && !is_ending { - return None; - } - Some(count) + let count = input_buffer + .iter() + .take_while(|c| c.is_ascii_digit()) + .count(); + if count == input_buffer.len() && !is_ending { + return None; + } + Some(count) } fn owned_event(event: JsonEvent<'_>) -> JsonEvent<'static> { - match event { - JsonEvent::String(s) => JsonEvent::String(s.into_owned().into()), - JsonEvent::Number(n) => JsonEvent::Number(n.into_owned().into()), - JsonEvent::Boolean(b) => JsonEvent::Boolean(b), - JsonEvent::Null => JsonEvent::Null, - JsonEvent::ArrayIndex => JsonEvent::ArrayIndex, - JsonEvent::StartArray => JsonEvent::StartArray, - JsonEvent::EndArray => JsonEvent::EndArray, - JsonEvent::StartObject => JsonEvent::StartObject, - JsonEvent::EndObject => JsonEvent::EndObject, - JsonEvent::ObjectKey(k) => JsonEvent::ObjectKey(k.into_owned().into()), - JsonEvent::Eof => JsonEvent::Eof, - } + match event { + JsonEvent::String(s) => JsonEvent::String(s.into_owned().into()), + JsonEvent::Number(n) => JsonEvent::Number(n.into_owned().into()), + JsonEvent::Boolean(b) => JsonEvent::Boolean(b), + JsonEvent::Null => JsonEvent::Null, + JsonEvent::ArrayIndex => JsonEvent::ArrayIndex, + JsonEvent::StartArray => JsonEvent::StartArray, + JsonEvent::EndArray => JsonEvent::EndArray, + JsonEvent::StartObject => JsonEvent::StartObject, + JsonEvent::EndObject => JsonEvent::EndObject, + JsonEvent::ObjectKey(k) => JsonEvent::ObjectKey(k.into_owned().into()), + JsonEvent::Eof => JsonEvent::Eof, + } } /// Result of [`LowLevelJsonParser::parse_next`]. #[derive(Debug)] pub struct LowLevelJsonParserResult<'a> { - /// How many bytes have been read from `input_buffer` and should be removed from it. - pub consumed_bytes: usize, - /// A possible new event - pub event: Option, JsonSyntaxError>>, + /// How many bytes have been read from `input_buffer` and should be removed from it. + pub consumed_bytes: usize, + /// A possible new event + pub event: Option, JsonSyntaxError>>, } /// A position in a text i.e. a `line` number starting from 0, a `column` number starting from 0 (in number of code points) and a global file `offset` starting from 0 (in number of bytes). #[derive(Eq, PartialEq, Debug, Clone, Copy)] pub struct TextPosition { - pub line: u64, - pub column: u64, - pub offset: u64, + pub line: u64, + pub column: u64, + pub offset: u64, } /// An error in the syntax of the parsed file. @@ -1285,65 +1291,65 @@ pub struct TextPosition { /// It is composed of a message and a byte range in the input. #[derive(Debug)] pub struct JsonSyntaxError { - location: Range, - message: String, + location: Range, + message: String, } impl JsonSyntaxError { - /// The location of the error inside of the file. - #[inline] - pub fn location(&self) -> Range { - self.location.clone() - } + /// The location of the error inside of the file. + #[inline] + pub fn location(&self) -> Range { + self.location.clone() + } - /// The error message. - #[inline] - pub fn message(&self) -> &str { - &self.message - } + /// The error message. + #[inline] + pub fn message(&self) -> &str { + &self.message + } } impl fmt::Display for JsonSyntaxError { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.location.start.offset + 1 >= self.location.end.offset { - write!( - f, - "Parser error at line {} column {}: {}", - self.location.start.line + 1, - self.location.start.column + 1, - self.message - ) - } else if self.location.start.line == self.location.end.line { - write!( - f, - "Parser error at line {} between columns {} and column {}: {}", - self.location.start.line + 1, - self.location.start.column + 1, - self.location.end.column + 1, - self.message - ) - } else { - write!( - f, - "Parser error between line {} column {} and line {} column {}: {}", - self.location.start.line + 1, - self.location.start.column + 1, - self.location.end.line + 1, - self.location.end.column + 1, - self.message - ) - } + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.location.start.offset + 1 >= self.location.end.offset { + write!( + f, + "Parser error at line {} column {}: {}", + self.location.start.line + 1, + self.location.start.column + 1, + self.message + ) + } else if self.location.start.line == self.location.end.line { + write!( + f, + "Parser error at line {} between columns {} and column {}: {}", + self.location.start.line + 1, + self.location.start.column + 1, + self.location.end.column + 1, + self.message + ) + } else { + write!( + f, + "Parser error between line {} column {} and line {} column {}: {}", + self.location.start.line + 1, + self.location.start.column + 1, + self.location.end.line + 1, + self.location.end.column + 1, + self.message + ) } + } } impl Error for JsonSyntaxError {} impl From for io::Error { - #[inline] - fn from(error: JsonSyntaxError) -> Self { - io::Error::new(io::ErrorKind::InvalidData, error) - } + #[inline] + fn from(error: JsonSyntaxError) -> Self { + io::Error::new(io::ErrorKind::InvalidData, error) + } } /// A parsing error. @@ -1351,52 +1357,52 @@ impl From for io::Error { /// It is the union of [`JsonSyntaxError`] and [`std::io::Error`]. #[derive(Debug)] pub enum JsonParseError { - /// I/O error during parsing (file not found...). - Io(io::Error), - /// An error in the file syntax. - Syntax(JsonSyntaxError), + /// I/O error during parsing (file not found...). + Io(io::Error), + /// An error in the file syntax. + Syntax(JsonSyntaxError), } impl fmt::Display for JsonParseError { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Io(e) => e.fmt(f), - Self::Syntax(e) => e.fmt(f), - } + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(e) => e.fmt(f), + Self::Syntax(e) => e.fmt(f), } + } } impl Error for JsonParseError { - #[inline] - fn source(&self) -> Option<&(dyn Error + 'static)> { - Some(match self { - Self::Io(e) => e, - Self::Syntax(e) => e, - }) - } + #[inline] + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(match self { + Self::Io(e) => e, + Self::Syntax(e) => e, + }) + } } impl From for JsonParseError { - #[inline] - fn from(error: JsonSyntaxError) -> Self { - Self::Syntax(error) - } + #[inline] + fn from(error: JsonSyntaxError) -> Self { + Self::Syntax(error) + } } impl From for JsonParseError { - #[inline] - fn from(error: io::Error) -> Self { - Self::Io(error) - } + #[inline] + fn from(error: io::Error) -> Self { + Self::Io(error) + } } impl From for io::Error { - #[inline] - fn from(error: JsonParseError) -> Self { - match error { - JsonParseError::Syntax(e) => e.into(), - JsonParseError::Io(e) => e, - } + #[inline] + fn from(error: JsonParseError) -> Self { + match error { + JsonParseError::Syntax(e) => e.into(), + JsonParseError::Io(e) => e, } + } } diff --git a/src/write.rs b/src/write.rs index a5d6f25..bd2263a 100644 --- a/src/write.rs +++ b/src/write.rs @@ -18,31 +18,31 @@ use tokio::io::{AsyncWrite, AsyncWriteExt}; /// # std::io::Result::Ok(()) /// ``` pub struct WriterJsonSerializer { - write: W, - writer: LowLevelJsonSerializer, + write: W, + writer: LowLevelJsonSerializer, } impl WriterJsonSerializer { - pub const fn new(write: W) -> Self { - Self { - write, - writer: LowLevelJsonSerializer::new(), - } + pub const fn new(write: W) -> Self { + Self { + write, + writer: LowLevelJsonSerializer::new(), } + } - pub fn serialize_event(&mut self, event: JsonEvent<'_>) -> Result<()> { - self.writer.serialize_event(event, &mut self.write) - } + pub fn serialize_event(&mut self, event: JsonEvent<'_>) -> Result<()> { + self.writer.serialize_event(event, &mut self.write) + } - #[deprecated(note = "Use serialize_event() instead")] - pub fn write_event(&mut self, event: JsonEvent<'_>) -> Result<()> { - self.serialize_event(event) - } + #[deprecated(note = "Use serialize_event() instead")] + pub fn write_event(&mut self, event: JsonEvent<'_>) -> Result<()> { + self.serialize_event(event) + } - pub fn finish(self) -> Result { - self.writer.validate_eof()?; - Ok(self.write) - } + pub fn finish(self) -> Result { + self.writer.validate_eof()?; + Ok(self.write) + } } /// A JSON streaming writer writing to an [`AsyncWrite`] implementation. @@ -67,37 +67,37 @@ impl WriterJsonSerializer { /// ``` #[cfg(feature = "async-tokio")] pub struct TokioAsyncWriterJsonSerializer { - write: W, - writer: LowLevelJsonSerializer, - buffer: Vec, + write: W, + writer: LowLevelJsonSerializer, + buffer: Vec, } #[cfg(feature = "async-tokio")] impl TokioAsyncWriterJsonSerializer { - pub const fn new(write: W) -> Self { - Self { - write, - writer: LowLevelJsonSerializer::new(), - buffer: Vec::new(), - } + pub const fn new(write: W) -> Self { + Self { + write, + writer: LowLevelJsonSerializer::new(), + buffer: Vec::new(), } + } - pub async fn serialize_event(&mut self, event: JsonEvent<'_>) -> Result<()> { - self.writer.serialize_event(event, &mut self.buffer)?; - self.write.write_all(&self.buffer).await?; - self.buffer.clear(); - Ok(()) - } + pub async fn serialize_event(&mut self, event: JsonEvent<'_>) -> Result<()> { + self.writer.serialize_event(event, &mut self.buffer)?; + self.write.write_all(&self.buffer).await?; + self.buffer.clear(); + Ok(()) + } - #[deprecated(note = "Use serialize_event() instead")] - pub async fn write_event(&mut self, event: JsonEvent<'_>) -> Result<()> { - self.serialize_event(event).await - } + #[deprecated(note = "Use serialize_event() instead")] + pub async fn write_event(&mut self, event: JsonEvent<'_>) -> Result<()> { + self.serialize_event(event).await + } - pub fn finish(self) -> Result { - self.writer.validate_eof()?; - Ok(self.write) - } + pub fn finish(self) -> Result { + self.writer.validate_eof()?; + Ok(self.write) + } } /// A low-level JSON streaming writer writing to a [`Write`] implementation. @@ -120,199 +120,193 @@ impl TokioAsyncWriterJsonSerializer { #[derive(Default)] pub struct LowLevelJsonSerializer { - state_stack: Vec, - element_written: bool, + state_stack: Vec, + element_written: bool, } impl LowLevelJsonSerializer { - pub const fn new() -> Self { - Self { - state_stack: Vec::new(), - element_written: false, - } + pub const fn new() -> Self { + Self { + state_stack: Vec::new(), + element_written: false, } + } - pub fn serialize_event(&mut self, event: JsonEvent<'_>, mut write: impl Write) -> Result<()> { - match event { - JsonEvent::String(s) => { - self.before_value(&mut write)?; - write_escaped_json_string(&s, write) - } - JsonEvent::Number(number) => { - self.before_value(&mut write)?; - write.write_all(number.as_bytes()) - } - JsonEvent::Boolean(b) => { - self.before_value(&mut write)?; - write.write_all(if b { b"true" } else { b"false" }) - } - JsonEvent::Null => { - self.before_value(&mut write)?; - write.write_all(b"null") - } - JsonEvent::StartArray => { - self.before_value(&mut write)?; - self.state_stack.push(JsonState::OpenArray); - write.write_all(b"[") - } - JsonEvent::EndArray => match self.state_stack.pop() { - Some(JsonState::OpenArray) | Some(JsonState::ContinuationArray) => { - write.write_all(b"]") - } - Some(s) => { - self.state_stack.push(s); - Err(Error::new( - ErrorKind::InvalidInput, - "Closing a not opened array", - )) - } - None => Err(Error::new( - ErrorKind::InvalidInput, - "Closing a not opened array", - )), - }, - JsonEvent::StartObject => { - self.before_value(&mut write)?; - self.state_stack.push(JsonState::OpenObject); - write.write_all(b"{") - } - JsonEvent::EndObject => match self.state_stack.pop() { - Some(JsonState::OpenObject) | Some(JsonState::ContinuationObject) => { - write.write_all(b"}") - } - Some(s) => { - self.state_stack.push(s); - Err(Error::new( - ErrorKind::InvalidInput, - "Closing a not opened object", - )) - } - None => Err(Error::new( - ErrorKind::InvalidInput, - "Closing a not opened object", - )), - }, - JsonEvent::ArrayIndex => { - Ok(()) - }, - JsonEvent::ObjectKey(key) => { - match self.state_stack.pop() { - Some(JsonState::OpenObject) => (), - Some(JsonState::ContinuationObject) => write.write_all(b",")?, - _ => { - return Err(Error::new( - ErrorKind::InvalidInput, - "Trying to write an object key in an not object", - )) - } - } - self.state_stack.push(JsonState::ContinuationObject); - self.state_stack.push(JsonState::ObjectValue); - write_escaped_json_string(&key, &mut write)?; - write.write_all(b":") - } - JsonEvent::Eof => Err(Error::new( - ErrorKind::InvalidInput, - "EOF is not allowed in JSON writer", - )), + pub fn serialize_event(&mut self, event: JsonEvent<'_>, mut write: impl Write) -> Result<()> { + match event { + JsonEvent::String(s) => { + self.before_value(&mut write)?; + write_escaped_json_string(&s, write) + } + JsonEvent::Number(number) => { + self.before_value(&mut write)?; + write.write_all(number.as_bytes()) + } + JsonEvent::Boolean(b) => { + self.before_value(&mut write)?; + write.write_all(if b { b"true" } else { b"false" }) + } + JsonEvent::Null => { + self.before_value(&mut write)?; + write.write_all(b"null") + } + JsonEvent::StartArray => { + self.before_value(&mut write)?; + self.state_stack.push(JsonState::OpenArray); + write.write_all(b"[") + } + JsonEvent::EndArray => match self.state_stack.pop() { + Some(JsonState::OpenArray) | Some(JsonState::ContinuationArray) => write.write_all(b"]"), + Some(s) => { + self.state_stack.push(s); + Err(Error::new( + ErrorKind::InvalidInput, + "Closing a not opened array", + )) + } + None => Err(Error::new( + ErrorKind::InvalidInput, + "Closing a not opened array", + )), + }, + JsonEvent::StartObject => { + self.before_value(&mut write)?; + self.state_stack.push(JsonState::OpenObject); + write.write_all(b"{") + } + JsonEvent::EndObject => match self.state_stack.pop() { + Some(JsonState::OpenObject) | Some(JsonState::ContinuationObject) => write.write_all(b"}"), + Some(s) => { + self.state_stack.push(s); + Err(Error::new( + ErrorKind::InvalidInput, + "Closing a not opened object", + )) + } + None => Err(Error::new( + ErrorKind::InvalidInput, + "Closing a not opened object", + )), + }, + JsonEvent::ArrayIndex => Ok(()), + JsonEvent::ObjectKey(key) => { + match self.state_stack.pop() { + Some(JsonState::OpenObject) => (), + Some(JsonState::ContinuationObject) => write.write_all(b",")?, + _ => { + return Err(Error::new( + ErrorKind::InvalidInput, + "Trying to write an object key in an not object", + )) + } } + self.state_stack.push(JsonState::ContinuationObject); + self.state_stack.push(JsonState::ObjectValue); + write_escaped_json_string(&key, &mut write)?; + write.write_all(b":") + } + JsonEvent::Eof => Err(Error::new( + ErrorKind::InvalidInput, + "EOF is not allowed in JSON writer", + )), } + } - #[deprecated(note = "Use serialize_event() instead")] - pub fn write_event(&mut self, event: JsonEvent<'_>, write: impl Write) -> Result<()> { - self.serialize_event(event, write) - } + #[deprecated(note = "Use serialize_event() instead")] + pub fn write_event(&mut self, event: JsonEvent<'_>, write: impl Write) -> Result<()> { + self.serialize_event(event, write) + } - fn before_value(&mut self, mut write: impl Write) -> Result<()> { - match self.state_stack.pop() { - Some(JsonState::OpenArray) => { - self.state_stack.push(JsonState::ContinuationArray); - Ok(()) - } - Some(JsonState::ContinuationArray) => { - self.state_stack.push(JsonState::ContinuationArray); - write.write_all(b",")?; - Ok(()) - } - Some(last_state @ JsonState::OpenObject) - | Some(last_state @ JsonState::ContinuationObject) => { - self.state_stack.push(last_state); - Err(Error::new( - ErrorKind::InvalidInput, - "Object key expected, string found", - )) - } - Some(JsonState::ObjectValue) => Ok(()), - None => { - if self.element_written { - Err(Error::new( - ErrorKind::InvalidInput, - "A root JSON value has already been written", - )) - } else { - self.element_written = true; - Ok(()) - } - } + fn before_value(&mut self, mut write: impl Write) -> Result<()> { + match self.state_stack.pop() { + Some(JsonState::OpenArray) => { + self.state_stack.push(JsonState::ContinuationArray); + Ok(()) + } + Some(JsonState::ContinuationArray) => { + self.state_stack.push(JsonState::ContinuationArray); + write.write_all(b",")?; + Ok(()) + } + Some(last_state @ JsonState::OpenObject) + | Some(last_state @ JsonState::ContinuationObject) => { + self.state_stack.push(last_state); + Err(Error::new( + ErrorKind::InvalidInput, + "Object key expected, string found", + )) + } + Some(JsonState::ObjectValue) => Ok(()), + None => { + if self.element_written { + Err(Error::new( + ErrorKind::InvalidInput, + "A root JSON value has already been written", + )) + } else { + self.element_written = true; + Ok(()) } + } } + } - fn validate_eof(&self) -> Result<()> { - if !self.state_stack.is_empty() { - return Err(Error::new( - ErrorKind::InvalidInput, - "The written JSON is not balanced: an object or an array has not been closed", - )); - } - if !self.element_written { - return Err(Error::new( - ErrorKind::InvalidInput, - "A JSON file can't be empty", - )); - } - Ok(()) + fn validate_eof(&self) -> Result<()> { + if !self.state_stack.is_empty() { + return Err(Error::new( + ErrorKind::InvalidInput, + "The written JSON is not balanced: an object or an array has not been closed", + )); } + if !self.element_written { + return Err(Error::new( + ErrorKind::InvalidInput, + "A JSON file can't be empty", + )); + } + Ok(()) + } } enum JsonState { - OpenArray, - ContinuationArray, - OpenObject, - ContinuationObject, - ObjectValue, + OpenArray, + ContinuationArray, + OpenObject, + ContinuationObject, + ObjectValue, } fn write_escaped_json_string(s: &str, mut write: impl Write) -> Result<()> { - write.write_all(b"\"")?; - let mut buffer = [b'\\', b'u', 0, 0, 0, 0]; - for c in s.chars() { - match c { - '\\' => write.write_all(b"\\\\"), - '"' => write.write_all(b"\\\""), + write.write_all(b"\"")?; + let mut buffer = [b'\\', b'u', 0, 0, 0, 0]; + for c in s.chars() { + match c { + '\\' => write.write_all(b"\\\\"), + '"' => write.write_all(b"\\\""), + c => { + if c < char::from(32) { + match c { + '\u{08}' => write.write_all(b"\\b"), + '\u{0C}' => write.write_all(b"\\f"), + '\n' => write.write_all(b"\\n"), + '\r' => write.write_all(b"\\r"), + '\t' => write.write_all(b"\\t"), c => { - if c < char::from(32) { - match c { - '\u{08}' => write.write_all(b"\\b"), - '\u{0C}' => write.write_all(b"\\f"), - '\n' => write.write_all(b"\\n"), - '\r' => write.write_all(b"\\r"), - '\t' => write.write_all(b"\\t"), - c => { - let mut c = c as u8; - for i in (2..6).rev() { - let ch = c % 16; - buffer[i] = if ch < 10 { b'0' + ch } else { b'A' + ch - 10 }; - c /= 16; - } - write.write_all(&buffer) - } - } - } else { - write.write_all(c.encode_utf8(&mut buffer[2..]).as_bytes()) - } + let mut c = c as u8; + for i in (2..6).rev() { + let ch = c % 16; + buffer[i] = if ch < 10 { b'0' + ch } else { b'A' + ch - 10 }; + c /= 16; + } + write.write_all(&buffer) } - }?; - } - write.write_all(b"\"")?; - Ok(()) + } + } else { + write.write_all(c.encode_utf8(&mut buffer[2..]).as_bytes()) + } + } + }?; + } + write.write_all(b"\"")?; + Ok(()) } diff --git a/tests/drains.rs b/tests/drains.rs index 6cb375d..7ea7c91 100644 --- a/tests/drains.rs +++ b/tests/drains.rs @@ -19,7 +19,6 @@ mod tests { let mut parser = ReaderJsonParser::new(&json[..]); while let Ok(event) = parser.parse_next() { - match event { JsonEvent::ObjectKey(key) => { println!("KEY: {:?}", key); diff --git a/tests/errors.rs b/tests/errors.rs index 9ddc3e6..c987a18 100644 --- a/tests/errors.rs +++ b/tests/errors.rs @@ -2,40 +2,40 @@ use json_event_parser::{JsonEvent, SliceJsonParser, WriterJsonSerializer}; #[test] fn test_recovery() { - let entries = [ - (b"[nonono]".as_slice(), "[]"), - (b"[a]", "[]"), - (b"[1,]", "[1]"), - (b"{\"foo\":1,}", "{\"foo\":1}"), - (b"{\"foo\" 1}", "{\"foo\":1}"), - (b"[1 2]", "[1,2]"), - (b"[\"\x00\"]", "[]"), - (b"[\"\\uD888\\u1234\"]", "[]"), - ]; + let entries = [ + (b"[nonono]".as_slice(), "[]"), + (b"[a]", "[]"), + (b"[1,]", "[1]"), + (b"{\"foo\":1,}", "{\"foo\":1}"), + (b"{\"foo\" 1}", "{\"foo\":1}"), + (b"[1 2]", "[1,2]"), + (b"[\"\x00\"]", "[]"), + (b"[\"\\uD888\\u1234\"]", "[]"), + ]; - for (input, expected_output) in entries { - let mut reader = SliceJsonParser::new(input); - let mut writer = WriterJsonSerializer::new(Vec::new()); - loop { - match reader.parse_next() { - Ok(JsonEvent::Eof) => break, - Ok(event) => writer.serialize_event(event).unwrap(), - Err(_) => (), - } - } - let actual_output = String::from_utf8(writer.finish().unwrap()).unwrap(); - assert_eq!( - expected_output, - actual_output, - "on {}", - String::from_utf8_lossy(input) - ); + for (input, expected_output) in entries { + let mut reader = SliceJsonParser::new(input); + let mut writer = WriterJsonSerializer::new(Vec::new()); + loop { + match reader.parse_next() { + Ok(JsonEvent::Eof) => break, + Ok(event) => writer.serialize_event(event).unwrap(), + Err(_) => (), + } } + let actual_output = String::from_utf8(writer.finish().unwrap()).unwrap(); + assert_eq!( + expected_output, + actual_output, + "on {}", + String::from_utf8_lossy(input) + ); + } } #[test] fn test_error_messages() { - let entries = [ + let entries = [ ( b"".as_slice(), "Parser error at line 1 column 1: Unexpected end of file, a value was expected", @@ -57,13 +57,13 @@ fn test_error_messages() { "Parser error at line 1 between columns 2 and column 8: \\uDCFF is not a valid high surrogate", ) ]; - for (json, error) in entries { - assert_eq!( - SliceJsonParser::new(json) - .parse_next() - .unwrap_err() - .to_string(), - error - ); - } + for (json, error) in entries { + assert_eq!( + SliceJsonParser::new(json) + .parse_next() + .unwrap_err() + .to_string(), + error + ); + } } diff --git a/tests/test_suite.rs b/tests/test_suite.rs index 8380649..c36d72f 100644 --- a/tests/test_suite.rs +++ b/tests/test_suite.rs @@ -1,115 +1,117 @@ use json_event_parser::{JsonEvent, ReaderJsonParser, SliceJsonParser, WriterJsonSerializer}; -use std::fs::{read_dir, File}; -use std::io::{Read, Result}; -use std::{fs, str}; +use std::{ + fs, + fs::{read_dir, File}, + io::{Read, Result}, + str, +}; const OTHER_VALID_TESTS: [&str; 12] = [ - "i_number_double_huge_neg_exp.json", - "i_number_huge_exp.json", - "i_number_neg_int_huge_exp.json", - "i_number_pos_double_huge_exp.json", - "i_number_real_neg_overflow.json", - "i_number_real_pos_overflow.json", - "i_number_real_underflow.json", - "i_number_too_big_neg_int.json", - "i_number_too_big_pos_int.json", - "i_number_very_big_negative_int.json", - "i_structure_500_nested_arrays.json", - "i_structure_UTF-8_BOM_empty_object.json", + "i_number_double_huge_neg_exp.json", + "i_number_huge_exp.json", + "i_number_neg_int_huge_exp.json", + "i_number_pos_double_huge_exp.json", + "i_number_real_neg_overflow.json", + "i_number_real_pos_overflow.json", + "i_number_real_underflow.json", + "i_number_too_big_neg_int.json", + "i_number_too_big_pos_int.json", + "i_number_very_big_negative_int.json", + "i_structure_500_nested_arrays.json", + "i_structure_UTF-8_BOM_empty_object.json", ]; const OTHER_INVALID_TESTS: [&str; 23] = [ - "i_object_key_lone_2nd_surrogate.json", - "i_string_1st_surrogate_but_2nd_missing.json", - "i_string_1st_valid_surrogate_2nd_invalid.json", - "i_string_incomplete_surrogate_and_escape_valid.json", - "i_string_incomplete_surrogate_pair.json", - "i_string_incomplete_surrogates_escape_valid.json", - "i_string_invalid_lonely_surrogate.json", - "i_string_invalid_surrogate.json", - "i_string_invalid_utf-8.json", - "i_string_inverted_surrogates_U+1D11E.json", - "i_string_iso_latin_1.json", - "i_string_lone_second_surrogate.json", - "i_string_lone_utf8_continuation_byte.json", - "i_string_not_in_unicode_range.json", - "i_string_overlong_sequence_2_bytes.json", - "i_string_overlong_sequence_6_bytes.json", - "i_string_overlong_sequence_6_bytes_null.json", - "i_string_truncated-utf-8.json", - "i_string_UTF8_surrogate_U+D800.json", - "i_string_utf16BE_no_BOM.json", - "i_string_utf16LE_no_BOM.json", - "i_string_UTF-8_invalid_sequence.json", - "i_string_UTF-16LE_with_BOM.json", + "i_object_key_lone_2nd_surrogate.json", + "i_string_1st_surrogate_but_2nd_missing.json", + "i_string_1st_valid_surrogate_2nd_invalid.json", + "i_string_incomplete_surrogate_and_escape_valid.json", + "i_string_incomplete_surrogate_pair.json", + "i_string_incomplete_surrogates_escape_valid.json", + "i_string_invalid_lonely_surrogate.json", + "i_string_invalid_surrogate.json", + "i_string_invalid_utf-8.json", + "i_string_inverted_surrogates_U+1D11E.json", + "i_string_iso_latin_1.json", + "i_string_lone_second_surrogate.json", + "i_string_lone_utf8_continuation_byte.json", + "i_string_not_in_unicode_range.json", + "i_string_overlong_sequence_2_bytes.json", + "i_string_overlong_sequence_6_bytes.json", + "i_string_overlong_sequence_6_bytes_null.json", + "i_string_truncated-utf-8.json", + "i_string_UTF8_surrogate_U+D800.json", + "i_string_utf16BE_no_BOM.json", + "i_string_utf16LE_no_BOM.json", + "i_string_UTF-8_invalid_sequence.json", + "i_string_UTF-16LE_with_BOM.json", ]; #[test] fn test_testsuite_parsing() -> Result<()> { - for file in read_dir(format!( - "{}/JSONTestSuite/test_parsing", - env!("CARGO_MANIFEST_DIR") - ))? { - let file = file?; - let file_name = file.file_name().to_str().unwrap().to_owned(); - if !file_name.ends_with(".json") { - continue; - } - let result = parse_read_result(File::open(file.path())?); - if file_name.starts_with("y_") || OTHER_VALID_TESTS.contains(&file_name.as_ref()) { - match result { - Ok(serialization) => { - let serialization_str = str::from_utf8(&serialization).unwrap(); - match parse_buffer_result(&serialization) { - Ok(other_serialization) => { - let other_serialization_str = - str::from_utf8(&other_serialization).unwrap(); - assert_eq!( + for file in read_dir(format!( + "{}/JSONTestSuite/test_parsing", + env!("CARGO_MANIFEST_DIR") + ))? { + let file = file?; + let file_name = file.file_name().to_str().unwrap().to_owned(); + if !file_name.ends_with(".json") { + continue; + } + let result = parse_read_result(File::open(file.path())?); + if file_name.starts_with("y_") || OTHER_VALID_TESTS.contains(&file_name.as_ref()) { + match result { + Ok(serialization) => { + let serialization_str = str::from_utf8(&serialization).unwrap(); + match parse_buffer_result(&serialization) { + Ok(other_serialization) => { + let other_serialization_str = str::from_utf8(&other_serialization).unwrap(); + assert_eq!( serialization_str, other_serialization_str, "Roundtrip {other_serialization_str} serialization of {serialization_str} is not identical (test {file_name})", ) - } - Err(error) => { - panic!("Parsing of {serialization_str} failed with error {error}") - } - } - } - Err(error) => panic!( - "Parsing of {file_name} failed with error {error} on {}", - fs::read_to_string(file.path())? - ), } - } else if file_name.starts_with("n_") || OTHER_INVALID_TESTS.contains(&file_name.as_ref()) { - if let Ok(json) = result { - panic!( - "Parsing of {file_name} wrongly succeeded with json {}", - str::from_utf8(&json).unwrap() - ) + Err(error) => { + panic!("Parsing of {serialization_str} failed with error {error}") } + } } + Err(error) => panic!( + "Parsing of {file_name} failed with error {error} on {}", + fs::read_to_string(file.path())? + ), + } + } else if file_name.starts_with("n_") || OTHER_INVALID_TESTS.contains(&file_name.as_ref()) { + if let Ok(json) = result { + panic!( + "Parsing of {file_name} wrongly succeeded with json {}", + str::from_utf8(&json).unwrap() + ) + } } - Ok(()) + } + Ok(()) } fn parse_buffer_result(read: &[u8]) -> Result> { - let mut reader = SliceJsonParser::new(read); - let mut writer = WriterJsonSerializer::new(Vec::new()); - loop { - match reader.parse_next()? { - JsonEvent::Eof => return writer.finish(), - e => writer.serialize_event(e)?, - } + let mut reader = SliceJsonParser::new(read); + let mut writer = WriterJsonSerializer::new(Vec::new()); + loop { + match reader.parse_next()? { + JsonEvent::Eof => return writer.finish(), + e => writer.serialize_event(e)?, } + } } fn parse_read_result(read: impl Read) -> Result> { - let mut reader = ReaderJsonParser::new(read); - let mut writer = WriterJsonSerializer::new(Vec::new()); - loop { - match reader.parse_next()? { - JsonEvent::Eof => return writer.finish(), - e => writer.serialize_event(e)?, - } + let mut reader = ReaderJsonParser::new(read); + let mut writer = WriterJsonSerializer::new(Vec::new()); + loop { + match reader.parse_next()? { + JsonEvent::Eof => return writer.finish(), + e => writer.serialize_event(e)?, } + } } From d9fc719e09e3d0a04a6991dff63d716d170fa6d7 Mon Sep 17 00:00:00 2001 From: Scott Wyatt Date: Fri, 18 Apr 2025 18:08:48 -0400 Subject: [PATCH 4/6] fix(): fixes position of cursor Uses an absolute position to handle the offset and cursor --- src/read.rs | 61 ++++++++++++++++++----------------------------------- 1 file changed, 20 insertions(+), 41 deletions(-) diff --git a/src/read.rs b/src/read.rs index d5f619a..fb997cb 100644 --- a/src/read.rs +++ b/src/read.rs @@ -128,7 +128,7 @@ impl ReaderJsonParser { let mut found_start = false; let mut offset = self.input_buffer_start; - let mut cursor = self.input_buffer_start; + let mut cursor = self.input_buffer_start; // absolute byte offset loop { // SAFETY: shadow parsing avoids borrow checker issues @@ -145,13 +145,7 @@ impl ReaderJsonParser { ); if consumed_bytes == 0 && self.is_ending { - return Err( - io::Error::new( - io::ErrorKind::UnexpectedEof, - "Unexpected EOF while draining value", - ) - .into(), - ); + return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "Unexpected EOF while draining value").into()); } if let Some(event) = event { @@ -179,38 +173,26 @@ impl ReaderJsonParser { nesting -= 1; if nesting == 0 { let end = offset + consumed_bytes; - // self.parser = shadow_parser; self.input_buffer_start = end; - return Ok( - str::from_utf8(&self.input_buffer[cursor..end]) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? - .to_string(), - ); + return Ok(str::from_utf8(&self.input_buffer[cursor..end]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? + .to_string()); } } JsonEvent::Eof => { - return Err( - io::Error::new( - io::ErrorKind::UnexpectedEof, - "Unexpected EOF while draining value", - ) - .into(), - ); + return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "Unexpected EOF while draining value").into()); } _ => { if !found_start { - // scalar case - cursor = offset; + cursor = offset; // start of scalar value found_start = true; } if nesting == 0 { let end = offset + consumed_bytes; self.input_buffer_start = end; - return Ok( - str::from_utf8(&self.input_buffer[cursor..end]) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? - .to_string(), - ); + return Ok(str::from_utf8(&self.input_buffer[cursor..end]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? + .to_string()); } } } @@ -220,23 +202,20 @@ impl ReaderJsonParser { if offset >= self.input_buffer_end { // shift + refill - let remaining = self.input_buffer_end - self.input_buffer_start; - self - .input_buffer - .copy_within(self.input_buffer_start..self.input_buffer_end, 0); + let shift = self.input_buffer_start; + self.input_buffer.copy_within(shift..self.input_buffer_end, 0); + self.input_buffer_end -= shift; self.input_buffer_start = 0; - self.input_buffer_end = remaining; - offset = self.input_buffer_end; + + // adjust all absolute pointers + offset -= shift; + cursor -= shift; if self.input_buffer.len() < self.max_buffer_size { - self - .input_buffer - .resize(self.input_buffer.len() + MIN_BUFFER_SIZE, 0); + self.input_buffer.resize(self.input_buffer.len() + MIN_BUFFER_SIZE, 0); } - let read = self - .read - .read(&mut self.input_buffer[self.input_buffer_end..])?; + let read = self.read.read(&mut self.input_buffer[self.input_buffer_end..])?; self.input_buffer_end += read; self.is_ending = read == 0; } @@ -250,7 +229,7 @@ impl ReaderJsonParser { /// use json_event_parser::{JsonEvent, TokioAsyncReaderJsonParser}; /// /// # #[tokio::main(flavor = "current_thread")] -/// # async fn main() -> ::std::io::Result<()> { +/// # async fn main() -> std::io::Result<()> { /// let mut reader = TokioAsyncReaderJsonParser::new(b"{\"foo\": 1}".as_slice()); /// assert_eq!(reader.parse_next().await?, JsonEvent::StartObject); /// assert_eq!( From 95e2c9954e36645df1c345cc45d8367a4129f970 Mon Sep 17 00:00:00 2001 From: Scott Wyatt Date: Mon, 21 Apr 2025 22:25:42 -0400 Subject: [PATCH 5/6] fmt --- src/read.rs | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/read.rs b/src/read.rs index fb997cb..d73a817 100644 --- a/src/read.rs +++ b/src/read.rs @@ -145,7 +145,13 @@ impl ReaderJsonParser { ); if consumed_bytes == 0 && self.is_ending { - return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "Unexpected EOF while draining value").into()); + return Err( + io::Error::new( + io::ErrorKind::UnexpectedEof, + "Unexpected EOF while draining value", + ) + .into(), + ); } if let Some(event) = event { @@ -174,13 +180,21 @@ impl ReaderJsonParser { if nesting == 0 { let end = offset + consumed_bytes; self.input_buffer_start = end; - return Ok(str::from_utf8(&self.input_buffer[cursor..end]) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? - .to_string()); + return Ok( + str::from_utf8(&self.input_buffer[cursor..end]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? + .to_string(), + ); } } JsonEvent::Eof => { - return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "Unexpected EOF while draining value").into()); + return Err( + io::Error::new( + io::ErrorKind::UnexpectedEof, + "Unexpected EOF while draining value", + ) + .into(), + ); } _ => { if !found_start { @@ -190,9 +204,11 @@ impl ReaderJsonParser { if nesting == 0 { let end = offset + consumed_bytes; self.input_buffer_start = end; - return Ok(str::from_utf8(&self.input_buffer[cursor..end]) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? - .to_string()); + return Ok( + str::from_utf8(&self.input_buffer[cursor..end]) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))? + .to_string(), + ); } } } @@ -203,7 +219,9 @@ impl ReaderJsonParser { if offset >= self.input_buffer_end { // shift + refill let shift = self.input_buffer_start; - self.input_buffer.copy_within(shift..self.input_buffer_end, 0); + self + .input_buffer + .copy_within(shift..self.input_buffer_end, 0); self.input_buffer_end -= shift; self.input_buffer_start = 0; @@ -212,10 +230,14 @@ impl ReaderJsonParser { cursor -= shift; if self.input_buffer.len() < self.max_buffer_size { - self.input_buffer.resize(self.input_buffer.len() + MIN_BUFFER_SIZE, 0); + self + .input_buffer + .resize(self.input_buffer.len() + MIN_BUFFER_SIZE, 0); } - let read = self.read.read(&mut self.input_buffer[self.input_buffer_end..])?; + let read = self + .read + .read(&mut self.input_buffer[self.input_buffer_end..])?; self.input_buffer_end += read; self.is_ending = read == 0; } From dce4a50ed1d04543657c2c16f7a17663de206c88 Mon Sep 17 00:00:00 2001 From: Scott Wyatt Date: Wed, 23 Apr 2025 20:42:10 -0400 Subject: [PATCH 6/6] cargo fix --- tests/drains.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/drains.rs b/tests/drains.rs index 7ea7c91..206966f 100644 --- a/tests/drains.rs +++ b/tests/drains.rs @@ -1,6 +1,6 @@ #[cfg(test)] mod tests { - use super::*; + use json_event_parser::{JsonEvent, ReaderJsonParser}; #[test]