diff --git a/build.sbt b/build.sbt index 77002b9f..968e5133 100644 --- a/build.sbt +++ b/build.sbt @@ -5,7 +5,7 @@ lazy val readme = scalatex.ScalatexReadme( source = "Readme", autoResources = List("out.js", "JProfiler.png") ).settings( - (resources in Compile) += baseDirectory.value/".."/"out"/"demo"/"fullOpt"/"dest"/"out.js", + (resources in Compile) += baseDirectory.value/".."/"out"/"demo"/"fullOpt.dest"/"out.js", scalaVersion := "2.12.10" ) diff --git a/fastparse/src-2/fastparse/internal/MacroImpls.scala b/fastparse/src-2/fastparse/internal/MacroImpls.scala index 447e6c1c..953e8753 100644 --- a/fastparse/src-2/fastparse/internal/MacroImpls.scala +++ b/fastparse/src-2/fastparse/internal/MacroImpls.scala @@ -29,7 +29,7 @@ object MacroImpls { else if (f.splice(ctx1.successValue.asInstanceOf[T])) ctx1.asInstanceOf[ParsingRun[T]] else ctx1.freshFailure().asInstanceOf[ParsingRun[T]] - if (ctx1.verboseFailures) ctx1.aggregateTerminal(startIndex, () => "filter") + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(startIndex, () => "filter") res } } @@ -52,11 +52,9 @@ object MacroImpls { ctx1.instrument.afterParse(name.splice.value, ctx0.index, ctx0.isSuccess) } if (ctx0.verboseFailures) { - ctx0.aggregateMsg( - startIndex, - Msgs(new Lazy(() => name.splice.value) :: Nil), - ctx0.failureGroupAggregate, - startIndex < ctx0.traceIndex + ctx0.reportAggregateMsg( + () => name.splice.value, + forceAggregate = startIndex < ctx0.traceIndex ) if (!ctx0.isSuccess){ ctx0.failureStack = (name.splice.value -> startIndex) :: ctx0.failureStack @@ -89,7 +87,7 @@ object MacroImpls { }else{ ctx1.freshFailure().asInstanceOf[ParsingRun[Unit]] } - if (ctx1.verboseFailures) ctx1.aggregateTerminal(index, () => literalized.splice) + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(index, () => literalized.splice) res } @@ -119,7 +117,7 @@ object MacroImpls { ctx1.freshFailure().asInstanceOf[ParsingRun[Unit]] } if (ctx1.verboseFailures) { - ctx1.aggregateTerminal(index, () => literalized.splice) + ctx1.reportTerminalMsg(index, () => literalized.splice) } res @@ -134,7 +132,7 @@ object MacroImpls { val res = if (Util.startsWith(ctx1.input, s1, index)) ctx1.freshSuccessUnit(index + s1.length) else ctx1.freshFailure().asInstanceOf[ParsingRun[Unit]] - if (ctx1.verboseFailures) ctx1.aggregateTerminal(index, () => Util.literalize(s1)) + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(index, () => Util.literalize(s1)) res } } @@ -232,7 +230,7 @@ object MacroImpls { lhs0.splice val lhsMsg = ctx5.shortParserMsg - val lhsAggregate = ctx5.failureGroupAggregate + val lhsAggregate = ctx5.aggregateParserMsgs if (ctx5.isSuccess) { ctx5.cut |= oldCut ctx5.asInstanceOf[ParsingRun[V]] @@ -242,7 +240,7 @@ object MacroImpls { val verboseFailures = ctx5.verboseFailures ctx5.index = startPos - if (verboseFailures) ctx5.aggregateMsg(startPos, lhsMsg, lhsAggregate) + if (verboseFailures) ctx5.reportAggregateMsg(lhsMsg) ctx5.cut = false other.splice @@ -251,7 +249,7 @@ object MacroImpls { val endCut = rhsCut | oldCut if (!ctx5.isSuccess && !rhsCut) ctx5.freshFailure(startPos) ctx5.cut = endCut - if (verboseFailures) ctx5.aggregateMsg(startPos, rhsMsg ::: lhsMsg, ctx5.failureGroupAggregate ::: lhsAggregate) + if (verboseFailures) ctx5.reportAggregateMsg(rhsMsg ::: lhsMsg, ctx5.aggregateParserMsgs ::: lhsAggregate) ctx5.asInstanceOf[ParsingRun[V]] } } @@ -350,7 +348,7 @@ object MacroImpls { ) } - val bracketed = "StringIn(" + literals.map(Util.literalize(_)).mkString(", ") + ")" + val bracketed = literals.map(Util.literalize(_)).toList val res = q""" $ctx match{ case $ctx1 => @@ -363,7 +361,7 @@ object MacroImpls { val res = if ($output != -1) $ctx1.freshSuccessUnit(index = $output) else $ctx1.freshFailure() - if ($ctx1.verboseFailures) $ctx1.setMsg($index, () => $bracketed) + if ($ctx1.verboseFailures) $ctx1.reportTerminalMsg($index, $bracketed) res } """ @@ -429,7 +427,7 @@ object MacroImpls { case true => ctx1.freshSuccessUnit(index + 1) case false => ctx1.freshFailure().asInstanceOf[ParsingRun[Unit]] } - if (ctx1.verboseFailures) ctx1.aggregateTerminal(index, () => bracketed.splice) + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(index, () => bracketed.splice) res } } @@ -466,7 +464,7 @@ object MacroImpls { else { val $preRhsIndex = $ctx1.index $rhs - val $rhsAggregate = $ctx1.failureGroupAggregate + val $rhsAggregate = $ctx1.aggregateParserMsgs val $rhsMsg = $ctx1.shortParserMsg val $res = if (!$ctx1.isSuccess) { @@ -488,11 +486,10 @@ object MacroImpls { ) } - if ($ctx1.verboseFailures) $ctx1.aggregateMsg( - $preLhsIndex, + if ($ctx1.verboseFailures) $ctx1.reportAggregateMsg( _root_.fastparse.internal.Util.joinBinOp($lhsMsg, $rhsMsg), $rhsAggregate ::: $lhsAggregate, - // We override the failureGroupAggregate to avoid building an `a ~ b` + // We override the aggregateParserMsgs to avoid building an `a ~ b` // aggregate msg in the specific case where the LHS parser fails to // make any progress past `startIndex`. This finds cases like `a.? ~ b` // or `a.rep ~ b` and lets use flatten them out into `a | b` @@ -522,7 +519,7 @@ object MacroImpls { if (!$ctx1.isSuccess) $ctx1 else { val $postLhsIndex = $ctx1.index - val $lhsAggregate = $ctx1.failureGroupAggregate + val $lhsAggregate = $ctx1.aggregateParserMsgs val $lhsMsg = $ctx1.shortParserMsg $setCut @@ -570,7 +567,7 @@ object MacroImpls { else { ctx0.freshSuccessUnit(ctx0.index + 1) } - if (ctx0.verboseFailures) ctx0.aggregateTerminal(startIndex, () => s"char-pred(${p0})") + if (ctx0.verboseFailures) ctx0.reportTerminalMsg(startIndex, () => s"char-pred(${p0})") res } } @@ -613,7 +610,7 @@ object MacroImpls { if ($index >= $goal) $ctx1.freshSuccessUnit(index = $index) else $ctx1.freshFailure() - if ($ctx1.verboseFailures) $ctx1.aggregateTerminal($start, () => $bracketed) + if ($ctx1.verboseFailures) $ctx1.reportTerminalMsg($start, () => $bracketed) res } """ @@ -641,7 +638,7 @@ object MacroImpls { val res = if (index >= goal) ctx0.freshSuccessUnit(index = index) else ctx0.freshFailure() - if (ctx0.verboseFailures) ctx0.aggregateTerminal(start, () => s"chars-while($p0, ${min.splice})") + if (ctx0.verboseFailures) ctx0.reportTerminalMsg(start, () => s"chars-while($p0, ${min.splice})") res } } @@ -693,9 +690,8 @@ object MacroImpls { if (ctx1.verboseFailures) { val msg = ctx1.shortParserMsg - val agg = ctx1.failureGroupAggregate if (!postSuccess){ - ctx1.aggregateMsg(startPos, () => msg.render + ".?", agg) + ctx1.reportAggregateMsg(() => msg.render + ".?") } } res diff --git a/fastparse/src-2/fastparse/internal/MacroRepImpls.scala b/fastparse/src-2/fastparse/internal/MacroRepImpls.scala index 962383d1..9978cca7 100644 --- a/fastparse/src-2/fastparse/internal/MacroRepImpls.scala +++ b/fastparse/src-2/fastparse/internal/MacroRepImpls.scala @@ -75,20 +75,23 @@ object MacroRepImpls{ ${c.prefix}.parse0() val $parsedMsg = $ctx1.shortParserMsg - val $parsedAgg = $ctx1.failureGroupAggregate + val $parsedAgg = $ctx1.aggregateParserMsgs $originalCut |= $ctx1.cut if (!$ctx1.isSuccess) { val res = if ($ctx1.cut) $ctx1.asInstanceOf[_root_.fastparse.P[${c.weakTypeOf[V]}]] else $endSnippet - if ($ctx1.verboseFailures) { - $ctx1.aggregateMsg( - $startIndex, - () => $parsedMsg.render + s".rep" + $aggregateSnippet, - if ($lastAgg == null) $ctx1.failureGroupAggregate - else $ctx1.failureGroupAggregate ::: $lastAgg - ) - } + + if ($ctx1.verboseFailures) _root_.fastparse.internal.Util.reportParseMsgInRep( + $startIndex, + ${min.getOrElse(q"0")}, + $ctx1, + _root_.fastparse.internal.Msgs.empty, + $parsedMsg, + $lastAgg, + true + ) + res }else { val $beforeSepIndex = $ctx1.index diff --git a/fastparse/src-2/fastparse/internal/RepImpls.scala b/fastparse/src-2/fastparse/internal/RepImpls.scala index acdd2f5b..403ae0d8 100644 --- a/fastparse/src-2/fastparse/internal/RepImpls.scala +++ b/fastparse/src-2/fastparse/internal/RepImpls.scala @@ -2,7 +2,7 @@ package fastparse.internal import fastparse.{Implicits, NoWhitespace, ParsingRun} -import Util.{aggregateMsgInRep, aggregateMsgPostSep} +import Util.{reportParseMsgInRep, reportParseMsgPostSep} import scala.annotation.tailrec @@ -34,14 +34,14 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ val verboseFailures = ctx.verboseFailures parse0() val parsedMsg = ctx.shortParserMsg - val parsedAgg = ctx.failureGroupAggregate + val parsedAgg = ctx.aggregateParserMsgs val postCut = ctx.cut if (!ctx.isSuccess) { val res = if (postCut) ctx.asInstanceOf[ParsingRun[V]] else end(startIndex, startIndex, count, outerCut | postCut) - if (verboseFailures) aggregateMsgInRep(startIndex, min, ctx, sepMsg, parsedMsg, lastAgg, precut) + if (verboseFailures) reportParseMsgInRep(startIndex, min, ctx, sepMsg, parsedMsg, lastAgg, precut || postCut) res }else { val beforeSepIndex = ctx.index @@ -49,7 +49,7 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ val nextCount = count + 1 if (nextCount == actualMax) { val res = end(beforeSepIndex, beforeSepIndex, nextCount, outerCut | postCut) - if (verboseFailures) ctx.setMsg(startIndex, () => parsedMsg.render + ".repX" + (if(min == 0) "" else s"($min)")) + if (verboseFailures) ctx.reportTerminalMsg(startIndex, () => parsedMsg.render + ".repX" + (if(min == 0) "" else s"($min)")) res } else { @@ -64,7 +64,7 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ val res = if (sepCut) ctx.augmentFailure(beforeSepIndex, endCut) else end(beforeSepIndex, beforeSepIndex, nextCount, endCut) - if (verboseFailures) aggregateMsgPostSep(startIndex, min, ctx, parsedMsg, parsedAgg) + if (verboseFailures) reportParseMsgPostSep(startIndex, min, ctx, parsedMsg, parsedAgg) res } } @@ -95,14 +95,14 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ ctx.cut = precut | (count < min && outerCut) parse0() val parsedMsg = ctx.shortParserMsg - val parsedAgg = ctx.failureGroupAggregate + val parsedAgg = ctx.aggregateParserMsgs val postCut = ctx.cut val verboseFailures = ctx.verboseFailures if (!ctx.isSuccess) { val res = if (postCut) ctx.asInstanceOf[ParsingRun[V]] else end(startIndex, startIndex, count, outerCut | postCut) - if (verboseFailures) aggregateMsgInRep(startIndex, min, ctx, sepMsg, parsedMsg, lastAgg, precut) + if (verboseFailures) reportParseMsgInRep(startIndex, min, ctx, sepMsg, parsedMsg, lastAgg, precut || postCut) res }else { val beforeSepIndex = ctx.index @@ -119,7 +119,7 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ val res = if (sepCut) ctx.augmentFailure(beforeSepIndex, endCut) else end(beforeSepIndex, beforeSepIndex, nextCount, endCut) - if (verboseFailures) aggregateMsgPostSep(startIndex, min, ctx, parsedMsg, parsedAgg) + if (verboseFailures) reportParseMsgPostSep(startIndex, min, ctx, parsedMsg, parsedAgg) res } } @@ -156,14 +156,14 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ else { parse0() val parsedMsg = ctx.shortParserMsg - val parsedAgg = ctx.failureGroupAggregate + val parsedAgg = ctx.aggregateParserMsgs val postCut = ctx.cut val verboseFailures = ctx.verboseFailures if (!ctx.isSuccess) { val res = if (postCut) ctx.asInstanceOf[ParsingRun[V]] else end(startIndex, startIndex, count, outerCut | postCut) - if (verboseFailures) aggregateMsgInRep(startIndex, min, ctx, sepMsg, parsedMsg, lastAgg, precut) + if (verboseFailures) reportParseMsgInRep(startIndex, min, ctx, sepMsg, parsedMsg, lastAgg, precut || postCut) res } else { val beforeSepIndex = ctx.index @@ -171,7 +171,7 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ val nextCount = count + 1 if (nextCount == actualMax) { val res = end(beforeSepIndex, beforeSepIndex, nextCount, outerCut | postCut) - if (verboseFailures) ctx.setMsg(startIndex, () => parsedMsg.render + ".rep" + (if(min == 0) "" else s"($min)")) + if (verboseFailures) ctx.reportTerminalMsg(startIndex, () => parsedMsg.render + ".rep" + (if(min == 0) "" else s"($min)")) res } else if (!consumeWhitespace(whitespace, ctx, false)) ctx.asInstanceOf[ParsingRun[Nothing]] @@ -182,9 +182,10 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ val endCut = outerCut | postCut | sepCut if (sep1 == null) rec(beforeSepIndex, nextCount, false, endCut, null, parsedAgg) else if (ctx.isSuccess) { + val sepMsg = ctx.shortParserMsg if (!consumeWhitespace(whitespace, ctx, sepCut)) ctx.asInstanceOf[ParsingRun[Nothing]] else { - rec(beforeSepIndex, nextCount, sepCut, endCut, ctx.shortParserMsg, parsedAgg) + rec(beforeSepIndex, nextCount, sepCut, endCut, sepMsg, parsedAgg) } } else { @@ -192,7 +193,7 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ if (sepCut) ctx.augmentFailure(beforeSepIndex, endCut) else end(beforeSepIndex, beforeSepIndex, nextCount, endCut) - if (verboseFailures) aggregateMsgPostSep(startIndex, min, ctx, parsedMsg, parsedAgg) + if (verboseFailures) reportParseMsgPostSep(startIndex, min, ctx, parsedMsg, parsedAgg) res } } @@ -224,14 +225,14 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ ctx.cut = precut | (count < min && outerCut) parse0() val parsedMsg = ctx.shortParserMsg - val parsedAgg = ctx.failureGroupAggregate + val parsedAgg = ctx.aggregateParserMsgs val postCut = ctx.cut val verboseFailures = ctx.verboseFailures if (!ctx.isSuccess){ val res = if (postCut) ctx.asInstanceOf[ParsingRun[V]] else end(startIndex, startIndex, count, outerCut | postCut) - if (verboseFailures) aggregateMsgInRep(startIndex, min, ctx, sepMsg, parsedMsg, lastAgg, precut) + if (verboseFailures) reportParseMsgInRep(startIndex, min, ctx, sepMsg, parsedMsg, lastAgg, precut || postCut) res }else{ val beforeSepIndex = ctx.index @@ -245,9 +246,10 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ val endCut = outerCut | postCut | sepCut if (sep1 == null) rec(beforeSepIndex, nextCount, false, endCut, null, parsedAgg) else if (ctx.isSuccess) { + val sepMsg = ctx.shortParserMsg if (!consumeWhitespace(whitespace, ctx, sepCut)) ctx.asInstanceOf[ParsingRun[Nothing]] else { - rec(beforeSepIndex, nextCount, sepCut, endCut, ctx.shortParserMsg, parsedAgg) + rec(beforeSepIndex, nextCount, sepCut, endCut, sepMsg, parsedAgg) } } else { @@ -255,7 +257,7 @@ class RepImpls[T](val parse0: () => ParsingRun[T]) extends AnyVal{ if (sepCut) ctx.augmentFailure(beforeSepIndex, endCut) else end(beforeSepIndex, beforeSepIndex, nextCount, endCut) - if (verboseFailures) aggregateMsgPostSep(startIndex, min, ctx, parsedMsg, parsedAgg) + if (verboseFailures) reportParseMsgPostSep(startIndex, min, ctx, parsedMsg, parsedAgg) res } } diff --git a/fastparse/src-2/fastparse/package.scala b/fastparse/src-2/fastparse/package.scala index 4de06306..62d7e5fd 100644 --- a/fastparse/src-2/fastparse/package.scala +++ b/fastparse/src-2/fastparse/package.scala @@ -276,9 +276,60 @@ package object fastparse extends fastparse.SharedPackageDefs { * fails if the wrapped parser succeeds. In all cases, it ends up * consuming zero characters. */ - def unary_!(implicit ctx: P[Any]) : P[Unit] = SharedPackageDefs.unary_!(parse0) + def unary_!(implicit ctx: P[Any]) : P[Unit] = { + val startPos = ctx.index + val startCut = ctx.cut + val oldNoCut = ctx.noDropBuffer + ctx.noDropBuffer = true + val startTerminals = ctx.terminalParserMsgs + parse0() + ctx.noDropBuffer = oldNoCut + val msg = ctx.shortParserMsg + + val res = + if (ctx.isSuccess) ctx.freshFailure(startPos) + else ctx.freshSuccessUnit(startPos) + + if (ctx.verboseFailures) { + // Unlike most other data on `ctx`, `terminalParserMsgs` is normally + // append-only. Thus when we're inside the unary_! expression, it + // continually appends to `terminalParserMsgs` sub-parsers that could + // have succeeded within it, but are irrelevant to the user because + // we *want* the contents of the unary_! to fail! Thus, we reset + // `terminalParserMsgs` once we exit the unary_!, to ensure these do not + // end up in error messages + ctx.terminalParserMsgs = startTerminals + ctx.reportTerminalMsg(startPos, Msgs.empty) + } + res.cut = startCut + res + } } + + /** + * Positive lookahead operator: succeeds if the wrapped parser succeeds and + * fails if the wrapped parser fails, but in all cases consumes zero + * characters. + */ + def &(parse: => P[_])(implicit ctx: P[_]): P[Unit] = { + + val startPos = ctx.index + val startCut = ctx.cut + val oldNoCut = ctx.noDropBuffer + ctx.noDropBuffer = true + parse + ctx.noDropBuffer = oldNoCut + val msg = ctx.shortParserMsg + + val res = + if (ctx.isSuccess) ctx.freshSuccessUnit(startPos) + else ctx.asInstanceOf[P[Unit]] + + res.cut = startCut + res + } + /** * Provides logging-related [[LogByNameOps]] implicits on [[String]]. */ diff --git a/fastparse/src-3/fastparse/internal/MacroInlineImpls.scala b/fastparse/src-3/fastparse/internal/MacroInlineImpls.scala index 65071be2..9df8c1df 100644 --- a/fastparse/src-3/fastparse/internal/MacroInlineImpls.scala +++ b/fastparse/src-3/fastparse/internal/MacroInlineImpls.scala @@ -27,7 +27,7 @@ object MacroInlineImpls { } else { ctx1.freshFailure().asInstanceOf[ParsingRun[Unit]] } - if (ctx1.verboseFailures) ctx1.aggregateTerminal(index, () => $literalized) + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(index, () => $literalized) res } @@ -51,7 +51,7 @@ object MacroInlineImpls { ctx1.freshFailure().asInstanceOf[ParsingRun[Unit]] } if (ctx1.verboseFailures) { - ctx1.aggregateTerminal(index, () => $literalized) + ctx1.reportTerminalMsg(index, () => $literalized) } res @@ -67,7 +67,7 @@ object MacroInlineImpls { val res = if (Util.startsWith(ctx1.input, s1, index)) ctx1.freshSuccessUnit(index + s1.length) else ctx1.freshFailure().asInstanceOf[ParsingRun[Unit]] - if (ctx1.verboseFailures) ctx1.aggregateTerminal(index, () => Util.literalize(s1)) + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(index, () => Util.literalize(s1)) res } } @@ -83,7 +83,7 @@ object MacroInlineImpls { else if (f(ctx1.successValue.asInstanceOf[T])) ctx1.asInstanceOf[ParsingRun[T]] else ctx1.freshFailure().asInstanceOf[ParsingRun[T]] - if (ctx1.verboseFailures) ctx1.aggregateTerminal(startIndex, () => "filter") + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(startIndex, () => "filter") res } @@ -103,11 +103,9 @@ object MacroInlineImpls { ctx1.instrument.afterParse(name.value, ctx0.index, ctx0.isSuccess) } if (ctx0.verboseFailures) { - ctx0.aggregateMsg( - startIndex, - Msgs(new Lazy(() => name.value) :: Nil), - ctx0.failureGroupAggregate, - startIndex < ctx0.traceIndex + ctx0.reportAggregateMsg( + () => name.value, + forceAggregate = startIndex < ctx0.traceIndex ) if (!ctx0.isSuccess) { ctx0.failureStack = (name.value -> startIndex) :: ctx0.failureStack @@ -149,7 +147,7 @@ object MacroInlineImpls { if (!ctx1.isSuccess) ctx1 else { val postLhsIndex = ctx1.index - val lhsAggregate = ctx1.failureGroupAggregate + val lhsAggregate = ctx1.aggregateParserMsgs val lhsMsg = ctx1.shortParserMsg ${ setCut('{ ctx1 }) } @@ -163,7 +161,7 @@ object MacroInlineImpls { else { val preRhsIndex = ctx1.index $rhs - val rhsAggregate = ctx1.failureGroupAggregate + val rhsAggregate = ctx1.aggregateParserMsgs val rhsMsg = ctx1.shortParserMsg val res = if (!ctx1.isSuccess) { @@ -188,11 +186,10 @@ object MacroInlineImpls { ) } - if (ctx1.verboseFailures) ctx1.aggregateMsg( - preLhsIndex, - _root_.fastparse.internal.Util.joinBinOp(lhsMsg, rhsMsg), + if (ctx1.verboseFailures) ctx1.reportAggregateMsg( + Util.joinBinOp(lhsMsg, rhsMsg), rhsAggregate ::: lhsAggregate, - // We override the failureGroupAggregate to avoid building an `a ~ b` + // We override the aggregateParserMsgs to avoid building an `a ~ b` // aggregate msg in the specific case where the LHS parser fails to // make any progress past `startIndex`. This finds cases like `a.? ~ b` // or `a.rep ~ b` and lets use flatten them out into `a | b` @@ -202,22 +199,21 @@ object MacroInlineImpls { } } - val guardedRhs = whitespace match { + whitespace match { case null => rhsSnippet case ws => if (ws.asTerm.tpe =:= TypeRepr.of[fastparse.NoWhitespace.noWhitespaceImplicit.type]) rhsSnippet else { '{ - _root_.fastparse.internal.Util.consumeWhitespace($ws, ctx1) + Util.consumeWhitespace($ws, ctx1) if (ctx1.isSuccess) $rhsSnippet else ctx1 } } } - guardedRhs } } - }.asInstanceOf[_root_.fastparse.ParsingRun[R]] + }.asInstanceOf[ParsingRun[R]] } } @@ -245,9 +241,8 @@ object MacroInlineImpls { if (ctx1.verboseFailures) { val msg = ctx1.shortParserMsg - val agg = ctx1.failureGroupAggregate if (!postSuccess) { - ctx1.aggregateMsg(startPos, () => msg.render + ".?", agg) + ctx1.reportAggregateMsg(() => msg.render + ".?") } } res @@ -301,7 +296,7 @@ object MacroInlineImpls { lhs0 val lhsMsg = ctx5.shortParserMsg - val lhsAggregate = ctx5.failureGroupAggregate + val lhsAggregate = ctx5.aggregateParserMsgs if (ctx5.isSuccess) { ctx5.cut |= oldCut ctx5.asInstanceOf[ParsingRun[V]] @@ -310,7 +305,7 @@ object MacroInlineImpls { val verboseFailures = ctx5.verboseFailures ctx5.index = startPos - if (verboseFailures) ctx5.aggregateMsg(startPos, lhsMsg, lhsAggregate) + if (verboseFailures) ctx5.reportAggregateMsg(lhsMsg) ctx5.cut = false other @@ -319,8 +314,9 @@ object MacroInlineImpls { val endCut = rhsCut | oldCut if (!ctx5.isSuccess && !rhsCut) ctx5.freshFailure(startPos) ctx5.cut = endCut - if (verboseFailures) - ctx5.aggregateMsg(startPos, rhsMsg ::: lhsMsg, ctx5.failureGroupAggregate ::: lhsAggregate) + if (verboseFailures) { + ctx5.reportAggregateMsg(rhsMsg ::: lhsMsg, ctx5.aggregateParserMsgs ::: lhsAggregate) + } ctx5.asInstanceOf[ParsingRun[V]] } } @@ -399,7 +395,7 @@ object MacroInlineImpls { case true => ctx1.freshSuccessUnit(index + 1) case false => ctx1.freshFailure().asInstanceOf[ParsingRun[Unit]] } - if (ctx1.verboseFailures) ctx1.aggregateTerminal(index, () => $bracketed) + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(index, () => $bracketed) res } } @@ -422,7 +418,7 @@ object MacroInlineImpls { } else { ctx0.freshSuccessUnit(ctx0.index + 1) } - if (ctx0.verboseFailures) ctx0.aggregateTerminal(startIndex, () => s"char-pred(${p0})") + if (ctx0.verboseFailures) ctx0.reportTerminalMsg(startIndex, () => s"char-pred(${p0})") res } @@ -450,7 +446,7 @@ object MacroInlineImpls { if (index >= goal) ctx1.freshSuccessUnit(index = index) else ctx1.freshFailure() - if (ctx1.verboseFailures) ctx1.aggregateTerminal(start, () => $bracketed) + if (ctx1.verboseFailures) ctx1.reportTerminalMsg(start, () => $bracketed) res } } @@ -465,7 +461,7 @@ object MacroInlineImpls { val res = if (index >= goal) ctx0.freshSuccessUnit(index = index) else ctx0.freshFailure() - if (ctx0.verboseFailures) ctx0.aggregateTerminal(start, () => s"chars-while($p0, $min)") + if (ctx0.verboseFailures) ctx0.reportTerminalMsg(start, () => s"chars-while($p0, $min)") res } @@ -540,12 +536,9 @@ object MacroInlineImpls { val res = if (output != -1) ctx1.freshSuccessUnit(output) else ctx1.freshFailure() - if (ctx1.verboseFailures) ctx1.setMsg( + if (ctx1.verboseFailures) ctx1.reportTerminalMsg( index, - () => - ${ - Expr("StringIn(" + literals.map(Util.literalize(_)).mkString(", ") + ")") - } + Msgs.fromStrings(${ Expr(literals.map(Util.literalize(_)).toList) }) ) res } diff --git a/fastparse/src-3/fastparse/internal/MacroRepImpls.scala b/fastparse/src-3/fastparse/internal/MacroRepImpls.scala index 9ae74fb8..30a1367d 100644 --- a/fastparse/src-3/fastparse/internal/MacroRepImpls.scala +++ b/fastparse/src-3/fastparse/internal/MacroRepImpls.scala @@ -27,19 +27,26 @@ object MacroRepImpls { ctx0: Expr[ParsingRun[_]])(using quotes: Quotes): Expr[ParsingRun[V]] = { import quotes.reflect.* - def getInlineExpansionValue(t: Term): Option[Int] = { + def getInlineExpansionValue[T](t: Term): Term = { t match{ case Inlined(a, b, c) => getInlineExpansionValue(c) - case _ => t.asExprOf[Int].value + case Typed(a, b) => getInlineExpansionValue(a) + case _ => t } } - val staticMin0 = getInlineExpansionValue(min.asTerm) - val staticMax0 = getInlineExpansionValue(max.asTerm) - val staticExactly0 = getInlineExpansionValue(exactly.asTerm) + val staticMin0 = getInlineExpansionValue[Int](min.asTerm).asExprOf[Int] + val staticMax0 = getInlineExpansionValue[Int](max.asTerm).asExprOf[Int] + val staticExactly0 = getInlineExpansionValue[Int](exactly.asTerm).asExprOf[Int] - val staticActualMin = staticMin0.zip(staticExactly0).map{(m, e) => if (e == -1) m else e} - val staticActualMax = staticMax0.zip(staticExactly0).map{(m, e) => if (e == -1) m else e} + val staticActualMin = staticExactly0 match{ + case '{-1} => staticMin0.value + case _ => staticExactly0.value + } + val staticActualMax = staticExactly0 match{ + case '{-1} => staticMax0.value + case _ => staticExactly0.value + } '{ val ctx = $ctx0 @@ -91,14 +98,24 @@ object MacroRepImpls { else { $parse0 val parsedMsg = ctx.shortParserMsg - val parsedAgg = ctx.failureGroupAggregate + val parsedAgg = ctx.aggregateParserMsgs val postCut = ctx.cut val verboseFailures = ctx.verboseFailures if (!ctx.isSuccess) { val res = if (postCut) ctx.asInstanceOf[ParsingRun[V]] else end(startIndex, startIndex, count, outerCut | postCut) - if (verboseFailures) Util.aggregateMsgInRep(startIndex, actualMin, ctx, sepMsg, parsedMsg, lastAgg, precut) + if (verboseFailures) { + Util.reportParseMsgInRep( + startIndex, + actualMin, + ctx, + sepMsg, + parsedMsg, + lastAgg, + precut || postCut + ) + } res } else { val beforeSepIndex = ctx.index @@ -112,7 +129,7 @@ object MacroRepImpls { '{ if ($checkMax2) { val res = end(beforeSepIndex, beforeSepIndex, nextCount, outerCut | postCut) - if (verboseFailures) ctx.setMsg(startIndex, () => parsedMsg.render + ".rep" + (if (actualMin == 0) "" else s"(${actualMin})")) + if (verboseFailures) ctx.reportTerminalMsg(startIndex, () => parsedMsg.render + ".rep" + (if (actualMin == 0) "" else s"(${actualMin})")) res } else { @@ -120,7 +137,7 @@ object MacroRepImpls { consumeWhitespace('{false})('{ ctx.cut = false ${ - sep match { + getInlineExpansionValue(sep.asTerm).asExpr match { case '{ null } => '{ rec(beforeSepIndex, nextCount, false, outerCut | postCut, null, parsedAgg) @@ -130,11 +147,11 @@ object MacroRepImpls { val sep1 = $sep val sepCut = ctx.cut val endCut = outerCut | postCut | sepCut - if (sep1 == null) rec(beforeSepIndex, nextCount, false, endCut, null, parsedAgg) - else if (ctx.isSuccess) { + if (ctx.isSuccess) { + val postSepMsg = ctx.shortParserMsg ${ consumeWhitespace('{sepCut})('{ - rec(beforeSepIndex, nextCount, sepCut, endCut, ctx.shortParserMsg, parsedAgg) + rec(beforeSepIndex, nextCount, sepCut, endCut, postSepMsg, parsedAgg) }) } } @@ -143,7 +160,7 @@ object MacroRepImpls { if (sepCut) ctx.augmentFailure(beforeSepIndex, endCut) else end(beforeSepIndex, beforeSepIndex, nextCount, endCut) - if (verboseFailures) Util.aggregateMsgPostSep(startIndex, actualMin, ctx, parsedMsg, parsedAgg) + if (verboseFailures) Util.reportParseMsgPostSep(startIndex, actualMin, ctx, parsedMsg, parsedAgg) res } } diff --git a/fastparse/src-3/fastparse/package.scala b/fastparse/src-3/fastparse/package.scala index aec2fc87..befc1b96 100644 --- a/fastparse/src-3/fastparse/package.scala +++ b/fastparse/src-3/fastparse/package.scala @@ -242,8 +242,56 @@ package object fastparse extends fastparse.SharedPackageDefs { * fails if the wrapped parser succeeds. In all cases, it ends up * consuming zero characters. */ - def unary_!(implicit ctx: P[Any]): P[Unit] = SharedPackageDefs.unary_!(() => parse0) + inline def unary_!(implicit ctx: P[Any]): P[Unit] = { + + val startPos = ctx.index + val startCut = ctx.cut + val oldNoCut = ctx.noDropBuffer + ctx.noDropBuffer = true + val startTerminals = ctx.terminalParserMsgs + parse0 + ctx.noDropBuffer = oldNoCut + + val res = + if (ctx.isSuccess) ctx.freshFailure(startPos) + else ctx.freshSuccessUnit(startPos) + + if (ctx.verboseFailures) { + // Unlike most other data on `ctx`, `terminalParserMsgs` is normally + // append-only. Thus when we're inside the unary_! expression, it + // continually appends to `terminalParserMsgs` sub-parsers that could + // have succeeded within it, but are irrelevant to the user because + // we *want* the contents of the unary_! to fail! Thus, we reset + // `terminalParserMsgs` once we exit the unary_!, to ensure these do not + // end up in error messages + ctx.terminalParserMsgs = startTerminals + ctx.reportTerminalMsg(startPos, Msgs.empty) + } + res.cut = startCut + res + } + + /** + * Positive lookahead operator: succeeds if the wrapped parser succeeds and + * fails if the wrapped parser fails, but in all cases consumes zero + * characters. + */ + inline def &(inline parse: => P[_])(implicit ctx: P[_]): P[Unit] = { + val startPos = ctx.index + val startCut = ctx.cut + val oldNoCut = ctx.noDropBuffer + ctx.noDropBuffer = true + parse + ctx.noDropBuffer = oldNoCut + + val res = + if (ctx.isSuccess) ctx.freshSuccessUnit(startPos) + else ctx.asInstanceOf[P[Unit]] + + res.cut = startCut + res + } /** Provides logging-related [[LogByNameOps]] implicits on [[String]]. */ implicit def LogOpsStr(parse0: String)(implicit ctx: P[Any]): fastparse.LogByNameOps[Unit] = LogByNameOps(parse0) diff --git a/fastparse/src/fastparse/Parsed.scala b/fastparse/src/fastparse/Parsed.scala index a1f35569..0256387c 100644 --- a/fastparse/src/fastparse/Parsed.scala +++ b/fastparse/src/fastparse/Parsed.scala @@ -148,8 +148,8 @@ object Parsed{ def fromParsingRun[T](p: ParsingRun[T]) = { assert(!p.isSuccess) TracedFailure( - p.failureTerminalAggregate, - p.lastFailureMsg ::: p.failureGroupAggregate, + p.terminalParserMsgs, + p.aggregateParserMsgs, Parsed.fromParsingRun(p).asInstanceOf[Failure] ) } @@ -185,7 +185,7 @@ object Parsed{ * Displays the short failure message excluding the parse stack. This shows * the last parser which failed causing the parse to fail. Note that this * does not include other parsers which may have failed earlier; see [[terminalsMsg]] - * and [[aggregateMsg]] for more detailed errors + * and [[reportParseMsg]] for more detailed errors */ def msg = failure.msg /** diff --git a/fastparse/src/fastparse/ParsingRun.scala b/fastparse/src/fastparse/ParsingRun.scala index bcf96457..529aea7e 100644 --- a/fastparse/src/fastparse/ParsingRun.scala +++ b/fastparse/src/fastparse/ParsingRun.scala @@ -11,12 +11,12 @@ import fastparse.internal.{Instrument, Lazy, Msgs, Util} * There are a few patterns that let us program with these mutable variables * in a sort-of-pure-functional way: * - test - If a parser that wishes to ignore changes to a field within their child + * - If a parser that wishes to ignore changes to a field within their child * parsers, a common pattern is to save the value of the field before the * wrapped parser runs, and then re-set the field. e.g. this can be used to * backtrack [[index]] after a lookahead parser finishes * - test - If a parser wants to read the value of the field "returned" by multiple + * - If a parser wants to read the value of the field "returned" by multiple * child parsers, make sure to read the field into a local variable after * each child parser is complete to make sure the value you want from an * earlier child isn't stomped over by a later child @@ -35,10 +35,10 @@ import fastparse.internal.{Instrument, Lazy, Msgs, Util} * it with tracing enabled. * @param traceIndex The index we wish to trace if tracing is enabled, else * -1. Used to find failure messages to aggregate into - * `failureTerminalAggregate` + * `terminalParserMsgs` * @param instrument Callbacks that can be injected before/after every * `P(...)` parser. - * @param failureTerminalAggregate When tracing is enabled, this collects up all the + * @param terminalParserMsgs When tracing is enabled, this collects up all the * upper-most failures that happen at [[traceIndex]] * (in [[Lazy]] wrappers) so they can be shown to the * user at end-of-parse as suggestions for what could @@ -108,8 +108,8 @@ final class ParsingRun[+T](val input: ParserInput, val traceIndex: Int, val instrument: Instrument, // Mutable vars below: - var failureTerminalAggregate: Msgs, - var failureGroupAggregate: Msgs, + var terminalParserMsgs: Msgs, + var aggregateParserMsgs: Msgs, var shortParserMsg: Msgs, var lastFailureMsg: Msgs, var failureStack: List[(String, Int)], @@ -122,132 +122,121 @@ final class ParsingRun[+T](val input: ParserInput, var noDropBuffer: Boolean, val misc: collection.mutable.Map[Any, Any]){ - // HOW ERROR AGGREGATION WORKS: - // - // Fastparse provides two levels of error aggregation that get enabled when - // calling `.trace()`: `failureTerminalAggregate`, and `failureGroupAggregate`: - // - // - `failureTerminalAggregate` lists all low-level terminal parsers which are - // tried at the given `traceIndex`. This is useful to answer the question - // "what can I put at the error position to make my parse continue" - // - // - `failureGroupAggregate` lists all high-level parsers which are tried at - // the given `traceIndex`. This is useful to answer the question "What was - // the parser trying to do when it failed" - // - // The implementation of `failureTerminalAggregate` is straightforward: we - // simply call `aggregateTerminal` in every terminal parser, which collects - // all the messages in a big list and returns it. The implementation of - // `failureGroupAggregate` is more interesting, since we need to figure out - // what are the "high level" parsers that we need to list. We use the - // following algorithm: - // - // - When a parse which started at the given `traceIndex` fails without a cut - // - Over-write `failureGroupAggregate` with it's `shortParserMsg` - // - // - Otherwise: - // - If we are a terminal parser, we set our `failureGroupAggregate` to Nil - // - If we are a compound parser, we simply sum up the `failureGroupAggregate` - // of all our constituent parts - // - // The point of this heuristic is to provide the highest-level parsers which - // failed at the `traceIndex`, but are not already part of the `failureStack`. - // non-highest-level parsers do successfully write their message to - // `failureGroupAggregate`, but they are subsequently over-written by the higher - // level parsers, until it reaches the point where `cut == true`, indicating - // that any further higher-level parsers will be in `failureStack` and using - // their message to stomp over the existing parse-failure-messages in - // `failureGroupAggregate` would be wasteful. - // - // These is an edge case where there is no given failure that occurs exactly at - // `traceIndex` e.g. parsing "ax" with P( ("a" ~ "b") ~ "c" | "a" ~/ "d" ), the - // final failure `index` and thus `traceIndex` is at offset 1, and we would like - // to receive the aggregation ("b" | "d"). But ("a" ~ "b") - // passes from offsets 0-2, "c" fails at offset 2 and ("a" ~ "b") ~ "c" fails - // from offset 0-2. In such a case, we truncate the `shortParserMsg` at - // `traceIndex` to only include the portion we're interested in (which directly - // follows the failure). This then gets aggregated nicely to form the error - // message from-point-of-failure. - // - // A follow-on edge case is parsing "ax" with - // - // val inner = P( "a" ~ "b" ) - // P( inner ~ "c" | "a" ~/ "d" ) - // - // Here, we find that the `inner` parser starts before the `traceIndex` and - // fails at `traceIndex`, but we want our aggregation to continue being - // ("b" | "d"), rather than (inner | "d"). Thus, for opaque compound parsers - // like `inner` which do not expose their internals, we use the `forceAggregate` - // to force it to expose it's internals when it's range covers the `traceIndex` - // but it isn't an exact match - def aggregateMsg(startIndex: Int, - msgToSet: () => String, - msgToAggregate: Msgs): Unit = { - aggregateMsg(startIndex, Msgs(new Lazy(msgToSet) :: Nil), msgToAggregate) + /** + * Called by non-terminal parsers after completion, success or failure + * + * This needs to be called for both successful and failed parsers, as we need + * to record the msg of a successful parse in case it forms part of a larger + * failed parse later. + * + * For example: + * + * - Using "a" ~ ("b" ~ "c" | "d") to parse "abe" + * - We report that the the parser ("b" ~ "c" | "d") failed at index 1 + * - That msg contains the msg of the parse "b" even though it was successful + * + * Overloaded to minimize the amount of callsite bytecode, since we do a ton + * of inlining in Fastparse, and large amounts of bytecode inlined in a method + * can cause JVM performance problems (e.g. JIT compilation may get disabled) + */ + def reportAggregateMsg(newShortParserMsg: Msgs): Unit = { + + reportAggregateMsg(newShortParserMsg, aggregateParserMsgs) } + def reportAggregateMsg(newShortParserMsg: Msgs, + newAggregateMsgs: Msgs): Unit = { - def aggregateMsg(startIndex: Int, - msgToSet: Msgs, - msgToAggregate: Msgs): Unit = { - aggregateMsg(startIndex, msgToSet, msgToAggregate, false) + reportAggregateMsg(newShortParserMsg, newAggregateMsgs, false) } - def aggregateMsg(startIndex: Int, - msgToSet: Msgs, - msgToAggregate: Msgs, - forceAggregate: Boolean): Unit = { - if (!isSuccess && lastFailureMsg == null) lastFailureMsg = msgToSet + def reportAggregateMsg(newShortParserMsg: Msgs, + forceAggregate: Boolean): Unit = { + reportAggregateMsg(newShortParserMsg, aggregateParserMsgs, forceAggregate) + } - shortParserMsg = msgToSet + def reportAggregateMsg(newShortParserMsg: Msgs, + newAggregateMsgs: Msgs, + forceAggregate: Boolean): Unit = { - // There are two cases when aggregating: either we stomp over the entire - // existing aggregation with `msgToSet`, or we preserve it (with possible - // additions) with `msgToAggregate`. - if (checkAggregate(startIndex) && !forceAggregate) failureGroupAggregate = msgToSet - else failureGroupAggregate = msgToAggregate + reportParseMsg0( + newShortParserMsg, + newAggregateMsgs, + forceAggregate, + newAggregateMsgs.value.nonEmpty + ) } - def aggregateTerminal(startIndex: Int, f: () => String): Unit = { - val f2 = new Lazy(f) - if (!isSuccess){ - if (index == traceIndex) failureTerminalAggregate ::= f2 - if (lastFailureMsg == null) lastFailureMsg = Msgs(f2 :: Nil) - } + /** + * Called by any terminal parser; these are parsers for which displaying + * sub-failures does not make sense these include: + * + * - Individual strings or characters + * - Parsers like negation `!p` or `.filter` where the entire parser failing + * is not caused by sub-failure + * - Parsers like `.opaque`, where sub-failures are intentionally hidden and + * not shown to the user + * + * These "terminal" failures will be stored in the `terminalParserMsgs` in case + * a user wants to know what could have been placed at the failure point to + * let the parse progress + */ + def reportTerminalMsg(startIndex: Int, newShortParserMsg: Msgs): Unit = { + // We only care about terminal parsers which failed exactly at the traceIndex + if (!isSuccess && index == traceIndex) terminalParserMsgs :::= newShortParserMsg - shortParserMsg = if (startIndex >= traceIndex) Msgs(f2 :: Nil) else Msgs.empty - failureGroupAggregate = if (checkAggregate(startIndex)) shortParserMsg else Msgs.empty + reportParseMsg0( + if (startIndex >= traceIndex) newShortParserMsg else Msgs.empty, + if (startIndex >= traceIndex) newShortParserMsg else Msgs.empty, + false, + startIndex >= traceIndex + ) } - def setMsg(startIndex: Int, f: () => String): Unit = { - setMsg(startIndex, Msgs(new Lazy(f) :: Nil)) - } + def reportParseMsg0(newShortParserMsg: Msgs, + newAggregateMsgs: Msgs, + forceAggregate: Boolean, + setShortMsg: Boolean): Unit = { + // `lastFailureMsg` ends up being set by the first parser to report a + // failure, while returning from the last parser to call `.freshFailure() + // (which nulls it out) + if (!isSuccess && lastFailureMsg == null) lastFailureMsg = newShortParserMsg - def setMsg(startIndex: Int, f: Msgs): Unit = { - if (!isSuccess && lastFailureMsg == null) lastFailureMsg = f - shortParserMsg = if (startIndex >= traceIndex) f else Msgs.empty - failureGroupAggregate = if (checkAggregate(startIndex)) shortParserMsg else Msgs.empty - } + // We only set the `shortParserMsg` for some parsers. These include: + // + // - Terminal parsers which have `startIndex >= traceIndex` + // + // - Aggregate parsers which have non-empty `newAggregateMsgs`, indicating + // that they have either child terminal parsers with `startIndex >= traceIndex` + // or they have child aggregate parsers with non-empty `newAggregateMsgs` + // + // This lets us skip setting `shortParserMsg` for all parsers, terminal or + // aggregate, which run and terminate fully before `traceIndex`, and thus + // would be of no interest to a user debugging parse failures at `traceIndex` + shortParserMsg = if (setShortMsg) newShortParserMsg else Msgs.empty - /** - * Conditions under which we want to aggregate the given parse - */ - def checkAggregate(startIndex: Int) = { - // We only aggregate if we are not currently past a cut; if we are past a - // cut, there is no further backtracking and so the error aggregate that has - // occurred will be the final aggregate shown to the user - !cut && - // Only aggregate failures - !isSuccess && - // We only stomp over the given aggregation with shortParserMsg if the range - // of the failed parse surrounds `traceIndex`. For parses that occur - // completely before or after the `traceIndex`, the actual parse doesn't - // contribute anything to the aggregation. - startIndex <= traceIndex && - traceIndex <= index + // There are two cases when aggregating: either we stomp over the entire + // existing `aggregateParserMsgs` with `newShortParserMsg`, or we preserve it + // (with possible additions) with `newAggregateMsgs`. + aggregateParserMsgs = + if (forceAggregate) newAggregateMsgs + // We only replace the aggregate Msgs if: + // + // 1. We are not currently past a cut; if we are past a cut, there is no + // further backtracking and so the error aggregate that has occurred + // will be the final aggregate shown to the user + // + // 2. Only replace in case of failures + // + // 3. Only stomp over the given aggregation with shortParserMsg if the + // current parser has failed and the final parse `index` (after any + // backtracking) is still at-or-greater-than the `traceIndex`. That + // ensures that any parsers which started/ended before the point of + // failure are not shown, since they are irrelevant + else if (!cut && !isSuccess && traceIndex <= index) shortParserMsg + else newAggregateMsgs } - // Use telescoping methods rather than default arguments to try and minimize // the amount of bytecode generated at the callsite. // diff --git a/fastparse/src/fastparse/SharedPackageDefs.scala b/fastparse/src/fastparse/SharedPackageDefs.scala index 68a1da3b..76201a01 100644 --- a/fastparse/src/fastparse/SharedPackageDefs.scala +++ b/fastparse/src/fastparse/SharedPackageDefs.scala @@ -54,8 +54,8 @@ trait SharedPackageDefs { originalParser = parser, traceIndex = traceIndex, instrument = instrument, - failureTerminalAggregate = Msgs.empty, - failureGroupAggregate = Msgs.empty, + terminalParserMsgs = Msgs.empty, + aggregateParserMsgs = Msgs.empty, shortParserMsg = Msgs.empty, lastFailureMsg = null, failureStack = List.empty, @@ -82,40 +82,10 @@ trait SharedPackageDefs { val res = if (Util.startsWithIgnoreCase(ctx.input, s, ctx.index)) ctx.freshSuccessUnit(ctx.index + s.length) else ctx.freshFailure().asInstanceOf[P[Unit]] - if (ctx.verboseFailures) ctx.aggregateTerminal(startIndex, () => Util.literalize(s)) + if (ctx.verboseFailures) ctx.reportTerminalMsg(startIndex, () => Util.literalize(s)) res } - /** - * Positive lookahead operator: succeeds if the wrapped parser succeeds and - * fails if the wrapped parser fails, but in all cases consumes zero - * characters. - */ - def &(parse: => P[_])(implicit ctx: P[_]): P[Unit] = { - - val startPos = ctx.index - val startCut = ctx.cut - val oldNoCut = ctx.noDropBuffer - ctx.noDropBuffer = true - parse - ctx.noDropBuffer = oldNoCut - val msg = ctx.shortParserMsg - - val res = - if (ctx.isSuccess) ctx.freshSuccessUnit(startPos) - else ctx.asInstanceOf[P[Unit]] - if (ctx.verboseFailures) { - ctx.failureGroupAggregate = Msgs.empty - ctx.setMsg(startPos, () => - msg match{ - case Seq(x) => s"&(${msg.render})" - case xs => s"&${msg.render}" - } - ) - } - res.cut = startCut - res - } /** * Parser that is only successful at the end of the input. Useful to ensure @@ -126,7 +96,7 @@ trait SharedPackageDefs { val res = if (!ctx.input.isReachable(startIndex)) ctx.freshSuccessUnit() else ctx.freshFailure().asInstanceOf[P[Unit]] - if (ctx.verboseFailures) ctx.aggregateTerminal(startIndex, () => "end-of-input") + if (ctx.verboseFailures) ctx.reportTerminalMsg(startIndex, () => "end-of-input") res } @@ -138,13 +108,13 @@ trait SharedPackageDefs { val res = if (startIndex == 0) ctx.freshSuccessUnit() else ctx.freshFailure().asInstanceOf[P[Unit]] - if (ctx.verboseFailures) ctx.aggregateTerminal(startIndex, () => "start-of-input") + if (ctx.verboseFailures) ctx.reportTerminalMsg(startIndex, () => "start-of-input") res } /** * Wraps a parser and ensures that none of the parsers within it leave - * failure traces in failureTerminalAggregate, though unlike [[ByNameOps.opaque]] + * failure traces in terminalParserMsgs, though unlike [[ByNameOps.opaque]] * if there is a failure *within* the wrapped parser the failure's location * and error message will still be shown * @@ -156,7 +126,7 @@ trait SharedPackageDefs { val res = p if (ctx.verboseFailures) { - ctx.failureGroupAggregate = Msgs.empty + ctx.aggregateParserMsgs = Msgs.empty ctx.shortParserMsg = Msgs.empty } res @@ -167,7 +137,7 @@ trait SharedPackageDefs { */ def Pass(implicit ctx: P[_]): P[Unit] = { val res = ctx.freshSuccessUnit() - if (ctx.verboseFailures) ctx.setMsg(ctx.index, () => "Pass") + if (ctx.verboseFailures) ctx.reportTerminalMsg(ctx.index, Msgs.empty) res } @@ -177,7 +147,7 @@ trait SharedPackageDefs { */ def Pass[T](v: T)(implicit ctx: P[_]): P[T] = { val res = ctx.freshSuccess(v) - if (ctx.verboseFailures) ctx.setMsg(ctx.index, () => "Pass") + if (ctx.verboseFailures) ctx.reportTerminalMsg(ctx.index, () => "Pass") res } @@ -186,7 +156,7 @@ trait SharedPackageDefs { */ def Fail(implicit ctx: P[_]): P[Nothing] = { val res = ctx.freshFailure() - if (ctx.verboseFailures) ctx.setMsg(ctx.index, () => "fail") + if (ctx.verboseFailures) ctx.reportTerminalMsg(ctx.index, () => "fail") res } @@ -198,7 +168,7 @@ trait SharedPackageDefs { */ def Index(implicit ctx: P[_]): P[Int] = { val res = ctx.freshSuccess(ctx.index) - if (ctx.verboseFailures) ctx.setMsg(ctx.index, () => "Index") + if (ctx.verboseFailures) ctx.reportTerminalMsg(ctx.index, () => "Index") res } @@ -211,7 +181,7 @@ trait SharedPackageDefs { val res = if (!ctx.input.isReachable(ctx.index)) ctx.freshFailure().asInstanceOf[P[Unit]] else ctx.freshSuccessUnit(ctx.index + 1) - if (ctx.verboseFailures) ctx.aggregateTerminal(startIndex, () => "any-character") + if (ctx.verboseFailures) ctx.reportTerminalMsg(startIndex, () => "any-char") res } @@ -227,7 +197,7 @@ trait SharedPackageDefs { val res = if (!ctx.input.isReachable(ctx.index)) ctx.freshFailure().asInstanceOf[P[Char]] else ctx.freshSuccess(ctx.input(ctx.index), ctx.index + 1) - if (ctx.verboseFailures) ctx.aggregateTerminal(startIndex, () => "any-character") + if (ctx.verboseFailures) ctx.reportTerminalMsg(startIndex, () => "any-char") res } @@ -262,35 +232,11 @@ object SharedPackageDefs{ if (res.isSuccess) ctx.freshSuccess(ctx.successValue) else ctx.freshFailure(oldIndex) - if (ctx.verboseFailures) ctx.aggregateTerminal(oldIndex, () => msg) + if (ctx.verboseFailures) ctx.reportTerminalMsg(oldIndex, () => msg) res2.asInstanceOf[P[T]] } - def unary_!(parse0: () => P[_])(implicit ctx: P[Any]): P[Unit] = { - val startPos = ctx.index - val startCut = ctx.cut - val oldNoCut = ctx.noDropBuffer - ctx.noDropBuffer = true - val startTerminals = ctx.failureTerminalAggregate - parse0() - ctx.noDropBuffer = oldNoCut - val msg = ctx.shortParserMsg - - val res = - if (ctx.isSuccess) ctx.freshFailure(startPos) - else ctx.freshSuccessUnit(startPos) - - if (ctx.verboseFailures) { - ctx.failureTerminalAggregate = startTerminals - ctx.failureGroupAggregate = Msgs.empty - ctx.setMsg(startPos, () => "!" + msg.render) - } - res.cut = startCut - res - } - - /** Wraps a parser to log when it succeeds and fails, and at what index. * Useful for seeing what is going on within your parser. Nicely indents * the logs for easy reading diff --git a/fastparse/src/fastparse/Whitespace.scala b/fastparse/src/fastparse/Whitespace.scala index 46550c35..e97ae013 100644 --- a/fastparse/src/fastparse/Whitespace.scala +++ b/fastparse/src/fastparse/Whitespace.scala @@ -4,7 +4,7 @@ import fastparse._ import fastparse.internal.Util import scala.annotation.{Annotation, switch, tailrec} - +import fastparse.internal.Msgs trait Whitespace{ def apply(ctx: ParsingRun[_]): ParsingRun[Unit] @@ -32,6 +32,7 @@ object SingleLineWhitespace { input.isReachable(index) && (input(index) match{ case ' ' | '\t' => true case _ => false}) ) index += 1 + if (ctx.verboseFailures) ctx.reportTerminalMsg(index, Msgs.empty) ctx.freshSuccessUnit(index = index) } } @@ -50,6 +51,7 @@ object MultiLineWhitespace { input.isReachable(index) && (input(index) match{ case ' ' | '\t' | '\r' | '\n' => true case _ => false}) ) index += 1 + if (ctx.verboseFailures) ctx.reportTerminalMsg(index, Msgs.empty) ctx.freshSuccessUnit(index = index) } } @@ -64,7 +66,10 @@ object ScriptWhitespace{ def apply(ctx: ParsingRun[_]) = { val input = ctx.input @tailrec def rec(current: Int, state: Int): ParsingRun[Unit] = { - if (!input.isReachable(current)) ctx.freshSuccessUnit(current) + if (!input.isReachable(current)) { + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current) + } else { val currentChar = input(current) (state: @switch) match{ @@ -72,7 +77,9 @@ object ScriptWhitespace{ (currentChar: @switch) match{ case ' ' | '\t' | '\n' | '\r' => rec(current + 1, state) case '#' => rec(current + 1, state = 1) - case _ => ctx.freshSuccessUnit(current) + case _ => + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current) } case 1 => rec(current + 1, state = if (currentChar == '\n') 0 else state) } @@ -92,15 +99,20 @@ object JavaWhitespace{ implicit object whitespace extends Whitespace { def apply(ctx: ParsingRun[_]) = { val input = ctx.input - val startIndex = ctx.index @tailrec def rec(current: Int, state: Int): ParsingRun[Unit] = { if (!input.isReachable(current)) { - if (state == 0 || state == 1) ctx.freshSuccessUnit(current) - else if(state == 2) ctx.freshSuccessUnit(current - 1) + if (state == 0 || state == 1) { + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current) + } + else if(state == 2) { + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current - 1) + } else { ctx.cut = true val res = ctx.freshFailure(current) - if (ctx.verboseFailures) ctx.aggregateTerminal(startIndex, () => Util.literalize("*/")) + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, () => Util.literalize("*/")) res } } else { @@ -110,14 +122,18 @@ object JavaWhitespace{ (currentChar: @switch) match{ case ' ' | '\t' | '\n' | '\r' => rec(current + 1, state) case '/' => rec(current + 1, state = 2) - case _ => ctx.freshSuccessUnit(current) + case _ => + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current) } case 1 => rec(current + 1, state = if (currentChar == '\n') 0 else state) case 2 => (currentChar: @switch) match{ case '/' => rec(current + 1, state = 1) case '*' => rec(current + 1, state = 3) - case _ => ctx.freshSuccessUnit(current - 1) + case _ => + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current - 1) } case 3 => rec(current + 1, state = if (currentChar == '*') 4 else state) case 4 => @@ -144,16 +160,21 @@ object JsonnetWhitespace{ implicit object whitespace extends Whitespace { def apply(ctx: ParsingRun[_]) = { val input = ctx.input - val startIndex = ctx.index @tailrec def rec(current: Int, state: Int): ParsingRun[Unit] = { if (!input.isReachable(current)) { - if (state == 0 || state == 1) ctx.freshSuccessUnit(current) - else if(state == 2) ctx.freshSuccessUnit(current - 1) + if (state == 0 || state == 1) { + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current) + } + else if(state == 2) { + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current - 1) + } else { ctx.cut = true val res = ctx.freshFailure(current) - if (ctx.verboseFailures) ctx.aggregateTerminal(startIndex, () => Util.literalize("*/")) + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, () => Util.literalize("*/")) res } } else { @@ -164,14 +185,18 @@ object JsonnetWhitespace{ case ' ' | '\t' | '\n' | '\r' => rec(current + 1, state) case '#' => rec(current + 1, state = 1) case '/' => rec(current + 1, state = 2) - case _ => ctx.freshSuccessUnit(current) + case _ => + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current) } case 1 => rec(current + 1, state = if (currentChar == '\n') 0 else state) case 2 => (currentChar: @switch) match{ case '/' => rec(current + 1, state = 1) case '*' => rec(current + 1, state = 3) - case _ => ctx.freshSuccessUnit(current - 1) + case _ => + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current - 1) } case 3 => rec(current + 1, state = if (currentChar == '*') 4 else state) case 4 => @@ -197,15 +222,20 @@ object ScalaWhitespace { implicit object whitespace extends Whitespace { def apply(ctx: ParsingRun[_]) = { val input = ctx.input - val startIndex = ctx.index @tailrec def rec(current: Int, state: Int, nesting: Int): ParsingRun[Unit] = { if (!input.isReachable(current)) { - if (state == 0 || state == 1) ctx.freshSuccessUnit(current) - else if(state == 2 && nesting == 0) ctx.freshSuccessUnit(current - 1) + if (state == 0 || state == 1) { + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current) + } + else if(state == 2 && nesting == 0) { + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current - 1) + } else { ctx.cut = true val res = ctx.freshFailure(current) - if (ctx.verboseFailures) ctx.aggregateTerminal(startIndex, () => Util.literalize("*/")) + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, () => Util.literalize("*/")) res } } else { @@ -215,7 +245,9 @@ object ScalaWhitespace { (currentChar: @switch) match{ case ' ' | '\t' | '\n' | '\r' => rec(current + 1, state, 0) case '/' => rec(current + 1, state = 2, 0) - case _ => ctx.freshSuccessUnit(current) + case _ => + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current) } case 1 => rec(current + 1, state = if (currentChar == '\n') 0 else state, 0) case 2 => @@ -225,7 +257,10 @@ object ScalaWhitespace { else rec(current + 1, state = 2, nesting) case '*' => rec(current + 1, state = 3, nesting + 1) case _ => - if (nesting == 0) ctx.freshSuccessUnit(current - 1) + if (nesting == 0) { + if (ctx.verboseFailures) ctx.reportTerminalMsg(current, Msgs.empty) + ctx.freshSuccessUnit(current - 1) + } else rec(current + 1, state = 3, nesting) } case 3 => diff --git a/fastparse/src/fastparse/internal/Util.scala b/fastparse/src/fastparse/internal/Util.scala index ed6941aa..c26b9f11 100644 --- a/fastparse/src/fastparse/internal/Util.scala +++ b/fastparse/src/fastparse/internal/Util.scala @@ -6,14 +6,15 @@ import scala.annotation.{switch, tailrec} import scala.collection.mutable.ArrayBuffer object Util { - def parenthize(fs: Seq[Lazy[String]]) = fs.reverseIterator.map(_()).toSeq.distinct match{ + def parenthize(fs: List[Lazy[String]]) = fs.reverseIterator.map(_()).toSeq.distinct match{ case Seq(x) => x case xs => xs.mkString("(", " | ", ")") } - def joinBinOp(lhs: Msgs, rhs: Msgs) = + def joinBinOp(lhs: Msgs, rhs: Msgs): Msgs = { if (lhs.value.isEmpty) rhs else if (rhs.value.isEmpty) lhs - else Msgs(new Lazy(() => lhs.render + " ~ " + rhs.render) :: Nil) + else Msgs.fromFunction(() => lhs.render + " ~ " + rhs.render) + } def consumeWhitespace[V](whitespace: fastparse.Whitespace, ctx: ParsingRun[Any]) = { val oldCapturing = ctx.noDropBuffer // completely disallow dropBuffer @@ -100,49 +101,35 @@ object Util { } - def aggregateMsgPostSep[V](startIndex: Int, - min: Int, - ctx: ParsingRun[Any], - parsedMsg: Msgs, - lastAgg: Msgs) = { - ctx.aggregateMsg( - startIndex, - () => parsedMsg.render + ".rep" + (if (min == 0) "" else s"(${min})"), - // When we fail on a sep, we collect the failure aggregate of the last - // non-sep rep body together with the failure aggregate of the sep, since - // the last non-sep rep body continuing is one of the valid ways of - // continuing the parse - ctx.failureGroupAggregate ::: lastAgg - - ) + def reportParseMsgPostSep(startIndex: Int, + min: Int, + ctx: ParsingRun[Any], + parsedMsg: Msgs, + lastAgg: Msgs) = { + reportParseMsgInRep(startIndex, min, ctx, null, parsedMsg, lastAgg, true) } - def aggregateMsgInRep[V](startIndex: Int, - min: Int, - ctx: ParsingRun[Any], - sepMsg: Msgs, - parsedMsg: Msgs, - lastAgg: Msgs, - precut: Boolean) = { - if (sepMsg == null || precut) { - ctx.aggregateMsg( - startIndex, - () => parsedMsg.render + ".rep" + (if (min == 0) "" else s"(${min})"), - if (lastAgg == null) ctx.failureGroupAggregate - else ctx.failureGroupAggregate ::: lastAgg - ) - } else { - ctx.aggregateMsg( - startIndex, - () => parsedMsg.render + ".rep" + (if (min == 0) "" else s"(${min})"), - // When we fail on a rep body, we collect both the concatenated - // sep and failure aggregate of the rep body that we tried (because - // we backtrack past the sep on failure) as well as the failure - // aggregate of the previous rep, which we could have continued - if (lastAgg == null) Util.joinBinOp(sepMsg, parsedMsg) - else Util.joinBinOp(sepMsg, parsedMsg) ::: lastAgg - ) - } + def reportParseMsgInRep(startIndex: Int, + min: Int, + ctx: ParsingRun[Any], + sepMsg: Msgs, + parsedMsg: Msgs, + lastAgg: Msgs, + precut: Boolean) = { + + // When we fail on a rep body, we collect both the concatenated + // sep and failure aggregate of the rep body that we tried (because + // we backtrack past the sep on failure) as well as the failure + // aggregate of the previous rep, which we could have continued + val newAgg = + if (sepMsg == null || precut) ctx.aggregateParserMsgs + else Util.joinBinOp(sepMsg, parsedMsg) + + ctx.reportAggregateMsg( + () => parsedMsg.render + ".rep" + (if (min == 0) "" else s"(${min})"), + if (lastAgg == null) newAgg + else newAgg ::: lastAgg + ) } } @@ -194,7 +181,14 @@ final class CompactTrieNode(source: TrieNode){ } object Msgs{ val empty = Msgs(Nil) + implicit def fromFunction(msgToSet: () => String): Msgs = { + Msgs(new Lazy(() => msgToSet()):: Nil) + } + implicit def fromStrings(msgsToSet: List[String]): Msgs = { + Msgs(msgsToSet.map(s => new Lazy(() => s))) + } } + case class Msgs(value: List[Lazy[String]]){ def :::(other: Msgs) = Msgs(other.value ::: value) def ::(other: Lazy[String]) = Msgs(other :: value) diff --git a/fastparse/test/src/fastparse/ExampleTests.scala b/fastparse/test/src/fastparse/ExampleTests.scala index 05ca749a..6e6b0108 100644 --- a/fastparse/test/src/fastparse/ExampleTests.scala +++ b/fastparse/test/src/fastparse/ExampleTests.scala @@ -551,5 +551,25 @@ object ExampleTests extends TestSuite{ check("oR", "Parsed: Or") check("IllegalBooleanOperation", "Cannot parse IllegalBooleanOperation as an AndOr") } + test("errorHandlingExplanation") { + import fastparse._, NoWhitespace._ + def num[$: P] = P(CharIn("0-9")).log + def sum[$: P] = P("(" ~/ expr ~ "+" ~/ expr ~ ")").log + def expr[$: P]: P[_] = P(num | sum).log + + val Parsed.Failure(_, _, extra) = fastparse.parse("(1+?)", expr(_)) + val trace = extra.trace() + val longTerminalsMsg = trace.longTerminalsMsg + assert( + longTerminalsMsg == + """Expected expr:1:1 / sum:1:1 / expr:1:4 / ([0-9] | "("):1:4, found "?)"""" + ) + assert( + trace.longAggregateMsg == + """Expected expr:1:1 / sum:1:1 / expr:1:4 / (num | sum):1:4, found "?)"""" + ) + + } } + } diff --git a/fastparse/test/src/fastparse/FailureTests.scala b/fastparse/test/src/fastparse/FailureTests.scala index 50cb6086..4099a477 100644 --- a/fastparse/test/src/fastparse/FailureTests.scala +++ b/fastparse/test/src/fastparse/FailureTests.scala @@ -17,8 +17,8 @@ object FailureTests extends TestSuite{ val terminals1 = Option(terminals).getOrElse(expected) assert( - trace.failure.label == label, trace.groupAggregateString == expected, + trace.label == label, trace.terminalAggregateString == terminals1 ) } @@ -30,8 +30,8 @@ object FailureTests extends TestSuite{ val trace = f.trace(true) assert( - trace.terminalAggregateString == """("a" | "b" | "c")""", - trace.groupAggregateString == """(parseB | "c")""" + trace.groupAggregateString == """(parseB | "c")""", + trace.terminalAggregateString == """("a" | "b" | "c")""" ) } @@ -126,6 +126,16 @@ object FailureTests extends TestSuite{ assert(trace2.groupAggregateString == """("," ~ parseB | "c")""") f2.index } + test("repTooFew"){ + def parseB[$: P] = P( "a" | "b" ) + def parseA[$: P] = P( parseB.rep(5) ) + val f1 @ Parsed.Failure(_, _, _) = parse("abab", parseA(_)) + + val trace = f1.trace() + + assert(trace.groupAggregateString == """("a" | "b")""") + assert(trace.terminalAggregateString == """("a" | "b")""") + } test("sepCut"){ def parseB[$: P] = P( "a" | "b" | "c" ) @@ -209,6 +219,126 @@ object FailureTests extends TestSuite{ parseA(_) } ) + test("repSeparatorIsNotIncludedInFailureMsgWhenCut") - checkOffset( + input = "ab aa", + expected = "\"b\"", + label = "\"b\"", + terminals = "\"b\"", + parser = { + def space[$: P] = P(" ") + def token[$: P] = P("a" ~/ "b") + def multiple[$: P] = P(token.rep(1, space)) + multiple(_) + } + ) + test("repSeparatorIsNotIncludedInFailureMsgWhenCutX") - checkOffset( + input = "ab aa", + expected = "\"b\"", + label = "\"b\"", + terminals = "\"b\"", + parser = { + def space[$: P] = P(" ") + def token[$: P] = P("a" ~/ "b") + def multiple[$: P] = P(token.repX(1, space)) + multiple(_) + } + ) + test("repSeparatorsBeforeTraceIndexDontPolluteFailureGroups") - checkOffset( + input = "p ii", + expected = "\"a\"", + label = "\"a\"", + terminals = "\"a\"", + parser = { + def space[$:P] = P( " " ) + def items[$: P]: P[Unit] = P( "p".rep(sep = " ") ~ space ~ "i" ~ "a" ) + items(_) + } + ) + test("repSeparatorsBeforeTraceIndexDontPolluteFailureGroups2") - checkOffset( + input = "p ii", + expected = "\"a\"", + label = "\"a\"", + terminals = "\"a\"", + parser = { + def space[$: P] = P(" ") + def prep[$: P] = P("p".rep(sep = space)) + def all[$: P] = P(prep ~ AnyChar ~ "i" ~ "a") + all(_) + } + ) + test("repSeparatorsBeforeTraceIndexDontPolluteFailureGroups3") - checkOffset( + input = "pt x_", + expected = """("y" | end-of-input)""", + label = "end-of-input", + terminals = """("y" | end-of-input)""", + parser = { + def c[$: P] = P( "x".repX(1, "y") ) + def d[$: P] = P( "p" ) + def b[$: P] = P( (d ~ "t").repX(1, " ") ) + def a[$: P] = P( b ~ " " ~ c ~ End ) + a(_) + } + ) + + test("lookahead") { + // We do not bother showing the enclosing `&()` for positive lookahead + // parsers. That is because to a user debugging the parser, it doesn't + // matter: whether the parser is `&(foo)` or `foo`, they still need to + // put the same input at `traceIndex` to make the parse succeed + // + // Furthermore, for both positive and negative lookahead which are + // typically used in a `&(lhs) ~ rhs` or `!lhs ~ rhs`, we cannot show + // the `rhs` even if we wanted to! The parse will already have failed + // when parsing the `lhs`, and so there is no opportunity to gather + // the `rhs`'s parse messages for display. + test("positive") - checkOffset( + input = "7", + expected = """[0-6]""", + label = "[0-6]", + terminals = """[0-6]""", + parser = { + def parse[$: P] = P( &(CharIn("0-6")) ~ CharIn("4-9") ~ End ) + parse(_) + } + ) + // Commented out for now, until we can figure out a better story + // around the error reporting of negative lookaheads + +// test("negative") - checkOffset( +// input = "5", +// expected = """![0-6]""", +// label = "![0-6]", +// terminals = """![0-6]""", +// parser = { +// def parse[$: P] = P( !CharIn("0-6") ~ CharIn("4-9") ~ End) +// parse(_) +// } +// ) +// test("negative2") - checkOffset( +// input = "5", +// expected = """!([0-4] | [5-9])""", +// label = "!([0-4] | [5-9])", +// terminals = """!([0-4] | [5-9])""", +// parser = { +// // Make sure that the failure if `[0-4]` inside the `!(...)` block +// // does not end up in our reported terminals. The parser *wants* +// // the wrapped parser to fail, and giving hints to make its +// // sub-parsers succeed is counter-productive! +// def parse[$: P] = P( !(CharIn("0-4") | CharIn("5-9")) ~ End) +// parse(_) +// } +// ) + test("negative3") - checkOffset( + input = "9", + expected = """[4-8]""", + label = "[4-8]", + terminals = """[4-8]""", + parser = { + def parse[$: P] = P( !CharIn("0-6").log("lhs") ~ CharIn("4-8").log("rhs") ~ End ).log + parse(_) + } + ) + } } test("offset"){ @@ -364,7 +494,7 @@ object FailureTests extends TestSuite{ import NoWhitespace._ // In the case where one branch fails further in than `traceIndex`, we // collect the partial aggregation from that branch in the - // `failureGroupAggregate` but ignore that branch's downstream failure in + // `aggregateParserMsgs` but ignore that branch's downstream failure in // `failureTerminalsAggregate` def check(parser: P[_] => P[_]) = checkOffset( @@ -393,6 +523,7 @@ object FailureTests extends TestSuite{ test("repXLeft") - check{ implicit c => (("a" ~ "b") ~ "c").repX ~ "a" ~/ "d" } test("repSep") - check{ implicit c => ("a" ~ ("b" ~ "c")).rep(sep = Pass) ~ "a" ~/ "d" } test("repSepLeft") - check{ implicit c => (("a" ~ "b") ~ "c").rep(sep = Pass) ~ "a" ~/ "d" } + } test("whitespace"){ diff --git a/project/build.properties b/project/build.properties index 7c58a83a..46e43a97 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.2.6 +sbt.version=1.8.2 diff --git a/readme/ErrorReportingInternals.scalatex b/readme/ErrorReportingInternals.scalatex new file mode 100644 index 00000000..ecb9154c --- /dev/null +++ b/readme/ErrorReportingInternals.scalatex @@ -0,0 +1,121 @@ +@import Main._ +@val tests = wd/'fastparse/'test/'src/'fastparse +@sect{Error Reporting Internals} + @p + This section goes into detail of how the FastParse error reporting + algorithm works. In general, it should "just work" when you call + @code{.longMsg}, @code{.longAggregateMsg}, or @code{.longTerminalsMsg}. + Nevertheless, it is both complicated as well important enough that it + is worth documenting in detail + + @p + The two levels of error reporting that are most interesting are + @code{.longAggregateMsg} and @code{.longTerminalsMsg}. Consider a failed + parse of an example simplified arithmetic parser: + + @hl.ref(tests/"ExampleTests.scala", Seq("\"errorHandlingExplanation\"", "")) + + @p + This fails on the @code{?} being invalid syntax. The following error reporting + levels will treat this as follows: + @ul + @li + @code{terminalParserMsgs} lists all the lowest-level terminal parsers which are + tried at the given @code{traceIndex}, i.e. the character class @code{[0-9]} and the + token @hl.scala{"("}. This is useful to answer the question "what token/char can I + put at the error position to make my parse continue". The implementation + of @code{terminalParserMsgs} is straightforward: we simply call + @code{reportTerminalMsg} in every terminal parser, which collects all the + messages in a big list and returns it. + @li + @code{aggregateParserMsgs} lists all high-level parsers which are tried at the given + @code{traceIndex}, i.e. the named parsers @code{num} and @code{plus}. This is useful to + answer the question "What construct was the parser trying to do when it + failed" + @p + The implementation of @code{aggregateParserMsgs} is more interesting, since we need + to define what "high level" parsers means, which is non-obvious. + + @sect{Definition of aggregateParserMsgs} + @p + Fastparse uses the following definition for @code{aggregateParserMsgs}: + @ul + @li + @code{aggregateParserMsgs} should contain the parsers highest in the call stack, + whose failure isn't immediately fatal to the parse (due to them being in + @code{|}, @code{.rep}, @code{?}, or other "backtrackable" operators, but + not past a @code{cut}) + @p + This is a useful definition because we already have the @code{failureStack} + containing all (named) parsers whose failure *is* immediately fatal to the + parse, both those at @code{traceIndex} and those earlier in the input. Thus + there is no need to duplicate showing any of them in the @code{aggregateParserMsgs}, + and we can instead go "one level deeper" to find the highest-level parsers + within the deepest parser of the @code{failureStack} and show those instead. + Thus, in the combined @code{longAggregateMsg}, the failure stack shows us + exactly which parsers failing directly contributed to the failure at + @code{traceIndex}, while the longAggregateMsg tells us what are the + highest-level parsers FastParse was trying to parse at @code{traceIndex} before + it finally failed. + @sect{Implementation of aggregateParserMsgs} + @p + To collect the @code{aggregateParserMsgs}, We use the following algorithm: + @ul + @li + When a parse which started at the given @code{traceIndex} fails without a cut: + Over-write @code{aggregateParserMsgs} with it's @code{shortParserMsg} + + @li + Otherwise: + + @ul + @li + If we are a terminal parser, we set our @code{aggregateParserMsgs} to Nil + @li + If we are a compound parser, we simply sum up the @code{aggregateParserMsgs} + of all our constituent parts + @p + As mentioned earlier, the point of this is to provide the highest-level parsers which + failed at the @code{traceIndex}, but are not already part of the @code{failureStack}. + non-highest-level parsers do successfully write their message to + @code{aggregateParserMsgs}, but they are subsequently over-written by the higher + level parsers, until it reaches the point where @code{cut == true}, indicating + that any further higher-level parsers will be in @code{failureStack} and using + their message to stomp over the existing parse-failure-messages in + @code{aggregateParserMsgs} would be wasteful. + @sect{Edge Cases} + @p + These is an edge case where there is no given failure that occurs exactly at + @code{traceIndex} e.g. + @ul + @li + Parsing @hl.scala{"ax"} with @hl.scala{P( ("a" ~ "b") ~ "c" | "a" ~/ "d" )} + @li + The final failure @code{index} and thus @code{traceIndex} is at offset 1 + @li + We would like to receive the aggregation @hl.scala{("b" | "d")} + @li + But @hl.scala{("a" ~ "b")} passes from offsets 0-2, @hl.scala{"c"} fails + + + @p + In such a case, we truncate the @code{shortParserMsg} at + @code{traceIndex} to only include the portion we're interested in (which directly + follows the failure). This then gets aggregated nicely to form the error + message from-point-of-failure. + @p + A follow-on edge case is parsing @hl.scala{"ax"} with + @hl.scala + val inner = P( "a" ~ "b" ) + P( inner ~ "c" | "a" ~/ "d" ) + @ul + @li + Here, we find that the @code{inner} parser starts before the @code{traceIndex} and + fails at @code{traceIndex}, + @li + But we want our aggregation to continue being @hl.scala{("b" | "d")}, rather than + @hl.scala{(inner | "d")}. + + Thus, for opaque compound parsers like @code{inner} which do not expose their + internals, we use @code{forceAggregate} to force it to expose it's internals + when it's range covers the @code{traceIndex} but it isn't an exact match diff --git a/readme/ExampleParsers.scalatex b/readme/ExampleParsers.scalatex index ea9e3865..79093ddd 100644 --- a/readme/ExampleParsers.scalatex +++ b/readme/ExampleParsers.scalatex @@ -1,5 +1,6 @@ @import Main._ @val tests = wd/'fastparse/'test/'src/'fastparse +@val tests212plus = wd/'fastparse/'test/"src-2.12+"/'fastparse @val main = wd/'fastparse/'src/'fastparse @sect{Example Parsers} @@ -76,7 +77,7 @@ You can also define your own custom whitespace consumer, if none of bundled ones fit your needs: - @hl.ref(tests/"CustomWhitespaceMathTests.scala", "implicit val whitespace", "val tests") + @hl.ref(tests212plus/"CustomWhitespaceMathTests.scala", "implicit object whitespace", "val tests") diff --git a/readme/FastParseInternals.scalatex b/readme/FastParseInternals.scalatex index 8e5d03d0..162a4ab2 100644 --- a/readme/FastParseInternals.scalatex +++ b/readme/FastParseInternals.scalatex @@ -1,7 +1,7 @@ @import Main._ @sect{Internals} @p - FastParse 2.0.5 is implemented as a set of methods that perform a + FastParse is implemented as a set of methods that perform a recursive-descent parse on the given input, with all book-keeping information maintained in the @code{fastparse.ParsingRun[T]} objects (abbreviated @code{fastparse.P[T]}). @code{ParsingRun}s are mutable, diff --git a/readme/Readme.scalatex b/readme/Readme.scalatex index 1e2951c3..4459d7a1 100644 --- a/readme/Readme.scalatex +++ b/readme/Readme.scalatex @@ -20,7 +20,7 @@ ) ) -@sect("FastParse 2.2.2", "Fast to write, Fast running Parsers in Scala") +@sect("FastParse 3.0.0", "Fast to write, Fast running Parsers in Scala") @GettingStarted() @WritingParsers() @@ -39,5 +39,7 @@ @FastParseInternals() + @ErrorReportingInternals() + @Changelog() diff --git a/readme/StreamingParsing.scalatex b/readme/StreamingParsing.scalatex index c08d7fdc..09c0ff65 100644 --- a/readme/StreamingParsing.scalatex +++ b/readme/StreamingParsing.scalatex @@ -1,5 +1,6 @@ @import Main._ @val tests = wd/'fastparse/'test/'src/'fastparse +@val tests212plus = wd/'fastparse/'test/"src-2.12+"/'fastparse @val main = wd/'fastparse/'src/'fastparse @sect{Streaming Parsing} @@ -9,7 +10,7 @@ @hl.scala{Iterator[String]} or @hl.scala{java.io.InputStream} instead of a @code{String} to the @hl.scala{fastparse.parse} method. - @hl.ref(tests/"IteratorTests.scala", Seq("\"basic\"", "")) + @hl.ref(tests212plus/"IteratorTests.scala", Seq("\"basic\"", "")) @p Streaming parsing still needs to buffer input in-memory: in particular, diff --git a/scalaparse/test/src/scalaparse/unit/FailureTests.scala b/scalaparse/test/src/scalaparse/unit/FailureTests.scala index 01e84569..3249dd65 100644 --- a/scalaparse/test/src/scalaparse/unit/FailureTests.scala +++ b/scalaparse/test/src/scalaparse/unit/FailureTests.scala @@ -18,7 +18,7 @@ object FailureTests extends TestSuite{ |import a |import import """.stripMargin, - aggregate = """(Semis ~ `package` | Semis ~ TopStat | ThisPath | IdPath)""", + aggregate = """(ThisPath | IdPath)""", terminals = """("this" | "super" | "`" | var-id | chars-while(OpCharNotSlash, 1) | "/" | operator | plain-id | id)""", found = "import" ) @@ -43,7 +43,7 @@ object FailureTests extends TestSuite{ |} """.stripMargin, aggregate = """(NamedType | Refinement)""", - terminals = """(chars-while(IdCharacter, 1) | [_] | [ \t] | "/*" | "//" | "(" | "-" | "." | [0-9] | "0x" | "true" | "false" | "`" | char-pred(UpperChar) | char-pred(LowerChar) | var-id | chars-while(OpCharNotSlash, 1) | "/" | operator | plain-id | id | filter | "\"\"\"" | "\"" | "'" | "null" | "this" | "super" | "_" | "{")""", + terminals = """(chars-while(IdCharacter, 1) | [_] | [ \t] | "/*" | "//" | "\n" | "\r\n" | "(" | "-" | "." | [0-9] | "0x" | "true" | "false" | "`" | char-pred(UpperChar) | char-pred(LowerChar) | var-id | chars-while(OpCharNotSlash, 1) | "/" | operator | plain-id | id | filter | "\"\"\"" | "\"" | "'" | "null" | "this" | "super" | "_" | "{")""", found = ")" ) test - checkNeg( @@ -70,7 +70,7 @@ object FailureTests extends TestSuite{ | } |} """.stripMargin, - aggregate = """(FunArgs | `:` | Body | Semis | "}")""", + aggregate = """(FunArgs | `:` | Body | "}")""", terminals = null, found = "](input: S" ) @@ -93,7 +93,7 @@ object FailureTests extends TestSuite{ | } |} """.stripMargin, - aggregate = """("=>" | `:` | "." | TypeArgs | ArgList | `_` | Id | `=` | MatchAscriptionSuffix | Semis | "}")""", + aggregate = """("=>" | `:` | "." | TypeArgs | ArgList | `_` | Id | `=` | MatchAscriptionSuffix | "}")""", terminals = null, found ="1\n" ) @@ -117,7 +117,7 @@ object FailureTests extends TestSuite{ | filename.asInstanceOf 10 |} """.stripMargin, - aggregate = """("." | TypeArgs | ArgList | `_` | Id | "=>" | `=` | MatchAscriptionSuffix | Semis | "}")""", + aggregate = """("." | TypeArgs | ArgList | `_` | Id | "=>" | `=` | MatchAscriptionSuffix | "}")""", terminals = null, found = "10" ) @@ -179,7 +179,7 @@ object FailureTests extends TestSuite{ |import org.parboiled2 _ | """.stripMargin, - aggregate = """(Semis ~ `package` | "." | "," | end-of-input)""", + aggregate = """("." | "," | end-of-input)""", terminals = null, found = "_" ) @@ -220,7 +220,7 @@ object FailureTests extends TestSuite{ |} | """.stripMargin, - aggregate = """(WL ~ "." | WL ~ TypeArgs | NotNewline ~ ArgList | `_` | InfixSuffix | PostFix | "=>" | `=` | MatchAscriptionSuffix | Semis | "}")""", + aggregate = """("." | TypeArgs | ArgList | `_` | InfixSuffix | PostFix | "=>" | `=` | MatchAscriptionSuffix | "}")""", terminals = null, found = ")" ) @@ -251,7 +251,7 @@ object FailureTests extends TestSuite{ | d = 1 | """.stripMargin, - aggregate = """("." | TypeArgs | "=>" | `=` | MatchAscriptionSuffix | Semis | "}")""", + aggregate = """("." | TypeArgs | "=>" | `=` | MatchAscriptionSuffix | "}")""", terminals = null, found = "" ) @@ -361,7 +361,7 @@ object FailureTests extends TestSuite{ | a =:= .c |} """.stripMargin, - aggregate = """(TypeArgs | PrefixExpr | Newline | "=>" | `=` | MatchAscriptionSuffix | Semis | "}")""", + aggregate = """(TypeArgs | PrefixExpr | "=>" | `=` | MatchAscriptionSuffix | "}")""", terminals = null, found = ".c" ) @@ -416,7 +416,7 @@ object FailureTests extends TestSuite{ | val trueA = 1 |} """.stripMargin, - aggregate = """(DefTmpl | Semis ~ TopStat | end-of-input)""", + aggregate = """(DefTmpl | TopStat | end-of-input)""", terminals = null, found = "val trueA" ) @@ -426,7 +426,7 @@ object FailureTests extends TestSuite{ | val null null cow = 1 |} """.stripMargin, - aggregate = """(Id | "," | `:` | `=` | Semis | "}")""", + aggregate = """(Id | "," | `:` | `=` | "}")""", terminals = null, found = "null cow" ) @@ -436,7 +436,7 @@ object FailureTests extends TestSuite{ | val omg_+_+ = 1 |} """.stripMargin, - aggregate = """(`@` | TQ | "\"" | "." | TypeArgs | TupleEx | Id | "," | `:` | `=` | Semis | "}")""", + aggregate = """(`@` | TQ | "\"" | "." | TypeArgs | TupleEx | Id | "," | `:` | `=` | "}")""", terminals = null, found = "_+ = 1" ) @@ -447,7 +447,7 @@ object FailureTests extends TestSuite{ | var = 2 |} """.stripMargin, - aggregate = """(Semis ~ TmplStat | Binding | InfixPattern | VarId)""", + aggregate = """(Binding | InfixPattern | VarId)""", terminals = null, found = "= 2" ) @@ -480,7 +480,7 @@ object FailureTests extends TestSuite{ | a!.b |} """.stripMargin, - aggregate = """(TypeArgs | PrefixExpr | Newline | "=>" | `=` | MatchAscriptionSuffix | Semis | "}")""", + aggregate = """(TypeArgs | PrefixExpr | "=>" | `=` | MatchAscriptionSuffix | "}")""", terminals = null, found = ".b" ) @@ -519,7 +519,7 @@ object FailureTests extends TestSuite{ |package omg |; """.stripMargin, - aggregate = """(Semis ~ TopStat | "{")""", + aggregate = """("." | "{")""", terminals = null, found = ";" ) @@ -529,7 +529,7 @@ object FailureTests extends TestSuite{ | { a: L = } |} """.stripMargin, - aggregate = """("." | TypeArgs | `#` | Annot | `with` | { | `*` | Id | "=>" | BlockLambda | BlockStat | Semis | "}")""", + aggregate = """("." | TypeArgs | `#` | Annot | `with` | { | `*` | Id | "=>" | BlockLambda | BlockStat | "}")""", terminals = null, found = "= }" ) @@ -549,7 +549,7 @@ object FailureTests extends TestSuite{ |} | """.stripMargin, - aggregate = """(PostDotCheck | id)""", + aggregate = """id""", terminals = null, found = "this" ) @@ -645,7 +645,7 @@ object FailureTests extends TestSuite{ |} | """.stripMargin, - aggregate = """(TypeArgs | `#` | NLAnnot | `with` | Refinement | `*` | Id | "=>" | ExistentialClause | `>:` | `<:` | "," ~ Type | "," ~ WS ~ Newline | "]")""", + aggregate = """(TypeArgs | `#` | NLAnnot | `with` | Refinement | `*` | Id | "=>" | ExistentialClause | `>:` | `<:` | "," ~ Type | "," | "]")""", terminals = null, found = ", ]" ) @@ -669,7 +669,7 @@ object FailureTests extends TestSuite{ | } |} """.stripMargin, - aggregate = """(BlockLambda | BlockStat | Semis | "}")""", + aggregate = """(BlockLambda | BlockStat | "}")""", terminals = null, found = "case for" ) @@ -691,7 +691,7 @@ object FailureTests extends TestSuite{ |} | """.stripMargin, - aggregate = """(StringChars | Interp | LiteralSlash | Escape | NonStringEnd | "\"")""", + aggregate = """(StringChars | Interp | LiteralSlash | Escape | "\"")""", terminals = null, found = "\n" ) @@ -759,7 +759,7 @@ object FailureTests extends TestSuite{ | val (x,) = 1 |} """.stripMargin, - aggregate = """(`:` | `@` | TQ | "\"" | "." | TypeArgs | TupleEx | Id | "|" | "," ~ Pattern | "," ~ WS ~ Newline | ")")""", + aggregate = """(`:` | `@` | TQ | "\"" | "." | TypeArgs | TupleEx | Id | "|" | "," ~ Pattern | "," | ")")""", terminals = null, found = ",)" ) @@ -799,7 +799,7 @@ object FailureTests extends TestSuite{ s""" |object X{def f(x: Int, ) = 1} """.stripMargin, - aggregate = """("." | TypeArgs | `#` | NLAnnot | `with` | Refinement | `*` | Id | "=>" | ExistentialClause | `>:` | `<:` | `=` | "," ~ FunArg | "," ~ WS ~ Newline | ")")""", + aggregate = """("." | TypeArgs | `#` | NLAnnot | `with` | Refinement | `*` | Id | "=>" | ExistentialClause | `>:` | `<:` | `=` | "," ~ FunArg | "," | ")")""", terminals = null, found = ", )" ) @@ -807,7 +807,7 @@ object FailureTests extends TestSuite{ s""" |object X{(2,)} """.stripMargin, - aggregate = """(FloatSuffix | "L" | "l" | WL ~ "." | WL ~ TypeArgs | Pass ~ ArgList | `_` | InfixSuffix | PostFix | "=>" | `=` | MatchAscriptionSuffix | "," ~ Expr | "," ~ WS ~ Newline | ")")""", + aggregate = """(FloatSuffix | "L" | "l" | "." | TypeArgs | ArgList | `_` | InfixSuffix | PostFix | "=>" | `=` | MatchAscriptionSuffix | "," ~ Expr | "," | ")")""", terminals = null, found = ",)" ) @@ -815,7 +815,7 @@ object FailureTests extends TestSuite{ s""" |object X{f[A,]} """.stripMargin, - aggregate = """("." | TypeArgs | `#` | NLAnnot | `with` | Refinement | `*` | Id | "=>" | ExistentialClause | `>:` | `<:` | "," ~ Type | "," ~ WS ~ Newline | "]")""", + aggregate = """("." | TypeArgs | `#` | NLAnnot | `with` | Refinement | `*` | Id | "=>" | ExistentialClause | `>:` | `<:` | "," ~ Type | "," | "]")""", terminals = null, found = ",]" ) @@ -831,7 +831,7 @@ object FailureTests extends TestSuite{ s""" |object X{def f[T, B,] = 1} """.stripMargin, - aggregate = """(TypeArgList | `>:` | `<:` | `<%` | `:` | "," ~ Annot.rep ~ TypeArg | "," ~ WS ~ Newline | "]")""", + aggregate = """(TypeArgList | `>:` | `<:` | `<%` | `:` | "," ~ Annot.rep ~ TypeArg | "," | "]")""", terminals = null, found = ",]" ) @@ -943,13 +943,13 @@ object FailureTests extends TestSuite{ | for(i <- Nil if x: Int => bar) 1 |} """.stripMargin, - aggregate = """(TQ | "\"" | "." | WL ~ "." | WL ~ TypeArgs | Pass ~ ArgList | `_` | InfixSuffix | PostFix | Enumerator | ")")""", + aggregate = """(TQ | "\"" | "." | TypeArgs | ArgList | `_` | InfixSuffix | PostFix | Enumerator | ")")""", terminals = null, found = ": Int" ) test - checkNeg( s"""object Foo{; x: Int => x}""", - aggregate = """("." | TypeArgs | `#` | Annot | `with` | { | `*` | Id | Semis | "}")""", + aggregate = """("." | TypeArgs | `#` | Annot | `with` | { | `*` | Id | "}")""", terminals = null, found = "=> x" ) @@ -978,7 +978,7 @@ object FailureTests extends TestSuite{ | val x = 1 | ; | """.stripMargin, - aggregate = """(BlockLambda | BlockStat | Semis | "}")""", + aggregate = """(BlockLambda | BlockStat | "}")""", terminals = null, found = "" )