@@ -12,6 +12,7 @@ const { transform, combineTransformers, getIntersectionWordSets, JS, Words, NFA,
1212const scslre = require ( 'scslre' ) ;
1313const path = require ( 'path' ) ;
1414const { argv } = require ( 'yargs' ) ;
15+ const RAA = require ( 'regexp-ast-analysis' ) ;
1516
1617/**
1718 * A map from language id to a list of code snippets in that language.
@@ -130,6 +131,7 @@ function testPatterns(Prism, mainLanguage) {
130131 * @property {string } name
131132 * @property {any } parent
132133 * @property {boolean } lookbehind Whether the first capturing group of the pattern is a Prism lookbehind group.
134+ * @property {CapturingGroup | undefined } lookbehindGroup
133135 * @property {{ key: string, value: any }[] } path
134136 * @property {(message: string) => void } reportError
135137 */
@@ -163,14 +165,17 @@ function testPatterns(Prism, mainLanguage) {
163165 }
164166
165167 const parent = path . length > 1 ? path [ path . length - 2 ] . value : undefined ;
168+ const lookbehind = key === 'pattern' && parent && ! ! parent . lookbehind ;
169+ const lookbehindGroup = lookbehind ? getFirstCapturingGroup ( ast . pattern ) : undefined ;
166170 callback ( {
167171 pattern : value ,
168172 ast,
169173 tokenPath,
170174 name : key ,
171175 parent,
172176 path,
173- lookbehind : key === 'pattern' && parent && ! ! parent . lookbehind ,
177+ lookbehind,
178+ lookbehindGroup,
174179 reportError : message => errors . push ( message )
175180 } ) ;
176181 } catch ( error ) {
@@ -231,9 +236,10 @@ function testPatterns(Prism, mainLanguage) {
231236
232237
233238 it ( '- should not match the empty string' , function ( ) {
234- forEachPattern ( ( { pattern, tokenPath } ) => {
239+ forEachPattern ( ( { ast , pattern, tokenPath } ) => {
235240 // test for empty string
236- assert . notMatch ( '' , pattern , `${ tokenPath } : ${ pattern } should not match the empty string.\n\n`
241+ const empty = RAA . isPotentiallyZeroLength ( ast . pattern . alternatives ) ;
242+ assert . isFalse ( empty , `${ tokenPath } : ${ pattern } should not match the empty string.\n\n`
237243 + `Patterns that do match the empty string can potentially cause infinitely many empty tokens. `
238244 + `Make sure that all patterns always consume at least one character.` ) ;
239245 } ) ;
@@ -256,47 +262,37 @@ function testPatterns(Prism, mainLanguage) {
256262 } ) ;
257263
258264 it ( '- should not have lookbehind groups that can be preceded by other some characters' , function ( ) {
259- forEachPattern ( ( { ast, tokenPath, lookbehind } ) => {
260- if ( ! lookbehind ) {
261- return ;
265+ forEachPattern ( ( { tokenPath, lookbehindGroup } ) => {
266+ if ( lookbehindGroup && ! isFirstMatch ( lookbehindGroup ) ) {
267+ assert . fail ( `${ tokenPath } : The lookbehind group ${ lookbehindGroup . raw } might be preceded by some characters.\n\n`
268+ + `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
269+ + `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
270+ + `To fix this, either remove the preceding characters or include them in the lookbehind group.` ) ;
262271 }
263- forEachCapturingGroup ( ast . pattern , ( { group, number } ) => {
264- if ( number === 1 && ! isFirstMatch ( group ) ) {
265- assert . fail ( `${ tokenPath } : The lookbehind group ${ group . raw } might be preceded by some characters.\n\n`
266- + `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
267- + `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
268- + `To fix this, either remove the preceding characters or include them in the lookbehind group.` ) ;
269- }
270- } ) ;
271272 } ) ;
272273 } ) ;
273274
274275 it ( '- should not have lookbehind groups that only have zero-width alternatives' , function ( ) {
275- forEachPattern ( ( { ast, tokenPath, lookbehind, reportError } ) => {
276- if ( ! lookbehind ) {
277- return ;
276+ forEachPattern ( ( { tokenPath, lookbehindGroup, reportError } ) => {
277+ if ( lookbehindGroup && RAA . isZeroLength ( lookbehindGroup ) ) {
278+ const groupContent = lookbehindGroup . raw . substr ( 1 , lookbehindGroup . raw . length - 2 ) ;
279+ const replacement = lookbehindGroup . alternatives . length === 1 ? groupContent : `(?:${ groupContent } )` ;
280+ reportError ( `${ tokenPath } : The lookbehind group ${ lookbehindGroup . raw } does not consume characters.\n\n`
281+ + `Therefor it is not necessary to use a lookbehind group.\n`
282+ + `To fix this, replace the lookbehind group with ${ replacement } and remove the 'lookbehind' property.` ) ;
278283 }
279- forEachCapturingGroup ( ast . pattern , ( { group, number } ) => {
280- if ( number === 1 && isAlwaysZeroWidth ( group ) ) {
281- const groupContent = group . raw . substr ( 1 , group . raw . length - 2 ) ;
282- const replacement = group . alternatives . length === 1 ? groupContent : `(?:${ groupContent } )` ;
283- reportError ( `${ tokenPath } : The lookbehind group ${ group . raw } does not consume characters.\n\n`
284- + `Therefor it is not necessary to use a lookbehind group.\n`
285- + `To fix this, replace the lookbehind group with ${ replacement } and remove the 'lookbehind' property.` ) ;
286- }
287- } ) ;
288284 } ) ;
289285 } ) ;
290286
291287 it ( '- should not have unused capturing groups' , function ( ) {
292- forEachPattern ( ( { ast, tokenPath, lookbehind , reportError } ) => {
288+ forEachPattern ( ( { ast, tokenPath, lookbehindGroup , reportError } ) => {
293289 forEachCapturingGroup ( ast . pattern , ( { group, number } ) => {
294- const isLookbehindGroup = lookbehind && number === 1 ;
290+ const isLookbehindGroup = group === lookbehindGroup ;
295291 if ( group . references . length === 0 && ! isLookbehindGroup ) {
296292 const fixes = [ ] ;
297293 fixes . push ( `Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)` ) ;
298294 fixes . push ( `Reference this group with a backreference (use '\\${ number } ' for this).` ) ;
299- if ( number === 1 && ! lookbehind ) {
295+ if ( number === 1 && ! lookbehindGroup ) {
300296 if ( isFirstMatch ( group ) ) {
301297 fixes . push ( `Add a 'lookbehind: true' declaration.` ) ;
302298 } else {
@@ -392,28 +388,26 @@ function testPatterns(Prism, mainLanguage) {
392388
393389
394390/**
395- * Returns whether the given element will always have zero width meaning that it doesn't consume characters .
391+ * Returns the first capturing group in the given pattern .
396392 *
397- * @param {Element } element
398- * @returns {boolean }
393+ * @param {Pattern } pattern
394+ * @returns {CapturingGroup | undefined }
399395 */
400- function isAlwaysZeroWidth ( element ) {
401- switch ( element . type ) {
402- case 'Assertion' :
403- // assertions == ^, $, \b, lookarounds
404- return true ;
405- case 'Quantifier' :
406- return element . max === 0 || isAlwaysZeroWidth ( element . element ) ;
407- case 'CapturingGroup' :
408- case 'Group' :
409- // every element in every alternative has to be of zero length
410- return element . alternatives . every ( alt => alt . elements . every ( isAlwaysZeroWidth ) ) ;
411- case 'Backreference' :
412- // on if the group referred to is of zero length
413- return isAlwaysZeroWidth ( element . resolved ) ;
414- default :
415- return false ; // what's left are characters
396+ function getFirstCapturingGroup ( pattern ) {
397+ let cap = undefined ;
398+
399+ try {
400+ visitRegExpAST ( pattern , {
401+ onCapturingGroupEnter ( node ) {
402+ cap = node ;
403+ throw new Error ( 'stop' ) ;
404+ }
405+ } ) ;
406+ } catch ( error ) {
407+ // ignore errors
416408 }
409+
410+ return cap ;
417411}
418412
419413/**
@@ -427,7 +421,7 @@ function isFirstMatch(element) {
427421 switch ( parent . type ) {
428422 case 'Alternative' : {
429423 // all elements before this element have to of zero length
430- if ( ! parent . elements . slice ( 0 , parent . elements . indexOf ( element ) ) . every ( isAlwaysZeroWidth ) ) {
424+ if ( ! parent . elements . slice ( 0 , parent . elements . indexOf ( element ) ) . every ( RAA . isZeroLength ) ) {
431425 return false ;
432426 }
433427 const grandParent = parent . parent ;
@@ -457,13 +451,7 @@ function isFirstMatch(element) {
457451 * @returns {boolean }
458452 */
459453function underAStar ( node ) {
460- if ( node . type === 'Quantifier' && node . max > 10 ) {
461- return true ;
462- } else if ( node . parent ) {
463- return underAStar ( node . parent ) ;
464- } else {
465- return false ;
466- }
454+ return RAA . getEffectiveMaximumRepetition ( node ) > 10 ;
467455}
468456
469457/**
0 commit comments