diff --git a/benchmark/aoc2023-day12-result.txt b/benchmark/aoc2023-day12-result.txt
index e4c1390..f75baee 100644
--- a/benchmark/aoc2023-day12-result.txt
+++ b/benchmark/aoc2023-day12-result.txt
@@ -1,3 +1,3 @@
- Part 1: 7191 (time: 1396ms)
- Part 2: 6512849198636 (time: 15904ms)
+ Part 1: 7191 (time: 907ms)
+ Part 2: 6512849198636 (time: 11005ms)
diff --git a/equiv-checker.html b/equiv-checker.html
index cfe4905..c0404cf 100644
--- a/equiv-checker.html
+++ b/equiv-checker.html
@@ -252,6 +252,28 @@
content: "⚠️";
font-size: 1.2rem;
}
+
+ .anchor-info {
+ background: #d1ecf1;
+ border: 2px solid #bee5eb;
+ color: #0c5460;
+ padding: 15px;
+ border-radius: 8px;
+ margin-bottom: 20px;
+ font-weight: 500;
+ display: flex;
+ align-items: center;
+ gap: 10px;
+ }
+
+ .anchor-info::before {
+ content: "ℹ️";
+ font-size: 1.2rem;
+ }
+
+ .anchor-info code {
+ white-space: nowrap;
+ }
@@ -285,6 +307,13 @@
RegExp Equivalence Checker
Check Equivalence
+
+ Note, without ^
(start) and $
(end) anchors RegExp match substrings.
+ That can lead to surprising results.
+ For example, /a+/
is technically equivalent to /a/
because /a+/
+ is the same as /^.*a+.*$/
which matches exactly the same strings as /^.*a.*$/
(i.e. /a/
).
+
+
@@ -367,6 +396,41 @@ Powered by:
const resultDiv = document.getElementById('result');
const counterexamples1 = document.getElementById('counterexamples1');
const counterexamples2 = document.getElementById('counterexamples2');
+ const anchorInfoBanner = document.getElementById('anchor-info-banner');
+
+ // Load initial values from URL parameters
+ function loadFromURL() {
+ const urlParams = new URLSearchParams(window.location.search);
+ const regex1 = urlParams.get('regexp1');
+ const regex2 = urlParams.get('regexp2');
+
+ if (regex1) regex1Input.value = regex1;
+ if (regex2) regex2Input.value = regex2;
+
+ // Auto-check equivalence if both parameters are present
+ if (regex1 && regex2) {
+ setTimeout(checkEquivalence, 100);
+ }
+ }
+
+ // Update URL parameters when inputs change
+ function updateURL() {
+ const urlParams = new URLSearchParams();
+ const regex1 = regex1Input.value.trim();
+ const regex2 = regex2Input.value.trim();
+
+ if (regex1) urlParams.set('regexp1', regex1);
+ if (regex2) urlParams.set('regexp2', regex2);
+
+ const newURL = urlParams.toString() ?
+ `${window.location.pathname}?${urlParams.toString()}` :
+ window.location.pathname;
+
+ window.history.replaceState(null, '', newURL);
+ }
+
+ // Load initial values
+ loadFromURL();
function clearResults() {
resultDiv.style.display = 'none';
@@ -375,6 +439,7 @@ Powered by:
counterexamples1.innerHTML = '';
counterexamples2.style.display = 'none';
counterexamples2.innerHTML = '';
+ anchorInfoBanner.style.display = 'none';
}
function assertMatchRegex(regex, strings) {
@@ -386,12 +451,14 @@ Powered by:
return true;
}
- // clear results on new input:
+ // clear results on new input and update URL:
regex1Input.addEventListener('input', () => {
clearResults()
+ updateURL()
})
regex2Input.addEventListener('input', () => {
clearResults()
+ updateURL()
})
function checkEquivalence() {
@@ -407,6 +474,19 @@ Powered by:
return;
}
+ // Check for missing anchors and show info banner
+ const hasStartAnchor1 = pattern1.includes('^');
+ const hasEndAnchor1 = pattern1.includes('$');
+ const hasStartAnchor2 = pattern2.includes('^');
+ const hasEndAnchor2 = pattern2.includes('$');
+
+ const shouldShowAnchorInfo = !hasStartAnchor1 || !hasEndAnchor1 || !hasStartAnchor2 || !hasEndAnchor2;
+
+ // Show/hide anchor info banner
+ if (shouldShowAnchorInfo) {
+ anchorInfoBanner.style.display = 'block';
+ }
+
try {
// First, validate that both patterns are valid JavaScript regex syntax
const regexA = new RegExp(pattern1);
@@ -449,13 +529,13 @@ Powered by:
}
} catch (error) {
if (error instanceof SyntaxError) {
- showResult(`❌ Invalid regex syntax: ${error.message} Please check that your regular expressions use valid JavaScript regex syntax.`, 'error');
+ showResult(`❌ Invalid regex syntax: ${encode(error.message)} Please check that your regular expressions use valid JavaScript regex syntax.`, 'error');
} else if (error instanceof ParseError) {
- showResult(`🚧 Unsupported regex syntax: The syntax is valid but not yet supported by the tool. ${error.message}`, 'error');
+ showResult(`🚧 Unsupported regex syntax: The syntax is valid but not yet supported by the tool. ${encode(error.message)}`, 'error');
} else if (error instanceof VeryLargeSyntaxTreeError || error instanceof CacheOverflowError) {
showResult(`☠️ Excessive resource use detected: This example might be computationally hard.`, 'error');
} else {
- showResult(`❌ Unexpected error: ${error.message}`, 'error');
+ showResult(`❌ Unexpected error: ${encode(error.message)}`, 'error');
}
}
};
@@ -481,6 +561,18 @@ Powered by:
resultDiv.style.display = 'block';
}
+ const renderExample = (str) => {
+ if (str === '') {
+ return 'the empty string ';
+ } else {
+ return `${encode(str)}
`;
+ }
+ };
+
+ const renderExampleList = array => {
+ return `${array.map(renderExample).map(str => `${str} `).join('')} `;
+ }
+
function showResultWithDiagram(message, type, examples = null, hasValidationIssue = false) {
const resultDiv = document.getElementById('result');
@@ -501,21 +593,30 @@ Powered by:
// Add examples if provided
if (examples) {
- html += 'For example, '
-
+ html += '
Example strings matched by one RegExp but not the other:
'
+ html += '';
+
+ // Left column for RegExp 1 examples
+ html += '
';
if (examples.regex1Only && examples.regex1Only.length > 0) {
- html += `RegExp 1 matches `;
- html += examples.regex1Only.map(str => `
${encode(str)}
`).join(', ');
- html += ' but RegExp 2 does not. ';
+ html += `
+
RegExp 1 only
+ ${renderExampleList(examples.regex1Only)}
+ `;
}
-
+ html += '
';
+
+ // Right column for RegExp 2 examples
+ html += '
';
if (examples.regex2Only && examples.regex2Only.length > 0) {
- html += `RegExp 2 matches `;
- html += examples.regex2Only.map(str => `
${encode(str)}
`).join(', ');
- html += 'but RegExp 1 does not. ';
+ html += `
+
RegExp 2 only
+ ${renderExampleList(examples.regex2Only)}
+ `;
}
-
- html += ''
+ html += '
';
+
+ html += '
';
}
resultDiv.innerHTML = html;
diff --git a/src/char-set.ts b/src/char-set.ts
index 59aeacb..f709762 100644
--- a/src/char-set.ts
+++ b/src/char-set.ts
@@ -317,8 +317,28 @@ export function toString(set: CharSet): string {
}
export function enumerate(set: CharSet): Stream.Stream {
+ // If we enumerate the set in "unicode order" then we only get
+ // chars like "\u0000", "\u0001" for a while. We prefer to enumerate
+ // more common characters first, since users will usually only
+ // look at the first few items in the enumeration.
+ const lowerChars = charRange('a', 'z')
+ const upperChars = charRange('A', 'Z')
+ const numChars = charRange('0', '9')
+
+ // The input set minus the "common characters ranges":
+ const restChars = [lowerChars, upperChars, numChars].reduce(
+ (acc, item) => difference(acc, item), set
+ )
+
+ const rangesWithBiasedOrder = [
+ ...getRanges(intersection(lowerChars, set)),
+ ...getRanges(intersection(upperChars, set)),
+ ...getRanges(intersection(numChars, set)),
+ ...getRanges(restChars),
+ ]
+
return Stream.concat(Stream.fromArray(
- [...getRanges(set)].map(
+ rangesWithBiasedOrder.map(
range => Stream.map(
codePoint => String.fromCodePoint(codePoint),
Stream.range(range.start, range.end)
diff --git a/src/utils.ts b/src/utils.ts
index 7e7beca..8446b8a 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -78,6 +78,10 @@ export function checkedAllCases(_witness: never): never {
throw new Error('not all cases checked')
}
+/**
+ * Returns its argument unchanged. Useful for generic contexts where a transformation
+ * function is expected but no transformation is needed.
+ */
export function identity(x: T): T {
return x
}
@@ -111,7 +115,8 @@ export function uniqWith(array: T[], compare: (l: T, r: T) => number): T[] {
}
/**
- * https://stackoverflow.com/a/52171480
+ * Computes a hash value for an array of numbers using a fast non-cryptographic hash algorithm.
+ * Based on implementation from https://stackoverflow.com/a/52171480
*/
export function hashNums(nums: number[], seed = 0): number {
let h1 = 0xdeadbeef ^ seed, h2 = 0x41c6ce57 ^ seed
@@ -128,6 +133,10 @@ export function hashNums(nums: number[], seed = 0): number {
return 4294967296 * (2097151 & h2) + (h1 >>> 0)
}
+/**
+ * Computes a hash value for a string using a fast non-cryptographic hash algorithm.
+ * Uses the same algorithm as hashNums but operates on character codes.
+ */
export function hashStr(str: string, seed = 0): number {
let h1 = 0xdeadbeef ^ seed, h2 = 0x41c6ce57 ^ seed
for (let i=0; i < str.length; i++) {
@@ -143,10 +152,17 @@ export function hashStr(str: string, seed = 0): number {
return 4294967296 * (2097151 & h2) + (h1 >>> 0)
}
+/**
+ * Performs bitwise XOR operation on two numbers.
+ */
export function xor(a: number, b: number): number {
return a^b
}
+/**
+ * Finds the element with the minimum score according to the provided scoring function.
+ * Returns undefined if the iterable is empty.
+ */
export function minBy(iterable: Iterable, scoreOf: (item: T) => number): T | undefined {
let minItem = undefined
let minScore = Infinity
@@ -160,6 +176,17 @@ export function minBy(iterable: Iterable, scoreOf: (item: T) => number): T
return minItem
}
+/**
+ * Calculates the sum of all numbers in the array.
+ */
export function sum(items: number[]) {
return items.reduce((a,b) => a+b, 0)
}
+
+/**
+ * Type guard that checks if an unknown value is one of the elements in the provided array.
+ * Returns true if the item is found in the array, with proper TypeScript type narrowing.
+ */
+export function isOneOf(item: unknown, array: T[]): item is T {
+ return (array as unknown[]).includes(item)
+}