From c25a7be6713a8463f0961b09d88a7ba03bcf3a26 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sun, 5 May 2024 16:13:40 -0700 Subject: [PATCH 1/2] x/text/internal/colltab: Avoid using Elem.Primary() == 0 in numeric.go Elem.Primary() == 0 has odd ordering properties, starting at 1 allows 0 to consistently be ordered before other numbers when non-numeric text follows a 0 Also fixes an issue comparing numbers > 269 characters with numbers < 270 characters Fixes golang/go#25554 --- collate/collate_test.go | 14 +++++++++++ internal/colltab/numeric.go | 4 ++-- internal/colltab/numeric_test.go | 40 +++++++++++++++++--------------- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/collate/collate_test.go b/collate/collate_test.go index 0e78b96fa..8953b4d90 100644 --- a/collate/collate_test.go +++ b/collate/collate_test.go @@ -6,6 +6,7 @@ package collate import ( "bytes" + "strings" "testing" "golang.org/x/text/internal/colltab" @@ -474,6 +475,19 @@ func TestNumeric(t *testing.T) { {"A-2", "A-12", -1}, {"A-12", "A-2", 1}, {"A-0001", "A-1", 0}, + {"0000-", "1-", -1}, + {"00001", "1", 0}, + {"00", "00", 0}, + {"0", "00", 0}, + {"00", "0", 0}, + {"01", "001", 0}, + {"01", "1", 0}, + {"1", "01", 0}, + {"9-A", "0-A", 1}, + {"99-A", "0-A", 1}, + {"9-A", "1-A", 1}, + {"99-A", "1-A", 1}, + {strings.Repeat("9", 270)+"-A", "1-A", 1}, } { if got := c.CompareString(tt.a, tt.b); got != tt.want { t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want) diff --git a/internal/colltab/numeric.go b/internal/colltab/numeric.go index 53b819cc3..d5eecf363 100644 --- a/internal/colltab/numeric.go +++ b/internal/colltab/numeric.go @@ -224,13 +224,13 @@ func (nc *numberConverter) update(elems []Elem) bool { return false } nc.nDigits++ - return nc.nDigits < maxDigits + return nc.nDigits+1 < maxDigits } // result fills in the length element for the digit sequence and returns the // completed collation elements. func (nc *numberConverter) result() []Elem { - e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0) + e, _ := MakeElem(nc.nDigits+1, defaultSecondary, defaultTertiary, 0) nc.elems[nc.lenIndex] = e return nc.elems } diff --git a/internal/colltab/numeric_test.go b/internal/colltab/numeric_test.go index e9406ae3f..9e0ab64ab 100644 --- a/internal/colltab/numeric_test.go +++ b/internal/colltab/numeric_test.go @@ -78,26 +78,28 @@ func TestNumericAppendNext(t *testing.T) { {"a", p(5)}, {"klm", p(99)}, {"aa", p(5, 5)}, - {"1", p(120, 1, 101)}, - {"0", p(120, 0)}, - {"01", p(120, 1, 101)}, - {"0001", p(120, 1, 101)}, - {"10", p(120, 2, 101, 100)}, - {"99", p(120, 2, 119, 119)}, - {"9999", p(120, 4, 119, 119, 119, 119)}, - {"1a", p(120, 1, 101, 5)}, - {"0b", p(120, 0, 6)}, - {"01c", p(120, 1, 101, 8, 2)}, - {"10x", p(120, 2, 101, 100, 200)}, - {"99y", p(120, 2, 119, 119, 201)}, - {"9999nop", p(120, 4, 119, 119, 119, 119, 121)}, + {"1", p(120, 2, 101)}, + {"0", p(120, 1)}, + {"00", p(120, 1)}, + {"01", p(120, 2, 101)}, + {"0001", p(120, 2, 101)}, + {"02", p(120, 2, 102)}, + {"10", p(120, 3, 101, 100)}, + {"99", p(120, 3, 119, 119)}, + {"9999", p(120, 5, 119, 119, 119, 119)}, + {"1a", p(120, 2, 101, 5)}, + {"0b", p(120, 1, 6)}, + {"01c", p(120, 2, 101, 8, 2)}, + {"10x", p(120, 3, 101, 100, 200)}, + {"99y", p(120, 3, 119, 119, 201)}, + {"9999nop", p(120, 5, 119, 119, 119, 119, 121)}, // Allow follow-up collation elements if they have a zero non-primary. - {"١٢٩", []Elem{e(120), e(3), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}}, + {"١٢٩", []Elem{e(120), e(4), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}}, { "129", []Elem{ - e(120), e(3), + e(120), e(4), e(101, digSec, digTert+1), e(102, digSec, digTert+3), e(119, digSec, digTert+1), @@ -105,7 +107,7 @@ func TestNumericAppendNext(t *testing.T) { }, // Ensure AppendNext* adds to the given buffer. - {"a10", p(5, 120, 2, 101, 100)}, + {"a10", p(5, 120, 3, 101, 100)}, } { nw := NewNumericWeighter(numWeighter) @@ -137,12 +139,12 @@ func TestNumericOverflow(t *testing.T) { got, n := nw.AppendNextString(nil, manyDigits) - if n != maxDigits { - t.Errorf("n: got %d; want %d", n, maxDigits) + if n != maxDigits-1 { // A digit is lost because elem.Primary() == 0 has odd ordering properties and is skipped + t.Errorf("n: got %d; want %d", n, maxDigits-1) } if got[1].Primary() != maxDigits { - t.Errorf("primary(e[1]): got %d; want %d", n, maxDigits) + t.Errorf("primary(e[1]): got %d; want %d", got[1].Primary(), maxDigits) } } From ce407f2fffb249a30edd86132e5c165a8137ca4c Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sun, 5 May 2024 16:12:39 -0700 Subject: [PATCH 2/2] x/text/internal/colltab: Sort numbers with leading zeros Makes numeric sorting deterministic. Particularly useful for approximating how an OS file manager will sort files. --- collate/collate_test.go | 14 ++++----- internal/colltab/numeric.go | 17 +++++++++++ internal/colltab/numeric_test.go | 51 +++++++++++++++++++++----------- 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/collate/collate_test.go b/collate/collate_test.go index 8953b4d90..b9360f26f 100644 --- a/collate/collate_test.go +++ b/collate/collate_test.go @@ -474,15 +474,15 @@ func TestNumeric(t *testing.T) { {"A-1", "A-2", -1}, {"A-2", "A-12", -1}, {"A-12", "A-2", 1}, - {"A-0001", "A-1", 0}, + {"A-0001", "A-1", 1}, {"0000-", "1-", -1}, - {"00001", "1", 0}, + {"00001", "1", 1}, {"00", "00", 0}, - {"0", "00", 0}, - {"00", "0", 0}, - {"01", "001", 0}, - {"01", "1", 0}, - {"1", "01", 0}, + {"0", "00", -1}, + {"00", "0", 1}, + {"01", "001", -1}, + {"01", "1", 1}, + {"1", "01", -1}, {"9-A", "0-A", 1}, {"99-A", "0-A", 1}, {"9-A", "1-A", 1}, diff --git a/internal/colltab/numeric.go b/internal/colltab/numeric.go index d5eecf363..f30230816 100644 --- a/internal/colltab/numeric.go +++ b/internal/colltab/numeric.go @@ -80,6 +80,7 @@ func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) { } // ce might have been grown already, so take it instead of buf. nc.init(ce, len(buf), isZero) + oldIndex := len(nc.elems) for n < len(s) { ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:]) nc.b = s @@ -87,7 +88,12 @@ func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) { if !nc.update(ce) { break } + oldIndex = len(nc.elems) } + nc.elems = append(nc.elems, 0) + copy(nc.elems[oldIndex+1:], nc.elems[oldIndex:]) + nc.elems[oldIndex], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0) + return nc.result(), n } @@ -105,6 +111,7 @@ func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n return ce, n } nc.init(ce, len(buf), isZero) + oldIndex := len(nc.elems) for n < len(s) { ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:]) nc.s = s @@ -112,7 +119,12 @@ func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n if !nc.update(ce) { break } + oldIndex = len(nc.elems) } + nc.elems = append(nc.elems, 0) + copy(nc.elems[oldIndex+1:], nc.elems[oldIndex:]) + nc.elems[oldIndex], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0) + return nc.result(), n } @@ -122,6 +134,7 @@ type numberConverter struct { elems []Elem nDigits int lenIndex int + zero int s string // set if the input was of type string b []byte // set if the input was of type []byte @@ -133,6 +146,7 @@ func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) { // Insert a marker indicating the start of a number and a placeholder // for the number of digits. if isZero { + nc.zero++ elems = append(elems[:oldLen], nc.w.numberStart, 0) } else { elems = append(elems, 0, 0) @@ -217,6 +231,9 @@ const maxDigits = 1<