Skip to content

Commit 563c8b1

Browse files
committed
Fix string casing builtins
1 parent b6bcd51 commit 563c8b1

File tree

2 files changed

+20
-72
lines changed

2 files changed

+20
-72
lines changed

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_unicode.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_join_overflow
5050
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_literals
5151
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_ljust
52+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_lower
5253
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_mul
5354
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_none_arguments
5455
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_printable_repr
@@ -67,6 +68,7 @@
6768
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_strip_whitespace
6869
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_subclass_add
6970
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_subscript
71+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_title
7072
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_unicode_repr
7173
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_upper
7274
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_utf8_decode_invalid_sequences

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 18 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import java.nio.charset.UnsupportedCharsetException;
6161
import java.util.Arrays;
6262
import java.util.List;
63+
import java.util.Locale;
6364
import java.util.regex.Matcher;
6465
import java.util.regex.Pattern;
6566

@@ -775,7 +776,7 @@ static String upper(Object self,
775776

776777
@TruffleBoundary
777778
private static String toUpperCase(String str) {
778-
return str.toUpperCase();
779+
return UCharacter.toUpperCase(Locale.ENGLISH, str);
779780
}
780781
}
781782

@@ -894,7 +895,7 @@ static String doGeneric(Object self,
894895

895896
@TruffleBoundary
896897
private static String toLowerCase(String self) {
897-
return self.toLowerCase();
898+
return UCharacter.toLowerCase(Locale.ENGLISH, self);
898899
}
899900
}
900901

@@ -1867,32 +1868,29 @@ abstract static class IsTitleNode extends PythonUnaryBuiltinNode {
18671868
@Specialization
18681869
@TruffleBoundary
18691870
static boolean doString(String self) {
1870-
boolean hasContent = false;
1871-
boolean expectLower = false;
1872-
if (self.length() == 0) {
1873-
return false;
1874-
}
1871+
boolean cased = false;
1872+
boolean previousIsCased = false;
18751873
for (int i = 0; i < self.length();) {
18761874
int codePoint = self.codePointAt(i);
1877-
if (!expectLower) {
1878-
if (UCharacter.isTitleCase(codePoint) || UCharacter.isUUppercase(codePoint)) {
1879-
expectLower = true;
1880-
hasContent = true;
1881-
} else if (UCharacter.isULowercase(codePoint)) {
1875+
1876+
if (UCharacter.isUUppercase(codePoint) || UCharacter.isTitleCase(codePoint)) {
1877+
if (previousIsCased) {
18821878
return false;
18831879
}
1884-
// uncased characters are allowed
1885-
} else {
1886-
if (UCharacter.isTitleCase(codePoint) || UCharacter.isUUppercase(codePoint)) {
1880+
previousIsCased = true;
1881+
cased = true;
1882+
} else if (UCharacter.isULowercase(codePoint)) {
1883+
if (!previousIsCased) {
18871884
return false;
1888-
} else if (!UCharacter.isULowercase(codePoint)) {
1889-
// we expect another title start after an uncased character
1890-
expectLower = false;
18911885
}
1886+
previousIsCased = true;
1887+
cased = true;
1888+
} else {
1889+
previousIsCased = false;
18921890
}
18931891
i += Character.charCount(codePoint);
18941892
}
1895-
return hasContent;
1893+
return cased;
18961894
}
18971895

18981896
@Specialization(replaces = "doString")
@@ -1986,59 +1984,7 @@ static String doGeneric(Object self,
19861984

19871985
@TruffleBoundary
19881986
private static String doTitle(String self) {
1989-
boolean shouldBeLowerCase = false;
1990-
boolean translated;
1991-
StringBuilder converted = new StringBuilder();
1992-
for (int offset = 0; offset < self.length();) {
1993-
int ch = self.codePointAt(offset);
1994-
translated = false;
1995-
if (Character.isAlphabetic(ch)) {
1996-
if (shouldBeLowerCase) {
1997-
// Should be lower case
1998-
if (UCharacter.isUUppercase(ch)) {
1999-
translated = true;
2000-
if (ch < 256) {
2001-
converted.append((char) UCharacter.toLowerCase(ch));
2002-
} else {
2003-
String origPart = new String(Character.toChars(ch));
2004-
String changedPart = origPart.toLowerCase();
2005-
converted.append(changedPart);
2006-
}
2007-
}
2008-
} else {
2009-
// Should be upper case
2010-
if (UCharacter.isULowercase(ch)) {
2011-
translated = true;
2012-
if (ch < 256) {
2013-
converted.append((char) UCharacter.toUpperCase(ch));
2014-
} else {
2015-
String origPart = new String(Character.toChars(ch));
2016-
String changedPart = origPart.toUpperCase();
2017-
if (origPart.length() < changedPart.length()) {
2018-
// the original char was mapped to more chars ->
2019-
// we need to make upper case just the first one
2020-
changedPart = doTitle(changedPart);
2021-
}
2022-
converted.append(changedPart);
2023-
}
2024-
}
2025-
}
2026-
// And this was a letter
2027-
shouldBeLowerCase = true;
2028-
} else {
2029-
// This was not a letter
2030-
shouldBeLowerCase = false;
2031-
}
2032-
if (!translated) {
2033-
if (ch < 256) {
2034-
converted.append((char) ch);
2035-
} else {
2036-
converted.append(Character.toChars(ch));
2037-
}
2038-
}
2039-
offset += Character.charCount(ch);
2040-
}
2041-
return converted.toString();
1987+
return UCharacter.toTitleCase(Locale.ENGLISH, self, null);
20421988
}
20431989
}
20441990

0 commit comments

Comments
 (0)