|
60 | 60 | import java.nio.charset.UnsupportedCharsetException;
|
61 | 61 | import java.util.Arrays;
|
62 | 62 | import java.util.List;
|
| 63 | +import java.util.Locale; |
63 | 64 | import java.util.regex.Matcher;
|
64 | 65 | import java.util.regex.Pattern;
|
65 | 66 |
|
@@ -775,7 +776,7 @@ static String upper(Object self,
|
775 | 776 |
|
776 | 777 | @TruffleBoundary
|
777 | 778 | private static String toUpperCase(String str) {
|
778 |
| - return str.toUpperCase(); |
| 779 | + return UCharacter.toUpperCase(Locale.ENGLISH, str); |
779 | 780 | }
|
780 | 781 | }
|
781 | 782 |
|
@@ -894,7 +895,7 @@ static String doGeneric(Object self,
|
894 | 895 |
|
895 | 896 | @TruffleBoundary
|
896 | 897 | private static String toLowerCase(String self) {
|
897 |
| - return self.toLowerCase(); |
| 898 | + return UCharacter.toLowerCase(Locale.ENGLISH, self); |
898 | 899 | }
|
899 | 900 | }
|
900 | 901 |
|
@@ -1867,32 +1868,29 @@ abstract static class IsTitleNode extends PythonUnaryBuiltinNode {
|
1867 | 1868 | @Specialization
|
1868 | 1869 | @TruffleBoundary
|
1869 | 1870 | static boolean doString(String self) {
|
1870 |
| - boolean hasContent = false; |
1871 |
| - boolean expectLower = false; |
1872 |
| - if (self.length() == 0) { |
1873 |
| - return false; |
1874 |
| - } |
| 1871 | + boolean cased = false; |
| 1872 | + boolean previousIsCased = false; |
1875 | 1873 | for (int i = 0; i < self.length();) {
|
1876 | 1874 | int codePoint = self.codePointAt(i);
|
1877 |
| - if (!expectLower) { |
1878 |
| - if (UCharacter.isTitleCase(codePoint) || UCharacter.isUUppercase(codePoint)) { |
1879 |
| - expectLower = true; |
1880 |
| - hasContent = true; |
1881 |
| - } else if (UCharacter.isULowercase(codePoint)) { |
| 1875 | + |
| 1876 | + if (UCharacter.isUUppercase(codePoint) || UCharacter.isTitleCase(codePoint)) { |
| 1877 | + if (previousIsCased) { |
1882 | 1878 | return false;
|
1883 | 1879 | }
|
1884 |
| - // uncased characters are allowed |
1885 |
| - } else { |
1886 |
| - if (UCharacter.isTitleCase(codePoint) || UCharacter.isUUppercase(codePoint)) { |
| 1880 | + previousIsCased = true; |
| 1881 | + cased = true; |
| 1882 | + } else if (UCharacter.isULowercase(codePoint)) { |
| 1883 | + if (!previousIsCased) { |
1887 | 1884 | return false;
|
1888 |
| - } else if (!UCharacter.isULowercase(codePoint)) { |
1889 |
| - // we expect another title start after an uncased character |
1890 |
| - expectLower = false; |
1891 | 1885 | }
|
| 1886 | + previousIsCased = true; |
| 1887 | + cased = true; |
| 1888 | + } else { |
| 1889 | + previousIsCased = false; |
1892 | 1890 | }
|
1893 | 1891 | i += Character.charCount(codePoint);
|
1894 | 1892 | }
|
1895 |
| - return hasContent; |
| 1893 | + return cased; |
1896 | 1894 | }
|
1897 | 1895 |
|
1898 | 1896 | @Specialization(replaces = "doString")
|
@@ -1986,59 +1984,7 @@ static String doGeneric(Object self,
|
1986 | 1984 |
|
1987 | 1985 | @TruffleBoundary
|
1988 | 1986 | private static String doTitle(String self) {
|
1989 |
| - boolean shouldBeLowerCase = false; |
1990 |
| - boolean translated; |
1991 |
| - StringBuilder converted = new StringBuilder(); |
1992 |
| - for (int offset = 0; offset < self.length();) { |
1993 |
| - int ch = self.codePointAt(offset); |
1994 |
| - translated = false; |
1995 |
| - if (Character.isAlphabetic(ch)) { |
1996 |
| - if (shouldBeLowerCase) { |
1997 |
| - // Should be lower case |
1998 |
| - if (UCharacter.isUUppercase(ch)) { |
1999 |
| - translated = true; |
2000 |
| - if (ch < 256) { |
2001 |
| - converted.append((char) UCharacter.toLowerCase(ch)); |
2002 |
| - } else { |
2003 |
| - String origPart = new String(Character.toChars(ch)); |
2004 |
| - String changedPart = origPart.toLowerCase(); |
2005 |
| - converted.append(changedPart); |
2006 |
| - } |
2007 |
| - } |
2008 |
| - } else { |
2009 |
| - // Should be upper case |
2010 |
| - if (UCharacter.isULowercase(ch)) { |
2011 |
| - translated = true; |
2012 |
| - if (ch < 256) { |
2013 |
| - converted.append((char) UCharacter.toUpperCase(ch)); |
2014 |
| - } else { |
2015 |
| - String origPart = new String(Character.toChars(ch)); |
2016 |
| - String changedPart = origPart.toUpperCase(); |
2017 |
| - if (origPart.length() < changedPart.length()) { |
2018 |
| - // the original char was mapped to more chars -> |
2019 |
| - // we need to make upper case just the first one |
2020 |
| - changedPart = doTitle(changedPart); |
2021 |
| - } |
2022 |
| - converted.append(changedPart); |
2023 |
| - } |
2024 |
| - } |
2025 |
| - } |
2026 |
| - // And this was a letter |
2027 |
| - shouldBeLowerCase = true; |
2028 |
| - } else { |
2029 |
| - // This was not a letter |
2030 |
| - shouldBeLowerCase = false; |
2031 |
| - } |
2032 |
| - if (!translated) { |
2033 |
| - if (ch < 256) { |
2034 |
| - converted.append((char) ch); |
2035 |
| - } else { |
2036 |
| - converted.append(Character.toChars(ch)); |
2037 |
| - } |
2038 |
| - } |
2039 |
| - offset += Character.charCount(ch); |
2040 |
| - } |
2041 |
| - return converted.toString(); |
| 1987 | + return UCharacter.toTitleCase(Locale.ENGLISH, self, null); |
2042 | 1988 | }
|
2043 | 1989 | }
|
2044 | 1990 |
|
|
0 commit comments