Skip to content

Commit 9b7c5e6

Browse files
committed
Iterate codepoints correctly
1 parent 487d256 commit 9b7c5e6

File tree

2 files changed

+28
-15
lines changed

2 files changed

+28
-15
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,10 +1680,12 @@ static boolean doString(String self) {
16801680
if (self.length() == 0) {
16811681
return false;
16821682
}
1683-
for (int i = 0; i < self.length(); i++) {
1684-
if (!Character.isLetterOrDigit(self.codePointAt(i))) {
1683+
for (int i = 0; i < self.length();) {
1684+
int codePoint = self.codePointAt(i);
1685+
if (!Character.isLetterOrDigit(codePoint)) {
16851686
return false;
16861687
}
1688+
i += Character.charCount(codePoint);
16871689
}
16881690
return true;
16891691
}
@@ -1704,10 +1706,12 @@ static boolean doString(String self) {
17041706
if (self.length() == 0) {
17051707
return false;
17061708
}
1707-
for (int i = 0; i < self.length(); i++) {
1708-
if (!Character.isLetter(self.codePointAt(i))) {
1709+
for (int i = 0; i < self.length();) {
1710+
int codePoint = self.codePointAt(i);
1711+
if (!Character.isLetter(codePoint)) {
17091712
return false;
17101713
}
1714+
i += Character.charCount(codePoint);
17111715
}
17121716
return true;
17131717
}
@@ -1728,10 +1732,12 @@ static boolean doString(String self) {
17281732
if (self.length() == 0) {
17291733
return false;
17301734
}
1731-
for (int i = 0; i < self.length(); i++) {
1732-
if (!Character.isDigit(self.codePointAt(i))) {
1735+
for (int i = 0; i < self.length();) {
1736+
int codePoint = self.codePointAt(i);
1737+
if (!Character.isDigit(codePoint)) {
17331738
return false;
17341739
}
1740+
i += Character.charCount(codePoint);
17351741
}
17361742
return true;
17371743
}
@@ -1778,7 +1784,7 @@ static boolean doString(String self) {
17781784
if (self.length() == 0) {
17791785
return false;
17801786
}
1781-
for (int i = 0; i < self.length(); i++) {
1787+
for (int i = 0; i < self.length();) {
17821788
int codePoint = self.codePointAt(i);
17831789
if (!Character.isLowerCase(codePoint)) {
17841790
if (Character.toLowerCase(codePoint) == Character.toUpperCase(codePoint)) {
@@ -1787,6 +1793,7 @@ static boolean doString(String self) {
17871793
return false;
17881794
}
17891795
}
1796+
i += Character.charCount(codePoint);
17901797
}
17911798
return uncased == 0 || self.length() > uncased;
17921799
}
@@ -1813,10 +1820,12 @@ private static boolean isPrintableChar(int i) {
18131820
@Specialization
18141821
@TruffleBoundary
18151822
static boolean doString(String self) {
1816-
for (int i = 0; i < self.length(); i++) {
1817-
if (!isPrintableChar(self.codePointAt(i))) {
1823+
for (int i = 0; i < self.length();) {
1824+
int codePoint = self.codePointAt(i);
1825+
if (!isPrintableChar(codePoint)) {
18181826
return false;
18191827
}
1828+
i += Character.charCount(codePoint);
18201829
}
18211830
return true;
18221831
}
@@ -1839,10 +1848,12 @@ static boolean doString(String self) {
18391848
if (self.length() == 0) {
18401849
return false;
18411850
}
1842-
for (int i = 0; i < self.length(); i++) {
1843-
if (!StringUtils.isSpace(self.charAt(i))) {
1851+
for (int i = 0; i < self.length();) {
1852+
int codePoint = self.codePointAt(i);
1853+
if (!StringUtils.isSpace(codePoint)) {
18441854
return false;
18451855
}
1856+
i += Character.charCount(codePoint);
18461857
}
18471858
return true;
18481859
}
@@ -1865,7 +1876,7 @@ static boolean doString(String self) {
18651876
if (self.length() == 0) {
18661877
return false;
18671878
}
1868-
for (int i = 0; i < self.length(); i++) {
1879+
for (int i = 0; i < self.length();) {
18691880
int codePoint = self.codePointAt(i);
18701881
if (!expectLower) {
18711882
if (Character.isTitleCase(codePoint) || Character.isUpperCase(codePoint)) {
@@ -1883,6 +1894,7 @@ static boolean doString(String self) {
18831894
expectLower = false;
18841895
}
18851896
}
1897+
i += Character.charCount(codePoint);
18861898
}
18871899
return hasContent;
18881900
}
@@ -1904,7 +1916,7 @@ static boolean doString(String self) {
19041916
if (self.length() == 0) {
19051917
return false;
19061918
}
1907-
for (int i = 0; i < self.length(); i++) {
1919+
for (int i = 0; i < self.length();) {
19081920
int codePoint = self.codePointAt(i);
19091921
if (!Character.isUpperCase(codePoint)) {
19101922
if (Character.toLowerCase(codePoint) == Character.toUpperCase(codePoint)) {
@@ -1913,6 +1925,7 @@ static boolean doString(String self) {
19131925
return false;
19141926
}
19151927
}
1928+
i += Character.charCount(codePoint);
19161929
}
19171930
return uncased == 0 || self.length() > uncased;
19181931
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ public enum StripKind {
8080
0, 0, 0, 0, 0, 0, 0, 0
8181
};
8282

83-
public static boolean isUnicodeWhitespace(char ch) {
83+
public static boolean isUnicodeWhitespace(int ch) {
8484
switch (ch) {
8585
case 0x0009:
8686
case 0x000A:
@@ -135,7 +135,7 @@ public static boolean isUnicodeLineBreak(char ch) {
135135
}
136136
}
137137

138-
public static boolean isSpace(char ch) {
138+
public static boolean isSpace(int ch) {
139139
if (ch < 128) {
140140
return ASCII_WHITESPACE[ch] == 1;
141141
}

0 commit comments

Comments
 (0)