Skip to content

Commit 487d256

Browse files
committed
Make str.splitlines deal with different newline types
1 parent c67784c commit 487d256

File tree

2 files changed

+31
-8
lines changed

2 files changed

+31
-8
lines changed

graalpython/com.oracle.graal.python.test/src/tests/unittest_tags/test_unicode.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_rindex
5858
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_rjust
5959
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_slice
60+
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_splitlines
6061
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_startswith
6162
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_startswith_endswith_errors
6263
*graalpython.lib-python.3.test.test_unicode.UnicodeTest.test_strip

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@
6060
import java.nio.charset.UnsupportedCharsetException;
6161
import java.util.Arrays;
6262
import java.util.List;
63+
import java.util.regex.Matcher;
64+
import java.util.regex.Pattern;
6365

6466
import org.graalvm.nativeimage.ImageInfo;
6567

@@ -1215,6 +1217,8 @@ private PList rsplitfields(VirtualFrame frame, String s, int maxsplit, AppendNod
12151217
public abstract static class SplitLinesNode extends PythonBinaryBuiltinNode {
12161218
@Child private AppendNode appendNode = AppendNode.create();
12171219

1220+
private static final Pattern LINEBREAK_PATTERN = Pattern.compile("\\R");
1221+
12181222
@Specialization
12191223
PList doString(String self, @SuppressWarnings("unused") PNone keepends) {
12201224
return doStringKeepends(self, false);
@@ -1224,17 +1228,15 @@ PList doString(String self, @SuppressWarnings("unused") PNone keepends) {
12241228
PList doStringKeepends(String self, boolean keepends) {
12251229
PList list = factory().createList();
12261230
int lastEnd = 0;
1227-
while (true) {
1228-
int nextIndex = PString.indexOf(self, "\n", lastEnd);
1229-
if (nextIndex == -1) {
1230-
break;
1231-
}
1231+
Matcher matcher = getMatcher(self);
1232+
while (matcherFind(matcher)) {
1233+
int end = matcherEnd(matcher);
12321234
if (keepends) {
1233-
appendNode.execute(list, PString.substring(self, lastEnd, nextIndex + 1));
1235+
appendNode.execute(list, PString.substring(self, lastEnd, end));
12341236
} else {
1235-
appendNode.execute(list, PString.substring(self, lastEnd, nextIndex));
1237+
appendNode.execute(list, PString.substring(self, lastEnd, matcherStart(matcher)));
12361238
}
1237-
lastEnd = nextIndex + 1;
1239+
lastEnd = end;
12381240
}
12391241
String remainder = PString.substring(self, lastEnd);
12401242
if (!remainder.isEmpty()) {
@@ -1243,6 +1245,26 @@ PList doStringKeepends(String self, boolean keepends) {
12431245
return list;
12441246
}
12451247

1248+
@TruffleBoundary
1249+
private static int matcherStart(Matcher matcher) {
1250+
return matcher.start();
1251+
}
1252+
1253+
@TruffleBoundary
1254+
private static int matcherEnd(Matcher matcher) {
1255+
return matcher.end();
1256+
}
1257+
1258+
@TruffleBoundary
1259+
private static boolean matcherFind(Matcher matcher) {
1260+
return matcher.find();
1261+
}
1262+
1263+
@TruffleBoundary
1264+
private static Matcher getMatcher(String self) {
1265+
return LINEBREAK_PATTERN.matcher(self);
1266+
}
1267+
12461268
@Specialization(replaces = {"doString", "doStringKeepends"})
12471269
PList doGeneric(Object self, Object keepends,
12481270
@Cached CastToJavaStringCheckedNode castSelfNode,

0 commit comments

Comments
 (0)