45
45
import static com .oracle .graal .python .runtime .exception .PythonErrorType .UnicodeDecodeError ;
46
46
import static com .oracle .graal .python .runtime .exception .PythonErrorType .UnicodeEncodeError ;
47
47
48
- import java .nio .BufferUnderflowException ;
49
48
import java .nio .ByteBuffer ;
50
49
import java .nio .CharBuffer ;
51
50
import java .nio .charset .CharacterCodingException ;
@@ -258,6 +257,7 @@ private int getLength(PBytes b) {
258
257
}
259
258
}
260
259
260
+ // Encoder for raw_unicode_escape
261
261
@ Builtin (name = "__truffle_raw_encode" , minNumOfPositionalArgs = 1 , parameterNames = {"str" , "errors" })
262
262
@ GenerateNodeFactory
263
263
public abstract static class RawEncodeNode extends EncodeBaseNode {
@@ -425,14 +425,15 @@ private boolean castToBoolean(VirtualFrame frame, Object object) {
425
425
}
426
426
}
427
427
428
+ // Decoder for raw_escape_unicode
428
429
@ Builtin (name = "__truffle_raw_decode" , minNumOfPositionalArgs = 1 , parameterNames = {"bytes" , "errors" })
429
430
@ GenerateNodeFactory
430
431
abstract static class RawDecodeNode extends EncodeBaseNode {
431
432
@ Child private GetInternalByteArrayNode toByteArrayNode ;
432
433
433
434
@ Specialization
434
435
Object decode (PBytesLike bytes , @ SuppressWarnings ("unused" ) PNone errors ) {
435
- String string = decodeBytes (getBytesBuffer (bytes ), "strict" );
436
+ String string = decodeBytes (getBytes (bytes ), "strict" );
436
437
return factory ().createTuple (new Object []{string , string .length ()});
437
438
}
438
439
@@ -446,60 +447,64 @@ Object decode(PBytesLike bytes, Object errors,
446
447
CompilerDirectives .transferToInterpreterAndInvalidate ();
447
448
throw new IllegalStateException ("should not be reached" );
448
449
}
449
- String string = decodeBytes (getBytesBuffer (bytes ), profiledErrors );
450
+ String string = decodeBytes (getBytes (bytes ), profiledErrors );
450
451
return factory ().createTuple (new Object []{string , string .length ()});
451
452
}
452
453
453
- private ByteBuffer getBytesBuffer (PBytesLike bytesLike ) {
454
+ private byte [] getBytes (PBytesLike bytesLike ) {
454
455
if (toByteArrayNode == null ) {
455
456
CompilerDirectives .transferToInterpreterAndInvalidate ();
456
457
toByteArrayNode = insert (GetInternalByteArrayNodeGen .create ());
457
458
}
458
- byte [] barr = toByteArrayNode .execute (bytesLike .getSequenceStorage ());
459
- return ByteBuffer .wrap (barr , 0 , barr .length );
459
+ return toByteArrayNode .execute (bytesLike .getSequenceStorage ());
460
460
}
461
461
462
462
@ TruffleBoundary
463
- String decodeBytes (ByteBuffer bytes , String errors ) {
463
+ String decodeBytes (byte [] bytes , String errors ) {
464
464
CodingErrorAction errorAction = convertCodingErrorAction (errors );
465
465
try {
466
- ByteBuffer buf = ByteBuffer .allocate (bytes .remaining () * Integer .BYTES );
467
- byte [] hexString = new byte [ 8 ] ;
468
- while (bytes .hasRemaining () ) {
469
- int val ;
470
- byte b = bytes .get ();
471
- if ( b == ( byte ) '\\' ) {
472
- byte b1 = bytes . get () ;
466
+ ByteBuffer buf = ByteBuffer .allocate (bytes .length * Integer .BYTES );
467
+ int i = 0 ;
468
+ while (i < bytes .length ) {
469
+ byte b = bytes [ i ] ;
470
+ if ( b == ( byte ) '\\' && i + 1 < bytes .length ) {
471
+ byte b1 = bytes [ i + 1 ];
472
+ int numIndex = i + 2 ;
473
473
if (b1 == (byte ) 'u' ) {
474
- bytes .get (hexString , 0 , 4 );
475
- val = Integer .parseInt (new String (hexString , 0 , 4 ), 16 );
474
+ final int count = 4 ;
475
+ if (numIndex + count > bytes .length ) {
476
+ throw raise (UnicodeDecodeError );
477
+ }
478
+ buf .putInt (Integer .parseInt (new String (bytes , numIndex , count ), 16 ));
479
+ i = numIndex + count ;
480
+ continue ;
476
481
} else if (b1 == (byte ) 'U' ) {
477
- bytes .get (hexString , 0 , 8 );
478
- val = Integer .parseInt (new String (hexString , 0 , 8 ), 16 );
479
- } else {
480
- throw new CharacterCodingException ();
482
+ final int count = 8 ;
483
+ if (numIndex + count > bytes .length ) {
484
+ throw raise (UnicodeDecodeError );
485
+ }
486
+ buf .putInt (Integer .parseInt (new String (bytes , numIndex , count ), 16 ));
487
+ i = numIndex + count ;
488
+ continue ;
481
489
}
482
- } else {
483
- // Bytes that are not an escape sequence are latin-1, which maps to unicode
484
- // codepoints directly
485
- val = b & 0xFF ;
486
490
}
487
- buf .putInt (val );
491
+ // Bytes that are not an escape sequence are latin-1, which maps to unicode
492
+ // codepoints directly
493
+ buf .putInt (b & 0xFF );
494
+ i ++;
488
495
}
489
496
buf .flip ();
490
497
CharBuffer decoded = UTF32 .newDecoder ().onMalformedInput (errorAction ).onUnmappableCharacter (errorAction ).decode (buf );
491
498
return String .valueOf (decoded );
492
- } catch (CharacterCodingException | NumberFormatException | BufferUnderflowException e ) {
499
+ } catch (CharacterCodingException | NumberFormatException e ) {
493
500
throw raise (UnicodeDecodeError , e );
494
501
}
495
502
}
496
503
}
497
504
498
- // _codecs.lookup(name)
499
505
@ Builtin (name = "__truffle_lookup" , minNumOfPositionalArgs = 1 )
500
506
@ GenerateNodeFactory
501
507
abstract static class CodecsLookupNode extends PythonBuiltinNode {
502
- // This is replaced in the core _codecs.py with the full functionality
503
508
@ Specialization
504
509
Object lookup (String encoding ) {
505
510
if (CharsetMapping .getCharset (encoding ) != null ) {
@@ -510,7 +515,6 @@ Object lookup(String encoding) {
510
515
}
511
516
}
512
517
513
- // _codecs.lookup(name)
514
518
@ Builtin (name = "charmap_build" , minNumOfPositionalArgs = 1 )
515
519
@ GenerateNodeFactory
516
520
abstract static class CharmapBuildNode extends PythonBuiltinNode {
0 commit comments