Skip to content

Commit c89254f

Browse files
alex-robbinspfalcon
authored andcommitted
extmod/modubinascii: Rewrite mod_binascii_a2b_base64.
This implementation ignores invalid characters in the input. This allows it to decode the output of b2a_base64, and also mimics the behavior of CPython.
1 parent 025e5f2 commit c89254f

File tree

2 files changed

+55
-38
lines changed

2 files changed

+55
-38
lines changed

extmod/modubinascii.c

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -105,54 +105,64 @@ mp_obj_t mod_binascii_unhexlify(mp_obj_t data) {
105105
}
106106
MP_DEFINE_CONST_FUN_OBJ_1(mod_binascii_unhexlify_obj, mod_binascii_unhexlify);
107107

108+
// If ch is a character in the base64 alphabet, and is not a pad character, then
109+
// the corresponding integer between 0 and 63, inclusively, is returned.
110+
// Otherwise, -1 is returned.
111+
static int mod_binascii_sextet(byte ch) {
112+
if (ch >= 'A' && ch <= 'Z') {
113+
return ch - 'A';
114+
} else if (ch >= 'a' && ch <= 'z') {
115+
return ch - 'a' + 26;
116+
} else if (ch >= '0' && ch <= '9') {
117+
return ch - '0' + 52;
118+
} else if (ch == '+') {
119+
return 62;
120+
} else if (ch == '/') {
121+
return 63;
122+
} else {
123+
return -1;
124+
}
125+
}
126+
108127
mp_obj_t mod_binascii_a2b_base64(mp_obj_t data) {
109128
mp_buffer_info_t bufinfo;
110129
mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ);
111-
if (bufinfo.len % 4 != 0) {
112-
mp_raise_ValueError("incorrect padding");
113-
}
130+
byte *in = bufinfo.buf;
114131

115132
vstr_t vstr;
116-
byte *in = bufinfo.buf;
117-
if (bufinfo.len == 0) {
118-
vstr_init_len(&vstr, 0);
119-
}
120-
else {
121-
vstr_init_len(&vstr, ((bufinfo.len / 4) * 3) - ((in[bufinfo.len-1] == '=') ? ((in[bufinfo.len-2] == '=') ? 2 : 1 ) : 0));
122-
}
123-
byte *out = (byte*)vstr.buf;
124-
for (mp_uint_t i = bufinfo.len; i; i -= 4) {
125-
char hold[4];
126-
for (int j = 4; j--;) {
127-
if (in[j] >= 'A' && in[j] <= 'Z') {
128-
hold[j] = in[j] - 'A';
129-
} else if (in[j] >= 'a' && in[j] <= 'z') {
130-
hold[j] = in[j] - 'a' + 26;
131-
} else if (in[j] >= '0' && in[j] <= '9') {
132-
hold[j] = in[j] - '0' + 52;
133-
} else if (in[j] == '+') {
134-
hold[j] = 62;
135-
} else if (in[j] == '/') {
136-
hold[j] = 63;
137-
} else if (in[j] == '=') {
138-
if (j < 2 || i > 4) {
139-
mp_raise_ValueError("incorrect padding");
140-
}
141-
hold[j] = 64;
142-
} else {
143-
mp_raise_ValueError("invalid character");
133+
vstr_init(&vstr, (bufinfo.len / 4) * 3 + 1); // Potentially over-allocate
134+
byte *out = (byte *)vstr.buf;
135+
136+
uint shift = 0;
137+
int nbits = 0; // Number of meaningful bits in shift
138+
bool hadpad = false; // Had a pad character since last valid character
139+
for (size_t i = 0; i < bufinfo.len; i++) {
140+
if (in[i] == '=') {
141+
if ((nbits == 2) || ((nbits == 4) && hadpad)) {
142+
nbits = 0;
143+
break;
144144
}
145+
hadpad = true;
145146
}
146-
in += 4;
147147

148-
*out++ = (hold[0]) << 2 | (hold[1]) >> 4;
149-
if (hold[2] != 64) {
150-
*out++ = (hold[1] & 0x0F) << 4 | hold[2] >> 2;
151-
if (hold[3] != 64) {
152-
*out++ = (hold[2] & 0x03) << 6 | hold[3];
153-
}
148+
int sextet = mod_binascii_sextet(in[i]);
149+
if (sextet == -1) {
150+
continue;
151+
}
152+
hadpad = false;
153+
shift = (shift << 6) | sextet;
154+
nbits += 6;
155+
156+
if (nbits >= 8) {
157+
nbits -= 8;
158+
out[vstr.len++] = (shift >> nbits) & 0xFF;
154159
}
155160
}
161+
162+
if (nbits) {
163+
mp_raise_ValueError("incorrect padding");
164+
}
165+
156166
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
157167
}
158168
MP_DEFINE_CONST_FUN_OBJ_1(mod_binascii_a2b_base64_obj, mod_binascii_a2b_base64);

tests/extmod/ubinascii_a2b_base64.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@
2121
print(binascii.a2b_base64(b'f4D+')) # convert '+'
2222
print(binascii.a2b_base64(b'MTIzNEFCQ0RhYmNk'))
2323

24+
# Ignore invalid characters and pad sequences
25+
print(binascii.a2b_base64(b'Zm9v\n'))
26+
print(binascii.a2b_base64(b'Zm\x009v\n'))
27+
print(binascii.a2b_base64(b'Zm9v=='))
28+
print(binascii.a2b_base64(b'Zm9v==='))
29+
print(binascii.a2b_base64(b'Zm9v===YmFy'))
30+
2431
try:
2532
print(binascii.a2b_base64(b'abc'))
2633
except ValueError:

0 commit comments

Comments
 (0)