Skip to content

Commit e1d9171

Browse files
Add support for base64url encoding and decoding
This adds support for base64url encoding and decoding, a base64 variant which is safe to use in filenames and URLs. base64url replaces '+' in the base64 alphabet with '-' and '/' with '_', thus making it safe for URL addresses and file systems. Support for base64url was originally suggested by Przemysław Sztoch. Author: Florents Tselai <[email protected]> Reviewed-by: Aleksander Alekseev <[email protected]> Reviewed-by: David E. Wheeler <[email protected]> Reviewed-by: Masahiko Sawada <[email protected]> Reviewed-by: Daniel Gustafsson <[email protected]> Reviewed-by: Chao Li (Evan) <[email protected]> Discussion: https://postgr.es/m/[email protected]
1 parent 261f89a commit e1d9171

File tree

4 files changed

+359
-21
lines changed

4 files changed

+359
-21
lines changed

doc/src/sgml/func/func-binarystring.sgml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,7 @@
728728
Encodes binary data into a textual representation; supported
729729
<parameter>format</parameter> values are:
730730
<link linkend="encode-format-base64"><literal>base64</literal></link>,
731+
<link linkend="encode-format-base64url"><literal>base64url</literal></link>,
731732
<link linkend="encode-format-escape"><literal>escape</literal></link>,
732733
<link linkend="encode-format-hex"><literal>hex</literal></link>.
733734
</para>
@@ -785,6 +786,24 @@
785786
</listitem>
786787
</varlistentry>
787788

789+
<varlistentry id="encode-format-base64url">
790+
<term>base64url
791+
<indexterm>
792+
<primary>base64url format</primary>
793+
</indexterm></term>
794+
<listitem>
795+
<para>
796+
The <literal>base64url</literal> format is that of
797+
<ulink url="https://datatracker.ietf.org/doc/html/rfc4648#section-5">
798+
RFC 4648 Section 5</ulink>, a <literal>base64</literal> variant safe to
799+
use in filenames and URLs. The <literal>base64url</literal> alphabet
800+
use <literal>'-'</literal> instead of <literal>'+'</literal> and
801+
<literal>'_'</literal> instead of <literal>'/'</literal> and also omits
802+
the <literal>'='</literal> padding character.
803+
</para>
804+
</listitem>
805+
</varlistentry>
806+
788807
<varlistentry id="encode-format-escape">
789808
<term>escape
790809
<indexterm>

src/backend/utils/adt/encode.c

Lines changed: 136 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -267,12 +267,15 @@ hex_dec_len(const char *src, size_t srclen)
267267
}
268268

269269
/*
270-
* BASE64
270+
* BASE64 and BASE64URL
271271
*/
272272

273273
static const char _base64[] =
274274
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
275275

276+
static const char _base64url[] =
277+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
278+
276279
static const int8 b64lookup[128] = {
277280
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
278281
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -284,15 +287,23 @@ static const int8 b64lookup[128] = {
284287
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
285288
};
286289

290+
/*
291+
* pg_base64_encode_internal
292+
*
293+
* Helper for decoding base64 or base64url. When url is passed as true the
294+
* input will be encoded using base64url. len bytes in src is encoded into
295+
* dst.
296+
*/
287297
static uint64
288-
pg_base64_encode(const char *src, size_t len, char *dst)
298+
pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url)
289299
{
290300
char *p,
291301
*lend = dst + 76;
292302
const char *s,
293303
*end = src + len;
294304
int pos = 2;
295305
uint32 buf = 0;
306+
const char *alphabet = url ? _base64url : _base64;
296307

297308
s = src;
298309
p = dst;
@@ -306,33 +317,64 @@ pg_base64_encode(const char *src, size_t len, char *dst)
306317
/* write it out */
307318
if (pos < 0)
308319
{
309-
*p++ = _base64[(buf >> 18) & 0x3f];
310-
*p++ = _base64[(buf >> 12) & 0x3f];
311-
*p++ = _base64[(buf >> 6) & 0x3f];
312-
*p++ = _base64[buf & 0x3f];
320+
*p++ = alphabet[(buf >> 18) & 0x3f];
321+
*p++ = alphabet[(buf >> 12) & 0x3f];
322+
*p++ = alphabet[(buf >> 6) & 0x3f];
323+
*p++ = alphabet[buf & 0x3f];
313324

314325
pos = 2;
315326
buf = 0;
316-
}
317-
if (p >= lend)
318-
{
319-
*p++ = '\n';
320-
lend = p + 76;
327+
328+
if (!url && p >= lend)
329+
{
330+
*p++ = '\n';
331+
lend = p + 76;
332+
}
321333
}
322334
}
335+
336+
/* Handle remaining bytes in buf */
323337
if (pos != 2)
324338
{
325-
*p++ = _base64[(buf >> 18) & 0x3f];
326-
*p++ = _base64[(buf >> 12) & 0x3f];
327-
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
328-
*p++ = '=';
339+
*p++ = alphabet[(buf >> 18) & 0x3f];
340+
*p++ = alphabet[(buf >> 12) & 0x3f];
341+
342+
if (pos == 0)
343+
{
344+
*p++ = alphabet[(buf >> 6) & 0x3f];
345+
if (!url)
346+
*p++ = '=';
347+
}
348+
else if (!url)
349+
{
350+
*p++ = '=';
351+
*p++ = '=';
352+
}
329353
}
330354

331355
return p - dst;
332356
}
333357

334358
static uint64
335-
pg_base64_decode(const char *src, size_t len, char *dst)
359+
pg_base64_encode(const char *src, size_t len, char *dst)
360+
{
361+
return pg_base64_encode_internal(src, len, dst, false);
362+
}
363+
364+
static uint64
365+
pg_base64url_encode(const char *src, size_t len, char *dst)
366+
{
367+
return pg_base64_encode_internal(src, len, dst, true);
368+
}
369+
370+
/*
371+
* pg_base64_decode_internal
372+
*
373+
* Helper for decoding base64 or base64url. When url is passed as true the
374+
* input will be assumed to be encoded using base64url.
375+
*/
376+
static uint64
377+
pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
336378
{
337379
const char *srcend = src + len,
338380
*s = src;
@@ -350,6 +392,15 @@ pg_base64_decode(const char *src, size_t len, char *dst)
350392
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
351393
continue;
352394

395+
/* convert base64url to base64 */
396+
if (url)
397+
{
398+
if (c == '-')
399+
c = '+';
400+
else if (c == '_')
401+
c = '/';
402+
}
403+
353404
if (c == '=')
354405
{
355406
/* end sequence */
@@ -360,9 +411,12 @@ pg_base64_decode(const char *src, size_t len, char *dst)
360411
else if (pos == 3)
361412
end = 2;
362413
else
414+
{
415+
/* translator: %s is the name of an encoding scheme */
363416
ereport(ERROR,
364417
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
365-
errmsg("unexpected \"=\" while decoding base64 sequence")));
418+
errmsg("unexpected \"=\" while decoding %s sequence", url ? "base64url" : "base64")));
419+
}
366420
}
367421
b = 0;
368422
}
@@ -372,10 +426,14 @@ pg_base64_decode(const char *src, size_t len, char *dst)
372426
if (c > 0 && c < 127)
373427
b = b64lookup[(unsigned char) c];
374428
if (b < 0)
429+
{
430+
/* translator: %s is the name of an encoding scheme */
375431
ereport(ERROR,
376432
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
377-
errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
378-
pg_mblen(s - 1), s - 1)));
433+
errmsg("invalid symbol \"%.*s\" found while decoding %s sequence",
434+
pg_mblen(s - 1), s - 1,
435+
url ? "base64url" : "base64")));
436+
}
379437
}
380438
/* add it to buffer */
381439
buf = (buf << 6) + b;
@@ -392,15 +450,40 @@ pg_base64_decode(const char *src, size_t len, char *dst)
392450
}
393451
}
394452

395-
if (pos != 0)
453+
if (pos == 2)
454+
{
455+
buf <<= 12;
456+
*p++ = (buf >> 16) & 0xFF;
457+
}
458+
else if (pos == 3)
459+
{
460+
buf <<= 6;
461+
*p++ = (buf >> 16) & 0xFF;
462+
*p++ = (buf >> 8) & 0xFF;
463+
}
464+
else if (pos != 0)
465+
{
466+
/* translator: %s is the name of an encoding scheme */
396467
ereport(ERROR,
397468
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
398-
errmsg("invalid base64 end sequence"),
469+
errmsg("invalid %s end sequence", url ? "base64url" : "base64"),
399470
errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
471+
}
400472

401473
return p - dst;
402474
}
403475

476+
static uint64
477+
pg_base64_decode(const char *src, size_t len, char *dst)
478+
{
479+
return pg_base64_decode_internal(src, len, dst, false);
480+
}
481+
482+
static uint64
483+
pg_base64url_decode(const char *src, size_t len, char *dst)
484+
{
485+
return pg_base64_decode_internal(src, len, dst, true);
486+
}
404487

405488
static uint64
406489
pg_base64_enc_len(const char *src, size_t srclen)
@@ -415,6 +498,32 @@ pg_base64_dec_len(const char *src, size_t srclen)
415498
return ((uint64) srclen * 3) >> 2;
416499
}
417500

501+
static uint64
502+
pg_base64url_enc_len(const char *src, size_t srclen)
503+
{
504+
/*
505+
* Unlike standard base64, base64url doesn't use padding characters when
506+
* the input length is not divisible by 3
507+
*/
508+
return (srclen + 2) / 3 * 4;
509+
}
510+
511+
static uint64
512+
pg_base64url_dec_len(const char *src, size_t srclen)
513+
{
514+
/*
515+
* For base64, each 4 characters of input produce at most 3 bytes of
516+
* output. For base64url without padding, we need to round up to the
517+
* nearest 4
518+
*/
519+
size_t adjusted_len = srclen;
520+
521+
if (srclen % 4 != 0)
522+
adjusted_len += 4 - (srclen % 4);
523+
524+
return (adjusted_len * 3) / 4;
525+
}
526+
418527
/*
419528
* Escape
420529
* Minimally escape bytea to text.
@@ -606,6 +715,12 @@ static const struct
606715
pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
607716
}
608717
},
718+
{
719+
"base64url",
720+
{
721+
pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
722+
}
723+
},
609724
{
610725
"escape",
611726
{

0 commit comments

Comments
 (0)