Skip to content

Commit 93412c9

Browse files
author
Commitfest Bot
committed
[CF 5538] v14 - SVE enablement for hex-encode and hex-decode
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5538 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/aOAzEeuydVXIfWJW@nathan Author(s): Chiranmoy Bhattacharya, Susmitha Devanga
2 parents f8f4afe + 8629bb3 commit 93412c9

File tree

4 files changed

+418
-4
lines changed

4 files changed

+418
-4
lines changed

src/backend/utils/adt/encode.c

Lines changed: 133 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <ctype.h>
1717

1818
#include "mb/pg_wchar.h"
19+
#include "port/simd.h"
1920
#include "utils/builtins.h"
2021
#include "utils/memutils.h"
2122
#include "varatt.h"
@@ -177,8 +178,8 @@ static const int8 hexlookup[128] = {
177178
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178179
};
179180

180-
uint64
181-
hex_encode(const char *src, size_t len, char *dst)
181+
static inline uint64
182+
hex_encode_scalar(const char *src, size_t len, char *dst)
182183
{
183184
const char *end = src + len;
184185

@@ -193,6 +194,55 @@ hex_encode(const char *src, size_t len, char *dst)
193194
return (uint64) len * 2;
194195
}
195196

197+
uint64
198+
hex_encode(const char *src, size_t len, char *dst)
199+
{
200+
#ifdef USE_NO_SIMD
201+
return hex_encode_scalar(src, len, dst);
202+
#else
203+
const uint64 tail_idx = len & ~(sizeof(Vector8) - 1);
204+
uint64 i;
205+
206+
/*
207+
* This splits the high and low nibbles of each byte into separate
208+
* vectors, adds the vectors to a mask that converts the nibbles to their
209+
* equivalent ASCII bytes, and interleaves those bytes back together to
210+
* form the final hex-encoded string.
211+
*/
212+
for (i = 0; i < tail_idx; i += sizeof(Vector8))
213+
{
214+
Vector8 srcv;
215+
Vector8 lo;
216+
Vector8 hi;
217+
Vector8 mask;
218+
219+
vector8_load(&srcv, (const uint8 *) &src[i]);
220+
221+
lo = vector8_and(srcv, vector8_broadcast(0x0f));
222+
mask = vector8_gt(lo, vector8_broadcast(0x9));
223+
mask = vector8_and(mask, vector8_broadcast('a' - '0' - 10));
224+
mask = vector8_add(mask, vector8_broadcast('0'));
225+
lo = vector8_add(lo, mask);
226+
227+
hi = vector8_and(srcv, vector8_broadcast(0xf0));
228+
hi = vector8_shift_right(hi, 4);
229+
mask = vector8_gt(hi, vector8_broadcast(0x9));
230+
mask = vector8_and(mask, vector8_broadcast('a' - '0' - 10));
231+
mask = vector8_add(mask, vector8_broadcast('0'));
232+
hi = vector8_add(hi, mask);
233+
234+
vector8_store((uint8 *) &dst[i * 2],
235+
vector8_interleave_low(hi, lo));
236+
vector8_store((uint8 *) &dst[i * 2 + sizeof(Vector8)],
237+
vector8_interleave_high(hi, lo));
238+
}
239+
240+
(void) hex_encode_scalar(src + i, len - i, dst + i * 2);
241+
242+
return (uint64) len * 2;
243+
#endif
244+
}
245+
196246
static inline bool
197247
get_hex(const char *cp, char *out)
198248
{
@@ -213,8 +263,8 @@ hex_decode(const char *src, size_t len, char *dst)
213263
return hex_decode_safe(src, len, dst, NULL);
214264
}
215265

216-
uint64
217-
hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
266+
static inline uint64
267+
hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext)
218268
{
219269
const char *s,
220270
*srcend;
@@ -254,6 +304,85 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
254304
return p - dst;
255305
}
256306

307+
/*
308+
* This helper converts each byte to its binary-equivalent nibble by
309+
* subtraction and combines them to form the return bytes (separated by zero
310+
* bytes). Returns false if any input bytes are outside the expected ranges of
311+
* ASCII values. Otherwise, returns true.
312+
*/
313+
#ifndef USE_NO_SIMD
314+
static inline bool
315+
hex_decode_simd_helper(const Vector8 src, Vector8 *dst)
316+
{
317+
Vector8 sub;
318+
Vector8 mask_hi = vector8_interleave_low(vector8_broadcast(0), vector8_broadcast(0x0f));
319+
Vector8 mask_lo = vector8_interleave_low(vector8_broadcast(0x0f), vector8_broadcast(0));
320+
Vector8 tmp;
321+
bool ret;
322+
323+
tmp = vector8_gt(vector8_broadcast('9' + 1), src);
324+
sub = vector8_and(tmp, vector8_broadcast('0'));
325+
326+
tmp = vector8_gt(src, vector8_broadcast('A' - 1));
327+
tmp = vector8_and(tmp, vector8_broadcast('A' - 10));
328+
sub = vector8_add(sub, tmp);
329+
330+
tmp = vector8_gt(src, vector8_broadcast('a' - 1));
331+
tmp = vector8_and(tmp, vector8_broadcast('a' - 'A'));
332+
sub = vector8_add(sub, tmp);
333+
334+
*dst = vector8_issub(src, sub);
335+
ret = !vector8_has_ge(*dst, 0x10);
336+
337+
tmp = vector8_and(*dst, mask_hi);
338+
tmp = vector8_shift_right(tmp, 8);
339+
*dst = vector8_and(*dst, mask_lo);
340+
*dst = vector8_shift_left(*dst, 4);
341+
*dst = vector8_or(*dst, tmp);
342+
return ret;
343+
}
344+
#endif /* ! USE_NO_SIMD */
345+
346+
uint64
347+
hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
348+
{
349+
#ifdef USE_NO_SIMD
350+
return hex_decode_safe_scalar(src, len, dst, escontext);
351+
#else
352+
const uint64 tail_idx = len & ~(sizeof(Vector8) * 2 - 1);
353+
uint64 i;
354+
bool success = true;
355+
356+
/*
357+
* We must process 2 vectors at a time since the output will be half the
358+
* length of the input.
359+
*/
360+
for (i = 0; i < tail_idx; i += sizeof(Vector8) * 2)
361+
{
362+
Vector8 srcv;
363+
Vector8 dstv1;
364+
Vector8 dstv2;
365+
366+
vector8_load(&srcv, (const uint8 *) &src[i]);
367+
success &= hex_decode_simd_helper(srcv, &dstv1);
368+
369+
vector8_load(&srcv, (const uint8 *) &src[i + sizeof(Vector8)]);
370+
success &= hex_decode_simd_helper(srcv, &dstv2);
371+
372+
vector8_store((uint8 *) &dst[i / 2], vector8_pack_16(dstv1, dstv2));
373+
}
374+
375+
/*
376+
* If something didn't look right in the vector path, try again in the
377+
* scalar path so that we can handle it correctly.
378+
*/
379+
if (!success)
380+
i = 0;
381+
382+
return i / 2 + hex_decode_safe_scalar(src + i, len - i, dst + i / 2, escontext);
383+
#endif
384+
}
385+
257386
static uint64
258387
hex_enc_len(const char *src, size_t srclen)
259388
{

0 commit comments

Comments
 (0)