1616#include <ctype.h>
1717
1818#include "mb/pg_wchar.h"
19+ #include "port/simd.h"
1920#include "utils/builtins.h"
2021#include "utils/memutils.h"
2122#include "varatt.h"
@@ -177,8 +178,8 @@ static const int8 hexlookup[128] = {
177178 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
178179};
179180
180- uint64
181- hex_encode (const char * src , size_t len , char * dst )
181+ static inline uint64
182+ hex_encode_scalar (const char * src , size_t len , char * dst )
182183{
183184 const char * end = src + len ;
184185
@@ -193,6 +194,55 @@ hex_encode(const char *src, size_t len, char *dst)
193194 return (uint64 ) len * 2 ;
194195}
195196
197+ uint64
198+ hex_encode (const char * src , size_t len , char * dst )
199+ {
200+ #ifdef USE_NO_SIMD
201+ return hex_encode_scalar (src , len , dst );
202+ #else
203+ const uint64 tail_idx = len & ~(sizeof (Vector8 ) - 1 );
204+ uint64 i ;
205+
206+ /*
207+ * This splits the high and low nibbles of each byte into separate
208+ * vectors, adds the vectors to a mask that converts the nibbles to their
209+ * equivalent ASCII bytes, and interleaves those bytes back together to
210+ * form the final hex-encoded string.
211+ */
212+ for (i = 0 ; i < tail_idx ; i += sizeof (Vector8 ))
213+ {
214+ Vector8 srcv ;
215+ Vector8 lo ;
216+ Vector8 hi ;
217+ Vector8 mask ;
218+
219+ vector8_load (& srcv , (const uint8 * ) & src [i ]);
220+
221+ lo = vector8_and (srcv , vector8_broadcast (0x0f ));
222+ mask = vector8_gt (lo , vector8_broadcast (0x9 ));
223+ mask = vector8_and (mask , vector8_broadcast ('a' - '0' - 10 ));
224+ mask = vector8_add (mask , vector8_broadcast ('0' ));
225+ lo = vector8_add (lo , mask );
226+
227+ hi = vector8_and (srcv , vector8_broadcast (0xf0 ));
228+ hi = vector8_shift_right (hi , 4 );
229+ mask = vector8_gt (hi , vector8_broadcast (0x9 ));
230+ mask = vector8_and (mask , vector8_broadcast ('a' - '0' - 10 ));
231+ mask = vector8_add (mask , vector8_broadcast ('0' ));
232+ hi = vector8_add (hi , mask );
233+
234+ vector8_store ((uint8 * ) & dst [i * 2 ],
235+ vector8_interleave_low (hi , lo ));
236+ vector8_store ((uint8 * ) & dst [i * 2 + sizeof (Vector8 )],
237+ vector8_interleave_high (hi , lo ));
238+ }
239+
240+ (void ) hex_encode_scalar (src + i , len - i , dst + i * 2 );
241+
242+ return (uint64 ) len * 2 ;
243+ #endif
244+ }
245+
196246static inline bool
197247get_hex (const char * cp , char * out )
198248{
@@ -213,8 +263,8 @@ hex_decode(const char *src, size_t len, char *dst)
213263 return hex_decode_safe (src , len , dst , NULL );
214264}
215265
216- uint64
217- hex_decode_safe (const char * src , size_t len , char * dst , Node * escontext )
266+ static inline uint64
267+ hex_decode_safe_scalar (const char * src , size_t len , char * dst , Node * escontext )
218268{
219269 const char * s ,
220270 * srcend ;
@@ -254,6 +304,85 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
254304 return p - dst ;
255305}
256306
307+ /*
308+ * This helper converts each byte to its binary-equivalent nibble by
309+ * subtraction and combines them to form the return bytes (separated by zero
310+ * bytes). Returns false if any input bytes are outside the expected ranges of
311+ * ASCII values. Otherwise, returns true.
312+ */
313+ #ifndef USE_NO_SIMD
314+ static inline bool
315+ hex_decode_simd_helper (const Vector8 src , Vector8 * dst )
316+ {
317+ Vector8 sub ;
318+ Vector8 mask_hi = vector8_interleave_low (vector8_broadcast (0 ), vector8_broadcast (0x0f ));
319+ Vector8 mask_lo = vector8_interleave_low (vector8_broadcast (0x0f ), vector8_broadcast (0 ));
320+ Vector8 tmp ;
321+ bool ret ;
322+
323+ tmp = vector8_gt (vector8_broadcast ('9' + 1 ), src );
324+ sub = vector8_and (tmp , vector8_broadcast ('0' ));
325+
326+ tmp = vector8_gt (src , vector8_broadcast ('A' - 1 ));
327+ tmp = vector8_and (tmp , vector8_broadcast ('A' - 10 ));
328+ sub = vector8_add (sub , tmp );
329+
330+ tmp = vector8_gt (src , vector8_broadcast ('a' - 1 ));
331+ tmp = vector8_and (tmp , vector8_broadcast ('a' - 'A' ));
332+ sub = vector8_add (sub , tmp );
333+
334+ * dst = vector8_issub (src , sub );
335+ ret = !vector8_has_ge (* dst , 0x10 );
336+
337+ tmp = vector8_and (* dst , mask_hi );
338+ tmp = vector8_shift_right (tmp , 8 );
339+ * dst = vector8_and (* dst , mask_lo );
340+ * dst = vector8_shift_left (* dst , 4 );
341+ * dst = vector8_or (* dst , tmp );
342+ return ret ;
343+ }
344+ #endif /* ! USE_NO_SIMD */
345+
346+ uint64
347+ hex_decode_safe (const char * src , size_t len , char * dst , Node * escontext )
348+ {
349+ #ifdef USE_NO_SIMD
350+ return hex_decode_safe_scalar (src , len , dst , escontext );
351+ #else
352+ const uint64 tail_idx = len & ~(sizeof (Vector8 ) * 2 - 1 );
353+ uint64 i ;
354+ bool success = true;
355+
356+ /*
357+ * We must process 2 vectors at a time since the output will be half the
358+ * length of the input.
359+ */
360+ for (i = 0 ; i < tail_idx ; i += sizeof (Vector8 ) * 2 )
361+ {
362+ Vector8 srcv ;
363+ Vector8 dstv1 ;
364+ Vector8 dstv2 ;
365+
366+ vector8_load (& srcv , (const uint8 * ) & src [i ]);
367+ success &= hex_decode_simd_helper (srcv , & dstv1 );
368+
369+ vector8_load (& srcv , (const uint8 * ) & src [i + sizeof (Vector8 )]);
370+ success &= hex_decode_simd_helper (srcv , & dstv2 );
371+
372+ vector8_store ((uint8 * ) & dst [i / 2 ], vector8_pack_16 (dstv1 , dstv2 ));
373+ }
374+
375+ /*
376+ * If something didn't look right in the vector path, try again in the
377+ * scalar path so that we can handle it correctly.
378+ */
379+ if (!success )
380+ i = 0 ;
381+
382+ return i / 2 + hex_decode_safe_scalar (src + i , len - i , dst + i / 2 , escontext );
383+ #endif
384+ }
385+
257386static uint64
258387hex_enc_len (const char * src , size_t srclen )
259388{
0 commit comments