Skip to content

Commit 02b983f

Browse files
committed
Minor changes to hashing and cache aging algorithms.
Hashing was changed slightly to exploit the fact that a significant amount of real world JSON strings / keys will have a high percentage of ASCII characters. Cache aging was modified to use an AIMD (additive increase, multiplicative decrease) policy. When an item is found in the cache, its age is incremented by one using saturating arithmetic. Ages are "quasi-randomly" aged using unsigned right shifts, or in other words its age is divided in half. Since ages decrease far more quickly than they increase, the cache can quickly adapt and converge on the "hot set".
1 parent 0aff3de commit 02b983f

File tree

1 file changed

+17
-17
lines changed

1 file changed

+17
-17
lines changed

JSONKit.m

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ The code in isValidCodePoint() is derived from the ICU code in
175175
#define JK_CACHE_SLOTS (1UL << JK_CACHE_SLOTS_BITS)
176176
// JK_CACHE_PROBES is the number of probe attempts.
177177
#define JK_CACHE_PROBES (4UL)
178-
// JK_INIT_CACHE_AGE must be (1 << AGE) - 1
178+
// JK_INIT_CACHE_AGE must be < (1 << AGE) - 1, where AGE is sizeof(typeof(AGE)) * 8.
179179
#define JK_INIT_CACHE_AGE (0)
180180

181181
// JK_TOKENBUFFER_SIZE is the default stack size for the temporary buffer used to hold "non-simple" strings (i.e., contains \ escapes)
@@ -609,7 +609,7 @@ - (void)releaseState;
609609

610610
JK_STATIC_INLINE size_t jk_min(size_t a, size_t b);
611611
JK_STATIC_INLINE size_t jk_max(size_t a, size_t b);
612-
JK_STATIC_INLINE JKHash calculateHash(JKHash currentHash, unsigned char c);
612+
JK_STATIC_INLINE JKHash jk_calculateHash(JKHash currentHash, unsigned char c);
613613

614614
// JSONKit v1.4 used both a JKArray : NSArray and JKMutableArray : NSMutableArray, and the same for the dictionary collection type.
615615
// However, Louis Gerbarg (via cocoa-dev) pointed out that Cocoa / Core Foundation actually implements only a single class that inherits from the
@@ -1111,7 +1111,8 @@ - (id)mutableCopyWithZone:(NSZone *)zone
11111111
JK_STATIC_INLINE size_t jk_min(size_t a, size_t b) { return((a < b) ? a : b); }
11121112
JK_STATIC_INLINE size_t jk_max(size_t a, size_t b) { return((a > b) ? a : b); }
11131113

1114-
JK_STATIC_INLINE JKHash calculateHash(JKHash currentHash, unsigned char c) { return(((currentHash << 5) + currentHash) + c); }
1114+
JK_STATIC_INLINE JKHash jk_calculateHash(JKHash currentHash, unsigned char c) { return((((currentHash << 5) + currentHash) + (c - 29)) ^ (currentHash >> 19)); }
1115+
11151116

11161117
static void jk_error(JKParseState *parseState, NSString *format, ...) {
11171118
NSCParameterAssert((parseState != NULL) && (format != NULL));
@@ -1408,7 +1409,7 @@ JK_STATIC_INLINE int jk_string_add_unicodeCodePoint(JKParseState *parseState, ui
14081409
if((result = ConvertUTF32toUTF8(unicodeCodePoint, &u8s, (parseState->token.tokenBuffer.bytes.ptr + parseState->token.tokenBuffer.bytes.length))) != conversionOK) { if(result == targetExhausted) { return(1); } }
14091410
size_t utf8len = u8s - &parseState->token.tokenBuffer.bytes.ptr[*tokenBufferIdx], nextIdx = (*tokenBufferIdx) + utf8len;
14101411

1411-
while(*tokenBufferIdx < nextIdx) { *stringHash = calculateHash(*stringHash, parseState->token.tokenBuffer.bytes.ptr[(*tokenBufferIdx)++]); }
1412+
while(*tokenBufferIdx < nextIdx) { *stringHash = jk_calculateHash(*stringHash, parseState->token.tokenBuffer.bytes.ptr[(*tokenBufferIdx)++]); }
14121413

14131414
return(0);
14141415
}
@@ -1442,8 +1443,8 @@ static int jk_parse_string(JKParseState *parseState) {
14421443
ConversionResult result;
14431444

14441445
if(JK_EXPECT_F((result = ConvertSingleCodePointInUTF8(atStringCharacter - 1, endOfBuffer, (UTF8 const **)&nextValidCharacter, &u32ch)) != conversionOK)) { goto switchToSlowPath; }
1445-
stringHash = calculateHash(stringHash, currentChar);
1446-
while(atStringCharacter < nextValidCharacter) { NSCParameterAssert(JK_AT_STRING_PTR(parseState) <= JK_END_STRING_PTR(parseState)); stringHash = calculateHash(stringHash, *atStringCharacter++); }
1446+
stringHash = jk_calculateHash(stringHash, currentChar);
1447+
while(atStringCharacter < nextValidCharacter) { NSCParameterAssert(JK_AT_STRING_PTR(parseState) <= JK_END_STRING_PTR(parseState)); stringHash = jk_calculateHash(stringHash, *atStringCharacter++); }
14471448
continue;
14481449
} else {
14491450
if(JK_EXPECT_F(currentChar == (unsigned long)'"')) { stringState = JSONStringStateFinished; goto finishedParsing; }
@@ -1460,7 +1461,7 @@ static int jk_parse_string(JKParseState *parseState) {
14601461

14611462
if(JK_EXPECT_F(currentChar < 0x20UL)) { jk_error(parseState, @"Invalid character < 0x20 found in string: 0x%2.2x.", currentChar); stringState = JSONStringStateError; goto finishedParsing; }
14621463

1463-
stringHash = calculateHash(stringHash, currentChar);
1464+
stringHash = jk_calculateHash(stringHash, currentChar);
14641465
}
14651466
}
14661467

@@ -1478,7 +1479,7 @@ static int jk_parse_string(JKParseState *parseState) {
14781479
if(JK_EXPECT_T(currentChar < (unsigned long)0x80)) { // Not a UTF8 sequence
14791480
if(JK_EXPECT_F(currentChar == (unsigned long)'"')) { stringState = JSONStringStateFinished; atStringCharacter++; goto finishedParsing; }
14801481
if(JK_EXPECT_F(currentChar == (unsigned long)'\\')) { stringState = JSONStringStateEscape; continue; }
1481-
stringHash = calculateHash(stringHash, currentChar);
1482+
stringHash = jk_calculateHash(stringHash, currentChar);
14821483
tokenBuffer[tokenBufferIdx++] = currentChar;
14831484
continue;
14841485
} else { // UTF8 sequence
@@ -1493,7 +1494,7 @@ static int jk_parse_string(JKParseState *parseState) {
14931494
atStringCharacter = nextValidCharacter - 1;
14941495
continue;
14951496
} else {
1496-
while(atStringCharacter < nextValidCharacter) { tokenBuffer[tokenBufferIdx++] = *atStringCharacter; stringHash = calculateHash(stringHash, *atStringCharacter++); }
1497+
while(atStringCharacter < nextValidCharacter) { tokenBuffer[tokenBufferIdx++] = *atStringCharacter; stringHash = jk_calculateHash(stringHash, *atStringCharacter++); }
14971498
atStringCharacter--;
14981499
continue;
14991500
}
@@ -1521,7 +1522,7 @@ static int jk_parse_string(JKParseState *parseState) {
15211522

15221523
parsedEscapedChar:
15231524
stringState = JSONStringStateParsing;
1524-
stringHash = calculateHash(stringHash, escapedChar);
1525+
stringHash = jk_calculateHash(stringHash, escapedChar);
15251526
tokenBuffer[tokenBufferIdx++] = escapedChar;
15261527
break;
15271528

@@ -1709,7 +1710,7 @@ static int jk_parse_number(JKParseState *parseState) {
17091710
if(JK_EXPECT_F(endOfNumber != &numberTempBuf[parseState->token.tokenPtrRange.length]) && JK_EXPECT_F(numberState != JSONNumberStateError)) { numberState = JSONNumberStateError; jk_error(parseState, @"The conversion function did not consume all of the number tokens characters."); }
17101711

17111712
size_t hashIndex = 0UL;
1712-
for(hashIndex = 0UL; hashIndex < parseState->token.value.ptrRange.length; hashIndex++) { parseState->token.value.hash = calculateHash(parseState->token.value.hash, parseState->token.value.ptrRange.ptr[hashIndex]); }
1713+
for(hashIndex = 0UL; hashIndex < parseState->token.value.ptrRange.length; hashIndex++) { parseState->token.value.hash = jk_calculateHash(parseState->token.value.hash, parseState->token.value.ptrRange.ptr[hashIndex]); }
17131714
}
17141715

17151716
if(JK_EXPECT_F(numberState != JSONNumberStateFinished)) { jk_error(parseState, @"Invalid number."); }
@@ -1972,7 +1973,7 @@ static id json_parse_it(JKParseState *parseState) {
19721973
#pragma mark Object cache
19731974

19741975
// This uses a Galois Linear Feedback Shift Register (LFSR) PRNG to pick which item in the cache to age. It has a period of (2^32)-1.
1975-
// NOTE: A LFSR *MUST* be initialized to a non-zero value and must always have a non-zero value.
1976+
// NOTE: A LFSR *MUST* be initialized to a non-zero value and must always have a non-zero value. The LFSR is initalized to 1 in -initWithParseOptions:
19761977
JK_STATIC_INLINE void jk_cache_age(JKParseState *parseState) {
19771978
NSCParameterAssert((parseState != NULL) && (parseState->cache.prng_lfsr != 0U));
19781979
parseState->cache.prng_lfsr = (parseState->cache.prng_lfsr >> 1) ^ ((0U - (parseState->cache.prng_lfsr & 1U)) & 0x80200003U);
@@ -1983,9 +1984,8 @@ JK_STATIC_INLINE void jk_cache_age(JKParseState *parseState) {
19831984
//
19841985
// The hash table is a linear C array of JKTokenCacheItem. The terms "item" and "bucket" are synonymous with the index in to the cache array, i.e. cache.items[bucket].
19851986
//
1986-
// Items in the cache have an age associated with them. The age is the number of rightmost 1 bits, i.e. 0000 = 0, 0001 = 1, 0011 = 2, 0111 = 3, 1111 = 4.
1987-
// This allows us to use left and right shifts to add or subtract from an items age. Add = (age << 1) | 1. Subtract = age >> 0. Subtract is synonymous with "age" (i.e., age an item).
1988-
// The reason for this is it allows us to perform saturated adds and subtractions and is branchless.
1987+
// Items in the cache have an age associated with them. An items age is incremented using saturating unsigned arithmetic and decremeted using unsigned right shifts.
1988+
// Thus, an items age is managed using an AIMD policy- additive increase, multiplicative decrease. All age calculations and manipulations are branchless.
19891989
// The primitive C type MUST be unsigned. It is currently a "char", which allows (at a minimum and in practice) 8 bits.
19901990
//
19911991
// A "useable bucket" is a bucket that is not in use (never populated), or has an age == 0.
@@ -2000,12 +2000,12 @@ JK_STATIC_INLINE void jk_cache_age(JKParseState *parseState) {
20002000
void *parsedAtom = NULL;
20012001

20022002
if(JK_EXPECT_F(parseState->token.value.ptrRange.length == 0UL) && JK_EXPECT_T(parseState->token.value.type == JKValueTypeString)) { return(@""); }
2003-
2003+
20042004
for(x = 0UL; x < JK_CACHE_PROBES; x++) {
20052005
if(JK_EXPECT_F(parseState->cache.items[bucket].object == NULL)) { setBucket = 1UL; useableBucket = bucket; break; }
20062006

20072007
if((JK_EXPECT_T(parseState->cache.items[bucket].hash == parseState->token.value.hash)) && (JK_EXPECT_T(parseState->cache.items[bucket].size == parseState->token.value.ptrRange.length)) && (JK_EXPECT_T(parseState->cache.items[bucket].type == parseState->token.value.type)) && (JK_EXPECT_T(parseState->cache.items[bucket].bytes != NULL)) && (JK_EXPECT_T(memcmp(parseState->cache.items[bucket].bytes, parseState->token.value.ptrRange.ptr, parseState->token.value.ptrRange.length) == 0U))) {
2008-
parseState->cache.age[bucket] = (parseState->cache.age[bucket] << 1) | 1U;
2008+
parseState->cache.age[bucket] = (((uint32_t)parseState->cache.age[bucket]) + 1U) - (((((uint32_t)parseState->cache.age[bucket]) + 1U) >> 31) ^ 1U);
20092009
parseState->token.value.cacheItem = &parseState->cache.items[bucket];
20102010
NSCParameterAssert(parseState->cache.items[bucket].object != NULL);
20112011
return((void *)CFRetain(parseState->cache.items[bucket].object));

0 commit comments

Comments
 (0)