Skip to content

Commit e8402a5

Browse files
committed
feat(index): collate index files into subdirs
Fixes: #14 Most filesystem see performance degradation if a single directory has too many files in it. For that reason, the entry index now stores index buckets as: `<cache>/index/<bucketKey.slice(0, 2)>/<bucketKey>`. BREAKING CHANGE: Previously-generated index entries are no longer compatible and the index must be regenerated.
1 parent bbc5fca commit e8402a5

File tree

3 files changed

+61
-47
lines changed

3 files changed

+61
-47
lines changed

Diff for: lib/entry-index.js

+42-28
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const through = require('mississippi').through
1515
module.exports.insert = insert
1616
function insert (cache, key, digest, opts) {
1717
opts = opts || {}
18-
const bucket = indexPath(cache, key)
18+
const bucket = bucketPath(cache, key)
1919
const lock = bucket + '.lock'
2020
return fixOwner.mkdirfix(
2121
path.dirname(bucket), opts.uid, opts.gid
@@ -74,7 +74,7 @@ function insert (cache, key, digest, opts) {
7474

7575
module.exports.find = find
7676
function find (cache, key) {
77-
const bucket = indexPath(cache, key)
77+
const bucket = bucketPath(cache, key)
7878
const stream = fs.createReadStream(bucket)
7979
let ret
8080
return Promise.fromNode(cb => {
@@ -105,37 +105,49 @@ function del (cache, key) {
105105

106106
module.exports.lsStream = lsStream
107107
function lsStream (cache) {
108-
const indexPath = path.join(cache, 'index')
108+
const indexDir = path.join(cache, 'index')
109109
const stream = through.obj()
110-
fs.readdir(indexPath, function (err, files) {
110+
fs.readdir(indexDir, function (err, buckets) {
111111
if (err && err.code === 'ENOENT') {
112112
return stream.end()
113113
} else if (err) {
114114
return stream.emit('error', err)
115115
} else {
116-
asyncMap(files, function (f, cb) {
117-
fs.readFile(path.join(indexPath, f), 'utf8', function (err, data) {
118-
if (err) { return cb(err) }
119-
const entries = {}
120-
data.split('\n').forEach(function (entry) {
121-
let parsed
122-
try {
123-
parsed = JSON.parse(entry)
124-
} catch (e) {
125-
}
126-
// NOTE - it's possible for an entry to be
127-
// incomplete/corrupt. So we just skip it.
128-
// See comment on `insert()` for deets.
129-
if (parsed) {
130-
entries[parsed.key] = formatEntry(cache, parsed)
131-
}
132-
})
133-
Object.keys(entries).forEach(function (k) {
134-
stream.write(entries[k])
135-
})
136-
cb()
116+
asyncMap(buckets, (bucket, cb) => {
117+
fs.readdir(path.join(indexDir, bucket), (err, files) => {
118+
if (err && err.code === 'ENOENT') {
119+
return cb()
120+
} else if (err) {
121+
return cb(err)
122+
} else {
123+
asyncMap(files, function (f, cb) {
124+
fs.readFile(path.join(indexDir, bucket, f), 'utf8', function (err, data) {
125+
if (err) { return cb(err) }
126+
const entries = {}
127+
data.split('\n').forEach(function (entry) {
128+
let parsed
129+
try {
130+
parsed = JSON.parse(entry)
131+
} catch (e) {
132+
}
133+
// NOTE - it's possible for an entry to be
134+
// incomplete/corrupt. So we just skip it.
135+
// See comment on `insert()` for deets.
136+
if (parsed) {
137+
entries[parsed.key] = formatEntry(cache, parsed)
138+
}
139+
})
140+
Object.keys(entries).forEach(function (k) {
141+
stream.write(entries[k])
142+
})
143+
cb()
144+
})
145+
}, function (err) {
146+
cb(err)
147+
})
148+
}
137149
})
138-
}, function (err) {
150+
}, err => {
139151
if (err) { stream.emit('error') }
140152
stream.end()
141153
})
@@ -165,8 +177,10 @@ function notFoundError (cache, key) {
165177
return err
166178
}
167179

168-
function indexPath (cache, key) {
169-
return path.join(cache, 'index', hashKey(key))
180+
module.exports._bucketPath = bucketPath
181+
function bucketPath (cache, key) {
182+
const hashed = hashKey(key)
183+
return path.join(cache, 'index', hashed.slice(0, 2), hashed)
170184
}
171185

172186
module.exports._hashKey = hashKey

Diff for: test/index.insert.js

+9-15
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ const index = require('../lib/entry-index')
1616

1717
const KEY = 'foo'
1818
const KEYHASH = index._hashKey(KEY)
19+
const BUCKET = index._bucketPath(CACHE, KEY)
1920
const DIGEST = 'deadbeef'
2021
const ALGO = 'whatnot'
2122

@@ -31,8 +32,7 @@ test('basic insertion', function (t) {
3132
time: entry.time,
3233
metadata: 'foo'
3334
}, 'formatted entry returned')
34-
const bucket = path.join(CACHE, 'index', KEYHASH)
35-
return fs.readFileAsync(bucket, 'utf8')
35+
return fs.readFileAsync(BUCKET, 'utf8')
3636
}).then(data => {
3737
t.equal(data[0], '{', 'first entry starts with a {, not \\n')
3838
const entry = JSON.parse(data)
@@ -53,8 +53,7 @@ test('inserts additional entries into existing key', function (t) {
5353
).then(() => (
5454
index.insert(CACHE, KEY, DIGEST, {metadata: 2})
5555
)).then(() => {
56-
const bucket = path.join(CACHE, 'index', KEYHASH)
57-
return fs.readFileAsync(bucket, 'utf8')
56+
return fs.readFileAsync(BUCKET, 'utf8')
5857
}).then(data => {
5958
const entries = data.split('\n').map(JSON.parse)
6059
entries.forEach(function (e) { delete e.time })
@@ -84,8 +83,7 @@ test('separates entries even if one is corrupted', function (t) {
8483
return index.insert(
8584
CACHE, KEY, DIGEST
8685
).then(() => {
87-
const bucket = path.join(CACHE, 'index', KEYHASH)
88-
return fs.readFileAsync(bucket, 'utf8')
86+
return fs.readFileAsync(BUCKET, 'utf8')
8987
}).then(data => {
9088
const entry = JSON.parse(data.split('\n')[4])
9189
delete entry.time
@@ -101,8 +99,7 @@ test('optional arbitrary metadata', function (t) {
10199
return index.insert(
102100
CACHE, KEY, DIGEST, { metadata: metadata }
103101
).then(() => {
104-
const bucket = path.join(CACHE, 'index', KEYHASH)
105-
return fs.readFileAsync(bucket, 'utf8')
102+
return fs.readFileAsync(BUCKET, 'utf8')
106103
}).then(data => {
107104
const entry = JSON.parse(data)
108105
delete entry.time
@@ -119,8 +116,7 @@ test('key case-sensitivity', function (t) {
119116
index.insert(CACHE, KEY, DIGEST),
120117
index.insert(CACHE, KEY.toUpperCase(), DIGEST)
121118
).then(() => {
122-
const bucket = path.join(CACHE, 'index', KEYHASH)
123-
return fs.readFileAsync(bucket, 'utf8')
119+
return fs.readFileAsync(BUCKET, 'utf8')
124120
}).then(data => {
125121
const entries = data.split('\n').map(JSON.parse).sort(e => (
126122
e.key === KEY
@@ -148,7 +144,7 @@ test('hash conflict in same bucket', function (t) {
148144
).then(() => (
149145
index.insert(CACHE, CONFLICTING, DIGEST)
150146
)).then(() => {
151-
const bucket = path.join(CACHE, 'index', index._hashKey(NEWKEY))
147+
const bucket = index._bucketPath(CACHE, NEWKEY)
152148
return fs.readFileAsync(bucket, 'utf8')
153149
}).then(data => {
154150
const entries = data.split('\n').map(JSON.parse)
@@ -165,11 +161,10 @@ test('hash conflict in same bucket', function (t) {
165161

166162
test('path-breaking characters', function (t) {
167163
const newKey = ';;!registry\nhttps://registry.npmjs.org/back \\ slash@Cool™?'
168-
const newHash = index._hashKey(newKey)
169164
return index.insert(
170165
CACHE, newKey, DIGEST
171166
).then(() => {
172-
const bucket = path.join(CACHE, 'index', newHash)
167+
const bucket = index._bucketPath(CACHE, newKey)
173168
return fs.readFileAsync(bucket, 'utf8')
174169
}).then(data => {
175170
const entry = JSON.parse(data)
@@ -186,11 +181,10 @@ test('extremely long keys', function (t) {
186181
for (let i = 0; i < 10000; i++) {
187182
newKey += i
188183
}
189-
const newHash = index._hashKey(newKey)
190184
return index.insert(
191185
CACHE, newKey, DIGEST
192186
).then(() => {
193-
const bucket = path.join(CACHE, 'index', newHash)
187+
const bucket = index._bucketPath(CACHE, newKey)
194188
return fs.readFileAsync(bucket, 'utf8')
195189
}).then(data => {
196190
const entry = JSON.parse(data)

Diff for: test/util/cache-index.js

+10-4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ function CacheIndex (entries) {
1616
Object.keys(entries).forEach(function (k) {
1717
var lines = entries[k]
1818
var hashed = hashKey(k)
19+
var prefix = hashed.slice(0, 2)
1920
var serialised
2021
if (typeof lines === 'string') {
2122
serialised = lines
@@ -25,12 +26,17 @@ function CacheIndex (entries) {
2526
}
2627
serialised = lines.map(JSON.stringify).join('\n')
2728
}
28-
index[hashed] = index[hashed]
29-
? [index[hashed], serialised].join('\n')
29+
index[prefix] = index[prefix] || {}
30+
index[prefix][hashed] = index[prefix][hashed]
31+
? [index[prefix][hashed], serialised].join('\n')
3032
: serialised
3133
})
32-
Object.keys(index).forEach(function (k) {
33-
index[k] = File(index[k])
34+
Object.keys(index).forEach(function (prefix) {
35+
var files = {}
36+
Object.keys(index[prefix]).forEach(key => {
37+
files[key] = File(index[prefix][key])
38+
})
39+
index[prefix] = Dir(files)
3440
})
3541
return Dir(index)
3642
}

0 commit comments

Comments
 (0)