Skip to content

Commit c3644e5

Browse files
chrisdickinsonzkat
authored andcommitted
fix(index): segment cache items with another subbucket (#64)
This will further help prevent individual buckets from becoming so big, they start giving filesystems a hard time because of too many inodes in a dir. Or, you know, you can only use cacache in btrfs, that's fine too? 😏 * fix: move hash segmenting to util module * BREAKING: add extra sub-bucket * reduce noise: factor out readdir w/ ignored ENOENT * bump content/index versions * back out module.exports = { ... }
1 parent e72f290 commit c3644e5

File tree

5 files changed

+79
-66
lines changed

5 files changed

+79
-66
lines changed

Diff for: get.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ function getData (byDigest, cache, key, opts) {
3535
byDigest ? Promise.resolve(null) : index.find(cache, key, opts)
3636
).then(entry => {
3737
if (!entry && !byDigest) {
38-
throw index.notFoundError(cache, key)
38+
throw new index.NotFoundError(cache, key)
3939
}
4040
return read(cache, byDigest ? key : entry.digest, {
4141
hashAlgorithm: byDigest ? opts.hashAlgorithm : entry.hashAlgorithm,
@@ -73,7 +73,7 @@ function getStream (cache, key, opts) {
7373
index.find(cache, key).then(entry => {
7474
if (!entry) {
7575
return stream.emit(
76-
'error', index.notFoundError(cache, key)
76+
'error', new index.NotFoundError(cache, key)
7777
)
7878
}
7979
let memoStream

Diff for: lib/content/path.js

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
'use strict'
22

33
var contentVer = require('../../package.json')['cache-version'].content
4+
var hashToSegments = require('../util/hash-to-segments')
45
var path = require('path')
56

67
// Current format of content file path:
@@ -11,11 +12,9 @@ module.exports = contentPath
1112
function contentPath (cache, address, hashAlgorithm) {
1213
address = address && address.toLowerCase()
1314
hashAlgorithm = hashAlgorithm ? hashAlgorithm.toLowerCase() : 'sha512'
14-
return path.join(
15+
return path.join.apply(path, [
1516
cache,
1617
`content-v${contentVer}`,
1718
hashAlgorithm,
18-
address.slice(0, 2),
19-
address.slice(2)
20-
)
19+
].concat(hashToSegments(address)))
2120
}

Diff for: lib/entry-index.js

+61-57
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,30 @@
11
'use strict'
22

3-
const asyncMap = require('slide/lib/async-map')
43
const contentPath = require('./content/path')
54
const crypto = require('crypto')
65
const fixOwner = require('./util/fix-owner')
76
const fs = require('graceful-fs')
87
const path = require('path')
98
const Promise = require('bluebird')
10-
const through = require('mississippi').through
9+
const ms = require('mississippi')
10+
const hashToSegments = require('./util/hash-to-segments')
1111

1212
const indexV = require('../package.json')['cache-version'].index
1313

1414
const appendFileAsync = Promise.promisify(fs.appendFile)
1515
const readFileAsync = Promise.promisify(fs.readFile)
16+
const readdirAsync = Promise.promisify(fs.readdir)
17+
const concat = ms.concat
18+
const from = ms.from
19+
20+
module.exports.NotFoundError = class NotFoundError extends Error {
21+
constructor (cache, key) {
22+
super('content not found')
23+
this.code = 'ENOENT'
24+
this.cache = cache
25+
this.key = key
26+
}
27+
}
1628

1729
module.exports.insert = insert
1830
function insert (cache, key, digest, opts) {
@@ -74,71 +86,54 @@ function del (cache, key) {
7486
module.exports.lsStream = lsStream
7587
function lsStream (cache) {
7688
const indexDir = bucketDir(cache)
77-
const stream = through.obj()
78-
fs.readdir(indexDir, function (err, buckets) {
79-
if (err && err.code === 'ENOENT') {
80-
return stream.end()
81-
} else if (err) {
82-
return stream.emit('error', err)
83-
} else {
84-
asyncMap(buckets, (bucket, cb) => {
85-
fs.readdir(path.join(indexDir, bucket), (err, files) => {
86-
if (err && err.code === 'ENOENT') {
87-
return cb()
88-
} else if (err) {
89-
return cb(err)
90-
} else {
91-
asyncMap(files, function (f, cb) {
92-
const bpath = path.join(indexDir, bucket, f)
93-
bucketEntries(cache, bpath).then(_entries => {
94-
const entries = _entries.reduce((acc, entry) => {
95-
acc[entry.key] = entry
96-
return acc
97-
}, {})
98-
Object.keys(entries).forEach(function (k) {
99-
stream.write(formatEntry(cache, entries[k]))
100-
})
101-
cb()
102-
}, err => {
103-
if (err.code === 'ENOENT') {
104-
cb()
105-
} else {
106-
cb(err)
107-
}
108-
})
109-
}, cb)
110-
}
111-
})
112-
}, function (err) {
113-
if (err) { stream.emit('error') }
114-
stream.end()
89+
const stream = from.obj()
90+
91+
// "/cachename/*"
92+
readdirOrEmpty(indexDir).map(bucket => {
93+
const bucketPath = path.join(indexDir, bucket)
94+
95+
// "/cachename/<bucket 0xFF>/*"
96+
return readdirOrEmpty(bucketPath).map(subbucket => {
97+
const subbucketPath = path.join(bucketPath, subbucket)
98+
99+
// "/cachename/<bucket 0xFF>/<bucket 0xFF>/*"
100+
return readdirOrEmpty(subbucketPath).map(entry => {
101+
const getKeyToEntry = bucketEntries(
102+
cache,
103+
path.join(subbucketPath, entry)
104+
).reduce((acc, entry) => {
105+
acc.set(entry.key, entry)
106+
return acc
107+
}, new Map())
108+
109+
return getKeyToEntry.then(reduced => {
110+
return Array.from(reduced.values()).map(
111+
entry => stream.push(formatEntry(cache, entry))
112+
)
113+
}).catch({code: 'ENOENT'}, nop)
115114
})
116-
}
115+
})
116+
}).then(() => {
117+
stream.push(null)
118+
}, err => {
119+
stream.emit('error', err)
117120
})
121+
118122
return stream
119123
}
120124

121125
module.exports.ls = ls
122126
function ls (cache) {
123-
const entries = {}
124127
return Promise.fromNode(cb => {
125-
lsStream(cache).on('finish', function () {
126-
cb(null, entries)
127-
}).on('data', function (d) {
128-
entries[d.key] = d
129-
}).on('error', cb)
128+
lsStream(cache).on('error', cb).pipe(concat(entries => {
129+
cb(null, entries.reduce((acc, xs) => {
130+
acc[xs.key] = xs
131+
return acc
132+
}, {}))
133+
}))
130134
})
131135
}
132136

133-
module.exports.notFoundError = notFoundError
134-
function notFoundError (cache, key) {
135-
const err = new Error('content not found')
136-
err.code = 'ENOENT'
137-
err.cache = cache
138-
err.key = key
139-
return err
140-
}
141-
142137
function bucketEntries (cache, bucket, filter) {
143138
return readFileAsync(
144139
bucket, 'utf8'
@@ -172,7 +167,9 @@ function bucketDir (cache) {
172167
module.exports._bucketPath = bucketPath
173168
function bucketPath (cache, key) {
174169
const hashed = hashKey(key)
175-
return path.join(bucketDir(cache), hashed.slice(0, 2), hashed.slice(2))
170+
return path.join.apply(path, [bucketDir(cache)].concat(
171+
hashToSegments(hashed)
172+
))
176173
}
177174

178175
module.exports._hashKey = hashKey
@@ -193,3 +190,10 @@ function formatEntry (cache, entry) {
193190
metadata: entry.metadata
194191
}
195192
}
193+
194+
function readdirOrEmpty (dir) {
195+
return readdirAsync(dir).catch({code: 'ENOENT'}, () => [])
196+
}
197+
198+
function nop () {
199+
}

Diff for: lib/util/hash-to-segments.js

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
'use strict'
2+
3+
module.exports = hashToSegments
4+
5+
function hashToSegments (hash) {
6+
return [
7+
hash.slice(0, 2),
8+
hash.slice(2, 4),
9+
hash.slice(4)
10+
]
11+
}

Diff for: package.json

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
"name": "cacache",
33
"version": "6.0.1",
44
"cache-version": {
5-
"content": "1",
6-
"index": "1"
5+
"content": "2",
6+
"index": "2"
77
},
88
"description": "General content-addressable cache system that maintains a filesystem registry of file data.",
99
"main": "index.js",
@@ -58,7 +58,6 @@
5858
"once": "^1.4.0",
5959
"promise-inflight": "^1.0.1",
6060
"rimraf": "^2.6.1",
61-
"slide": "^1.1.6",
6261
"unique-filename": "^1.1.0"
6362
},
6463
"devDependencies": {

0 commit comments

Comments
 (0)