fix(index): who cares about race conditions anyway

zkat · zkat · commit b1d3888e2ad7 · 2017-03-03T17:40:05.000-08:00
PRETTY SURE THIS IS FINE
diff --git a/lib/entry-index.js b/lib/entry-index.js
@@ -5,7 +5,6 @@ const contentPath = require('./content/path')
 const crypto = require('crypto')
 const fixOwner = require('./util/fix-owner')
 const fs = require('graceful-fs')
-const lockfile = require('lockfile')
 const path = require('path')
 const pipe = require('mississippi').pipe
 const Promise = require('bluebird')
@@ -14,64 +13,30 @@ const through = require('mississippi').through
 
 const indexV = require('../package.json')['cache-version'].index
 
+const appendFileAsync = Promise.promisify(fs.appendFile)
+
 module.exports.insert = insert
 function insert (cache, key, digest, opts) {
   opts = opts || {}
   const bucket = bucketPath(cache, key)
-  const lock = bucket + '.lock'
   return fixOwner.mkdirfix(
     path.dirname(bucket), opts.uid, opts.gid
-  ).then(() => (
-    Promise.fromNode(_cb => {
-      const cb = (err, entry) => {
-        lockfile.unlock(lock, er => {
-          _cb(err || er, entry)
-        })
-      }
-      lockfile.lock(lock, {
-        stale: 60000,
-        retries: 10,
-        wait: 10000
-      }, function (err) {
-        if (err) { return _cb(err) }
-        fs.stat(bucket, function (err, existing) {
-          if (err && err.code !== 'ENOENT' && err.code !== 'EPERM') {
-            return cb(err)
-          }
-          const entry = {
-            key: key,
-            digest: digest,
-            hashAlgorithm: opts.hashAlgorithm,
-            time: +(new Date()),
-            metadata: opts.metadata
-          }
-          // Because of the way these entries work,
-          // the index is safe from fs.appendFile stopping
-          // mid-write so long as newlines are *prepended*
-          //
-          // That is, if a write fails, it will be ignored
-          // by `find`, and the next successful one will be
-          // used.
-          //
-          // This should be -very rare-, since `fs.appendFile`
-          // will often be atomic on most platforms unless
-          // very large metadata has been included, but caches
-          // like this one tend to last a long time. :)
-          // Most corrupted reads are likely to be from attempting
-          // to read the index while it's being written to --
-          // which is safe, but not guaranteed to be atomic.
-          const e = (existing ? '\n' : '') + JSON.stringify(entry)
-          fs.appendFile(bucket, e, function (err) {
-            cb(err, entry)
-          })
-        })
-      })
-    })
-  )).then(entry => {
-    return fixOwner.chownr(bucket, opts.uid, opts.gid).then(() => {
-      return formatEntry(cache, entry)
-    })
-  })
+  ).then(() => {
+    const entry = {
+      key: key,
+      digest: digest,
+      hashAlgorithm: opts.hashAlgorithm,
+      time: +(new Date()),
+      metadata: opts.metadata
+    }
+    return appendFileAsync(
+      bucket, '\n' + JSON.stringify(entry)
+    ).then(() => entry)
+  }).then(entry => (
+    fixOwner.chownr(bucket, opts.uid, opts.gid).then(() => (
+      formatEntry(cache, entry)
+    ))
+  ))
 }
 
 module.exports.find = find
@@ -126,7 +91,7 @@ function lsStream (cache) {
               fs.readFile(path.join(indexDir, bucket, f), 'utf8', function (err, data) {
                 if (err) { return cb(err) }
                 const entries = {}
-                data.split('\n').forEach(function (entry) {
+                data.split('\n').slice(1).forEach(function (entry) {
                   let parsed
                   try {
                     parsed = JSON.parse(entry)
@@ -186,30 +151,15 @@ function bucketDir (cache) {
 module.exports._bucketPath = bucketPath
 function bucketPath (cache, key) {
   const hashed = hashKey(key)
-  return path.join(bucketDir(cache), hashed.slice(0, 2), hashed)
+  return path.join(bucketDir(cache), hashed.slice(0, 2), hashed.slice(2))
 }
 
 module.exports._hashKey = hashKey
 function hashKey (key) {
-  // NOTE (SECURITY)
-  //
-  // `sha1` conflicts can be generated, but it doesn't matter in this case,
-  // since we intend for there to be regular conflicts anyway. You can have
-  // the entire cache in a single bucket and all that'll do is just make a big
-  // file with a lot of contention, if you can even pull it off in the `key`
-  // string. So whatever. `sha1` is faster and it doesn't trigger the warnings
-  // `md5` tends to (yet?...).
-  //
-  // Not to mention, that in the case of pacote/npm, the amount of control
-  // anyone would have over this key is so minimal that it's incredibly
-  // unlikely that they could intentionally generate a large number of
-  // conflicts just with a package key such that they'd do anything resembling
-  // a hash flood DOS.
   return crypto
-  .createHash('sha1')
-  .update(key.toLowerCase()) // lump case-variant keys into same bucket.
+  .createHash('sha256')
+  .update(key)
   .digest('hex')
-  .slice(0, 7)
 }
 
 function formatEntry (cache, entry) {
diff --git a/test/index.insert.js b/test/index.insert.js
@@ -34,7 +34,7 @@ test('basic insertion', function (t) {
     }, 'formatted entry returned')
     return fs.readFileAsync(BUCKET, 'utf8')
   }).then(data => {
-    t.equal(data[0], '{', 'first entry starts with a {, not \\n')
+    t.equal(data[0], '\n', 'first entry starts with a \\n')
     const entry = JSON.parse(data)
     t.ok(entry.time, 'entry has a timestamp')
     t.deepEqual(entry, {
@@ -55,7 +55,7 @@ test('inserts additional entries into existing key', function (t) {
   )).then(() => {
     return fs.readFileAsync(BUCKET, 'utf8')
   }).then(data => {
-    const entries = data.split('\n').map(JSON.parse)
+    const entries = data.split('\n').slice(1).map(JSON.parse)
     entries.forEach(function (e) { delete e.time })
     t.deepEqual(entries, [{
       key: KEY,
@@ -112,48 +112,30 @@ test('optional arbitrary metadata', function (t) {
 test('key case-sensitivity', function (t) {
   return Promise.join(
     index.insert(CACHE, KEY, DIGEST),
-    index.insert(CACHE, KEY.toUpperCase(), DIGEST)
+    index.insert(CACHE, KEY.toUpperCase(), DIGEST + 'upper')
   ).then(() => {
-    return fs.readFileAsync(BUCKET, 'utf8')
-  }).then(data => {
-    const entries = data.split('\n').map(JSON.parse).sort(e => (
-      e.key === KEY
-      ? -1
-      : 1
-    ))
-    entries.forEach(function (e) { delete e.time })
-    t.deepEqual(entries, [{
-      key: KEY,
-      digest: DIGEST
-    }, {
-      key: KEY.toUpperCase(),
-      digest: DIGEST
-    }], 'all entries present')
-  })
-})
-
-test('hash conflict in same bucket', function (t) {
-  // NOTE - this test will break if `index._hashKey` changes its algorithm.
-  //        Adapt to it accordingly.
-  const NEWKEY = KEY.toUpperCase()
-  const CONFLICTING = KEY.toLowerCase()
-  return index.insert(
-    CACHE, NEWKEY, DIGEST
-  ).then(() => (
-    index.insert(CACHE, CONFLICTING, DIGEST)
-  )).then(() => {
-    const bucket = index._bucketPath(CACHE, NEWKEY)
-    return fs.readFileAsync(bucket, 'utf8')
-  }).then(data => {
-    const entries = data.split('\n').map(JSON.parse)
-    entries.forEach(function (e) { delete e.time })
-    t.deepEqual(entries, [{
-      key: NEWKEY,
-      digest: DIGEST
-    }, {
-      key: CONFLICTING,
-      digest: DIGEST
-    }], 'multiple entries for conflicting keys in the same bucket')
+    return Promise.join(
+      index.find(CACHE, KEY),
+      index.find(CACHE, KEY.toUpperCase()),
+      (entry, upperEntry) => {
+        delete entry.time
+        delete upperEntry.time
+        t.deepEqual({
+          key: entry.key,
+          digest: entry.digest
+        }, {
+          key: KEY,
+          digest: DIGEST
+        }, 'regular entry exists')
+        t.deepEqual({
+          key: upperEntry.key,
+          digest: upperEntry.digest
+        }, {
+          key: KEY.toUpperCase(),
+          digest: DIGEST + 'upper'
+        }, 'case-variant entry intact')
+      }
+    )
   })
 })
 
diff --git a/test/util/cache-index.js b/test/util/cache-index.js
@@ -26,7 +26,7 @@ function CacheIndex (entries, hashAlgorithm) {
       if (typeof lines.length !== 'number') {
         lines = [lines]
       }
-      serialised = lines.map(JSON.stringify).join('\n')
+      serialised = '\n' + lines.map(JSON.stringify).join('\n')
     }
     insertContent(tree, parts, serialised)
   })

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ function CacheIndex (entries, hashAlgorithm) {`
`26`	`26`	`if (typeof lines.length !== 'number') {`
`27`	`27`	`lines = [lines]`
`28`	`28`	`}`
`29`		`- serialised = lines.map(JSON.stringify).join('\n')`
	`29`	`+ serialised = '\n' + lines.map(JSON.stringify).join('\n')`
`30`	`30`	`}`
`31`	`31`	`insertContent(tree, parts, serialised)`
`32`	`32`	`})`