Skip to content

Commit b437443

Browse files
committed
p2p/discover: fix race involving the seed node iterator
nodeDB.querySeeds was not safe for concurrent use but could be called concurrenty on multiple goroutines in the following case: - the table was empty - a timed refresh started - a lookup was started and initiated refresh These conditions are unlikely to coincide during normal use, but are much more likely to occur all at once when the user's machine just woke from sleep. The root cause of the issue is that querySeeds reused the same leveldb iterator until it was exhausted. This commit moves the refresh scheduling logic into its own goroutine (so only one refresh is ever active) and changes querySeeds to not use a persistent iterator. The seed node selection is now more random and ignores nodes that have not been contacted in the last 5 days.
1 parent 7977e87 commit b437443

File tree

5 files changed

+198
-172
lines changed

5 files changed

+198
-172
lines changed

p2p/discover/database.go

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ package discover
2121

2222
import (
2323
"bytes"
24+
"crypto/rand"
2425
"encoding/binary"
2526
"os"
2627
"sync"
@@ -46,11 +47,8 @@ var (
4647

4748
// nodeDB stores all nodes we know about.
4849
type nodeDB struct {
49-
lvl *leveldb.DB // Interface to the database itself
50-
seeder iterator.Iterator // Iterator for fetching possible seed nodes
51-
52-
self NodeID // Own node id to prevent adding it into the database
53-
50+
lvl *leveldb.DB // Interface to the database itself
51+
self NodeID // Own node id to prevent adding it into the database
5452
runner sync.Once // Ensures we can start at most one expirer
5553
quit chan struct{} // Channel to signal the expiring thread to stop
5654
}
@@ -302,52 +300,70 @@ func (db *nodeDB) updateFindFails(id NodeID, fails int) error {
302300
return db.storeInt64(makeKey(id, nodeDBDiscoverFindFails), int64(fails))
303301
}
304302

305-
// querySeeds retrieves a batch of nodes to be used as potential seed servers
306-
// during bootstrapping the node into the network.
307-
//
308-
// Ideal seeds are the most recently seen nodes (highest probability to be still
309-
// alive), but yet untried. However, since leveldb only supports dumb iteration
310-
// we will instead start pulling in potential seeds that haven't been yet pinged
311-
// since the start of the boot procedure.
312-
//
313-
// If the database runs out of potential seeds, we restart the startup counter
314-
// and start iterating over the peers again.
315-
func (db *nodeDB) querySeeds(n int) []*Node {
316-
// Create a new seed iterator if none exists
317-
if db.seeder == nil {
318-
db.seeder = db.lvl.NewIterator(nil, nil)
303+
// querySeeds retrieves random nodes to be used as potential seed nodes
304+
// for bootstrapping.
305+
func (db *nodeDB) querySeeds(n int, maxAge time.Duration) []*Node {
306+
var (
307+
now = time.Now()
308+
nodes = make([]*Node, 0, n)
309+
it = db.lvl.NewIterator(nil, nil)
310+
id NodeID
311+
)
312+
defer it.Release()
313+
314+
seek:
315+
for seeks := 0; len(nodes) < n && seeks < n*5; seeks++ {
316+
// Seek to a random entry. The first byte is incremented by a
317+
// random amount each time in order to increase the likelihood
318+
// of hitting all existing nodes in very small databases.
319+
ctr := id[0]
320+
rand.Read(id[:])
321+
id[0] = ctr + id[0]%16
322+
it.Seek(makeKey(id, nodeDBDiscoverRoot))
323+
324+
n := nextNode(it)
325+
if n == nil {
326+
id[0] = 0
327+
continue seek // iterator exhausted
328+
}
329+
if n.ID == db.self {
330+
continue seek
331+
}
332+
if now.Sub(db.lastPong(n.ID)) > maxAge {
333+
continue seek
334+
}
335+
for i := range nodes {
336+
if nodes[i].ID == n.ID {
337+
continue seek // duplicate
338+
}
339+
}
340+
nodes = append(nodes, n)
319341
}
320-
// Iterate over the nodes and find suitable seeds
321-
nodes := make([]*Node, 0, n)
322-
for len(nodes) < n && db.seeder.Next() {
323-
// Iterate until a discovery node is found
324-
id, field := splitKey(db.seeder.Key())
342+
return nodes
343+
}
344+
345+
// reads the next node record from the iterator, skipping over other
346+
// database entries.
347+
func nextNode(it iterator.Iterator) *Node {
348+
for end := false; !end; end = !it.Next() {
349+
id, field := splitKey(it.Key())
325350
if field != nodeDBDiscoverRoot {
326351
continue
327352
}
328-
// Dump it if its a self reference
329-
if bytes.Compare(id[:], db.self[:]) == 0 {
330-
db.deleteNode(id)
353+
var n Node
354+
if err := rlp.DecodeBytes(it.Value(), &n); err != nil {
355+
if glog.V(logger.Warn) {
356+
glog.Errorf("invalid node %x: %v", id, err)
357+
}
331358
continue
332359
}
333-
// Load it as a potential seed
334-
if node := db.node(id); node != nil {
335-
nodes = append(nodes, node)
336-
}
337-
}
338-
// Release the iterator if we reached the end
339-
if len(nodes) == 0 {
340-
db.seeder.Release()
341-
db.seeder = nil
360+
return &n
342361
}
343-
return nodes
362+
return nil
344363
}
345364

346365
// close flushes and closes the database files.
347366
func (db *nodeDB) close() {
348-
if db.seeder != nil {
349-
db.seeder.Release()
350-
}
351367
close(db.quit)
352368
db.lvl.Close()
353369
}

p2p/discover/database_test.go

Lines changed: 37 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,33 @@ var nodeDBSeedQueryNodes = []struct {
162162
node *Node
163163
pong time.Time
164164
}{
165+
// This one should not be in the result set because its last
166+
// pong time is too far in the past.
165167
{
166168
node: newNode(
167-
MustHexID("0x01d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
169+
MustHexID("0x84d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
170+
net.IP{127, 0, 0, 3},
171+
30303,
172+
30303,
173+
),
174+
pong: time.Now().Add(-3 * time.Hour),
175+
},
176+
// This one shouldn't be in in the result set because its
177+
// nodeID is the local node's ID.
178+
{
179+
node: newNode(
180+
MustHexID("0x57d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
181+
net.IP{127, 0, 0, 3},
182+
30303,
183+
30303,
184+
),
185+
pong: time.Now().Add(-4 * time.Second),
186+
},
187+
188+
// These should be in the result set.
189+
{
190+
node: newNode(
191+
MustHexID("0x22d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
168192
net.IP{127, 0, 0, 1},
169193
30303,
170194
30303,
@@ -173,7 +197,7 @@ var nodeDBSeedQueryNodes = []struct {
173197
},
174198
{
175199
node: newNode(
176-
MustHexID("0x02d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
200+
MustHexID("0x44d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
177201
net.IP{127, 0, 0, 2},
178202
30303,
179203
30303,
@@ -182,7 +206,7 @@ var nodeDBSeedQueryNodes = []struct {
182206
},
183207
{
184208
node: newNode(
185-
MustHexID("0x03d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
209+
MustHexID("0xe2d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
186210
net.IP{127, 0, 0, 3},
187211
30303,
188212
30303,
@@ -192,28 +216,32 @@ var nodeDBSeedQueryNodes = []struct {
192216
}
193217

194218
func TestNodeDBSeedQuery(t *testing.T) {
195-
db, _ := newNodeDB("", Version, NodeID{})
219+
db, _ := newNodeDB("", Version, nodeDBSeedQueryNodes[1].node.ID)
196220
defer db.close()
197221

198222
// Insert a batch of nodes for querying
199223
for i, seed := range nodeDBSeedQueryNodes {
200224
if err := db.updateNode(seed.node); err != nil {
201225
t.Fatalf("node %d: failed to insert: %v", i, err)
202226
}
227+
if err := db.updateLastPong(seed.node.ID, seed.pong); err != nil {
228+
t.Fatalf("node %d: failed to insert lastPong: %v", i, err)
229+
}
203230
}
231+
204232
// Retrieve the entire batch and check for duplicates
205-
seeds := db.querySeeds(2 * len(nodeDBSeedQueryNodes))
206-
if len(seeds) != len(nodeDBSeedQueryNodes) {
207-
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(nodeDBSeedQueryNodes))
208-
}
233+
seeds := db.querySeeds(len(nodeDBSeedQueryNodes)*2, time.Hour)
209234
have := make(map[NodeID]struct{})
210235
for _, seed := range seeds {
211236
have[seed.ID] = struct{}{}
212237
}
213238
want := make(map[NodeID]struct{})
214-
for _, seed := range nodeDBSeedQueryNodes {
239+
for _, seed := range nodeDBSeedQueryNodes[2:] {
215240
want[seed.node.ID] = struct{}{}
216241
}
242+
if len(seeds) != len(want) {
243+
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(want))
244+
}
217245
for id, _ := range have {
218246
if _, ok := want[id]; !ok {
219247
t.Errorf("extra seed: %v", id)
@@ -224,63 +252,6 @@ func TestNodeDBSeedQuery(t *testing.T) {
224252
t.Errorf("missing seed: %v", id)
225253
}
226254
}
227-
// Make sure the next batch is empty (seed EOF)
228-
seeds = db.querySeeds(2 * len(nodeDBSeedQueryNodes))
229-
if len(seeds) != 0 {
230-
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), 0)
231-
}
232-
}
233-
234-
func TestNodeDBSeedQueryContinuation(t *testing.T) {
235-
db, _ := newNodeDB("", Version, NodeID{})
236-
defer db.close()
237-
238-
// Insert a batch of nodes for querying
239-
for i, seed := range nodeDBSeedQueryNodes {
240-
if err := db.updateNode(seed.node); err != nil {
241-
t.Fatalf("node %d: failed to insert: %v", i, err)
242-
}
243-
}
244-
// Iteratively retrieve the batch, checking for an empty batch on reset
245-
for i := 0; i < len(nodeDBSeedQueryNodes); i++ {
246-
if seeds := db.querySeeds(1); len(seeds) != 1 {
247-
t.Errorf("1st iteration %d: seed count mismatch: have %v, want %v", i, len(seeds), 1)
248-
}
249-
}
250-
if seeds := db.querySeeds(1); len(seeds) != 0 {
251-
t.Errorf("reset: seed count mismatch: have %v, want %v", len(seeds), 0)
252-
}
253-
for i := 0; i < len(nodeDBSeedQueryNodes); i++ {
254-
if seeds := db.querySeeds(1); len(seeds) != 1 {
255-
t.Errorf("2nd iteration %d: seed count mismatch: have %v, want %v", i, len(seeds), 1)
256-
}
257-
}
258-
}
259-
260-
func TestNodeDBSelfSeedQuery(t *testing.T) {
261-
// Assign a node as self to verify evacuation
262-
self := nodeDBSeedQueryNodes[0].node.ID
263-
db, _ := newNodeDB("", Version, self)
264-
defer db.close()
265-
266-
// Insert a batch of nodes for querying
267-
for i, seed := range nodeDBSeedQueryNodes {
268-
if err := db.updateNode(seed.node); err != nil {
269-
t.Fatalf("node %d: failed to insert: %v", i, err)
270-
}
271-
}
272-
// Retrieve the entire batch and check that self was evacuated
273-
seeds := db.querySeeds(2 * len(nodeDBSeedQueryNodes))
274-
if len(seeds) != len(nodeDBSeedQueryNodes)-1 {
275-
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(nodeDBSeedQueryNodes)-1)
276-
}
277-
have := make(map[NodeID]struct{})
278-
for _, seed := range seeds {
279-
have[seed.ID] = struct{}{}
280-
}
281-
if _, ok := have[self]; ok {
282-
t.Errorf("self not evacuated")
283-
}
284255
}
285256

286257
func TestNodeDBPersistency(t *testing.T) {

0 commit comments

Comments
 (0)