Skip to content

Commit e3ac56d

Browse files
committed
Merge pull request ethereum#1859 from fjl/fix-discover-refresh-race
p2p/discover: fix race involving the seed node iterator
2 parents 46ad5a5 + 32dda97 commit e3ac56d

File tree

6 files changed

+199
-183
lines changed

6 files changed

+199
-183
lines changed

p2p/discover/database.go

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ package discover
2121

2222
import (
2323
"bytes"
24+
"crypto/rand"
2425
"encoding/binary"
2526
"os"
2627
"sync"
@@ -46,11 +47,8 @@ var (
4647

4748
// nodeDB stores all nodes we know about.
4849
type nodeDB struct {
49-
lvl *leveldb.DB // Interface to the database itself
50-
seeder iterator.Iterator // Iterator for fetching possible seed nodes
51-
52-
self NodeID // Own node id to prevent adding it into the database
53-
50+
lvl *leveldb.DB // Interface to the database itself
51+
self NodeID // Own node id to prevent adding it into the database
5452
runner sync.Once // Ensures we can start at most one expirer
5553
quit chan struct{} // Channel to signal the expiring thread to stop
5654
}
@@ -302,52 +300,70 @@ func (db *nodeDB) updateFindFails(id NodeID, fails int) error {
302300
return db.storeInt64(makeKey(id, nodeDBDiscoverFindFails), int64(fails))
303301
}
304302

305-
// querySeeds retrieves a batch of nodes to be used as potential seed servers
306-
// during bootstrapping the node into the network.
307-
//
308-
// Ideal seeds are the most recently seen nodes (highest probability to be still
309-
// alive), but yet untried. However, since leveldb only supports dumb iteration
310-
// we will instead start pulling in potential seeds that haven't been yet pinged
311-
// since the start of the boot procedure.
312-
//
313-
// If the database runs out of potential seeds, we restart the startup counter
314-
// and start iterating over the peers again.
315-
func (db *nodeDB) querySeeds(n int) []*Node {
316-
// Create a new seed iterator if none exists
317-
if db.seeder == nil {
318-
db.seeder = db.lvl.NewIterator(nil, nil)
303+
// querySeeds retrieves random nodes to be used as potential seed nodes
304+
// for bootstrapping.
305+
func (db *nodeDB) querySeeds(n int, maxAge time.Duration) []*Node {
306+
var (
307+
now = time.Now()
308+
nodes = make([]*Node, 0, n)
309+
it = db.lvl.NewIterator(nil, nil)
310+
id NodeID
311+
)
312+
defer it.Release()
313+
314+
seek:
315+
for seeks := 0; len(nodes) < n && seeks < n*5; seeks++ {
316+
// Seek to a random entry. The first byte is incremented by a
317+
// random amount each time in order to increase the likelihood
318+
// of hitting all existing nodes in very small databases.
319+
ctr := id[0]
320+
rand.Read(id[:])
321+
id[0] = ctr + id[0]%16
322+
it.Seek(makeKey(id, nodeDBDiscoverRoot))
323+
324+
n := nextNode(it)
325+
if n == nil {
326+
id[0] = 0
327+
continue seek // iterator exhausted
328+
}
329+
if n.ID == db.self {
330+
continue seek
331+
}
332+
if now.Sub(db.lastPong(n.ID)) > maxAge {
333+
continue seek
334+
}
335+
for i := range nodes {
336+
if nodes[i].ID == n.ID {
337+
continue seek // duplicate
338+
}
339+
}
340+
nodes = append(nodes, n)
319341
}
320-
// Iterate over the nodes and find suitable seeds
321-
nodes := make([]*Node, 0, n)
322-
for len(nodes) < n && db.seeder.Next() {
323-
// Iterate until a discovery node is found
324-
id, field := splitKey(db.seeder.Key())
342+
return nodes
343+
}
344+
345+
// reads the next node record from the iterator, skipping over other
346+
// database entries.
347+
func nextNode(it iterator.Iterator) *Node {
348+
for end := false; !end; end = !it.Next() {
349+
id, field := splitKey(it.Key())
325350
if field != nodeDBDiscoverRoot {
326351
continue
327352
}
328-
// Dump it if its a self reference
329-
if bytes.Compare(id[:], db.self[:]) == 0 {
330-
db.deleteNode(id)
353+
var n Node
354+
if err := rlp.DecodeBytes(it.Value(), &n); err != nil {
355+
if glog.V(logger.Warn) {
356+
glog.Errorf("invalid node %x: %v", id, err)
357+
}
331358
continue
332359
}
333-
// Load it as a potential seed
334-
if node := db.node(id); node != nil {
335-
nodes = append(nodes, node)
336-
}
337-
}
338-
// Release the iterator if we reached the end
339-
if len(nodes) == 0 {
340-
db.seeder.Release()
341-
db.seeder = nil
360+
return &n
342361
}
343-
return nodes
362+
return nil
344363
}
345364

346365
// close flushes and closes the database files.
347366
func (db *nodeDB) close() {
348-
if db.seeder != nil {
349-
db.seeder.Release()
350-
}
351367
close(db.quit)
352368
db.lvl.Close()
353369
}

p2p/discover/database_test.go

Lines changed: 37 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,33 @@ var nodeDBSeedQueryNodes = []struct {
162162
node *Node
163163
pong time.Time
164164
}{
165+
// This one should not be in the result set because its last
166+
// pong time is too far in the past.
165167
{
166168
node: newNode(
167-
MustHexID("0x01d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
169+
MustHexID("0x84d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
170+
net.IP{127, 0, 0, 3},
171+
30303,
172+
30303,
173+
),
174+
pong: time.Now().Add(-3 * time.Hour),
175+
},
176+
// This one shouldn't be in in the result set because its
177+
// nodeID is the local node's ID.
178+
{
179+
node: newNode(
180+
MustHexID("0x57d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
181+
net.IP{127, 0, 0, 3},
182+
30303,
183+
30303,
184+
),
185+
pong: time.Now().Add(-4 * time.Second),
186+
},
187+
188+
// These should be in the result set.
189+
{
190+
node: newNode(
191+
MustHexID("0x22d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
168192
net.IP{127, 0, 0, 1},
169193
30303,
170194
30303,
@@ -173,7 +197,7 @@ var nodeDBSeedQueryNodes = []struct {
173197
},
174198
{
175199
node: newNode(
176-
MustHexID("0x02d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
200+
MustHexID("0x44d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
177201
net.IP{127, 0, 0, 2},
178202
30303,
179203
30303,
@@ -182,7 +206,7 @@ var nodeDBSeedQueryNodes = []struct {
182206
},
183207
{
184208
node: newNode(
185-
MustHexID("0x03d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
209+
MustHexID("0xe2d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
186210
net.IP{127, 0, 0, 3},
187211
30303,
188212
30303,
@@ -192,28 +216,32 @@ var nodeDBSeedQueryNodes = []struct {
192216
}
193217

194218
func TestNodeDBSeedQuery(t *testing.T) {
195-
db, _ := newNodeDB("", Version, NodeID{})
219+
db, _ := newNodeDB("", Version, nodeDBSeedQueryNodes[1].node.ID)
196220
defer db.close()
197221

198222
// Insert a batch of nodes for querying
199223
for i, seed := range nodeDBSeedQueryNodes {
200224
if err := db.updateNode(seed.node); err != nil {
201225
t.Fatalf("node %d: failed to insert: %v", i, err)
202226
}
227+
if err := db.updateLastPong(seed.node.ID, seed.pong); err != nil {
228+
t.Fatalf("node %d: failed to insert lastPong: %v", i, err)
229+
}
203230
}
231+
204232
// Retrieve the entire batch and check for duplicates
205-
seeds := db.querySeeds(2 * len(nodeDBSeedQueryNodes))
206-
if len(seeds) != len(nodeDBSeedQueryNodes) {
207-
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(nodeDBSeedQueryNodes))
208-
}
233+
seeds := db.querySeeds(len(nodeDBSeedQueryNodes)*2, time.Hour)
209234
have := make(map[NodeID]struct{})
210235
for _, seed := range seeds {
211236
have[seed.ID] = struct{}{}
212237
}
213238
want := make(map[NodeID]struct{})
214-
for _, seed := range nodeDBSeedQueryNodes {
239+
for _, seed := range nodeDBSeedQueryNodes[2:] {
215240
want[seed.node.ID] = struct{}{}
216241
}
242+
if len(seeds) != len(want) {
243+
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(want))
244+
}
217245
for id, _ := range have {
218246
if _, ok := want[id]; !ok {
219247
t.Errorf("extra seed: %v", id)
@@ -224,63 +252,6 @@ func TestNodeDBSeedQuery(t *testing.T) {
224252
t.Errorf("missing seed: %v", id)
225253
}
226254
}
227-
// Make sure the next batch is empty (seed EOF)
228-
seeds = db.querySeeds(2 * len(nodeDBSeedQueryNodes))
229-
if len(seeds) != 0 {
230-
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), 0)
231-
}
232-
}
233-
234-
func TestNodeDBSeedQueryContinuation(t *testing.T) {
235-
db, _ := newNodeDB("", Version, NodeID{})
236-
defer db.close()
237-
238-
// Insert a batch of nodes for querying
239-
for i, seed := range nodeDBSeedQueryNodes {
240-
if err := db.updateNode(seed.node); err != nil {
241-
t.Fatalf("node %d: failed to insert: %v", i, err)
242-
}
243-
}
244-
// Iteratively retrieve the batch, checking for an empty batch on reset
245-
for i := 0; i < len(nodeDBSeedQueryNodes); i++ {
246-
if seeds := db.querySeeds(1); len(seeds) != 1 {
247-
t.Errorf("1st iteration %d: seed count mismatch: have %v, want %v", i, len(seeds), 1)
248-
}
249-
}
250-
if seeds := db.querySeeds(1); len(seeds) != 0 {
251-
t.Errorf("reset: seed count mismatch: have %v, want %v", len(seeds), 0)
252-
}
253-
for i := 0; i < len(nodeDBSeedQueryNodes); i++ {
254-
if seeds := db.querySeeds(1); len(seeds) != 1 {
255-
t.Errorf("2nd iteration %d: seed count mismatch: have %v, want %v", i, len(seeds), 1)
256-
}
257-
}
258-
}
259-
260-
func TestNodeDBSelfSeedQuery(t *testing.T) {
261-
// Assign a node as self to verify evacuation
262-
self := nodeDBSeedQueryNodes[0].node.ID
263-
db, _ := newNodeDB("", Version, self)
264-
defer db.close()
265-
266-
// Insert a batch of nodes for querying
267-
for i, seed := range nodeDBSeedQueryNodes {
268-
if err := db.updateNode(seed.node); err != nil {
269-
t.Fatalf("node %d: failed to insert: %v", i, err)
270-
}
271-
}
272-
// Retrieve the entire batch and check that self was evacuated
273-
seeds := db.querySeeds(2 * len(nodeDBSeedQueryNodes))
274-
if len(seeds) != len(nodeDBSeedQueryNodes)-1 {
275-
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(nodeDBSeedQueryNodes)-1)
276-
}
277-
have := make(map[NodeID]struct{})
278-
for _, seed := range seeds {
279-
have[seed.ID] = struct{}{}
280-
}
281-
if _, ok := have[self]; ok {
282-
t.Errorf("self not evacuated")
283-
}
284255
}
285256

286257
func TestNodeDBPersistency(t *testing.T) {

0 commit comments

Comments
 (0)