1
1
//! Data structures and helpers for writing subgraph changes to the store
2
- use std:: { collections:: HashSet , sync:: Arc } ;
2
+ use std:: {
3
+ collections:: { HashMap , HashSet } ,
4
+ sync:: Arc ,
5
+ } ;
3
6
4
7
use crate :: {
5
8
blockchain:: { block_stream:: FirehoseCursor , BlockPtr , BlockTime } ,
@@ -8,6 +11,7 @@ use crate::{
8
11
data:: { store:: Id , subgraph:: schema:: SubgraphError } ,
9
12
data_source:: CausalityRegion ,
10
13
derive:: CacheWeight ,
14
+ env:: ENV_VARS ,
11
15
internal_error,
12
16
util:: cache_weight:: CacheWeight ,
13
17
} ;
@@ -154,9 +158,10 @@ impl EntityModification {
154
158
}
155
159
156
160
pub fn creates_entity ( & self ) -> bool {
161
+ use EntityModification :: * ;
157
162
match self {
158
- EntityModification :: Insert { .. } => true ,
159
- EntityModification :: Overwrite { .. } | EntityModification :: Remove { .. } => false ,
163
+ Insert { .. } => true ,
164
+ Overwrite { .. } | Remove { .. } => false ,
160
165
}
161
166
}
162
167
@@ -310,6 +315,10 @@ pub struct RowGroup {
310
315
rows : Vec < EntityModification > ,
311
316
312
317
immutable : bool ,
318
+
319
+ /// Map the `key.entity_id` of all entries in `rows` to the index with
320
+ /// the most recent entry for that id to speed up lookups
321
+ last_mod : HashMap < Id , usize > ,
313
322
}
314
323
315
324
impl RowGroup {
@@ -318,6 +327,7 @@ impl RowGroup {
318
327
entity_type,
319
328
rows : Vec :: new ( ) ,
320
329
immutable,
330
+ last_mod : HashMap :: new ( ) ,
321
331
}
322
332
}
323
333
@@ -374,6 +384,21 @@ impl RowGroup {
374
384
}
375
385
376
386
pub fn last_op ( & self , key : & EntityKey , at : BlockNumber ) -> Option < EntityOp < ' _ > > {
387
+ if ENV_VARS . store . write_batch_memoize {
388
+ let idx = * self . last_mod . get ( & key. entity_id ) ?;
389
+ if let Some ( op) = self . rows . get ( idx) . and_then ( |emod| {
390
+ if emod. block ( ) <= at {
391
+ Some ( emod. as_entity_op ( at) )
392
+ } else {
393
+ None
394
+ }
395
+ } ) {
396
+ return Some ( op) ;
397
+ }
398
+ }
399
+ // We are looking for the change at a block `at` that is before the
400
+ // change we remember in `last_mod`, and therefore have to scan
401
+ // through all changes
377
402
self . rows
378
403
. iter ( )
379
404
// We are scanning backwards, i.e., in descendng order of
@@ -383,7 +408,14 @@ impl RowGroup {
383
408
. map ( |emod| emod. as_entity_op ( at) )
384
409
}
385
410
411
+ /// Return an iterator over all changes that are effective at `at`. That
412
+ /// makes it possible to construct the state that the deployment will
413
+ /// have once all changes for block `at` have been written.
386
414
pub fn effective_ops ( & self , at : BlockNumber ) -> impl Iterator < Item = EntityOp < ' _ > > {
415
+ // We don't use `self.last_mod` here, because we need to return
416
+ // operations for all entities that have pending changes at block
417
+ // `at`, and there is no guarantee that `self.last_mod` is visible
418
+ // at `at` since the change in `self.last_mod` might come after `at`
387
419
let mut seen = HashSet :: new ( ) ;
388
420
self . rows
389
421
. iter ( )
@@ -400,7 +432,12 @@ impl RowGroup {
400
432
401
433
/// Find the most recent entry for `id`
402
434
fn prev_row_mut ( & mut self , id : & Id ) -> Option < & mut EntityModification > {
403
- self . rows . iter_mut ( ) . rfind ( |emod| emod. id ( ) == id)
435
+ if ENV_VARS . store . write_batch_memoize {
436
+ let idx = * self . last_mod . get ( id) ?;
437
+ self . rows . get_mut ( idx)
438
+ } else {
439
+ self . rows . iter_mut ( ) . rfind ( |emod| emod. id ( ) == id)
440
+ }
404
441
}
405
442
406
443
/// Append `row` to `self.rows` by combining it with a previously
@@ -433,6 +470,14 @@ impl RowGroup {
433
470
) ) ;
434
471
}
435
472
473
+ if row. id ( ) != prev_row. id ( ) {
474
+ return Err ( internal_error ! (
475
+ "last_mod map is corrupted: got id {} looking up id {}" ,
476
+ prev_row. id( ) ,
477
+ row. id( )
478
+ ) ) ;
479
+ }
480
+
436
481
// The heart of the matter: depending on what `row` is, clamp
437
482
// `prev_row` and either ignore `row` since it is not needed, or
438
483
// turn it into an `Insert`, which also does not require
@@ -460,25 +505,31 @@ impl RowGroup {
460
505
Insert { .. } ,
461
506
) => {
462
507
// prev_row was deleted
463
- self . rows . push ( row) ;
508
+ self . push_row ( row) ;
464
509
}
465
510
(
466
511
Insert { end : None , .. } | Overwrite { end : None , .. } ,
467
512
Overwrite { block, .. } ,
468
513
) => {
469
514
prev_row. clamp ( * block) ?;
470
- self . rows . push ( row. as_insert ( & self . entity_type ) ?) ;
515
+ let row = row. as_insert ( & self . entity_type ) ?;
516
+ self . push_row ( row) ;
471
517
}
472
518
( Insert { end : None , .. } | Overwrite { end : None , .. } , Remove { block, .. } ) => {
473
519
prev_row. clamp ( * block) ?;
474
520
}
475
521
}
476
522
} else {
477
- self . rows . push ( row) ;
523
+ self . push_row ( row) ;
478
524
}
479
525
Ok ( ( ) )
480
526
}
481
527
528
+ fn push_row ( & mut self , row : EntityModification ) {
529
+ self . last_mod . insert ( row. id ( ) . clone ( ) , self . rows . len ( ) ) ;
530
+ self . rows . push ( row) ;
531
+ }
532
+
482
533
fn append ( & mut self , group : RowGroup ) -> Result < ( ) , StoreError > {
483
534
if self . entity_type != group. entity_type {
484
535
return Err ( internal_error ! (
@@ -659,7 +710,7 @@ pub struct Batch {
659
710
pub first_block : BlockNumber ,
660
711
/// The firehose cursor corresponding to `block_ptr`
661
712
pub firehose_cursor : FirehoseCursor ,
662
- mods : RowGroups ,
713
+ pub mods : RowGroups ,
663
714
/// New data sources
664
715
pub data_sources : DataSources ,
665
716
pub deterministic_errors : Vec < SubgraphError > ,
@@ -924,6 +975,7 @@ impl<'a> Iterator for WriteChunkIter<'a> {
924
975
925
976
#[ cfg( test) ]
926
977
mod test {
978
+ use std:: collections:: HashMap ;
927
979
use std:: sync:: Arc ;
928
980
929
981
use crate :: {
@@ -947,18 +999,27 @@ mod test {
947
999
948
1000
assert_eq ! ( values. len( ) , blocks. len( ) ) ;
949
1001
950
- let rows = values
1002
+ let rows: Vec < _ > = values
951
1003
. iter ( )
952
1004
. zip ( blocks. iter ( ) )
953
1005
. map ( |( value, block) | EntityModification :: Remove {
954
1006
key : ROW_GROUP_TYPE . key ( Id :: String ( Word :: from ( value. to_string ( ) ) ) ) ,
955
1007
block : * block,
956
1008
} )
957
1009
. collect ( ) ;
1010
+ let last_mod = rows
1011
+ . iter ( )
1012
+ . enumerate ( )
1013
+ . fold ( HashMap :: new ( ) , |mut map, ( idx, emod) | {
1014
+ map. insert ( emod. id ( ) . clone ( ) , idx) ;
1015
+ map
1016
+ } ) ;
1017
+
958
1018
let group = RowGroup {
959
1019
entity_type : ENTRY_TYPE . clone ( ) ,
960
1020
rows,
961
1021
immutable : false ,
1022
+ last_mod,
962
1023
} ;
963
1024
let act = group
964
1025
. clamps_by_block ( )
0 commit comments