66from enum import Enum
77from contextlib import contextmanager
88from operator import methodcaller
9- from typing import Dict , Set , Tuple , Iterator , Optional
9+ from typing import Dict , Set , List , Tuple , Iterator , Optional
1010from concurrent .futures import ThreadPoolExecutor , as_completed
1111
1212import attrs
@@ -28,6 +28,7 @@ class Algorithm(Enum):
2828
2929
3030DiffResult = Iterator [Tuple [str , tuple ]] # Iterator[Tuple[Literal["+", "-"], tuple]]
31+ DiffResultList = Iterator [List [Tuple [str , tuple ]]]
3132
3233
3334@attrs .define (frozen = False )
@@ -187,6 +188,7 @@ class TableDiffer(ThreadBase, ABC):
187188 ignored_columns1 : Set [str ] = attrs .field (factory = set )
188189 ignored_columns2 : Set [str ] = attrs .field (factory = set )
189190 _ignored_columns_lock : threading .Lock = attrs .field (factory = threading .Lock , init = False )
191+ yield_list : bool = False
190192
191193 def diff_tables (self , table1 : TableSegment , table2 : TableSegment , info_tree : InfoTree = None ) -> DiffResultWrapper :
192194 """Diff the given tables.
@@ -255,7 +257,9 @@ def _diff_tables_wrapper(self, table1: TableSegment, table2: TableSegment, info_
255257 def _validate_and_adjust_columns (self , table1 : TableSegment , table2 : TableSegment ) -> None :
256258 pass
257259
258- def _diff_tables_root (self , table1 : TableSegment , table2 : TableSegment , info_tree : InfoTree ) -> DiffResult :
260+ def _diff_tables_root (
261+ self , table1 : TableSegment , table2 : TableSegment , info_tree : InfoTree
262+ ) -> DiffResult | DiffResultList :
259263 return self ._bisect_and_diff_tables (table1 , table2 , info_tree )
260264
261265 @abstractmethod
@@ -300,9 +304,9 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in
300304 f"size: table1 <= { btable1 .approximate_size ()} , table2 <= { btable2 .approximate_size ()} "
301305 )
302306
303- ti = ThreadedYielder (self .max_threadpool_size )
307+ ti = ThreadedYielder (self .max_threadpool_size , self . yield_list )
304308 # Bisect (split) the table into segments, and diff them recursively.
305- ti .submit (self ._bisect_and_diff_segments , ti , btable1 , btable2 , info_tree )
309+ ti .submit (self ._bisect_and_diff_segments , ti , btable1 , btable2 , info_tree , priority = 999 )
306310
307311 # Now we check for the second min-max, to diff the portions we "missed".
308312 # This is achieved by subtracting the table ranges, and dividing the resulting space into aligned boxes.
@@ -326,7 +330,7 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in
326330
327331 for p1 , p2 in new_regions :
328332 extra_tables = [t .new_key_bounds (min_key = p1 , max_key = p2 ) for t in (table1 , table2 )]
329- ti .submit (self ._bisect_and_diff_segments , ti , * extra_tables , info_tree )
333+ ti .submit (self ._bisect_and_diff_segments , ti , * extra_tables , info_tree , priority = 999 )
330334
331335 return ti
332336
0 commit comments