diff --git a/data_diff/abcs/database_types.py b/data_diff/abcs/database_types.py index b6912b4a..e5aa5fab 100644 --- a/data_diff/abcs/database_types.py +++ b/data_diff/abcs/database_types.py @@ -182,6 +182,8 @@ def python_type(self) -> type: "Return the equivalent Python type of the key" def make_value(self, value): + if isinstance(value, self.python_type): + return value return self.python_type(value) @@ -217,7 +219,14 @@ class Native_UUID(ColType_UUID): @attrs.define(frozen=True) class String_UUID(ColType_UUID, StringType): - pass + # Case is important for UUIDs stored as regular string, not native UUIDs stored as numbers. + # We slice them internally as numbers, but render them back to SQL as lower/upper case. + # None means we do not know for sure, behave as with False, but it might be unreliable. + lowercase: Optional[bool] = None + uppercase: Optional[bool] = None + + def make_value(self, v: str) -> ArithUUID: + return self.python_type(v, lowercase=self.lowercase, uppercase=self.uppercase) @attrs.define(frozen=True) @@ -230,9 +239,6 @@ def test_value(value: str) -> bool: except ValueError: return False - def make_value(self, value): - return self.python_type(value) - @attrs.define(frozen=True) class String_VaryingAlphanum(String_Alphanum): @@ -244,6 +250,8 @@ class String_FixedAlphanum(String_Alphanum): length: int def make_value(self, value): + if isinstance(value, self.python_type): + return value if len(value) != self.length: raise ValueError(f"Expected alphanumeric value of length {self.length}, but got '{value}'.") return self.python_type(value, max_len=self.length) diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index d6738e7e..21c8d0e6 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -20,7 +20,7 @@ from data_diff.abcs.compiler import AbstractCompiler, Compilable from data_diff.queries.extras import ApplyFuncAndNormalizeAsString, Checksum, NormalizeAsString from data_diff.schema import RawColumnInfo -from data_diff.utils import ArithString, is_uuid, join_iter, safezip +from data_diff.utils import ArithString, ArithUUID, is_uuid, join_iter, safezip from data_diff.queries.api import Expr, table, Select, SKIP, Explain, Code, this from data_diff.queries.ast_classes import ( Alias, @@ -248,6 +248,9 @@ def _compile(self, compiler: Compiler, elem) -> str: return self.timestamp_value(elem) elif isinstance(elem, bytes): return f"b'{elem.decode()}'" + elif isinstance(elem, ArithUUID): + s = f"'{elem.uuid}'" + return s.upper() if elem.uppercase else s.lower() if elem.lowercase else s elif isinstance(elem, ArithString): return f"'{elem}'" assert False, elem @@ -681,8 +684,10 @@ def _constant_value(self, v): return f"'{v}'" elif isinstance(v, datetime): return self.timestamp_value(v) - elif isinstance(v, UUID): + elif isinstance(v, UUID): # probably unused anymore in favour of ArithUUID return f"'{v}'" + elif isinstance(v, ArithUUID): + return f"'{v.uuid}'" elif isinstance(v, decimal.Decimal): return str(v) elif isinstance(v, bytearray): @@ -1110,7 +1115,10 @@ def _refine_coltypes( ) else: assert col_name in col_dict - col_dict[col_name] = String_UUID() + col_dict[col_name] = String_UUID( + lowercase=all(s == s.lower() for s in uuid_samples), + uppercase=all(s == s.upper() for s in uuid_samples), + ) continue if self.SUPPORTS_ALPHANUMS: # Anything but MySQL (so far) diff --git a/data_diff/diff_tables.py b/data_diff/diff_tables.py index 4aa09da2..5ec1f71b 100644 --- a/data_diff/diff_tables.py +++ b/data_diff/diff_tables.py @@ -300,7 +300,8 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in # Start with the first completed value, so we don't waste time waiting min_key1, max_key1 = self._parse_key_range_result(key_types1, next(key_ranges)) - btable1, btable2 = [t.new_key_bounds(min_key=min_key1, max_key=max_key1) for t in (table1, table2)] + btable1 = table1.new_key_bounds(min_key=min_key1, max_key=max_key1, key_types=key_types1) + btable2 = table2.new_key_bounds(min_key=min_key1, max_key=max_key1, key_types=key_types2) logger.info( f"Diffing segments at key-range: {btable1.min_key}..{btable2.max_key}. " @@ -324,7 +325,8 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in # └──┴──────┴──┘ # Overall, the max number of new regions in this 2nd pass is 3^|k| - 1 - min_key2, max_key2 = self._parse_key_range_result(key_types1, next(key_ranges)) + # Note: python types can be the same, but the rendering parameters (e.g. casing) can differ. + min_key2, max_key2 = self._parse_key_range_result(key_types2, next(key_ranges)) points = [list(sorted(p)) for p in safezip(min_key1, min_key2, max_key1, max_key2)] box_mesh = create_mesh_from_points(*points) @@ -332,8 +334,9 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in new_regions = [(p1, p2) for p1, p2 in box_mesh if p1 < p2 and not (p1 >= min_key1 and p2 <= max_key1)] for p1, p2 in new_regions: - extra_tables = [t.new_key_bounds(min_key=p1, max_key=p2) for t in (table1, table2)] - ti.submit(self._bisect_and_diff_segments, ti, *extra_tables, info_tree, priority=999) + extra_table1 = table1.new_key_bounds(min_key=p1, max_key=p2, key_types=key_types1) + extra_table2 = table2.new_key_bounds(min_key=p1, max_key=p2, key_types=key_types2) + ti.submit(self._bisect_and_diff_segments, ti, extra_table1, extra_table2, info_tree, priority=999) return ti diff --git a/data_diff/table_segment.py b/data_diff/table_segment.py index 73b12909..924271ba 100644 --- a/data_diff/table_segment.py +++ b/data_diff/table_segment.py @@ -1,5 +1,5 @@ import time -from typing import Container, Dict, List, Optional, Tuple +from typing import Container, Dict, List, Optional, Sequence, Tuple import logging from itertools import product @@ -9,7 +9,7 @@ from data_diff.utils import safezip, Vector from data_diff.utils import ArithString, split_space from data_diff.databases.base import Database -from data_diff.abcs.database_types import DbPath, DbKey, DbTime +from data_diff.abcs.database_types import DbPath, DbKey, DbTime, IKey from data_diff.schema import RawColumnInfo, Schema, create_schema from data_diff.queries.extras import Checksum from data_diff.queries.api import Count, SKIP, table, this, Expr, min_, max_, Code @@ -205,7 +205,7 @@ def new(self, **kwargs) -> Self: """Creates a copy of the instance using 'replace()'""" return attrs.evolve(self, **kwargs) - def new_key_bounds(self, min_key: Vector, max_key: Vector) -> Self: + def new_key_bounds(self, min_key: Vector, max_key: Vector, *, key_types: Optional[Sequence[IKey]] = None) -> Self: if self.min_key is not None: assert self.min_key <= min_key, (self.min_key, min_key) assert self.min_key < max_key @@ -214,6 +214,13 @@ def new_key_bounds(self, min_key: Vector, max_key: Vector) -> Self: assert min_key < self.max_key assert max_key <= self.max_key + # If asked, enforce the PKs to proper types, mainly to meta-params of the relevant side, + # so that we do not leak e.g. casing of UUIDs from side A to side B and vice versa. + # If not asked, keep the meta-params of the keys as is (assume them already casted). + if key_types is not None: + min_key = Vector(type.make_value(val) for type, val in safezip(key_types, min_key)) + max_key = Vector(type.make_value(val) for type, val in safezip(key_types, max_key)) + return attrs.evolve(self, min_key=min_key, max_key=max_key) @property diff --git a/data_diff/utils.py b/data_diff/utils.py index b9045cc1..e16110e1 100644 --- a/data_diff/utils.py +++ b/data_diff/utils.py @@ -43,7 +43,14 @@ def safezip(*args): return zip(*args) -def is_uuid(u): +UUID_PATTERN = re.compile(r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.I) + + +def is_uuid(u: str) -> bool: + # E.g., hashlib.md5(b'hello') is a 32-letter hex number, but not an UUID. + # It would fail UUID-like comparison (< & >) because of casing and dashes. + if not UUID_PATTERN.fullmatch(u): + return False try: UUID(u) except ValueError: @@ -128,23 +135,75 @@ def range(self, other: "ArithString", count: int) -> List[Self]: return [self.new(int=i) for i in checkpoints] -# @attrs.define # not as long as it inherits from UUID -class ArithUUID(UUID, ArithString): +def _any_to_uuid(v: Union[str, int, UUID, "ArithUUID"]) -> UUID: + if isinstance(v, ArithUUID): + return v.uuid + elif isinstance(v, UUID): + return v + elif isinstance(v, str): + return UUID(v) + elif isinstance(v, int): + return UUID(int=v) + else: + raise ValueError(f"Cannot convert a value to UUID: {v!r}") + + +@attrs.define(frozen=True, eq=False, order=False) +class ArithUUID(ArithString): "A UUID that supports basic arithmetic (add, sub)" + uuid: UUID = attrs.field(converter=_any_to_uuid) + lowercase: Optional[bool] = None + uppercase: Optional[bool] = None + + def range(self, other: "ArithUUID", count: int) -> List[Self]: + assert isinstance(other, ArithUUID) + checkpoints = split_space(self.uuid.int, other.uuid.int, count) + return [attrs.evolve(self, uuid=i) for i in checkpoints] + def __int__(self): - return self.int + return self.uuid.int def __add__(self, other: int) -> Self: if isinstance(other, int): - return self.new(int=self.int + other) + return attrs.evolve(self, uuid=self.uuid.int + other) return NotImplemented - def __sub__(self, other: Union[UUID, int]): + def __sub__(self, other: Union["ArithUUID", int]): if isinstance(other, int): - return self.new(int=self.int - other) - elif isinstance(other, UUID): - return self.int - other.int + return attrs.evolve(self, uuid=self.uuid.int - other) + elif isinstance(other, ArithUUID): + return self.uuid.int - other.uuid.int + return NotImplemented + + def __eq__(self, other: object) -> bool: + if isinstance(other, ArithUUID): + return self.uuid == other.uuid + return NotImplemented + + def __ne__(self, other: object) -> bool: + if isinstance(other, ArithUUID): + return self.uuid != other.uuid + return NotImplemented + + def __gt__(self, other: object) -> bool: + if isinstance(other, ArithUUID): + return self.uuid > other.uuid + return NotImplemented + + def __lt__(self, other: object) -> bool: + if isinstance(other, ArithUUID): + return self.uuid < other.uuid + return NotImplemented + + def __ge__(self, other: object) -> bool: + if isinstance(other, ArithUUID): + return self.uuid >= other.uuid + return NotImplemented + + def __le__(self, other: object) -> bool: + if isinstance(other, ArithUUID): + return self.uuid <= other.uuid return NotImplemented diff --git a/poetry.lock b/poetry.lock index e07c2402..073367c0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -324,6 +324,7 @@ python-versions = ">=3.7, <4" files = [ {file = "clickhouse-driver-0.2.6.tar.gz", hash = "sha256:028baf4d65a0b3f9e0ac5df248cab20657b51adbfce6c5427aa6c16a7318dda1"}, {file = "clickhouse_driver-0.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e61d975081b74cae9efe7a64b1de1a8aec5643affb81b57487dcae7d195f250f"}, + {file = "clickhouse_driver-0.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee0395b49bd8c0cd3dca6b3a4b9db347c1d300de83ee7b4f482a9d48b6c7af54"}, {file = "clickhouse_driver-0.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1d8aff4d3f0d78fd4b11e28ef344a5ee71d6850fef4a79e3265e0728b4d1d89"}, {file = "clickhouse_driver-0.2.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b086bd658889af10205cb8307b714c8202bdfd05a4833fc7f4f82df2d88a963"}, {file = "clickhouse_driver-0.2.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79a1b6815d56a03398110c7f602a87ad767ecfd7a0869e61f2d8bfa0779dce2b"}, @@ -337,6 +338,7 @@ files = [ {file = "clickhouse_driver-0.2.6-cp310-cp310-win32.whl", hash = "sha256:1960244de84d7888598180e69689d1ba7ec6c9c99cd2c080a76315a7a29a5cab"}, {file = "clickhouse_driver-0.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:5a6bdfde4e2fb81414200303950ba75c3f7ee9249e4a997854ce18e1cb4beea9"}, {file = "clickhouse_driver-0.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef0a9453a972ec32399cc93a510aec33fa4b9b1f0c5050a3a40e5d298a89a7aa"}, + {file = "clickhouse_driver-0.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:95c13374741a8749980436603922ad7c476ae3b5e17850c50faba3879db66bdb"}, {file = "clickhouse_driver-0.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d715d392eaadb43ded1c104354aebbc233f69bbf3919aa61beb7cc6ecdaa950a"}, {file = "clickhouse_driver-0.2.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d75d50265616a2779d0b2acaebf7253783e2b8ad0df3efa6d23f0db1c9bf50"}, {file = "clickhouse_driver-0.2.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32f0e02c28e9a6f1c1f116d1aa14772e73beb7efd4f30490d9f171d39b40551a"}, @@ -349,6 +351,20 @@ files = [ {file = "clickhouse_driver-0.2.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3bdff826074af1b339fe9bff17844f6b8117080f895b8601f536b13a9d04f82a"}, {file = "clickhouse_driver-0.2.6-cp311-cp311-win32.whl", hash = "sha256:c8c02606eabe4288045bbba497088b7fe976c34330c1066db9744fa09fef4a2a"}, {file = "clickhouse_driver-0.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:44df94940739a72a02716bb14ac8b683aef84b54b05783d96201ff334bcd88fb"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:079708ac620343736c2c8dace6663178156f4ded47bf25245b56147498d0d7de"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e13369cf516df6c33c156fe66cfff502f66fc25f2a515c761ed1480fc83b3aa9"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbc0bf957fc6d0163ee06ac02275bdb2f40d109fc225366e387358e78d968a43"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f58b0ffb434fefe99b7419e09d6071a49773e9eb49c5ebeedf7c3180b40c2330"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0746dac9aa5cf2c275187aef16b67ae922ef257c82671948a6be86e19ee9cb2"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f1ce40c9a2715ea44be9a5c33cb5b08048c1ef5595a6739443473e4ba23fedf"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9499a2b2d5e856c7e8efd28da479df8a962e2497c70bf5e2d9a25875d520465"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8b2e849bb7102365a480d9d1083ed203a244f0c02a0fc973eab6078b3d14638d"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:5846c50e2dfe0ce2f300275955a20f82422b1128b09ab5a9ea4d8a00d4ba8438"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:a12990b54b92b2a2598f144388e766d6261492408f2434738fe649423371894b"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:af14a5699fea890a1f8f022c624ca9f61994e15913cfaf4e0e58b1e4ac99540a"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:965fb8370eb7ee8a20cdf54d7c2fe024f587da692bd15e94dd2eee93a3c88f4b"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-win32.whl", hash = "sha256:9c552205d2b6125a99121080417c5c7bbc47af81ed15bb5ff9be464fed96bb68"}, + {file = "clickhouse_driver-0.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:a58fb8b12a32d58ce0c72839293ec5bacc7904f3db36a82bb963f394dbb5f230"}, {file = "clickhouse_driver-0.2.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f2a9abb8b1464985f7a480f956744736e611970ffc8ffd3eb0b46343a3a691e6"}, {file = "clickhouse_driver-0.2.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e01696c450a2de41d586689dbaed0893d4de7469811abd3bf831a0483e723a"}, {file = "clickhouse_driver-0.2.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a0bb85760dabbef493aec985ad94612132ddeb5b81569cf0a7222f6cb7278eda"}, @@ -363,6 +379,7 @@ files = [ {file = "clickhouse_driver-0.2.6-cp37-cp37m-win32.whl", hash = "sha256:48f47694d5e54af192a4aa2a24f947795c362ab40a253d088593880fede97568"}, {file = "clickhouse_driver-0.2.6-cp37-cp37m-win_amd64.whl", hash = "sha256:b783e5d3d12947c73d991bceb6b8765231512ab0ac6363823cdcd2c283c67a99"}, {file = "clickhouse_driver-0.2.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3d24e0acf8fef1d787851ae048e0168b2fb10297c3235cbb87974f78db37d3d3"}, + {file = "clickhouse_driver-0.2.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:27dc025f10a930aed453eec5ed9a0404e7b2db671da4a253109facf5c1ad1b4c"}, {file = "clickhouse_driver-0.2.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d534c744b1b211241f8c58d2ad5fcfc465a0503011d9b9073c00e25507abcbf3"}, {file = "clickhouse_driver-0.2.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:858b8039a1241591b63f368de9dbdef6c4e6466b6bf0e01d53d36f7091af7569"}, {file = "clickhouse_driver-0.2.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21eb62e1de7d2d5483d121d1447e857030bf866d4f23572b0dedc515f9359cd0"}, @@ -376,6 +393,7 @@ files = [ {file = "clickhouse_driver-0.2.6-cp38-cp38-win32.whl", hash = "sha256:b9b775f70371a7333ac828fe2bbd9473c94e18728ac6b70b2865cdee1f0d551f"}, {file = "clickhouse_driver-0.2.6-cp38-cp38-win_amd64.whl", hash = "sha256:d13fe44620750abcd4c93c067d6e44c8a1ea050856c4c27a5633ad8ff197a689"}, {file = "clickhouse_driver-0.2.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e17995752eef4f742976abab03ff3f5b81edb9b9218b151abaf3534055fcf2b8"}, + {file = "clickhouse_driver-0.2.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8c23baf4b4185b3ee13332c05c201e242600e35deb8b0b0d95211e71d5eb3f59"}, {file = "clickhouse_driver-0.2.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbd7e3e33d2bc5f32da2557e97299340a722f948790494a2e9efaed4635ff499"}, {file = "clickhouse_driver-0.2.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c872e9fee17d278816fc30b4df4b10bedd8eec9efaa614c71725f147b00b30d"}, {file = "clickhouse_driver-0.2.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c8af2761676cd306962a86cc87a4187efcfdaf253a0d908c8f8ef791277a7fe"}, @@ -999,6 +1017,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -1448,8 +1476,6 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, - {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, - {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -1956,13 +1982,13 @@ jeepney = ">=0.6" [[package]] name = "setuptools" -version = "69.0.2" +version = "69.0.3" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"}, - {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"}, + {file = "setuptools-69.0.3-py3-none-any.whl", hash = "sha256:385eb4edd9c9d5c17540511303e39a147ce2fc04bc55289c322b9e5904fe2c05"}, + {file = "setuptools-69.0.3.tar.gz", hash = "sha256:be1af57fc409f93647f2e8e4573a142ed38724b8cdd389706a867bb4efcf1e78"}, ] [package.extras] diff --git a/tests/test_mesh.py b/tests/test_mesh.py new file mode 100644 index 00000000..c0ee7c55 --- /dev/null +++ b/tests/test_mesh.py @@ -0,0 +1,132 @@ +import uuid + +from data_diff.abcs.database_types import String_UUID +from data_diff.databases import MySQL +from data_diff.table_segment import create_mesh_from_points +from data_diff.utils import ArithUUID, safezip +from tests.common import DiffTestCase, table_segment + + +# We do not need real tables, just any reference to them for proper object creation. +class TestDiffMesh(DiffTestCase): + db_cls = MySQL + + def test_meta_parameters_passed_from_coltypes_to_values(self): + key_types1 = [String_UUID(lowercase=True, uppercase=False)] + key_types2 = [String_UUID(lowercase=False, uppercase=True)] + + # side B is wider than side A to ensure there are "outer" regions. + min_uuid1 = uuid.UUID("11111111-1111-1111-1111-111111111111") + max_uuid1 = uuid.UUID("EEEEEEEE-EEEE-EEEE-EEEE-EEEEEEEEEEEE") + min_uuid2 = uuid.UUID("00000000-0000-0000-0000-000000000000") + max_uuid2 = uuid.UUID("FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF") + min_key1 = (key_types1[0].make_value(min_uuid1),) + max_key1 = (key_types1[0].make_value(max_uuid1),) + min_key2 = (key_types2[0].make_value(min_uuid2),) + max_key2 = (key_types2[0].make_value(max_uuid2),) + + # Verify that we pass the meta-parameters from col types to values: + assert isinstance(min_key1[0], ArithUUID) + assert isinstance(max_key1[0], ArithUUID) + assert isinstance(min_key2[0], ArithUUID) + assert isinstance(max_key2[0], ArithUUID) + assert min_key1[0].uuid == min_uuid1 + assert min_key1[0].lowercase == True + assert min_key1[0].uppercase == False + assert max_key1[0].uuid == max_uuid1 + assert max_key1[0].lowercase == True + assert max_key1[0].uppercase == False + assert min_key2[0].uuid == min_uuid2 + assert min_key2[0].lowercase == False + assert min_key2[0].uppercase == True + assert max_key2[0].uuid == max_uuid2 + assert max_key2[0].lowercase == False + assert max_key2[0].uppercase == True + + def test_meta_parameters_left_as_is_if_not_casted(self): + table1 = table_segment(self.connection, self.table_src_path, "id", "timestamp", case_sensitive=False) + key_types1 = [String_UUID(lowercase=True, uppercase=False)] + + min_uuid1 = uuid.UUID("11111111-1111-1111-1111-111111111111") + max_uuid1 = uuid.UUID("EEEEEEEE-EEEE-EEEE-EEEE-EEEEEEEEEEEE") + min_key1 = (key_types1[0].make_value(min_uuid1),) + max_key1 = (key_types1[0].make_value(max_uuid1),) + + btable1 = table1.new_key_bounds(min_key=min_key1, max_key=max_key1) + assert btable1.min_key[0] is min_key1[0] # by identity, not by equality + assert btable1.max_key[0] is max_key1[0] # by identity, not by equality + + def test_mesh_keys_meta_parameters_preserved(self): + table1 = table_segment(self.connection, self.table_src_path, "id", "timestamp", case_sensitive=False) + table2 = table_segment(self.connection, self.table_src_path, "id", "timestamp", case_sensitive=False) + key_types1 = [String_UUID(lowercase=True, uppercase=False)] + key_types2 = [String_UUID(lowercase=False, uppercase=True)] + + # side B is wider than side A to ensure there are "outer" regions. + min_uuid1 = uuid.UUID("11111111-1111-1111-1111-111111111111") + max_uuid1 = uuid.UUID("EEEEEEEE-EEEE-EEEE-EEEE-EEEEEEEEEEEE") + min_uuid2 = uuid.UUID("00000000-0000-0000-0000-000000000000") + max_uuid2 = uuid.UUID("FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF") + min_key1 = (key_types1[0].make_value(min_uuid1),) + max_key1 = (key_types1[0].make_value(max_uuid1),) + min_key2 = (key_types2[0].make_value(min_uuid2),) + max_key2 = (key_types2[0].make_value(max_uuid2),) + + # This is what TableDiffer._bisect_and_diff_tables() does, precisely (yes, using key1!): + btable1 = table1.new_key_bounds(min_key=min_key1, max_key=max_key1, key_types=key_types1) + btable2 = table2.new_key_bounds(min_key=min_key1, max_key=max_key1, key_types=key_types2) + + # Verify that both sides have proper (the side-specific) pk meta-parameters: + assert btable1.min_key[0].uuid == min_uuid1 + assert btable1.min_key[0].lowercase == True + assert btable1.min_key[0].uppercase == False + assert btable1.max_key[0].uuid == max_uuid1 + assert btable1.max_key[0].lowercase == True + assert btable1.max_key[0].uppercase == False + assert btable2.min_key[0].uuid == min_uuid1 + assert btable2.min_key[0].lowercase == False + assert btable2.min_key[0].uppercase == True + assert btable2.max_key[0].uuid == max_uuid1 + assert btable2.max_key[0].lowercase == False + assert btable2.max_key[0].uppercase == True + + # This is what TableDiffer._bisect_and_diff_tables() does, precisely: + points = [list(sorted(p)) for p in safezip(min_key1, min_key2, max_key1, max_key2)] + box_mesh = create_mesh_from_points(*points) + new_regions = [(p1, p2) for p1, p2 in box_mesh if p1 < p2 and not (p1 >= min_key1 and p2 <= max_key1)] + extra_tables = [ + ( + table1.new_key_bounds(min_key=p1, max_key=p2, key_types=key_types1), + table2.new_key_bounds(min_key=p1, max_key=p2, key_types=key_types2), + ) + for p1, p2 in new_regions + ] + + # Verify that extra ("outer") segments have the proper pk meta-parameters: + assert len(extra_tables) == 2 + + assert extra_tables[0][0].min_key[0].uuid == min_uuid2 + assert extra_tables[0][0].min_key[0].lowercase == True + assert extra_tables[0][0].min_key[0].uppercase == False + assert extra_tables[0][0].max_key[0].uuid == min_uuid1 + assert extra_tables[0][0].max_key[0].lowercase == True + assert extra_tables[0][0].max_key[0].uppercase == False + assert extra_tables[0][1].min_key[0].uuid == min_uuid2 + assert extra_tables[0][1].min_key[0].lowercase == False + assert extra_tables[0][1].min_key[0].uppercase == True + assert extra_tables[0][1].max_key[0].uuid == min_uuid1 + assert extra_tables[0][1].max_key[0].lowercase == False + assert extra_tables[0][1].max_key[0].uppercase == True + + assert extra_tables[1][0].min_key[0].uuid == max_uuid1 + assert extra_tables[1][0].min_key[0].lowercase == True + assert extra_tables[1][0].min_key[0].uppercase == False + assert extra_tables[1][0].max_key[0].uuid == max_uuid2 + assert extra_tables[1][0].max_key[0].lowercase == True + assert extra_tables[1][0].max_key[0].uppercase == False + assert extra_tables[1][1].min_key[0].uuid == max_uuid1 + assert extra_tables[1][1].min_key[0].lowercase == False + assert extra_tables[1][1].min_key[0].uppercase == True + assert extra_tables[1][1].max_key[0].uuid == max_uuid2 + assert extra_tables[1][1].max_key[0].lowercase == False + assert extra_tables[1][1].max_key[0].uppercase == True