diff --git a/data_diff/databases/mssql.py b/data_diff/databases/mssql.py index 834ed9cd..bd0d8e7b 100644 --- a/data_diff/databases/mssql.py +++ b/data_diff/databases/mssql.py @@ -69,7 +69,7 @@ class Dialect(BaseDialect): "varbinary": Text, "xml": Text, # UUID - "uniqueidentifier": Native_UUID, + # "uniqueidentifier": Native_UUID, # It is not supported yet # Bool "bit": Boolean, # JSON @@ -94,7 +94,7 @@ def current_schema(self) -> str: WHERE name = CURRENT_USER""" def to_string(self, s: str): - return f"CONVERT(varchar, {s})" + return f"CONVERT(varchar(4000), {s})" def type_repr(self, t) -> str: try: @@ -102,6 +102,10 @@ def type_repr(self, t) -> str: except KeyError: return super().type_repr(t) + def _convert_db_precision_to_digits(self, p: int) -> int: + # Subtracting 2 due to wierd precision issues in PostgreSQL + return super()._convert_db_precision_to_digits(p) - 2 + def random(self) -> str: return "rand()" @@ -134,21 +138,27 @@ def constant_values(self, rows) -> str: return f"VALUES {values}" def normalize_timestamp(self, value: str, coltype: TemporalType) -> str: - if coltype.precision > 0: - formatted_value = ( - f"FORMAT({value}, 'yyyy-MM-dd HH:mm:ss') + '.' + " - f"SUBSTRING(FORMAT({value}, 'fffffff'), 1, {coltype.precision})" - ) - else: - formatted_value = f"FORMAT({value}, 'yyyy-MM-dd HH:mm:ss')" + if coltype.rounds: + value = f""" + DATEADD( + nanosecond, + CAST( + CAST( + CAST( + DATEDIFF(nanosecond , FORMAT(col, 'yyyy-MM-dd HH:mm:ss'), col) AS DECIMAL(38, 9) + ) / 1000000000 AS DECIMAL(38, {coltype.precision}) + ) * 1000000000 AS INT + ), + CAST(FORMAT(col, 'yyyy-MM-dd HH:mm:ss') AS DATETIME2(6)) + ) + """ + + formatted_value = f"FORMAT({value}, 'yyyy-MM-dd HH:mm:ss.ffffff')" return formatted_value def normalize_number(self, value: str, coltype: NumericType) -> str: - if coltype.precision == 0: - return f"CAST(FLOOR({value}) AS VARCHAR)" - - return f"FORMAT({value}, 'N{coltype.precision}')" + return f"FORMAT(CAST({value} AS DECIMAL(38, {coltype.precision})), 'N{coltype.precision}')" def md5_as_int(self, s: str) -> str: return f"convert(bigint, convert(varbinary, '0x' + RIGHT(CONVERT(NVARCHAR(32), HashBytes('MD5', {s}), 2), {CHECKSUM_HEXDIGITS}), 1)) - {CHECKSUM_OFFSET}" @@ -156,6 +166,9 @@ def md5_as_int(self, s: str) -> str: def md5_as_hex(self, s: str) -> str: return f"HashBytes('MD5', {s})" + def normalize_uuid(self, value: str, coltype) -> str: + return f"CONVERT(VARCHAR(36), TRIM(CONVERT(varchar(4000), {value})))" + @attrs.define(frozen=False, init=False, kw_only=True) class MsSQL(ThreadedDatabase): diff --git a/tests/test_database_types.py b/tests/test_database_types.py index e97ca484..d30b634c 100644 --- a/tests/test_database_types.py +++ b/tests/test_database_types.py @@ -353,7 +353,7 @@ def init_conns(): "int": ["INT", "BIGINT"], "datetime": ["datetime2(6)"], "float": ["DECIMAL(6, 2)", "FLOAT", "REAL"], - "uuid": ["VARCHAR(100)", "CHAR(100)", "UNIQUEIDENTIFIER"], + "uuid": ["VARCHAR(100)", "CHAR(100)"], "boolean": [ "BIT", ],