@@ -131,6 +131,10 @@ def __post_init__(self):
131131class  UnknownColType (ColType ):
132132    text : str 
133133
134+     def  __post_init__ (self ):
135+         logger .warn (f"Column of type '{ self .text }  ' has no compatibility handling. " 
136+                      "If encoding/formatting differs between databases, it may result in false positives." )
137+ 
134138
135139class  AbstractDatabase (ABC ):
136140    @abstractmethod  
@@ -173,16 +177,24 @@ def close(self):
173177        "Close connection(s) to the database instance. Querying will stop functioning." 
174178        ...
175179
180+ 
176181    @abstractmethod  
177-     def  normalize_value_by_type ( value : str , coltype : ColType ) ->  str :
178-         """Creates an SQL expression, that converts 'value' to a normalized representation . 
182+     def  normalize_timestamp ( self ,  value : str , coltype : ColType ) ->  str :
183+         """Creates an SQL expression, that converts 'value' to a normalized timestamp . 
179184
180-         The returned expression must accept any SQL value, and return a string. 
185+         The returned expression must accept any SQL datetime/timestamp, and return a string. 
186+ 
187+         Date format: "YYYY-MM-DD HH:mm:SS.FFFFFF" 
188+ 
189+         Precision of dates should be rounded up/down according to coltype.rounds 
190+         """ 
191+         ...
181192
182-         - Dates are expected in the format: 
183-             "YYYY-MM-DD HH:mm:SS.FFFFFF" 
193+     @abstractmethod  
194+     def  normalize_number (self , value : str , coltype : ColType ) ->  str :
195+         """Creates an SQL expression, that converts 'value' to a normalized number. 
184196
185-             Rounded up/down according to coltype.rounds  
197+         The returned expression must accept any SQL int/numeric/float, and return a string.  
186198
187199        - Floats/Decimals are expected in the format 
188200            "I.P" 
@@ -191,14 +203,31 @@ def normalize_value_by_type(value: str, coltype: ColType) -> str:
191203            and must be at least one digit (0). 
192204            P is the fractional digits, the amount of which is specified with 
193205            coltype.precision. Trailing zeroes may be necessary. 
206+             If P is 0, the dot is omitted. 
194207
195208            Note: This precision is different than the one used by databases. For decimals, 
196-             it's the same as "numeric_scale", and for floats, who use binary precision, 
197-             it can be calculated as log10(2**p) 
209+             it's the same as ``numeric_scale``, and for floats, who use binary precision, 
210+             it can be calculated as ``log10(2**numeric_precision)``. 
211+         """ 
212+         ...
213+ 
214+     def  normalize_value_by_type (self , value : str , coltype : ColType ) ->  str :
215+         """Creates an SQL expression, that converts 'value' to a normalized representation. 
216+ 
217+         The returned expression must accept any SQL value, and return a string. 
218+ 
219+         The default implementation dispatches to a method according to ``coltype``: 
198220
221+             TemporalType -> normalize_timestamp() 
222+             NumericType  -> normalize_number() 
223+             -else-       -> to_string() 
199224
200225        """ 
201-         ...
226+         if  isinstance (coltype , TemporalType ):
227+             return  self .normalize_timestamp (value , coltype )
228+         elif  isinstance (coltype , NumericType ):
229+             return  self .normalize_number (value , coltype )
230+         return  self .to_string (f"{ value }  " )
202231
203232
204233class  Database (AbstractDatabase ):
@@ -404,27 +433,16 @@ def md5_to_int(self, s: str) -> str:
404433    def  to_string (self , s : str ):
405434        return  f"{ s }  ::varchar" 
406435
407-     def  normalize_value_by_type (self , value : str , coltype : ColType ) ->  str :
408-         if  isinstance (coltype , TemporalType ):
409-             # if coltype.precision == 0: 
410-             #     return f"to_char({value}::timestamp(0), 'YYYY-mm-dd HH24:MI:SS')" 
411-             # if coltype.precision == 3: 
412-             #     return f"to_char({value}, 'YYYY-mm-dd HH24:MI:SS.US')" 
413-             # elif coltype.precision == 6: 
414-             # return f"to_char({value}::timestamp({coltype.precision}), 'YYYY-mm-dd HH24:MI:SS.US')" 
415-             # else: 
416-             #     # Postgres/Redshift doesn't support arbitrary precision 
417-             #     raise TypeError(f"Bad precision for {type(self).__name__}: {coltype})") 
418-             if  coltype .rounds :
419-                 return  f"to_char({ value }  ::timestamp({ coltype .precision }  ), 'YYYY-mm-dd HH24:MI:SS.US')" 
420-             else :
421-                 timestamp6  =  f"to_char({ value }  ::timestamp(6), 'YYYY-mm-dd HH24:MI:SS.US')" 
422-                 return  f"RPAD(LEFT({ timestamp6 }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  ), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
423436
424-         elif  isinstance (coltype , NumericType ):
425-             value  =  f"{ value }  ::decimal(38, { coltype .precision }  )" 
437+     def  normalize_timestamp (self , value : str , coltype : ColType ) ->  str :
438+         if  coltype .rounds :
439+             return  f"to_char({ value }  ::timestamp({ coltype .precision }  ), 'YYYY-mm-dd HH24:MI:SS.US')" 
426440
427-         return  self .to_string (f"{ value }  " )
441+         timestamp6  =  f"to_char({ value }  ::timestamp(6), 'YYYY-mm-dd HH24:MI:SS.US')" 
442+         return  f"RPAD(LEFT({ timestamp6 }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  ), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
443+ 
444+     def  normalize_number (self , value : str , coltype : ColType ) ->  str :
445+         return  self .to_string (f"{ value }  ::decimal(38, { coltype .precision }  )" )
428446
429447
430448class  Presto (Database ):
@@ -463,25 +481,19 @@ def _query(self, sql_code: str) -> list:
463481    def  close (self ):
464482        self ._conn .close ()
465483
466-     def  normalize_value_by_type (self , value : str , coltype : ColType ) ->  str :
467-         if  isinstance (coltype , TemporalType ):
468-             if  coltype .rounds :
469-                 if  coltype .precision  >  3 :
470-                     pass 
471-                 s  =  f"date_format(cast({ value }   as timestamp(6)), '%Y-%m-%d %H:%i:%S.%f')" 
472-             else :
473-                 s  =  f"date_format(cast({ value }   as timestamp(6)), '%Y-%m-%d %H:%i:%S.%f')" 
474-                 # datetime = f"date_format(cast({value} as timestamp(6), '%Y-%m-%d %H:%i:%S.%f'))" 
475-                 # datetime = self.to_string(f"cast({value} as datetime(6))") 
484+     def  normalize_timestamp (self , value : str , coltype : ColType ) ->  str :
485+         # TODO 
486+         if  coltype .rounds :
487+             s  =  f"date_format(cast({ value }   as timestamp(6)), '%Y-%m-%d %H:%i:%S.%f')" 
488+         else :
489+             s  =  f"date_format(cast({ value }   as timestamp(6)), '%Y-%m-%d %H:%i:%S.%f')" 
476490
477-             return  (
478-                 f"RPAD(RPAD({ s }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  , '.'), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
479-             )
480- 
481-         elif  isinstance (coltype , NumericType ):
482-             value  =  f"cast({ value }   as decimal(38,{ coltype .precision }  ))" 
491+         return  (
492+             f"RPAD(RPAD({ s }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  , '.'), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
493+         )
483494
484-         return  self .to_string (value )
495+     def  normalize_number (self , value : str , coltype : ColType ) ->  str :
496+         return  self .to_string (f"cast({ value }   as decimal(38,{ coltype .precision }  ))" )
485497
486498    def  select_table_schema (self , path : DbPath ) ->  str :
487499        schema , table  =  self ._normalize_table_path (path )
@@ -566,18 +578,16 @@ def md5_to_int(self, s: str) -> str:
566578    def  to_string (self , s : str ):
567579        return  f"cast({ s }   as char)" 
568580
569-     def  normalize_value_by_type (self , value : str , coltype : ColType ) ->  str :
570-         if  isinstance (coltype , TemporalType ):
571-             if  coltype .rounds :
572-                 return  self .to_string (f"cast( cast({ value }   as datetime({ coltype .precision }  )) as datetime(6))" )
573-             else :
574-                 s  =  self .to_string (f"cast({ value }   as datetime(6))" )
575-                 return  f"RPAD(RPAD({ s }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  , '.'), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
581+     def  normalize_timestamp (self , value : str , coltype : ColType ) ->  str :
582+         if  coltype .rounds :
583+             return  self .to_string (f"cast( cast({ value }   as datetime({ coltype .precision }  )) as datetime(6))" )
576584
577-         elif  isinstance (coltype , NumericType ):
578-             value  =  f"cast({ value }   as decimal(38,{ coltype .precision }  ))" 
585+         s  =  self .to_string (f"cast({ value }   as datetime(6))" )
586+         return  f"RPAD(RPAD({ s }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  , '.'), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
587+ 
588+     def  normalize_number (self , value : str , coltype : ColType ) ->  str :
589+         return  self .to_string (f"cast({ value }   as decimal(38, { coltype .precision }  ))" )
579590
580-         return  self .to_string (f"{ value }  " )
581591
582592
583593class  Oracle (ThreadedDatabase ):
@@ -622,16 +632,15 @@ def select_table_schema(self, path: DbPath) -> str:
622632            f" FROM USER_TAB_COLUMNS WHERE table_name = '{ table .upper ()}  '" 
623633        )
624634
625-     def  normalize_value_by_type (self , value : str , coltype : ColType ) ->  str :
626-         if  isinstance (coltype , TemporalType ):
627-             return  f"to_char(cast({ value }   as timestamp({ coltype .precision }  )), 'YYYY-MM-DD HH24:MI:SS.FF6')" 
628-         elif  isinstance (coltype , NumericType ):
629-             # FM999.9990 
630-             format_str  =  "FM"  +  "9"  *  (38  -  coltype .precision )
631-             if  coltype .precision :
632-                 format_str  +=  "0."  +  "9"  *  (coltype .precision  -  1 ) +  "0" 
633-             return  f"to_char({ value }  , '{ format_str }  ')" 
634-         return  self .to_string (f"{ value }  " )
635+     def  normalize_timestamp (self , value : str , coltype : ColType ) ->  str :
636+         return  f"to_char(cast({ value }   as timestamp({ coltype .precision }  )), 'YYYY-MM-DD HH24:MI:SS.FF6')" 
637+ 
638+     def  normalize_number (self , value : str , coltype : ColType ) ->  str :
639+         # FM999.9990 
640+         format_str  =  "FM"  +  "9"  *  (38  -  coltype .precision )
641+         if  coltype .precision :
642+             format_str  +=  "0."  +  "9"  *  (coltype .precision  -  1 ) +  "0" 
643+         return  f"to_char({ value }  , '{ format_str }  ')" 
635644
636645    def  _parse_type (
637646        self , type_repr : str , datetime_precision : int  =  None , numeric_precision : int  =  None , numeric_scale : int  =  None 
@@ -682,27 +691,25 @@ def _convert_db_precision_to_digits(self, p: int) -> int:
682691    def  md5_to_int (self , s : str ) ->  str :
683692        return  f"strtol(substring(md5({ s }  ), { 1 + MD5_HEXDIGITS - CHECKSUM_HEXDIGITS }  ), 16)::decimal(38)" 
684693
685-     def  normalize_value_by_type (self , value : str , coltype : ColType ) ->  str :
686-         if  isinstance (coltype , TemporalType ):
687-             if  coltype .rounds :
688-                 timestamp  =  f"{ value }  ::timestamp(6)" 
689-                 # Get seconds since epoch. Redshift doesn't support milli- or micro-seconds. 
690-                 secs  =  f"timestamp 'epoch' + round(extract(epoch from { timestamp }  )::decimal(38)" 
691-                 # Get the milliseconds from timestamp. 
692-                 ms  =  f"extract(ms from { timestamp }  )" 
693-                 # Get the microseconds from timestamp, without the milliseconds! 
694-                 us  =  f"extract(us from { timestamp }  )" 
695-                 # epoch = Total time since epoch in microseconds. 
696-                 epoch  =  f"{ secs }  *1000000 + { ms }  *1000 + { us }  " 
697-                 timestamp6  =  f"to_char({ epoch }  , -6+{ coltype .precision }  ) * interval '0.000001 seconds', 'YYYY-mm-dd HH24:MI:SS.US')" 
698-             else :
699-                 timestamp6  =  f"to_char({ value }  ::timestamp(6), 'YYYY-mm-dd HH24:MI:SS.US')" 
700-             return  f"RPAD(LEFT({ timestamp6 }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  ), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
694+     def  normalize_timestamp (self , value : str , coltype : ColType ) ->  str :
695+         if  coltype .rounds :
696+             timestamp  =  f"{ value }  ::timestamp(6)" 
697+             # Get seconds since epoch. Redshift doesn't support milli- or micro-seconds. 
698+             secs  =  f"timestamp 'epoch' + round(extract(epoch from { timestamp }  )::decimal(38)" 
699+             # Get the milliseconds from timestamp. 
700+             ms  =  f"extract(ms from { timestamp }  )" 
701+             # Get the microseconds from timestamp, without the milliseconds! 
702+             us  =  f"extract(us from { timestamp }  )" 
703+             # epoch = Total time since epoch in microseconds. 
704+             epoch  =  f"{ secs }  *1000000 + { ms }  *1000 + { us }  " 
705+             timestamp6  =  f"to_char({ epoch }  , -6+{ coltype .precision }  ) * interval '0.000001 seconds', 'YYYY-mm-dd HH24:MI:SS.US')" 
706+         else :
707+             timestamp6  =  f"to_char({ value }  ::timestamp(6), 'YYYY-mm-dd HH24:MI:SS.US')" 
708+         return  f"RPAD(LEFT({ timestamp6 }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  ), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
701709
702-          elif   isinstance ( coltype ,  NumericType ) :
703-              value   =   f"{ value }  ::decimal(38,{ coltype .precision }  )" 
710+     def   normalize_number ( self ,  value :  str ,  coltype :  ColType )  ->   str :
711+         return   self . to_string ( f"{ value }  ::decimal(38,{ coltype .precision }  )" ) 
704712
705-         return  self .to_string (f"{ value }  " )
706713
707714
708715class  MsSQL (ThreadedDatabase ):
@@ -794,25 +801,22 @@ def select_table_schema(self, path: DbPath) -> str:
794801            f"WHERE table_name = '{ table }  ' AND table_schema = '{ schema }  '" 
795802        )
796803
797-     def  normalize_value_by_type (self , value : str , coltype : ColType ) ->  str :
798-         if  isinstance (coltype , TemporalType ):
799-             if  coltype .rounds :
800-                 timestamp  =  f"timestamp_micros(cast(round(unix_micros(cast({ value }   as timestamp))/1000000, { coltype .precision }  )*1000000 as int))" 
801-                 return  f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', { timestamp }  )" 
802-             else :
803-                 if  coltype .precision  ==  0 :
804-                     return  f"FORMAT_TIMESTAMP('%F %H:%M:%S.000000, { value }  )" 
805-                 elif  coltype .precision  ==  6 :
806-                     return  f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', { value }  )" 
804+     def  normalize_timestamp (self , value : str , coltype : ColType ) ->  str :
805+         if  coltype .rounds :
806+             timestamp  =  f"timestamp_micros(cast(round(unix_micros(cast({ value }   as timestamp))/1000000, { coltype .precision }  )*1000000 as int))" 
807+             return  f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', { timestamp }  )" 
807808
808-                 timestamp6  =  f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', { value }  )" 
809-                 return  f"RPAD(LEFT({ timestamp6 }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  ), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
809+         if  coltype .precision  ==  0 :
810+             return  f"FORMAT_TIMESTAMP('%F %H:%M:%S.000000, { value }  )" 
811+         elif  coltype .precision  ==  6 :
812+             return  f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', { value }  )" 
810813
811-         elif  isinstance (coltype , NumericType ):
812-             # value = f"cast({value} as decimal)" 
813-             return  f"format('%.{ coltype .precision }  f', cast({ value }   as decimal))" 
814+         timestamp6  =  f"FORMAT_TIMESTAMP('%F %H:%M:%E6S', { value }  )" 
815+         return  f"RPAD(LEFT({ timestamp6 }  , { TIMESTAMP_PRECISION_POS + coltype .precision }  ), { TIMESTAMP_PRECISION_POS + 6 }  , '0')" 
814816
815-         return  self .to_string (f"{ value }  " )
817+     def  normalize_number (self , value : str , coltype : ColType ) ->  str :
818+         # value = f"cast({value} as decimal)" 
819+         return  f"format('%.{ coltype .precision }  f', cast({ value }   as decimal))" 
816820
817821    def  parse_table_name (self , name : str ) ->  DbPath :
818822        path  =  parse_table_name (name )
@@ -886,19 +890,16 @@ def select_table_schema(self, path: DbPath) -> str:
886890        schema , table  =  self ._normalize_table_path (path )
887891        return  super ().select_table_schema ((schema , table ))
888892
889-     def  normalize_value_by_type (self , value : str , coltype : ColType ) ->  str :
890-         if  isinstance (coltype , TemporalType ):
891-             if  coltype .rounds :
892-                 timestamp  =  f"to_timestamp(round(date_part(epoch_nanosecond, { value }  ::timestamp(9))/1000000000, { coltype .precision }  ))" 
893-             else :
894-                 timestamp  =  f"cast({ value }   as timestamp({ coltype .precision }  ))" 
893+     def  normalize_timestamp (self , value : str , coltype : ColType ) ->  str :
894+         if  coltype .rounds :
895+             timestamp  =  f"to_timestamp(round(date_part(epoch_nanosecond, { value }  ::timestamp(9))/1000000000, { coltype .precision }  ))" 
896+         else :
897+             timestamp  =  f"cast({ value }   as timestamp({ coltype .precision }  ))" 
895898
896-              return  f"to_char({ timestamp }  , 'YYYY-MM-DD HH24:MI:SS.FF6')" 
899+         return  f"to_char({ timestamp }  , 'YYYY-MM-DD HH24:MI:SS.FF6')" 
897900
898-         elif  isinstance (coltype , NumericType ):
899-             value  =  f"cast({ value }   as decimal(38, { coltype .precision }  ))" 
900- 
901-         return  self .to_string (f"{ value }  " )
901+     def  normalize_number (self , value : str , coltype : ColType ) ->  str :
902+         return  self .to_string (f"cast({ value }   as decimal(38, { coltype .precision }  ))" )
902903
903904
904905@dataclass  
0 commit comments