From 683b247015f2c91d139431191bee3de583eec78b Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 2 Nov 2022 16:14:54 -0300
Subject: [PATCH 1/5] Refactor test_database_types

---
 tests/test_database_types.py | 46 ++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/tests/test_database_types.py b/tests/test_database_types.py
index c8ed69aa..07b8891b 100644
--- a/tests/test_database_types.py
+++ b/tests/test_database_types.py
@@ -418,22 +418,38 @@ def __iter__(self):
     "uuid": UUID_Faker(N_SAMPLES),
 }
 
+def _get_test_db_pairs(full):
+    if full:
+        for source_db in DATABASE_TYPES:
+            for target_db in DATABASE_TYPES:
+                yield source_db, target_db
+    else:
+        for db_cls in DATABASE_TYPES:
+            yield db_cls, db.PostgreSQL
+            yield db.PostgreSQL, db_cls
+            yield db_cls, db.Snowflake
+            yield db.Snowflake, db_cls
+
+def get_test_db_pairs(full=True):
+    active_pairs = {(db1, db2) for db1, db2 in _get_test_db_pairs(full) if db1 in CONN_STRINGS and db2 in CONN_STRINGS}
+    for db1, db2 in active_pairs:
+        yield db1, DATABASE_TYPES[db1], db2, DATABASE_TYPES[db2]
+
+
 type_pairs = []
-for source_db, source_type_categories in DATABASE_TYPES.items():
-    for target_db, target_type_categories in DATABASE_TYPES.items():
-        if CONN_STRINGS.get(source_db, False) and CONN_STRINGS.get(target_db, False):
-            for type_category, source_types in source_type_categories.items():  # int, datetime, ..
-                for source_type in source_types:
-                    for target_type in target_type_categories[type_category]:
-                        type_pairs.append(
-                            (
-                                source_db,
-                                target_db,
-                                source_type,
-                                target_type,
-                                type_category,
-                            )
-                        )
+for source_db, source_type_categories, target_db, target_type_categories in get_test_db_pairs():
+    for type_category, source_types in source_type_categories.items():  # int, datetime, ..
+        for source_type in source_types:
+            for target_type in target_type_categories[type_category]:
+                type_pairs.append(
+                    (
+                        source_db,
+                        target_db,
+                        source_type,
+                        target_type,
+                        type_category,
+                    )
+                )
 
 
 def sanitize(name):

From abf3d51ec0d9541d5c8d487a798b4ecb6f7c298f Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 2 Nov 2022 16:15:26 -0300
Subject: [PATCH 2/5] Tests: Fix MySQL tests (timezone issues)

---
 data_diff/databases/base.py | 3 +++
 tests/test_cli.py           | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py
index 0a5a2fa6..672c4e0b 100644
--- a/data_diff/databases/base.py
+++ b/data_diff/databases/base.py
@@ -267,6 +267,9 @@ def query(self, sql_ast: Union[Expr, Generator], res_type: type = list):
             if res is None:  # May happen due to sum() of 0 items
                 return None
             return int(res)
+        elif res_type is datetime:
+            res = _one(_one(res))
+            return res  # XXX parse timestamp?
         elif res_type is tuple:
             assert len(res) == 1, (sql_code, res)
             return res[0]
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 1c131f54..b63b1c7f 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -37,7 +37,10 @@ def setUp(self) -> None:
 
         src_table = table(table_src_name, schema={"id": int, "datetime": datetime, "text_comment": str})
         self.conn.query(src_table.create())
-        self.now = now = arrow.get(datetime.now())
+
+        self.conn.query("SET @@session.time_zone='+00:00'")
+        db_time = self.conn.query("select now()", datetime)
+        self.now = now = arrow.get(db_time)
 
         rows = [
             (now, "now"),

From cc126592f0a4b518ffeed1d40c260c7e8d740b44 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 2 Nov 2022 17:07:28 -0300
Subject: [PATCH 3/5] Split tests into FULL_TESTS and regular

---
 .github/workflows/ci.yml          |  1 +
 .github/workflows/ci_full.yml     | 50 +++++++++++++++++++++++++++++++
 data_diff/databases/clickhouse.py |  1 -
 data_diff/databases/oracle.py     |  4 +--
 tests/common.py                   |  3 ++
 tests/test_database_types.py      |  5 +++-
 6 files changed, 59 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/ci_full.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b5f8d548..21115c45 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -46,6 +46,7 @@ jobs:
         env:
             DATADIFF_SNOWFLAKE_URI: '${{ secrets.DATADIFF_SNOWFLAKE_URI }}'
             DATADIFF_PRESTO_URI: '${{ secrets.DATADIFF_PRESTO_URI }}'
+            DATADIFF_TRINO_URI: '${{ secrets.DATADIFF_TRINO_URI }}'
             DATADIFF_CLICKHOUSE_URI: 'clickhouse://clickhouse:Password1@localhost:9000/clickhouse'
             DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
         run: |
diff --git a/.github/workflows/ci_full.yml b/.github/workflows/ci_full.yml
new file mode 100644
index 00000000..42b1e5fb
--- /dev/null
+++ b/.github/workflows/ci_full.yml
@@ -0,0 +1,50 @@
+name: CI
+
+on:
+  push:
+    paths:
+      - '**.py'
+      - '.github/workflows/**'
+      - '!dev/**'
+  pull_request:
+    branches: [ master ]
+
+  workflow_dispatch:
+
+jobs:
+  unit_tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version:
+          - "3.10"
+
+    name: Check Python ${{ matrix.python-version }} on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Build the stack
+        run: docker-compose up -d mysql postgres presto trino clickhouse vertica
+
+      - name: Install Poetry
+        run: pip install poetry
+
+      - name: Install package
+        run: "poetry install"
+
+      - name: Run unit tests
+        env:
+            DATADIFF_SNOWFLAKE_URI: '${{ secrets.DATADIFF_SNOWFLAKE_URI }}'
+            DATADIFF_PRESTO_URI: '${{ secrets.DATADIFF_PRESTO_URI }}'
+            DATADIFF_CLICKHOUSE_URI: 'clickhouse://clickhouse:Password1@localhost:9000/clickhouse'
+            DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
+        run: |
+          chmod +x tests/waiting_for_stack_up.sh
+          ./tests/waiting_for_stack_up.sh && FULL_TESTS=1 poetry run unittest-parallel -j 16
diff --git a/data_diff/databases/clickhouse.py b/data_diff/databases/clickhouse.py
index 3d6f9d34..b5f2f577 100644
--- a/data_diff/databases/clickhouse.py
+++ b/data_diff/databases/clickhouse.py
@@ -55,7 +55,6 @@ class Dialect(BaseDialect):
         "DateTime64": Timestamp,
     }
 
-
     def normalize_number(self, value: str, coltype: FractionalType) -> str:
         # If a decimal value has trailing zeros in a fractional part, when casting to string they are dropped.
         # For example:
diff --git a/data_diff/databases/oracle.py b/data_diff/databases/oracle.py
index b68b216a..64127e9a 100644
--- a/data_diff/databases/oracle.py
+++ b/data_diff/databases/oracle.py
@@ -127,9 +127,7 @@ def parse_type(
             precision = int(m.group(1))
             return t_cls(precision=precision, rounds=self.ROUNDS_ON_PREC_LOSS)
 
-        return super().parse_type(
-            table_path, col_name, type_repr, datetime_precision, numeric_precision, numeric_scale
-        )
+        return super().parse_type(table_path, col_name, type_repr, datetime_precision, numeric_precision, numeric_scale)
 
 
 class Oracle(ThreadedDatabase):
diff --git a/tests/common.py b/tests/common.py
index fccd5ddc..aa10f64d 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -36,6 +36,7 @@
 N_SAMPLES = int(os.environ.get("N_SAMPLES", DEFAULT_N_SAMPLES))
 BENCHMARK = os.environ.get("BENCHMARK", False)
 N_THREADS = int(os.environ.get("N_THREADS", 1))
+FULL_TESTS = bool(os.environ.get("FULL_TESTS", False))      # Should we run the full db<->db test suite?
 
 
 def get_git_revision_short_hash() -> str:
@@ -94,6 +95,8 @@ def _print_used_dbs():
     logging.info(f"Testing databases: {', '.join(used)}")
     if unused:
         logging.info(f"Connection not configured; skipping tests for: {', '.join(unused)}")
+    if FULL_TESTS:
+        logging.info("Full tests enabled (every db<->db). May take very long when many dbs are involved.")
 
 
 _print_used_dbs()
diff --git a/tests/test_database_types.py b/tests/test_database_types.py
index 07b8891b..d2f65162 100644
--- a/tests/test_database_types.py
+++ b/tests/test_database_types.py
@@ -25,6 +25,7 @@
     N_THREADS,
     BENCHMARK,
     GIT_REVISION,
+    FULL_TESTS,
     get_conn,
     random_table_suffix,
 )
@@ -418,6 +419,7 @@ def __iter__(self):
     "uuid": UUID_Faker(N_SAMPLES),
 }
 
+
 def _get_test_db_pairs(full):
     if full:
         for source_db in DATABASE_TYPES:
@@ -430,6 +432,7 @@ def _get_test_db_pairs(full):
             yield db_cls, db.Snowflake
             yield db.Snowflake, db_cls
 
+
 def get_test_db_pairs(full=True):
     active_pairs = {(db1, db2) for db1, db2 in _get_test_db_pairs(full) if db1 in CONN_STRINGS and db2 in CONN_STRINGS}
     for db1, db2 in active_pairs:
@@ -437,7 +440,7 @@ def get_test_db_pairs(full=True):
 
 
 type_pairs = []
-for source_db, source_type_categories, target_db, target_type_categories in get_test_db_pairs():
+for source_db, source_type_categories, target_db, target_type_categories in get_test_db_pairs(FULL_TESTS):
     for type_category, source_types in source_type_categories.items():  # int, datetime, ..
         for source_type in source_types:
             for target_type in target_type_categories[type_category]:

From 32a491447df03e21aecbb97f32a7540433779a86 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 2 Nov 2022 17:41:51 -0300
Subject: [PATCH 4/5] Tests: Even better balance using TEST_ACROSS_ALL_DBS

---
 .github/workflows/ci.yml      |  2 +-
 .github/workflows/ci_full.yml |  2 +-
 tests/common.py               |  8 +++++---
 tests/test_database_types.py  | 16 +++++++++-------
 4 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 21115c45..0aa3642d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -51,4 +51,4 @@ jobs:
             DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
         run: |
           chmod +x tests/waiting_for_stack_up.sh
-          ./tests/waiting_for_stack_up.sh && poetry run unittest-parallel -j 16
+          ./tests/waiting_for_stack_up.sh && TEST_ACROSS_ALL_DBS=0 poetry run unittest-parallel -j 16
diff --git a/.github/workflows/ci_full.yml b/.github/workflows/ci_full.yml
index 42b1e5fb..a6a75806 100644
--- a/.github/workflows/ci_full.yml
+++ b/.github/workflows/ci_full.yml
@@ -47,4 +47,4 @@ jobs:
             DATADIFF_VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
         run: |
           chmod +x tests/waiting_for_stack_up.sh
-          ./tests/waiting_for_stack_up.sh && FULL_TESTS=1 poetry run unittest-parallel -j 16
+          ./tests/waiting_for_stack_up.sh && TEST_ACROSS_ALL_DBS=full poetry run unittest-parallel -j 16
diff --git a/tests/common.py b/tests/common.py
index aa10f64d..c1ae30a0 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -36,7 +36,7 @@
 N_SAMPLES = int(os.environ.get("N_SAMPLES", DEFAULT_N_SAMPLES))
 BENCHMARK = os.environ.get("BENCHMARK", False)
 N_THREADS = int(os.environ.get("N_THREADS", 1))
-FULL_TESTS = bool(os.environ.get("FULL_TESTS", False))      # Should we run the full db<->db test suite?
+TEST_ACROSS_ALL_DBS = os.environ.get("TEST_ACROSS_ALL_DBS", True)  # Should we run the full db<->db test suite?
 
 
 def get_git_revision_short_hash() -> str:
@@ -95,8 +95,10 @@ def _print_used_dbs():
     logging.info(f"Testing databases: {', '.join(used)}")
     if unused:
         logging.info(f"Connection not configured; skipping tests for: {', '.join(unused)}")
-    if FULL_TESTS:
-        logging.info("Full tests enabled (every db<->db). May take very long when many dbs are involved.")
+    if TEST_ACROSS_ALL_DBS:
+        logging.info(
+            f"Full tests enabled (every db<->db). May take very long when many dbs are involved. ={TEST_ACROSS_ALL_DBS}"
+        )
 
 
 _print_used_dbs()
diff --git a/tests/test_database_types.py b/tests/test_database_types.py
index d2f65162..250b4537 100644
--- a/tests/test_database_types.py
+++ b/tests/test_database_types.py
@@ -25,7 +25,7 @@
     N_THREADS,
     BENCHMARK,
     GIT_REVISION,
-    FULL_TESTS,
+    TEST_ACROSS_ALL_DBS,
     get_conn,
     random_table_suffix,
 )
@@ -420,27 +420,29 @@ def __iter__(self):
 }
 
 
-def _get_test_db_pairs(full):
-    if full:
+def _get_test_db_pairs():
+    if str(TEST_ACROSS_ALL_DBS).lower() == "full":
         for source_db in DATABASE_TYPES:
             for target_db in DATABASE_TYPES:
                 yield source_db, target_db
-    else:
+    elif int(TEST_ACROSS_ALL_DBS):
         for db_cls in DATABASE_TYPES:
             yield db_cls, db.PostgreSQL
             yield db.PostgreSQL, db_cls
             yield db_cls, db.Snowflake
             yield db.Snowflake, db_cls
+    else:
+        yield db.PostgreSQL, db.PostgreSQL
 
 
-def get_test_db_pairs(full=True):
-    active_pairs = {(db1, db2) for db1, db2 in _get_test_db_pairs(full) if db1 in CONN_STRINGS and db2 in CONN_STRINGS}
+def get_test_db_pairs():
+    active_pairs = {(db1, db2) for db1, db2 in _get_test_db_pairs() if db1 in CONN_STRINGS and db2 in CONN_STRINGS}
     for db1, db2 in active_pairs:
         yield db1, DATABASE_TYPES[db1], db2, DATABASE_TYPES[db2]
 
 
 type_pairs = []
-for source_db, source_type_categories, target_db, target_type_categories in get_test_db_pairs(FULL_TESTS):
+for source_db, source_type_categories, target_db, target_type_categories in get_test_db_pairs():
     for type_category, source_types in source_type_categories.items():  # int, datetime, ..
         for source_type in source_types:
             for target_type in target_type_categories[type_category]:

From f216ecb887959534a9da8b406c968f2ee4b36cd7 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 2 Nov 2022 17:51:27 -0300
Subject: [PATCH 5/5] Better names for CI

---
 .github/workflows/ci.yml      | 2 +-
 .github/workflows/ci_full.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0aa3642d..7c42e356 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: CI
+name: CI-COVER-VERSIONS
 
 on:
   push:
diff --git a/.github/workflows/ci_full.yml b/.github/workflows/ci_full.yml
index a6a75806..0de7da52 100644
--- a/.github/workflows/ci_full.yml
+++ b/.github/workflows/ci_full.yml
@@ -1,4 +1,4 @@
-name: CI
+name: CI-COVER-DATABASES
 
 on:
   push: