1313# limitations under the License.
1414
1515import os
16+ import time
1617import uuid
1718
1819import google .cloud .bigquery
3637BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
3738BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING
3839
40+ TIMEOUT = 30
41+
3942
4043# Create new custom topic/subscription
41- @pytest .fixture (scope = "module" )
44+ # We observe sometimes all the tests in this file fail. In a
45+ # hypothesis where DLP service somehow loses the connection to the
46+ # topic, now we use function scope for Pub/Sub fixtures.
47+ @pytest .fixture (scope = "function" )
4248def topic_id ():
4349 # Creates a pubsub topic, and tears it down.
4450 publisher = google .cloud .pubsub .PublisherClient ()
@@ -53,7 +59,7 @@ def topic_id():
5359 publisher .delete_topic (topic_path )
5460
5561
56- @pytest .fixture (scope = "module " )
62+ @pytest .fixture (scope = "function " )
5763def subscription_id (topic_id ):
5864 # Subscribes to a topic.
5965 subscriber = google .cloud .pubsub .SubscriberClient ()
@@ -160,7 +166,22 @@ def bigquery_project():
160166 bigquery_client .delete_dataset (dataset_ref , delete_contents = True )
161167
162168
163- @pytest .mark .flaky
169+ def delay (err , * args ):
170+ # 20 mins of delay. This sounds like too long a delay, but we
171+ # occasionally observe consequtive time block where operations are
172+ # slow which leads to the test failures. These situations tend to
173+ # get self healed in 20 minutes or so, so I'm trying this strategy.
174+ #
175+ # There are 10 tests, so we don't want the retry delay happening
176+ # for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry
177+ # the test immediately.
178+ wait_time = min (pytest .MAX_FLAKY_WAIT , 60 * 20 )
179+ pytest .MAX_FLAKY_WAIT -= wait_time
180+ time .sleep (wait_time )
181+ return True
182+
183+
184+ @pytest .mark .flaky (max_runs = 2 , min_passes = 1 , rerun_filter = delay )
164185def test_numerical_risk_analysis (
165186 topic_id , subscription_id , bigquery_project , capsys
166187):
@@ -172,13 +193,14 @@ def test_numerical_risk_analysis(
172193 NUMERIC_FIELD ,
173194 topic_id ,
174195 subscription_id ,
196+ timeout = TIMEOUT ,
175197 )
176198
177199 out , _ = capsys .readouterr ()
178200 assert "Value Range:" in out
179201
180202
181- @pytest .mark .flaky
203+ @pytest .mark .flaky ( max_runs = 2 , min_passes = 1 , rerun_filter = delay )
182204def test_categorical_risk_analysis_on_string_field (
183205 topic_id , subscription_id , bigquery_project , capsys
184206):
@@ -190,14 +212,14 @@ def test_categorical_risk_analysis_on_string_field(
190212 UNIQUE_FIELD ,
191213 topic_id ,
192214 subscription_id ,
193- timeout = 180 ,
215+ timeout = TIMEOUT ,
194216 )
195217
196218 out , _ = capsys .readouterr ()
197219 assert "Most common value occurs" in out
198220
199221
200- @pytest .mark .flaky
222+ @pytest .mark .flaky ( max_runs = 2 , min_passes = 1 , rerun_filter = delay )
201223def test_categorical_risk_analysis_on_number_field (
202224 topic_id , subscription_id , bigquery_project , capsys
203225):
@@ -209,13 +231,14 @@ def test_categorical_risk_analysis_on_number_field(
209231 NUMERIC_FIELD ,
210232 topic_id ,
211233 subscription_id ,
234+ timeout = TIMEOUT ,
212235 )
213236
214237 out , _ = capsys .readouterr ()
215238 assert "Most common value occurs" in out
216239
217240
218- @pytest .mark .flaky
241+ @pytest .mark .flaky ( max_runs = 2 , min_passes = 1 , rerun_filter = delay )
219242def test_k_anonymity_analysis_single_field (
220243 topic_id , subscription_id , bigquery_project , capsys
221244):
@@ -227,14 +250,15 @@ def test_k_anonymity_analysis_single_field(
227250 topic_id ,
228251 subscription_id ,
229252 [NUMERIC_FIELD ],
253+ timeout = TIMEOUT ,
230254 )
231255
232256 out , _ = capsys .readouterr ()
233257 assert "Quasi-ID values:" in out
234258 assert "Class size:" in out
235259
236260
237- @pytest .mark .flaky (max_runs = 3 , min_passes = 1 )
261+ @pytest .mark .flaky (max_runs = 2 , min_passes = 1 , rerun_filter = delay )
238262def test_k_anonymity_analysis_multiple_fields (
239263 topic_id , subscription_id , bigquery_project , capsys
240264):
@@ -246,14 +270,15 @@ def test_k_anonymity_analysis_multiple_fields(
246270 topic_id ,
247271 subscription_id ,
248272 [NUMERIC_FIELD , REPEATED_FIELD ],
273+ timeout = TIMEOUT ,
249274 )
250275
251276 out , _ = capsys .readouterr ()
252277 assert "Quasi-ID values:" in out
253278 assert "Class size:" in out
254279
255280
256- @pytest .mark .flaky
281+ @pytest .mark .flaky ( max_runs = 2 , min_passes = 1 , rerun_filter = delay )
257282def test_l_diversity_analysis_single_field (
258283 topic_id , subscription_id , bigquery_project , capsys
259284):
@@ -266,6 +291,7 @@ def test_l_diversity_analysis_single_field(
266291 subscription_id ,
267292 UNIQUE_FIELD ,
268293 [NUMERIC_FIELD ],
294+ timeout = TIMEOUT ,
269295 )
270296
271297 out , _ = capsys .readouterr ()
@@ -274,7 +300,7 @@ def test_l_diversity_analysis_single_field(
274300 assert "Sensitive value" in out
275301
276302
277- @pytest .mark .flaky (max_runs = 3 , min_passes = 1 )
303+ @pytest .mark .flaky (max_runs = 2 , min_passes = 1 , rerun_filter = delay )
278304def test_l_diversity_analysis_multiple_field (
279305 topic_id , subscription_id , bigquery_project , capsys
280306):
@@ -287,6 +313,7 @@ def test_l_diversity_analysis_multiple_field(
287313 subscription_id ,
288314 UNIQUE_FIELD ,
289315 [NUMERIC_FIELD , REPEATED_FIELD ],
316+ timeout = TIMEOUT ,
290317 )
291318
292319 out , _ = capsys .readouterr ()
@@ -295,7 +322,7 @@ def test_l_diversity_analysis_multiple_field(
295322 assert "Sensitive value" in out
296323
297324
298- @pytest .mark .flaky
325+ @pytest .mark .flaky ( max_runs = 2 , min_passes = 1 , rerun_filter = delay )
299326def test_k_map_estimate_analysis_single_field (
300327 topic_id , subscription_id , bigquery_project , capsys
301328):
@@ -308,6 +335,7 @@ def test_k_map_estimate_analysis_single_field(
308335 subscription_id ,
309336 [NUMERIC_FIELD ],
310337 ["AGE" ],
338+ timeout = TIMEOUT ,
311339 )
312340
313341 out , _ = capsys .readouterr ()
@@ -316,7 +344,7 @@ def test_k_map_estimate_analysis_single_field(
316344 assert "Values" in out
317345
318346
319- @pytest .mark .flaky (max_runs = 3 , min_passes = 1 )
347+ @pytest .mark .flaky (max_runs = 2 , min_passes = 1 , rerun_filter = delay )
320348def test_k_map_estimate_analysis_multiple_field (
321349 topic_id , subscription_id , bigquery_project , capsys
322350):
@@ -329,6 +357,7 @@ def test_k_map_estimate_analysis_multiple_field(
329357 subscription_id ,
330358 [NUMERIC_FIELD , STRING_BOOLEAN_FIELD ],
331359 ["AGE" , "GENDER" ],
360+ timeout = TIMEOUT ,
332361 )
333362
334363 out , _ = capsys .readouterr ()
@@ -337,7 +366,7 @@ def test_k_map_estimate_analysis_multiple_field(
337366 assert "Values" in out
338367
339368
340- @pytest .mark .flaky
369+ @pytest .mark .flaky ( max_runs = 2 , min_passes = 1 , rerun_filter = delay )
341370def test_k_map_estimate_analysis_quasi_ids_info_types_equal (
342371 topic_id , subscription_id , bigquery_project
343372):
@@ -351,4 +380,5 @@ def test_k_map_estimate_analysis_quasi_ids_info_types_equal(
351380 subscription_id ,
352381 [NUMERIC_FIELD , STRING_BOOLEAN_FIELD ],
353382 ["AGE" ],
383+ timeout = TIMEOUT ,
354384 )
0 commit comments