@@ -303,6 +303,103 @@ def inspect_table(
303303
304304# [END dlp_inspect_table]
305305
306+
307+ # [START dlp_inspect_column_values_w_custom_hotwords]
308+ from typing import List # noqa: E402, I100
309+
310+ import google .cloud .dlp # noqa: F811, E402
311+
312+
313+ def inspect_column_values_w_custom_hotwords (
314+ project : str ,
315+ table_header : List [str ],
316+ table_rows : List [List [str ]],
317+ info_types : List [str ],
318+ custom_hotword : str ,
319+ ) -> None :
320+ """Uses the Data Loss Prevention API to inspect table data using built-in
321+ infoType detectors, excluding columns that match a custom hot-word.
322+ Args:
323+ project: The Google Cloud project id to use as a parent resource.
324+ table_header: List of strings representing table field names.
325+ table_rows: List of rows representing table values.
326+ info_types: The infoType for which hot-word rule is applied.
327+ custom_hotword: The custom regular expression used for likelihood boosting.
328+ """
329+
330+ # Instantiate a client
331+ dlp = google .cloud .dlp_v2 .DlpServiceClient ()
332+
333+ # Construct the `table`. For more details on the table schema, please see
334+ # https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table
335+ headers = [{"name" : val } for val in table_header ]
336+ rows = []
337+ for row in table_rows :
338+ rows .append (
339+ {"values" : [{"string_value" : cell_val } for cell_val in row ]}
340+ )
341+ table = {"headers" : headers , "rows" : rows }
342+
343+ # Construct the `item` for table to be inspected.
344+ item = {"table" : table }
345+
346+ # Prepare info_types by converting the list of strings into a list of
347+ # dictionaries.
348+ info_types = [{"name" : info_type } for info_type in info_types ]
349+
350+ # Construct a rule set with caller provided hot-word, with a likelihood
351+ # boost to VERY_UNLIKELY when the hot-word are present
352+ hotword_rule = {
353+ "hotword_regex" : {"pattern" : custom_hotword },
354+ "likelihood_adjustment" : {
355+ "fixed_likelihood" : google .cloud .dlp_v2 .Likelihood .VERY_UNLIKELY
356+ },
357+ "proximity" : {"window_before" : 1 },
358+ }
359+
360+ rule_set = [
361+ {
362+ "info_types" : info_types ,
363+ "rules" : [{"hotword_rule" : hotword_rule }],
364+ }
365+ ]
366+
367+ # Construct the configuration dictionary, which defines the entire inspect content task.
368+ inspect_config = {
369+ "info_types" : info_types ,
370+ "rule_set" : rule_set ,
371+ "min_likelihood" : google .cloud .dlp_v2 .Likelihood .POSSIBLE ,
372+ "include_quote" : True ,
373+ }
374+
375+ # Convert the project id into a full resource id.
376+ parent = f"projects/{ project } /locations/global"
377+
378+ # Call the API
379+ response = dlp .inspect_content (
380+ request = {
381+ "parent" : parent ,
382+ "inspect_config" : inspect_config ,
383+ "item" : item ,
384+ }
385+ )
386+
387+ # Print out the results.
388+ if response .result .findings :
389+ for finding in response .result .findings :
390+ try :
391+ if finding .quote :
392+ print ("Quote: {}" .format (finding .quote ))
393+ except AttributeError :
394+ pass
395+ print ("Info type: {}" .format (finding .info_type .name ))
396+ print ("Likelihood: {}" .format (finding .likelihood ))
397+ else :
398+ print ("No findings." )
399+
400+ # [END dlp_inspect_column_values_w_custom_hotwords]
401+
402+
306403# [START dlp_inspect_file]
307404import mimetypes # noqa: I100, E402
308405from typing import Optional # noqa: I100, E402
@@ -969,6 +1066,65 @@ def inspect_image_file_all_infotypes(
9691066# [END dlp_inspect_image_all_infotypes]
9701067
9711068
1069+ # [START dlp_inspect_image_file]
1070+ import google .cloud .dlp # noqa: F811, E402, I100
1071+
1072+
1073+ def inspect_image_file (
1074+ project : str ,
1075+ filename : str ,
1076+ include_quote : bool = True ,
1077+ ) -> None :
1078+ """Uses the Data Loss Prevention API to analyze strings for
1079+ protected data in image file.
1080+ Args:
1081+ project: The Google Cloud project id to use as a parent resource.
1082+ filename: The path to the file to inspect.
1083+ include_quote: Boolean for whether to display a quote of the detected
1084+ information in the results.
1085+ """
1086+ # Instantiate a client.
1087+ dlp = google .cloud .dlp_v2 .DlpServiceClient ()
1088+
1089+ # Prepare info_types by converting the list of strings into a list of
1090+ # dictionaries.
1091+ info_types = ["PHONE_NUMBER" , "EMAIL_ADDRESS" , "CREDIT_CARD_NUMBER" ]
1092+ info_types = [{"name" : info_type } for info_type in info_types ]
1093+
1094+ # Construct the configuration for the Inspect request.
1095+ inspect_config = {
1096+ "info_types" : info_types ,
1097+ "include_quote" : include_quote ,
1098+ }
1099+
1100+ # Construct the byte_item, containing the image file's byte data.
1101+ with open (filename , mode = "rb" ) as f :
1102+ byte_item = {"type_" : "IMAGE" , "data" : f .read ()}
1103+
1104+ # Convert the project id into a full resource id.
1105+ parent = f"projects/{ project } /locations/global"
1106+
1107+ # Call the API.
1108+ response = dlp .inspect_content (
1109+ request = {
1110+ "parent" : parent ,
1111+ "inspect_config" : inspect_config ,
1112+ "item" : {"byte_item" : byte_item },
1113+ }
1114+ )
1115+
1116+ # Parse the response and process results.
1117+ if response .result .findings :
1118+ for finding in response .result .findings :
1119+ print ("Quote: {}" .format (finding .quote ))
1120+ print ("Info type: {}" .format (finding .info_type .name ))
1121+ print ("Likelihood: {}" .format (finding .likelihood ))
1122+ else :
1123+ print ("No findings." )
1124+
1125+ # [END dlp_inspect_image_file]
1126+
1127+
9721128# [START dlp_inspect_image_listed_infotypes]
9731129import google .cloud .dlp # noqa: F811, E402
9741130
@@ -1539,6 +1695,42 @@ def inspect_data_to_hybrid_job_trigger(
15391695 default = True ,
15401696 )
15411697
1698+ parser_table_hotword = subparsers .add_parser (
1699+ "table_w_custom_hotword" ,
1700+ help = "Inspect a table and exclude column values when matched "
1701+ "with custom hot-word." ,
1702+ )
1703+ parser_table_hotword .add_argument (
1704+ "--project" ,
1705+ help = "The Google Cloud project id to use as a parent resource." ,
1706+ default = default_project ,
1707+ )
1708+ parser_table_hotword .add_argument (
1709+ "--table_header" ,
1710+ help = "List of strings representing table field names."
1711+ "Example include '['Fake_Email_Address', 'Real_Email_Address]'. "
1712+ "The method can be used to exclude matches from entire column"
1713+ '"Fake_Email_Address".' ,
1714+ )
1715+ parser_table_hotword .add_argument (
1716+ "--table_rows" ,
1717+ help = "List of rows representing table values."
1718+ 'Example: '
1719+ 1720+ 1721+ )
1722+ parser_table_hotword .add_argument (
1723+ "--info_types" ,
1724+ action = "append" ,
1725+ help = "Strings representing info types to look for. A full list of "
1726+ "info categories and types is available from the API. Examples "
1727+ 'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
1728+ )
1729+ parser_table_hotword .add_argument (
1730+ "custom_hotword" ,
1731+ help = "The custom regular expression used for likelihood boosting." ,
1732+ )
1733+
15421734 parser_file = subparsers .add_parser ("file" , help = "Inspect a local file." )
15431735 parser_file .add_argument ("filename" , help = "The path to the file to inspect." )
15441736 parser_file .add_argument (
@@ -1904,6 +2096,22 @@ def inspect_data_to_hybrid_job_trigger(
19042096 default = True ,
19052097 )
19062098
2099+ parser_image_default_infotypes = subparsers .add_parser (
2100+ "image_default_infotypes" , help = "Inspect a local file with default info types."
2101+ )
2102+ parser_image_default_infotypes .add_argument (
2103+ "--project" ,
2104+ help = "The Google Cloud project id to use as a parent resource." ,
2105+ default = default_project ,
2106+ )
2107+ parser_image_default_infotypes .add_argument ("filename" , help = "The path to the file to inspect." )
2108+ parser_image_default_infotypes .add_argument (
2109+ "--include_quote" ,
2110+ help = "A Boolean for whether to display a quote of the detected"
2111+ "information in the results." ,
2112+ default = True ,
2113+ )
2114+
19072115 parser_image_infotypes = subparsers .add_parser (
19082116 "image_listed_infotypes" , help = "Inspect a local file with listed info types."
19092117 )
@@ -2039,6 +2247,14 @@ def inspect_data_to_hybrid_job_trigger(
20392247 max_findings = args .max_findings ,
20402248 include_quote = args .include_quote ,
20412249 )
2250+ elif args .content == "table_w_custom_hotword" :
2251+ inspect_column_values_w_custom_hotwords (
2252+ args .project ,
2253+ args .table_header ,
2254+ args .table_rows ,
2255+ args .info_types ,
2256+ args .custom_hotword ,
2257+ )
20422258 elif args .content == "file" :
20432259 inspect_file (
20442260 args .project ,
@@ -2124,6 +2340,12 @@ def inspect_data_to_hybrid_job_trigger(
21242340 args .filename ,
21252341 include_quote = args .include_quote ,
21262342 )
2343+ elif args .content == "image_default_infotypes" :
2344+ inspect_image_file (
2345+ args .project ,
2346+ args .filename ,
2347+ include_quote = args .include_quote ,
2348+ )
21272349 elif args .content == "image_listed_infotypes" :
21282350 inspect_image_file_listed_infotypes (
21292351 args .project ,
0 commit comments