Skip to content

Commit 12c4dfc

Browse files
authored
feat(dlp): sample for dlp De-identify (GoogleCloudPlatform#1858)
1 parent 99bdd75 commit 12c4dfc

10 files changed

+874
-0
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
<?php
2+
/**
3+
* Copyright 2023 Google Inc.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
/**
19+
* For instructions on how to run the samples:
20+
*
21+
* @see https://github.com/GoogleCloudPlatform/php-docs-samples/tree/main/dlp/README.md
22+
*/
23+
24+
namespace Google\Cloud\Samples\Dlp;
25+
26+
# [START dlp_deidentify_dictionary_replacement]
27+
use Google\Cloud\Dlp\V2\ContentItem;
28+
use Google\Cloud\Dlp\V2\DlpServiceClient;
29+
use Google\Cloud\Dlp\V2\CustomInfoType\Dictionary\WordList;
30+
use Google\Cloud\Dlp\V2\InfoType;
31+
use Google\Cloud\Dlp\V2\DeidentifyConfig;
32+
use Google\Cloud\Dlp\V2\InspectConfig;
33+
use Google\Cloud\Dlp\V2\PrimitiveTransformation;
34+
use Google\Cloud\Dlp\V2\InfoTypeTransformations;
35+
use Google\Cloud\Dlp\V2\InfoTypeTransformations\InfoTypeTransformation;
36+
use Google\Cloud\Dlp\V2\ReplaceDictionaryConfig;
37+
38+
/**
39+
* Dictionary replacement
40+
* Dictionary replacement (ReplaceDictionaryConfig) replaces each piece of detected sensitive data with a
41+
* value that Cloud DLP randomly selects from a list of words that you provide. This transformation method
42+
* is useful if you want to use realistic surrogate values.Suppose you want Cloud DLP to detect email addresses
43+
* and replace each detected value with one of three surrogate email addresses.
44+
*
45+
* @param string $callingProjectId The project ID to run the API call under.
46+
* @param string $textToDeIdentify The String you want the service to DeIdentify.
47+
*/
48+
function deidentify_dictionary_replacement(
49+
// TODO(developer): Replace sample parameters before running the code.
50+
string $callingProjectId,
51+
string $textToDeIdentify = 'My name is Charlie and email address is [email protected].'
52+
): void {
53+
// Instantiate a client.
54+
$dlp = new DlpServiceClient();
55+
56+
// Specify what content you want the service to de-identify.
57+
$contentItem = (new ContentItem())
58+
->setValue($textToDeIdentify);
59+
60+
// Specify the type of info the inspection will look for.
61+
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
62+
$emailAddress = (new InfoType())
63+
->setName('EMAIL_ADDRESS');
64+
65+
$inspectConfig = (new InspectConfig())
66+
->setInfoTypes([$emailAddress]);
67+
68+
// Define type of de-identification as replacement with items from dictionary.
69+
$primitiveTransformation = (new PrimitiveTransformation())
70+
->setReplaceDictionaryConfig(
71+
// Specify the dictionary to use for selecting replacement values for the finding.
72+
(new ReplaceDictionaryConfig())
73+
->setWordList(
74+
// Specify list of value which will randomly replace identified email addresses.
75+
(new WordList())
76+
77+
)
78+
);
79+
80+
$transformation = (new InfoTypeTransformation())
81+
->setInfoTypes([$emailAddress])
82+
->setPrimitiveTransformation($primitiveTransformation);
83+
84+
// Construct the configuration for the de-identification request and list all desired transformations.
85+
$deidentifyConfig = (new DeidentifyConfig())
86+
->setInfoTypeTransformations(
87+
(new InfoTypeTransformations())
88+
->setTransformations([$transformation])
89+
);
90+
91+
// Send the request and receive response from the service.
92+
$parent = "projects/$callingProjectId/locations/global";
93+
$response = $dlp->deidentifyContent([
94+
'parent' => $parent,
95+
'deidentifyConfig' => $deidentifyConfig,
96+
'inspectConfig' => $inspectConfig,
97+
'item' => $contentItem
98+
]);
99+
100+
// Print the results.
101+
printf('Text after replace with infotype config: %s', $response->getItem()->getValue());
102+
}
103+
# [END dlp_deidentify_dictionary_replacement]
104+
105+
// The following 2 lines are only needed to run the samples.
106+
require_once __DIR__ . '/../../testing/sample_helpers.php';
107+
\Google\Cloud\Samples\execute_sample(__FILE__, __NAMESPACE__, $argv);
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
<?php
2+
/**
3+
* Copyright 2023 Google Inc.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
/**
19+
* For instructions on how to run the samples:
20+
*
21+
* @see https://github.com/GoogleCloudPlatform/php-docs-samples/tree/main/dlp/README.md
22+
*/
23+
24+
namespace Google\Cloud\Samples\Dlp;
25+
26+
# [START dlp_deidentify_table_primitive_bucketing]
27+
28+
use Google\Cloud\Dlp\V2\BucketingConfig;
29+
use Google\Cloud\Dlp\V2\BucketingConfig\Bucket;
30+
use Google\Cloud\Dlp\V2\DlpServiceClient;
31+
use Google\Cloud\Dlp\V2\ContentItem;
32+
use Google\Cloud\Dlp\V2\DeidentifyConfig;
33+
use Google\Cloud\Dlp\V2\FieldId;
34+
use Google\Cloud\Dlp\V2\FieldTransformation;
35+
use Google\Cloud\Dlp\V2\PrimitiveTransformation;
36+
use Google\Cloud\Dlp\V2\RecordTransformations;
37+
use Google\Cloud\Dlp\V2\Table;
38+
use Google\Cloud\Dlp\V2\Table\Row;
39+
use Google\Cloud\Dlp\V2\Value;
40+
41+
/**
42+
* De-identify data using primitive bucketing.
43+
* https://cloud.google.com/dlp/docs/concepts-bucketing#bucketing_scenario_1
44+
*
45+
* @param string $callingProjectId The Google Cloud project id to use as a parent resource.
46+
* @param string $inputCsvFile The input file(csv) path to deidentify.
47+
* @param string $outputCsvFile The oupt file path to save deidentify content.
48+
*
49+
*/
50+
function deidentify_table_primitive_bucketing(
51+
// TODO(developer): Replace sample parameters before running the code.
52+
string $callingProjectId,
53+
string $inputCsvFile = './test/data/table4.csv',
54+
string $outputCsvFile = './test/data/deidentify_table_primitive_bucketing_output.csv'
55+
): void {
56+
// Instantiate a client.
57+
$dlp = new DlpServiceClient();
58+
59+
// Read a CSV file.
60+
$csvLines = file($inputCsvFile, FILE_IGNORE_NEW_LINES);
61+
$csvHeaders = explode(',', $csvLines[0]);
62+
$csvRows = array_slice($csvLines, 1);
63+
64+
// Convert CSV file into protobuf objects.
65+
$tableHeaders = array_map(function ($csvHeader) {
66+
return (new FieldId)->setName($csvHeader);
67+
}, $csvHeaders);
68+
69+
$tableRows = array_map(function ($csvRow) {
70+
$rowValues = array_map(function ($csvValue) {
71+
return (new Value())
72+
->setStringValue($csvValue);
73+
}, explode(',', $csvRow));
74+
return (new Row())
75+
->setValues($rowValues);
76+
}, $csvRows);
77+
78+
// Construct the table object.
79+
$tableToDeIdentify = (new Table())
80+
->setHeaders($tableHeaders)
81+
->setRows($tableRows);
82+
83+
// Specify what content you want the service to de-identify.
84+
$contentItem = (new ContentItem())
85+
->setTable($tableToDeIdentify);
86+
87+
// Specify how the content should be de-identified.
88+
$buckets = [
89+
(new Bucket())
90+
->setMin((new Value())
91+
->setIntegerValue(0))
92+
->setMax((new Value())
93+
->setIntegerValue(25))
94+
->setReplacementValue((new Value())
95+
->setStringValue('LOW')),
96+
(new Bucket())
97+
->setMin((new Value())
98+
->setIntegerValue(25))
99+
->setMax((new Value())
100+
->setIntegerValue(75))
101+
->setReplacementValue((new Value())
102+
->setStringValue('Medium')),
103+
(new Bucket())
104+
->setMin((new Value())
105+
->setIntegerValue(75))
106+
->setMax((new Value())
107+
->setIntegerValue(100))
108+
->setReplacementValue((new Value())
109+
->setStringValue('High')),
110+
];
111+
112+
$bucketingConfig = (new BucketingConfig())
113+
->setBuckets($buckets);
114+
115+
$primitiveTransformation = (new PrimitiveTransformation())
116+
->setBucketingConfig($bucketingConfig);
117+
118+
// Specify the field of the table to be de-identified.
119+
$fieldId = (new FieldId())
120+
->setName('score');
121+
122+
$fieldTransformation = (new FieldTransformation())
123+
->setPrimitiveTransformation($primitiveTransformation)
124+
->setFields([$fieldId]);
125+
126+
$recordTransformations = (new RecordTransformations())
127+
->setFieldTransformations([$fieldTransformation]);
128+
129+
// Create the deidentification configuration object.
130+
$deidentifyConfig = (new DeidentifyConfig())
131+
->setRecordTransformations($recordTransformations);
132+
133+
$parent = "projects/$callingProjectId/locations/global";
134+
135+
// Send the request and receive response from the service.
136+
$response = $dlp->deidentifyContent([
137+
'parent' => $parent,
138+
'deidentifyConfig' => $deidentifyConfig,
139+
'item' => $contentItem
140+
]);
141+
142+
// Print the results.
143+
$csvRef = fopen($outputCsvFile, 'w');
144+
fputcsv($csvRef, $csvHeaders);
145+
foreach ($response->getItem()->getTable()->getRows() as $tableRow) {
146+
$values = array_map(function ($tableValue) {
147+
return $tableValue->getStringValue();
148+
}, iterator_to_array($tableRow->getValues()));
149+
fputcsv($csvRef, $values);
150+
};
151+
printf('Table after deidentify (File Location): %s', $outputCsvFile);
152+
}
153+
# [END dlp_deidentify_table_primitive_bucketing]
154+
155+
// The following 2 lines are only needed to run the samples.
156+
require_once __DIR__ . '/../../testing/sample_helpers.php';
157+
\Google\Cloud\Samples\execute_sample(__FILE__, __NAMESPACE__, $argv);

0 commit comments

Comments
 (0)