Skip to content

Commit 8fa91d0

Browse files
authored
feat(dlp): de-identify sensitive data with a simple word list (GoogleCloudPlatform#1794)
1 parent 1dab3cb commit 8fa91d0

File tree

2 files changed

+117
-0
lines changed

2 files changed

+117
-0
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
<?php
2+
3+
/**
4+
* Copyright 2023 Google Inc.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
/**
20+
* For instructions on how to run the samples:
21+
*
22+
* @see https://github.com/GoogleCloudPlatform/php-docs-samples/tree/main/dlp/README.md
23+
*/
24+
25+
namespace Google\Cloud\Samples\Dlp;
26+
27+
# [START dlp_deidentify_simple_word_list]
28+
use Google\Cloud\Dlp\V2\DlpServiceClient;
29+
use Google\Cloud\Dlp\V2\PrimitiveTransformation;
30+
use Google\Cloud\Dlp\V2\InfoType;
31+
use Google\Cloud\Dlp\V2\DeidentifyConfig;
32+
use Google\Cloud\Dlp\V2\InfoTypeTransformations\InfoTypeTransformation;
33+
use Google\Cloud\Dlp\V2\InfoTypeTransformations;
34+
use Google\Cloud\Dlp\V2\ContentItem;
35+
use Google\Cloud\Dlp\V2\InspectConfig;
36+
use Google\Cloud\Dlp\V2\ReplaceWithInfoTypeConfig;
37+
use Google\Cloud\Dlp\V2\CustomInfoType;
38+
use Google\Cloud\Dlp\V2\CustomInfoType\Dictionary;
39+
use Google\Cloud\Dlp\V2\CustomInfoType\Dictionary\WordList;
40+
41+
/**
42+
* De-identify sensitive data with a simple word list
43+
* Matches against a custom simple word list to de-identify sensitive data.
44+
*
45+
* @param string $callingProjectId The Google Cloud project id to use as a parent resource.
46+
* @param string $string The string to deidentify (will be treated as text).
47+
*/
48+
49+
function deidentify_simple_word_list(
50+
// TODO(developer): Replace sample parameters before running the code.
51+
string $callingProjectId,
52+
string $string = 'Patient was seen in RM-YELLOW then transferred to rm green.'
53+
): void {
54+
// Instantiate a client.
55+
$dlp = new DlpServiceClient();
56+
57+
$parent = "projects/$callingProjectId/locations/global";
58+
59+
$content = (new ContentItem())
60+
->setValue($string);
61+
62+
// Construct the word list to be detected
63+
$wordList = (new Dictionary())
64+
->setWordList((new WordList())
65+
->setWords(['RM-GREEN', 'RM-YELLOW', 'RM-ORANGE']));
66+
67+
// The infoTypes of information to mask
68+
$custoMRoomIdinfoType = (new InfoType())
69+
->setName('CUSTOM_ROOM_ID');
70+
$customInfoType = (new CustomInfoType())
71+
->setInfoType($custoMRoomIdinfoType)
72+
->setDictionary($wordList);
73+
74+
// Create the configuration object
75+
$inspectConfig = (new InspectConfig())
76+
->setCustomInfoTypes([$customInfoType]);
77+
78+
// Create the information transform configuration objects
79+
$primitiveTransformation = (new PrimitiveTransformation())
80+
->setReplaceWithInfoTypeConfig(new ReplaceWithInfoTypeConfig());
81+
82+
$infoTypeTransformation = (new InfoTypeTransformation())
83+
->setPrimitiveTransformation($primitiveTransformation)
84+
->setInfoTypes([$custoMRoomIdinfoType]);
85+
86+
$infoTypeTransformations = (new InfoTypeTransformations())
87+
->setTransformations([$infoTypeTransformation]);
88+
89+
// Create the deidentification configuration object
90+
$deidentifyConfig = (new DeidentifyConfig())
91+
->setInfoTypeTransformations($infoTypeTransformations);
92+
93+
// Run request
94+
$response = $dlp->deidentifyContent([
95+
'parent' => $parent,
96+
'deidentifyConfig' => $deidentifyConfig,
97+
'item' => $content,
98+
'inspectConfig' => $inspectConfig
99+
]);
100+
101+
// Print the results
102+
printf('Deidentified content: %s', $response->getItem()->getValue());
103+
}
104+
# [END dlp_deidentify_simple_word_list]
105+
106+
// The following 2 lines are only needed to run the samples
107+
require_once __DIR__ . '/../../testing/sample_helpers.php';
108+
\Google\Cloud\Samples\execute_sample(__FILE__, __NAMESPACE__, $argv);

dlp/test/dlpTest.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,15 @@ public function testJobs()
259259
$this->assertStringContainsString('Successfully deleted job ' . $jobId, $output);
260260
}
261261

262+
public function testDeidentifySimpleWordList()
263+
{
264+
$output = $this->runFunctionSnippet('deidentify_simple_word_list', [
265+
self::$projectId,
266+
'Patient was seen in RM-YELLOW then transferred to rm green.'
267+
]);
268+
$this->assertStringContainsString('[CUSTOM_ROOM_ID]', $output);
269+
}
270+
262271
public function testInspectStringWithoutOverlap()
263272
{
264273
$output = $this->runFunctionSnippet('inspect_string_without_overlap', [

0 commit comments

Comments
 (0)