Skip to content

Commit 1dab3cb

Browse files
authored
feat(dlp): inspect a string for sensitive data, omitting overlapping matches on domain and email (GoogleCloudPlatform#1805)
1 parent e06a4fd commit 1dab3cb

File tree

2 files changed

+137
-0
lines changed

2 files changed

+137
-0
lines changed
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
<?php
2+
3+
/**
4+
* Copyright 2023 Google LLC.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
/**
20+
* For instructions on how to run the full sample:
21+
*
22+
* @see https://github.com/GoogleCloudPlatform/php-docs-samples/tree/main/bigquery/api/README.md
23+
*/
24+
25+
namespace Google\Cloud\Samples\Dlp;
26+
27+
// [START dlp_inspect_string_without_overlap]
28+
use Google\Cloud\Dlp\V2\DlpServiceClient;
29+
use Google\Cloud\Dlp\V2\ContentItem;
30+
use Google\Cloud\Dlp\V2\CustomInfoType;
31+
use Google\Cloud\Dlp\V2\CustomInfoType\ExclusionType;
32+
use Google\Cloud\Dlp\V2\ExcludeInfoTypes;
33+
use Google\Cloud\Dlp\V2\ExclusionRule;
34+
use Google\Cloud\Dlp\V2\InfoType;
35+
use Google\Cloud\Dlp\V2\InspectConfig;
36+
use Google\Cloud\Dlp\V2\InspectionRule;
37+
use Google\Cloud\Dlp\V2\InspectionRuleSet;
38+
use Google\Cloud\Dlp\V2\Likelihood;
39+
use Google\Cloud\Dlp\V2\MatchingType;
40+
41+
/**
42+
* Inspect a string for sensitive data, omitting overlapping matches on domain and email
43+
* Omit matches on domain names that are part of email addresses in a DOMAIN_NAME detector scan.
44+
*
45+
* @param string $projectId The Google Cloud project id to use as a parent resource.
46+
* @param string $textToInspect The string to inspect.
47+
*/
48+
function inspect_string_without_overlap(
49+
// TODO(developer): Replace sample parameters before running the code.
50+
string $projectId,
51+
string $textToInspect = 'example.com is a domain, [email protected] is an email.'
52+
): void {
53+
// Instantiate a client.
54+
$dlp = new DlpServiceClient();
55+
56+
$parent = "projects/$projectId/locations/global";
57+
58+
// Specify what content you want the service to Inspect.
59+
$item = (new ContentItem())
60+
->setValue($textToInspect);
61+
62+
// Specify the type of info the inspection will look for.
63+
$domainName = (new InfoType())
64+
->setName('DOMAIN_NAME');
65+
$emailAddress = (new InfoType())
66+
->setName('EMAIL_ADDRESS');
67+
$infoTypes = [$domainName, $emailAddress];
68+
69+
// Define a custom info type to exclude email addresses
70+
$customInfoType = (new CustomInfoType())
71+
->setInfoType($emailAddress)
72+
->setExclusionType(ExclusionType::EXCLUSION_TYPE_EXCLUDE);
73+
74+
// Exclude EMAIL_ADDRESS matches
75+
$matchingType = MatchingType::MATCHING_TYPE_PARTIAL_MATCH;
76+
77+
$exclusionRule = (new ExclusionRule())
78+
->setMatchingType($matchingType)
79+
->setExcludeInfoTypes((new ExcludeInfoTypes())
80+
->setInfoTypes([$customInfoType->getInfoType()])
81+
);
82+
83+
// Construct a ruleset that applies the exclusion rule to the DOMAIN_NAME infotype.
84+
// If a DOMAIN_NAME match is part of an EMAIL_ADDRESS match, the DOMAIN_NAME match will
85+
// be excluded.
86+
$inspectionRuleSet = (new InspectionRuleSet())
87+
->setInfoTypes([$domainName])
88+
->setRules([
89+
(new InspectionRule())
90+
->setExclusionRule($exclusionRule),
91+
]);
92+
93+
// Construct the configuration for the Inspect request, including the ruleset.
94+
$inspectConfig = (new InspectConfig())
95+
->setInfoTypes($infoTypes)
96+
->setCustomInfoTypes([$customInfoType])
97+
->setIncludeQuote(true)
98+
->setRuleSet([$inspectionRuleSet]);
99+
100+
// Run request
101+
$response = $dlp->inspectContent([
102+
'parent' => $parent,
103+
'inspectConfig' => $inspectConfig,
104+
'item' => $item
105+
]);
106+
107+
// Print the results
108+
$findings = $response->getResult()->getFindings();
109+
if (count($findings) == 0) {
110+
printf('No findings.' . PHP_EOL);
111+
} else {
112+
printf('Findings:' . PHP_EOL);
113+
foreach ($findings as $finding) {
114+
printf(' Quote: %s' . PHP_EOL, $finding->getQuote());
115+
printf(' Info type: %s' . PHP_EOL, $finding->getInfoType()->getName());
116+
printf(
117+
' Likelihood: %s' . PHP_EOL,
118+
Likelihood::name($finding->getLikelihood()));
119+
}
120+
}
121+
}
122+
// [END dlp_inspect_string_without_overlap]
123+
124+
// The following 2 lines are only needed to run the samples
125+
require_once __DIR__ . '/../../testing/sample_helpers.php';
126+
\Google\Cloud\Samples\execute_sample(__FILE__, __NAMESPACE__, $argv);

dlp/test/dlpTest.php

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,17 @@ public function testJobs()
259259
$this->assertStringContainsString('Successfully deleted job ' . $jobId, $output);
260260
}
261261

262+
public function testInspectStringWithoutOverlap()
263+
{
264+
$output = $this->runFunctionSnippet('inspect_string_without_overlap', [
265+
self::$projectId,
266+
'example.com is a domain, [email protected] is an email.'
267+
]);
268+
269+
$this->assertStringContainsString('Info type: DOMAIN_NAME', $output);
270+
$this->assertStringNotContainsString('Info type: EMAIL_ADDRESS', $output);
271+
}
272+
262273
public function testInspectStringWithExclusionDict()
263274
{
264275
$output = $this->runFunctionSnippet('inspect_string_with_exclusion_dict', [

0 commit comments

Comments
 (0)