Skip to content

Commit 504f21b

Browse files
committed
WIP
1 parent 7201a66 commit 504f21b

File tree

158 files changed

+2466
-2186
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

158 files changed

+2466
-2186
lines changed

bigtable/src/filter_composing_condition.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ function filter_composing_condition(
4747

4848
$filter = Filter::condition(
4949
Filter::chain()
50-
->addFilter(Filter::value()->exactMatch((array) unpack('C*', '1')))
50+
->addFilter(Filter::value()->exactMatch(unpack('C*', 1)))
5151
->addFilter(Filter::qualifier()->exactMatch('data_plan_10gb'))
5252
)
5353
->then(Filter::label('passed-filter'))

bigtable/src/filter_composing_interleave.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ function filter_composing_interleave(
4646
$table = $dataClient->table($instanceId, $tableId);
4747

4848
$filter = Filter::interleave()
49-
->addFilter(Filter::value()->exactMatch((array) unpack('C*', '1')))
49+
->addFilter(Filter::value()->exactMatch(unpack('C*', 1)))
5050
->addFilter(Filter::qualifier()->exactMatch('os_build'));
5151

5252
$rows = $table->readRows([

dlp/src/categorical_stats.php

Lines changed: 127 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,9 @@
2222
* @see https://github.com/GoogleCloudPlatform/php-docs-samples/tree/master/dlp/README.md
2323
*/
2424

25-
// Include Google Cloud dependendencies using Composer
26-
require_once __DIR__ . '/../vendor/autoload.php';
27-
28-
if (count($argv) != 8) {
29-
return print("Usage: php categorical_stats.php CALLING_PROJECT DATA_PROJECT TOPIC SUBSCRIPTION DATASET TABLE COLUMN\n");
30-
}
31-
list($_, $callingProjectId, $dataProjectId, $topicId, $subscriptionId, $datasetId, $tableId, $columnName) = $argv;
25+
namespace Google\Cloud\Samples\Dlp;
3226

3327
# [START dlp_categorical_stats]
34-
/**
35-
* Computes risk metrics of a column of data in a Google BigQuery table.
36-
*/
3728
use Google\Cloud\Dlp\V2\DlpServiceClient;
3829
use Google\Cloud\Dlp\V2\RiskAnalysisJobConfig;
3930
use Google\Cloud\Dlp\V2\BigQueryTable;
@@ -45,118 +36,134 @@
4536
use Google\Cloud\Dlp\V2\FieldId;
4637
use Google\Cloud\PubSub\PubSubClient;
4738

48-
/** Uncomment and populate these variables in your code */
49-
// $callingProjectId = 'The project ID to run the API call under';
50-
// $dataProjectId = 'The project ID containing the target Datastore';
51-
// $topicId = 'The name of the Pub/Sub topic to notify once the job completes';
52-
// $subscriptionId = 'The name of the Pub/Sub subscription to use when listening for job';
53-
// $datasetId = 'The ID of the dataset to inspect';
54-
// $tableId = 'The ID of the table to inspect';
55-
// $columnName = 'The name of the column to compute risk metrics for, e.g. "age"';
56-
57-
// Instantiate a client.
58-
$dlp = new DlpServiceClient([
59-
'projectId' => $callingProjectId,
60-
]);
61-
$pubsub = new PubSubClient([
62-
'projectId' => $callingProjectId,
63-
]);
64-
$topic = $pubsub->topic($topicId);
65-
66-
// Construct risk analysis config
67-
$columnField = (new FieldId())
68-
->setName($columnName);
69-
70-
$statsConfig = (new CategoricalStatsConfig())
71-
->setField($columnField);
72-
73-
$privacyMetric = (new PrivacyMetric())
74-
->setCategoricalStatsConfig($statsConfig);
75-
76-
// Construct items to be analyzed
77-
$bigqueryTable = (new BigQueryTable())
78-
->setProjectId($dataProjectId)
79-
->setDatasetId($datasetId)
80-
->setTableId($tableId);
81-
82-
// Construct the action to run when job completes
83-
$pubSubAction = (new PublishToPubSub())
84-
->setTopic($topic->name());
85-
86-
$action = (new Action())
87-
->setPubSub($pubSubAction);
88-
89-
// Construct risk analysis job config to run
90-
$riskJob = (new RiskAnalysisJobConfig())
91-
->setPrivacyMetric($privacyMetric)
92-
->setSourceTable($bigqueryTable)
93-
->setActions([$action]);
94-
95-
// Submit request
96-
$parent = "projects/$callingProjectId/locations/global";
97-
$job = $dlp->createDlpJob($parent, [
98-
'riskJob' => $riskJob
99-
]);
100-
101-
// Listen for job notifications via an existing topic/subscription.
102-
$subscription = $topic->subscription($subscriptionId);
103-
104-
// Poll Pub/Sub using exponential backoff until job finishes
105-
// Consider using an asynchronous execution model such as Cloud Functions
106-
$attempt = 1;
107-
$startTime = time();
108-
do {
109-
foreach ($subscription->pull() as $message) {
110-
if (isset($message->attributes()['DlpJobName']) &&
111-
$message->attributes()['DlpJobName'] === $job->getName()) {
112-
$subscription->acknowledge($message);
113-
// Get the updated job. Loop to avoid race condition with DLP API.
114-
do {
115-
$job = $dlp->getDlpJob($job->getName());
116-
} while ($job->getState() == JobState::RUNNING);
117-
break 2; // break from parent do while
118-
}
119-
}
120-
printf('Waiting for job to complete' . PHP_EOL);
121-
// Exponential backoff with max delay of 60 seconds
122-
sleep(min(60, pow(2, ++$attempt)));
123-
} while (time() - $startTime < 600); // 10 minute timeout
124-
125-
// Print finding counts
126-
printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState()));
127-
switch ($job->getState()) {
128-
case JobState::DONE:
129-
$histBuckets = $job->getRiskDetails()->getCategoricalStatsResult()->getValueFrequencyHistogramBuckets();
130-
131-
foreach ($histBuckets as $bucketIndex => $histBucket) {
132-
// Print bucket stats
133-
printf('Bucket %s:' . PHP_EOL, $bucketIndex);
134-
printf(' Most common value occurs %s time(s)' . PHP_EOL, $histBucket->getValueFrequencyUpperBound());
135-
printf(' Least common value occurs %s time(s)' . PHP_EOL, $histBucket->getValueFrequencyLowerBound());
136-
printf(' %s unique value(s) total.', $histBucket->getBucketSize());
137-
138-
// Print bucket values
139-
foreach ($histBucket->getBucketValues() as $percent => $quantile) {
140-
printf(
141-
' Value %s occurs %s time(s).' . PHP_EOL,
142-
$quantile->getValue()->serializeToJsonString(),
143-
$quantile->getCount()
144-
);
39+
/**
40+
* Computes risk metrics of a column of data in a Google BigQuery table.
41+
*
42+
* @param string $callingProjectId The project ID to run the API call under
43+
* @param string $dataProjectId The project ID containing the target Datastore
44+
* @param string $topicId The name of the Pub/Sub topic to notify once the job completes
45+
* @param string $subscriptionId The name of the Pub/Sub subscription to use when listening for job
46+
* @param string $datasetId The ID of the dataset to inspect
47+
* @param string $tableId The ID of the table to inspect
48+
* @param string $columnName The name of the column to compute risk metrics for, e.g. "age"
49+
*/
50+
function categorical_stats(
51+
string $callingProjectId,
52+
string $dataProjectId,
53+
string $topicId,
54+
string $subscriptionId,
55+
string $datasetId,
56+
string $tableId,
57+
string $columnName
58+
): void {
59+
// Instantiate a client.
60+
$dlp = new DlpServiceClient([
61+
'projectId' => $callingProjectId,
62+
]);
63+
$pubsub = new PubSubClient([
64+
'projectId' => $callingProjectId,
65+
]);
66+
$topic = $pubsub->topic($topicId);
67+
68+
// Construct risk analysis config
69+
$columnField = (new FieldId())
70+
->setName($columnName);
71+
72+
$statsConfig = (new CategoricalStatsConfig())
73+
->setField($columnField);
74+
75+
$privacyMetric = (new PrivacyMetric())
76+
->setCategoricalStatsConfig($statsConfig);
77+
78+
// Construct items to be analyzed
79+
$bigqueryTable = (new BigQueryTable())
80+
->setProjectId($dataProjectId)
81+
->setDatasetId($datasetId)
82+
->setTableId($tableId);
83+
84+
// Construct the action to run when job completes
85+
$pubSubAction = (new PublishToPubSub())
86+
->setTopic($topic->name());
87+
88+
$action = (new Action())
89+
->setPubSub($pubSubAction);
90+
91+
// Construct risk analysis job config to run
92+
$riskJob = (new RiskAnalysisJobConfig())
93+
->setPrivacyMetric($privacyMetric)
94+
->setSourceTable($bigqueryTable)
95+
->setActions([$action]);
96+
97+
// Submit request
98+
$parent = "projects/$callingProjectId/locations/global";
99+
$job = $dlp->createDlpJob($parent, [
100+
'riskJob' => $riskJob
101+
]);
102+
103+
// Listen for job notifications via an existing topic/subscription.
104+
$subscription = $topic->subscription($subscriptionId);
105+
106+
// Poll Pub/Sub using exponential backoff until job finishes
107+
// Consider using an asynchronous execution model such as Cloud Functions
108+
$attempt = 1;
109+
$startTime = time();
110+
do {
111+
foreach ($subscription->pull() as $message) {
112+
if (isset($message->attributes()['DlpJobName']) &&
113+
$message->attributes()['DlpJobName'] === $job->getName()) {
114+
$subscription->acknowledge($message);
115+
// Get the updated job. Loop to avoid race condition with DLP API.
116+
do {
117+
$job = $dlp->getDlpJob($job->getName());
118+
} while ($job->getState() == JobState::RUNNING);
119+
break 2; // break from parent do while
145120
}
146121
}
122+
printf('Waiting for job to complete' . PHP_EOL);
123+
// Exponential backoff with max delay of 60 seconds
124+
sleep(min(60, pow(2, ++$attempt)));
125+
} while (time() - $startTime < 600); // 10 minute timeout
126+
127+
// Print finding counts
128+
printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState()));
129+
switch ($job->getState()) {
130+
case JobState::DONE:
131+
$histBuckets = $job->getRiskDetails()->getCategoricalStatsResult()->getValueFrequencyHistogramBuckets();
132+
133+
foreach ($histBuckets as $bucketIndex => $histBucket) {
134+
// Print bucket stats
135+
printf('Bucket %s:' . PHP_EOL, $bucketIndex);
136+
printf(' Most common value occurs %s time(s)' . PHP_EOL, $histBucket->getValueFrequencyUpperBound());
137+
printf(' Least common value occurs %s time(s)' . PHP_EOL, $histBucket->getValueFrequencyLowerBound());
138+
printf(' %s unique value(s) total.', $histBucket->getBucketSize());
139+
140+
// Print bucket values
141+
foreach ($histBucket->getBucketValues() as $percent => $quantile) {
142+
printf(
143+
' Value %s occurs %s time(s).' . PHP_EOL,
144+
$quantile->getValue()->serializeToJsonString(),
145+
$quantile->getCount()
146+
);
147+
}
148+
}
147149

148-
break;
149-
case JobState::FAILED:
150-
$errors = $job->getErrors();
151-
printf('Job %s had errors:' . PHP_EOL, $job->getName());
152-
foreach ($errors as $error) {
153-
var_dump($error->getDetails());
154-
}
155-
break;
156-
case JobState::PENDING:
157-
printf('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL);
158-
break;
159-
default:
160-
printf('Unexpected job state.');
150+
break;
151+
case JobState::FAILED:
152+
$errors = $job->getErrors();
153+
printf('Job %s had errors:' . PHP_EOL, $job->getName());
154+
foreach ($errors as $error) {
155+
var_dump($error->getDetails());
156+
}
157+
break;
158+
case JobState::PENDING:
159+
printf('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL);
160+
break;
161+
default:
162+
printf('Unexpected job state.');
163+
}
161164
}
162165
# [END dlp_categorical_stats]
166+
167+
// The following 2 lines are only needed to run the samples
168+
require_once __DIR__ . '/../../testing/sample_helpers.php';
169+
\Google\Cloud\Samples\execute_sample(__FILE__, __NAMESPACE__, $argv);

0 commit comments

Comments
 (0)