Skip to content

Commit ecdaa93

Browse files
d-googparthea
andauthored
feat(storagetransfer): Add POSIX & Manifest Samples (GoogleCloudPlatform#8091)
* feat: add transfer manifest samples * feat: POSIX samples * fix: `manifest_file` * fix: `transfer_between_posix` call * fix: typo `sink_bucket` -> `destination_bucket` * Update storagetransfer/conftest.py Co-authored-by: Anthonios Partheniou <[email protected]> * doc: fix doc for POSIX to POSIX Co-authored-by: Anthonios Partheniou <[email protected]>
1 parent 13596b8 commit ecdaa93

File tree

9 files changed

+665
-0
lines changed

9 files changed

+665
-0
lines changed

storagetransfer/conftest.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,58 @@ def destination_bucket(gcs_bucket: storage.Bucket, sts_service_account: str):
165165
gcs_bucket.set_iam_policy(policy)
166166

167167
yield gcs_bucket
168+
169+
170+
@pytest.fixture(scope='module')
171+
def intermediate_bucket(gcs_bucket: storage.Bucket, sts_service_account: str):
172+
"""
173+
Yields and auto-cleans up a GCS bucket preconfigured with necessary
174+
STS service account write perms
175+
"""
176+
177+
# Setup policy for STS
178+
member: str = f"serviceAccount:{sts_service_account}"
179+
objectViewer = "roles/storage.objectViewer"
180+
bucketReader = "roles/storage.legacyBucketReader"
181+
bucketWriter = "roles/storage.legacyBucketWriter"
182+
183+
# Prepare policy
184+
policy = gcs_bucket.get_iam_policy(requested_policy_version=3)
185+
policy.bindings.append({"role": objectViewer, "members": {member}})
186+
policy.bindings.append({"role": bucketReader, "members": {member}})
187+
policy.bindings.append({"role": bucketWriter, "members": {member}})
188+
189+
# Set policy
190+
gcs_bucket.set_iam_policy(policy)
191+
192+
yield gcs_bucket
193+
194+
195+
@pytest.fixture(scope='module')
196+
def agent_pool_name():
197+
"""
198+
Yields a source agent pool name
199+
"""
200+
201+
# use default agent
202+
yield ''
203+
204+
205+
@pytest.fixture(scope='module')
206+
def posix_root_directory():
207+
"""
208+
Yields a POSIX root directory
209+
"""
210+
211+
# use arbitrary path
212+
yield '/my-posix-root/'
213+
214+
215+
@pytest.fixture(scope='module')
216+
def manifest_file(source_bucket: storage.Bucket):
217+
"""
218+
Yields a transfer manifest file name
219+
"""
220+
221+
# use arbitrary path and name
222+
yield f'gs://{source_bucket.name}/test-manifest.csv'
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2022 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""
18+
Command-line sample that creates a transfer from a POSIX file system to a
19+
GCS bucket using a manifest file.
20+
"""
21+
22+
23+
import argparse
24+
25+
# [START storagetransfer_manifest_request]
26+
from google.cloud import storage_transfer
27+
28+
29+
def create_transfer_with_manifest(
30+
project_id: str, description: str, source_agent_pool_name: str,
31+
root_directory: str, sink_bucket: str, manifest_location: str):
32+
"""Create a transfer from a POSIX file system to a GCS bucket using
33+
a manifest file."""
34+
35+
client = storage_transfer.StorageTransferServiceClient()
36+
37+
# The ID of the Google Cloud Platform Project that owns the job
38+
# project_id = 'my-project-id'
39+
40+
# A useful description for your transfer job
41+
# description = 'My transfer job'
42+
43+
# The agent pool associated with the POSIX data source.
44+
# Defaults to 'projects/{project_id}/agentPools/transfer_service_default'
45+
# source_agent_pool_name = 'projects/my-project/agentPools/my-agent'
46+
47+
# The root directory path on the source filesystem
48+
# root_directory = '/directory/to/transfer/source'
49+
50+
# Google Cloud Storage destination bucket name
51+
# sink_bucket = 'my-gcs-destination-bucket'
52+
53+
# Transfer manifest location. Must be a `gs:` URL
54+
# manifest_location = 'gs://my-bucket/sample_manifest.csv'
55+
56+
transfer_job_request = storage_transfer.CreateTransferJobRequest({
57+
'transfer_job': {
58+
'project_id': project_id,
59+
'description': description,
60+
'status': storage_transfer.TransferJob.Status.ENABLED,
61+
'transfer_spec': {
62+
'source_agent_pool_name': source_agent_pool_name,
63+
'posix_data_source': {
64+
'root_directory': root_directory,
65+
},
66+
'gcs_data_sink': {
67+
'bucket_name': sink_bucket,
68+
},
69+
'transfer_manifest': {
70+
'location': manifest_location
71+
}
72+
}
73+
}
74+
})
75+
76+
result = client.create_transfer_job(transfer_job_request)
77+
print(f'Created transferJob: {result.name}')
78+
79+
80+
# [END storagetransfer_manifest_request]
81+
82+
if __name__ == "__main__":
83+
parser = argparse.ArgumentParser(description=__doc__)
84+
parser.add_argument(
85+
'--project-id',
86+
help='The ID of the Google Cloud Platform Project that owns the job',
87+
required=True)
88+
parser.add_argument(
89+
'--description',
90+
help='A useful description for your transfer job',
91+
required=True)
92+
parser.add_argument(
93+
'--source-agent-pool-name',
94+
help='The agent pool associated with the POSIX data source',
95+
required=True)
96+
parser.add_argument(
97+
'--root-directory',
98+
help='The root directory path on the source filesystem',
99+
required=True)
100+
parser.add_argument(
101+
'--sink-bucket',
102+
help='Google Cloud Storage destination bucket name',
103+
required=True)
104+
parser.add_argument(
105+
'--manifest-location',
106+
help='Transfer manifest location. Must be a `gs:` URL',
107+
required=True)
108+
109+
args = parser.parse_args()
110+
111+
create_transfer_with_manifest(**vars(args))
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright 2022 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import backoff
16+
from google.api_core.exceptions import RetryError
17+
from google.cloud.storage import Bucket
18+
19+
import manifest_request
20+
21+
22+
@backoff.on_exception(backoff.expo, (RetryError,), max_time=60)
23+
def test_manifest_request(
24+
capsys, project_id: str, job_description_unique: str,
25+
agent_pool_name: str, posix_root_directory: str,
26+
destination_bucket: Bucket, manifest_file: str):
27+
28+
manifest_request.create_transfer_with_manifest(
29+
project_id=project_id,
30+
description=job_description_unique,
31+
source_agent_pool_name=agent_pool_name,
32+
root_directory=posix_root_directory,
33+
sink_bucket=destination_bucket.name,
34+
manifest_location=manifest_file
35+
)
36+
37+
out, _ = capsys.readouterr()
38+
39+
assert "Created transferJob" in out

storagetransfer/posix_download.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2022 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""
18+
Command-line sample that creates a transfer from a GCS bucket to a POSIX file
19+
system.
20+
"""
21+
22+
23+
import argparse
24+
25+
# [START storagetransfer_download_to_posix]
26+
from google.cloud import storage_transfer
27+
28+
29+
def download_from_gcs(
30+
project_id: str, description: str, sink_agent_pool_name: str,
31+
root_directory: str, source_bucket: str, gcs_source_path: str):
32+
"""Create a transfer from a GCS bucket to a POSIX file system."""
33+
34+
client = storage_transfer.StorageTransferServiceClient()
35+
36+
# The ID of the Google Cloud Platform Project that owns the job
37+
# project_id = 'my-project-id'
38+
39+
# A useful description for your transfer job
40+
# description = 'My transfer job'
41+
42+
# The agent pool associated with the POSIX data sink.
43+
# Defaults to 'projects/{project_id}/agentPools/transfer_service_default'
44+
# sink_agent_pool_name = 'projects/my-project/agentPools/my-agent'
45+
46+
# The root directory path on the destination filesystem
47+
# root_directory = '/directory/to/transfer/sink'
48+
49+
# Google Cloud Storage source bucket name
50+
# source_bucket = 'my-gcs-source-bucket'
51+
52+
# An optional path on the Google Cloud Storage bucket to download from
53+
# gcs_source_path = 'foo/bar/'
54+
55+
transfer_job_request = storage_transfer.CreateTransferJobRequest({
56+
'transfer_job': {
57+
'project_id': project_id,
58+
'description': description,
59+
'status': storage_transfer.TransferJob.Status.ENABLED,
60+
'transfer_spec': {
61+
'sink_agent_pool_name': sink_agent_pool_name,
62+
'posix_data_sink': {
63+
'root_directory': root_directory,
64+
},
65+
'gcs_data_source': {
66+
'bucket_name': source_bucket,
67+
'path': gcs_source_path,
68+
},
69+
}
70+
}
71+
})
72+
73+
result = client.create_transfer_job(transfer_job_request)
74+
print(f'Created transferJob: {result.name}')
75+
76+
77+
# [END storagetransfer_download_to_posix]
78+
79+
if __name__ == "__main__":
80+
parser = argparse.ArgumentParser(description=__doc__)
81+
parser.add_argument(
82+
'--project-id',
83+
help='The ID of the Google Cloud Platform Project that owns the job',
84+
required=True)
85+
parser.add_argument(
86+
'--description',
87+
help='A useful description for your transfer job',
88+
required=True)
89+
parser.add_argument(
90+
'--sink-agent-pool-name',
91+
help='The agent pool associated with the POSIX data sink',
92+
required=True)
93+
parser.add_argument(
94+
'--root-directory',
95+
help='The root directory path on the destination filesystem',
96+
required=True)
97+
parser.add_argument(
98+
'--source-bucket',
99+
help='Google Cloud Storage source bucket name',
100+
required=True)
101+
parser.add_argument(
102+
'--gcs-source-path',
103+
help='A path on the Google Cloud Storage bucket to download from',
104+
required=True)
105+
106+
args = parser.parse_args()
107+
108+
download_from_gcs(**vars(args))
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright 2022 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import backoff
16+
from google.api_core.exceptions import RetryError
17+
from google.cloud.storage import Bucket
18+
19+
import posix_download
20+
21+
22+
@backoff.on_exception(backoff.expo, (RetryError,), max_time=60)
23+
def test_posix_download(
24+
capsys, project_id: str, job_description_unique: str,
25+
agent_pool_name: str, posix_root_directory: str,
26+
source_bucket: Bucket):
27+
28+
posix_download.download_from_gcs(
29+
project_id=project_id,
30+
description=job_description_unique,
31+
sink_agent_pool_name=agent_pool_name,
32+
root_directory=posix_root_directory,
33+
source_bucket=source_bucket.name,
34+
gcs_source_path=posix_root_directory
35+
)
36+
37+
out, _ = capsys.readouterr()
38+
39+
assert "Created transferJob" in out

0 commit comments

Comments
 (0)