diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f8c481965d..14998d13e5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -16,7 +16,7 @@ /tests/unit/aiplatform/** @googleapis/cloud-aiplatform-model-builder-sdk # The Cloud AI DPE team is the default owner for samples -/samples/**/*.py @googleapis/cdpe-cloudai @googleapis/python-samples-owners +/samples/**/*.py @googleapis/cdpe-cloudai @googleapis/python-samples-reviewers /.sample_configs/** @googleapis/cdpe-cloudai # The enhanced client library tests are owned by Cloud AI DPE diff --git a/CHANGELOG.md b/CHANGELOG.md index 340873320d..f4c8f96d1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,39 @@ # Changelog -### [1.13.1](https://github.com/googleapis/python-aiplatform/compare/v1.13.0...v1.13.1) (2022-05-26) +## [1.14.0](https://github.com/googleapis/python-aiplatform/compare/v1.13.1...v1.14.0) (2022-06-08) + + +### Features + +* add a way to easily clone a PipelineJob ([#1239](https://github.com/googleapis/python-aiplatform/issues/1239)) ([efaf6ed](https://github.com/googleapis/python-aiplatform/commit/efaf6edc36262b095aa13d0b40348c20e39b3fc6)) +* add display_name and metadata to ModelEvaluation in aiplatform model_evaluation.proto ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* add Examples to Explanation related messages in aiplatform v1beta1 explanation.proto ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* Add hierarchy and window configs to Vertex Forecasting training job ([#1255](https://github.com/googleapis/python-aiplatform/issues/1255)) ([8560fa8](https://github.com/googleapis/python-aiplatform/commit/8560fa88c8e0fe51f2ae56f68be575e85db3696a)) +* add holiday regions for vertex forecasting ([#1253](https://github.com/googleapis/python-aiplatform/issues/1253)) ([0036ab0](https://github.com/googleapis/python-aiplatform/commit/0036ab07004e0c9ae7806c4c2c25f22d5af4a978)) +* add IAM policy to aiplatform_v1beta1.yaml ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* add latent_space_source to ExplanationMetadata in aiplatform v1 explanation_metadata.proto ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* add latent_space_source to ExplanationMetadata in aiplatform v1beta1 explanation_metadata.proto ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* add preset configuration for example-based explanations in aiplatform v1beta1 explanation.proto ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* add scaling to OnlineServingConfig in aiplatform v1 featurestore.proto ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* add seq2seq forecasting training job ([#1196](https://github.com/googleapis/python-aiplatform/issues/1196)) ([643d335](https://github.com/googleapis/python-aiplatform/commit/643d335693ec57848949ee173401867a1188678b)) +* add successful_forecast_point_count to CompletionStats in completion_stats.proto ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* add template_metadata to PipelineJob in aiplatform v1 pipeline_job.proto ([b6bf6dc](https://github.com/googleapis/python-aiplatform/commit/b6bf6dc643274220e6eeca6479b5f9df61b11d16)) +* Add Vertex Forecasting E2E test. ([#1248](https://github.com/googleapis/python-aiplatform/issues/1248)) ([e82c179](https://github.com/googleapis/python-aiplatform/commit/e82c1792293396045a1032df015a3700fc38609b)) +* Added forecasting snippets and fixed bugs with existing snippets ([#1210](https://github.com/googleapis/python-aiplatform/issues/1210)) ([4e4bff5](https://github.com/googleapis/python-aiplatform/commit/4e4bff5cac3a99e7f55145ab2aee83b20af67060)) + + +### Bug Fixes + +* change endpoint update method to return resource ([#1409](https://github.com/googleapis/python-aiplatform/issues/1409)) ([44e279b](https://github.com/googleapis/python-aiplatform/commit/44e279b15a1b03bf234111333517153ffdbaf696)) +* Changed system test to use list_models() correctly ([#1397](https://github.com/googleapis/python-aiplatform/issues/1397)) ([a3da19a](https://github.com/googleapis/python-aiplatform/commit/a3da19aac6bdd3fa8d218408582205f7241a4b04)) +* Pinned protobuf to prevent issues with pb files. ([#1398](https://github.com/googleapis/python-aiplatform/issues/1398)) ([7a54637](https://github.com/googleapis/python-aiplatform/commit/7a54637d9b0e7a52ec4648505a6902610c4cc5b7)) + + +### Documentation + +* fix changelog header to consistent size ([#1404](https://github.com/googleapis/python-aiplatform/issues/1404)) ([f6a7e6f](https://github.com/googleapis/python-aiplatform/commit/f6a7e6f35188d6032fc8b34a3c205b0632029e02)) + +## [1.13.1](https://github.com/googleapis/python-aiplatform/compare/v1.13.0...v1.13.1) (2022-05-26) ### Features @@ -60,7 +93,7 @@ * fix type in docstring for map fields ([847ad78](https://github.com/googleapis/python-aiplatform/commit/847ad789e09aec14238a7476a3fa88729ce24d6f)) -### [1.12.1](https://github.com/googleapis/python-aiplatform/compare/v1.12.0...v1.12.1) (2022-04-20) +## [1.12.1](https://github.com/googleapis/python-aiplatform/compare/v1.12.0...v1.12.1) (2022-04-20) ### Features diff --git a/docs/definition_v1/types.rst b/docs/definition_v1/types.rst new file mode 100644 index 0000000000..a1df2bce25 --- /dev/null +++ b/docs/definition_v1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Aiplatform V1 Schema Trainingjob Definition v1 API +========================================================================= + +.. automodule:: google.cloud.aiplatform.v1.schema.trainingjob.definition_v1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/definition_v1beta1/types.rst b/docs/definition_v1beta1/types.rst new file mode 100644 index 0000000000..f4fe7a5301 --- /dev/null +++ b/docs/definition_v1beta1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Aiplatform V1beta1 Schema Trainingjob Definition v1beta1 API +=================================================================================== + +.. automodule:: google.cloud.aiplatform.v1beta1.schema.trainingjob.definition_v1beta1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/instance_v1/types.rst b/docs/instance_v1/types.rst new file mode 100644 index 0000000000..564ab013ee --- /dev/null +++ b/docs/instance_v1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Aiplatform V1 Schema Predict Instance v1 API +=================================================================== + +.. automodule:: google.cloud.aiplatform.v1.schema.predict.instance_v1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/instance_v1beta1/types.rst b/docs/instance_v1beta1/types.rst new file mode 100644 index 0000000000..7caa088065 --- /dev/null +++ b/docs/instance_v1beta1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Aiplatform V1beta1 Schema Predict Instance v1beta1 API +============================================================================= + +.. automodule:: google.cloud.aiplatform.v1beta1.schema.predict.instance_v1beta1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/params_v1/types.rst b/docs/params_v1/types.rst new file mode 100644 index 0000000000..956ef5224d --- /dev/null +++ b/docs/params_v1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Aiplatform V1 Schema Predict Params v1 API +================================================================= + +.. automodule:: google.cloud.aiplatform.v1.schema.predict.params_v1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/params_v1beta1/types.rst b/docs/params_v1beta1/types.rst new file mode 100644 index 0000000000..722a1d8ba0 --- /dev/null +++ b/docs/params_v1beta1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Aiplatform V1beta1 Schema Predict Params v1beta1 API +=========================================================================== + +.. automodule:: google.cloud.aiplatform.v1beta1.schema.predict.params_v1beta1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/prediction_v1/types.rst b/docs/prediction_v1/types.rst new file mode 100644 index 0000000000..a97faf34de --- /dev/null +++ b/docs/prediction_v1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Aiplatform V1 Schema Predict Prediction v1 API +===================================================================== + +.. automodule:: google.cloud.aiplatform.v1.schema.predict.prediction_v1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/prediction_v1beta1/types.rst b/docs/prediction_v1beta1/types.rst new file mode 100644 index 0000000000..b14182d6d7 --- /dev/null +++ b/docs/prediction_v1beta1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Aiplatform V1beta1 Schema Predict Prediction v1beta1 API +=============================================================================== + +.. automodule:: google.cloud.aiplatform.v1beta1.schema.predict.prediction_v1beta1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py index db7d0a7c18..1ad69b2a54 100644 --- a/google/cloud/aiplatform/__init__.py +++ b/google/cloud/aiplatform/__init__.py @@ -63,6 +63,7 @@ CustomPythonPackageTrainingJob, AutoMLTabularTrainingJob, AutoMLForecastingTrainingJob, + SequenceToSequencePlusForecastingTrainingJob, AutoMLImageTrainingJob, AutoMLTextTrainingJob, AutoMLVideoTrainingJob, @@ -116,6 +117,7 @@ "Model", "ModelEvaluation", "PipelineJob", + "SequenceToSequencePlusForecastingTrainingJob", "TabularDataset", "Tensorboard", "TensorboardExperiment", diff --git a/google/cloud/aiplatform/datasets/time_series_dataset.py b/google/cloud/aiplatform/datasets/time_series_dataset.py index ec5546f12a..6bde6be7a5 100644 --- a/google/cloud/aiplatform/datasets/time_series_dataset.py +++ b/google/cloud/aiplatform/datasets/time_series_dataset.py @@ -46,6 +46,7 @@ def create( labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, + create_request_timeout: Optional[float] = None, ) -> "TimeSeriesDataset": """Creates a new time series dataset. @@ -102,6 +103,8 @@ def create( Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. + create_request_timeout (float): + Optional. The timeout for the create request in seconds. Returns: time_series_dataset (TimeSeriesDataset): @@ -141,6 +144,7 @@ def create( encryption_spec_key_name=encryption_spec_key_name ), sync=sync, + create_request_timeout=create_request_timeout, ) def import_data(self): diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 7d104b3112..3b4b03af3e 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -1377,19 +1377,13 @@ def update( self, ) - update_endpoint_lro = self.api_client.update_endpoint( + self._gca_resource = self.api_client.update_endpoint( endpoint=copied_endpoint_proto, update_mask=update_mask, metadata=request_metadata, timeout=update_request_timeout, ) - _LOGGER.log_action_started_against_resource_with_lro( - "Update", "endpoint", self.__class__, update_endpoint_lro - ) - - update_endpoint_lro.result() - _LOGGER.log_action_completed_against_resource("endpoint", "updated", self) return self diff --git a/google/cloud/aiplatform/pipeline_jobs.py b/google/cloud/aiplatform/pipeline_jobs.py index 90d7e0f86d..bc50a47aa2 100644 --- a/google/cloud/aiplatform/pipeline_jobs.py +++ b/google/cloud/aiplatform/pipeline_jobs.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -144,15 +144,15 @@ def __init__( be encrypted with the provided encryption key. Overrides encryption_spec_key_name set in aiplatform.init. - labels (Dict[str,str]): + labels (Dict[str, str]): Optional. The user defined metadata to organize PipelineJob. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to create this PipelineJob. Overrides credentials set in aiplatform.init. - project (str), + project (str): Optional. The project that you want to run this PipelineJob in. If not set, the project set in aiplatform.init will be used. - location (str), + location (str): Optional. Location to create PipelineJob. If not set, location set in aiplatform.init will be used. @@ -215,9 +215,9 @@ def __init__( ) if not _VALID_NAME_PATTERN.match(self.job_id): raise ValueError( - "Generated job ID: {} is illegal as a Vertex pipelines job ID. " + f"Generated job ID: {self.job_id} is illegal as a Vertex pipelines job ID. " "Expecting an ID following the regex pattern " - '"[a-z][-a-z0-9]{{0,127}}"'.format(job_id) + f'"{_VALID_NAME_PATTERN.pattern[1:-1]}"' ) if enable_caching is not None: @@ -471,3 +471,147 @@ def list( def wait_for_resource_creation(self) -> None: """Waits until resource has been created.""" self._wait_for_resource_creation() + + def clone( + self, + display_name: Optional[str] = None, + job_id: Optional[str] = None, + pipeline_root: Optional[str] = None, + parameter_values: Optional[Dict[str, Any]] = None, + enable_caching: Optional[bool] = None, + encryption_spec_key_name: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + credentials: Optional[auth_credentials.Credentials] = None, + project: Optional[str] = None, + location: Optional[str] = None, + ) -> "PipelineJob": + """Returns a new PipelineJob object with the same settings as the original one. + + Args: + display_name (str): + Optional. The user-defined name of this cloned Pipeline. + If not specified, original pipeline display name will be used. + job_id (str): + Optional. The unique ID of the job run. + If not specified, "cloned" + pipeline name + timestamp will be used. + pipeline_root (str): + Optional. The root of the pipeline outputs. Default to be the same + staging bucket as original pipeline. + parameter_values (Dict[str, Any]): + Optional. The mapping from runtime parameter names to its values that + control the pipeline run. Defaults to be the same values as original + PipelineJob. + enable_caching (bool): + Optional. Whether to turn on caching for the run. + If this is not set, defaults to be the same as original pipeline. + If this is set, the setting applies to all tasks in the pipeline. + encryption_spec_key_name (str): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the job. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is created. + If this is set, then all + resources created by the PipelineJob will + be encrypted with the provided encryption key. + If not specified, encryption_spec of original PipelineJob will be used. + labels (Dict[str, str]): + Optional. The user defined metadata to organize PipelineJob. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to create this PipelineJob. + Overrides credentials set in aiplatform.init. + project (str): + Optional. The project that you want to run this PipelineJob in. + If not set, the project set in original PipelineJob will be used. + location (str): + Optional. Location to create PipelineJob. + If not set, location set in original PipelineJob will be used. + + Returns: + A Vertex AI PipelineJob. + + Raises: + ValueError: If job_id or labels have incorrect format. + """ + ## Initialize an empty PipelineJob + if not project: + project = self.project + if not location: + location = self.location + if not credentials: + credentials = self.credentials + + cloned = self.__class__._empty_constructor( + project=project, + location=location, + credentials=credentials, + ) + cloned._parent = initializer.global_config.common_location_path( + project=project, location=location + ) + + ## Get gca_resource from original PipelineJob + pipeline_job = json_format.MessageToDict(self._gca_resource._pb) + + ## Set pipeline_spec + pipeline_spec = pipeline_job["pipelineSpec"] + if "deploymentConfig" in pipeline_spec: + del pipeline_spec["deploymentConfig"] + + ## Set caching + if enable_caching is not None: + _set_enable_caching_value(pipeline_spec, enable_caching) + + ## Set job_id + pipeline_name = pipeline_spec["pipelineInfo"]["name"] + cloned.job_id = job_id or "cloned-{pipeline_name}-{timestamp}".format( + pipeline_name=re.sub("[^-0-9a-z]+", "-", pipeline_name.lower()) + .lstrip("-") + .rstrip("-"), + timestamp=_get_current_time().strftime("%Y%m%d%H%M%S"), + ) + if not _VALID_NAME_PATTERN.match(cloned.job_id): + raise ValueError( + f"Generated job ID: {cloned.job_id} is illegal as a Vertex pipelines job ID. " + "Expecting an ID following the regex pattern " + f'"{_VALID_NAME_PATTERN.pattern[1:-1]}"' + ) + + ## Set display_name, labels and encryption_spec + if display_name: + utils.validate_display_name(display_name) + elif not display_name and "displayName" in pipeline_job: + display_name = pipeline_job["displayName"] + + if labels: + utils.validate_labels(labels) + elif not labels and "labels" in pipeline_job: + labels = pipeline_job["labels"] + + if encryption_spec_key_name or "encryptionSpec" not in pipeline_job: + encryption_spec = initializer.global_config.get_encryption_spec( + encryption_spec_key_name=encryption_spec_key_name + ) + else: + encryption_spec = pipeline_job["encryptionSpec"] + + ## Set runtime_config + builder = pipeline_utils.PipelineRuntimeConfigBuilder.from_job_spec_json( + pipeline_job + ) + builder.update_pipeline_root(pipeline_root) + builder.update_runtime_parameters(parameter_values) + runtime_config_dict = builder.build() + runtime_config = gca_pipeline_job.PipelineJob.RuntimeConfig()._pb + json_format.ParseDict(runtime_config_dict, runtime_config) + + ## Create gca_resource for cloned PipelineJob + cloned._gca_resource = gca_pipeline_job.PipelineJob( + display_name=display_name, + pipeline_spec=pipeline_spec, + labels=labels, + runtime_config=runtime_config, + encryption_spec=encryption_spec, + ) + + return cloned diff --git a/google/cloud/aiplatform/schema.py b/google/cloud/aiplatform/schema.py index a1da75d9e6..96a7a50bbd 100644 --- a/google/cloud/aiplatform/schema.py +++ b/google/cloud/aiplatform/schema.py @@ -23,6 +23,7 @@ class definition: custom_task = "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml" automl_tabular = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_tabular_1.0.0.yaml" automl_forecasting = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_time_series_forecasting_1.0.0.yaml" + seq2seq_plus_forecasting = "gs://google-cloud-aiplatform/schema/trainingjob/definition/seq2seq_plus_time_series_forecasting_1.0.0.yaml" automl_image_classification = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_image_classification_1.0.0.yaml" automl_image_object_detection = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_image_object_detection_1.0.0.yaml" automl_text_classification = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_text_classification_1.0.0.yaml" diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 2dbd130555..2b246e113a 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -1561,176 +1561,70 @@ def _model_upload_fail_string(self) -> str: ) -# TODO(b/172368325) add scheduling, custom_job.Scheduling -class CustomTrainingJob(_CustomTrainingJob): - """Class to launch a Custom Training Job in Vertex AI using a script. +class _ForecastingTrainingJob(_TrainingJob): + """ABC for Forecasting Training Pipelines.""" - Takes a training implementation as a python script and executes that - script in Cloud Vertex AI Training. - """ + _supported_training_schemas = tuple() def __init__( self, - # TODO(b/223262536): Make display_name parameter fully optional in next major release - display_name: str, - script_path: str, - container_uri: str, - requirements: Optional[Sequence[str]] = None, - model_serving_container_image_uri: Optional[str] = None, - model_serving_container_predict_route: Optional[str] = None, - model_serving_container_health_route: Optional[str] = None, - model_serving_container_command: Optional[Sequence[str]] = None, - model_serving_container_args: Optional[Sequence[str]] = None, - model_serving_container_environment_variables: Optional[Dict[str, str]] = None, - model_serving_container_ports: Optional[Sequence[int]] = None, - model_description: Optional[str] = None, - model_instance_schema_uri: Optional[str] = None, - model_parameters_schema_uri: Optional[str] = None, - model_prediction_schema_uri: Optional[str] = None, + display_name: Optional[str] = None, + optimization_objective: Optional[str] = None, + column_specs: Optional[Dict[str, str]] = None, + column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, - staging_bucket: Optional[str] = None, ): - """Constructs a Custom Training Job from a Python script. - - job = aiplatform.CustomTrainingJob( - display_name='test-train', - script_path='test_script.py', - requirements=['pandas', 'numpy'], - container_uri='gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest', - model_serving_container_image_uri='gcr.io/my-trainer/serving:1', - model_serving_container_predict_route='predict', - model_serving_container_health_route='metadata, - labels={'key': 'value'}, - ) - - Usage with Dataset: - - ds = aiplatform.TabularDataset( - 'projects/my-project/locations/us-central1/datasets/12345') - - job.run( - ds, - replica_count=1, - model_display_name='my-trained-model', - model_labels={'key': 'value'}, - ) - - Usage without Dataset: - - job.run(replica_count=1, model_display_name='my-trained-model) - - - TODO(b/169782082) add documentation about traning utilities - To ensure your model gets saved in Vertex AI, write your saved model to - os.environ["AIP_MODEL_DIR"] in your provided training script. - + """Constructs a Forecasting Training Job. Args: display_name (str): - Required. The user-defined name of this TrainingPipeline. - script_path (str): Required. Local path to training script. - container_uri (str): - Required: Uri of the training container image in the GCR. - requirements (Sequence[str]): - List of python packages dependencies of script. - model_serving_container_image_uri (str): - If the training produces a managed Vertex AI Model, the URI of the - Model serving container suitable for serving the model produced by the - training script. - model_serving_container_predict_route (str): - If the training produces a managed Vertex AI Model, An HTTP path to - send prediction requests to the container, and which must be supported - by it. If not specified a default HTTP path will be used by Vertex AI. - model_serving_container_health_route (str): - If the training produces a managed Vertex AI Model, an HTTP path to - send health check requests to the container, and which must be supported - by it. If not specified a standard HTTP path will be used by AI - Platform. - model_serving_container_command (Sequence[str]): - The command with which the container is run. Not executed within a - shell. The Docker image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's - environment. If a variable cannot be resolved, the reference in the - input string will be unchanged. The $(VAR_NAME) syntax can be escaped - with a double $$, ie: $$(VAR_NAME). Escaped references will never be - expanded, regardless of whether the variable exists or not. - model_serving_container_args (Sequence[str]): - The arguments to the command. The Docker image's CMD is used if this is - not provided. Variable references $(VAR_NAME) are expanded using the - container's environment. If a variable cannot be resolved, the reference - in the input string will be unchanged. The $(VAR_NAME) syntax can be - escaped with a double $$, ie: $$(VAR_NAME). Escaped references will - never be expanded, regardless of whether the variable exists or not. - model_serving_container_environment_variables (Dict[str, str]): - The environment variables that are to be present in the container. - Should be a dictionary where keys are environment variable names - and values are environment variable values for those names. - model_serving_container_ports (Sequence[int]): - Declaration of ports that are exposed by the container. This field is - primarily informational, it gives Vertex AI information about the - network connections the container uses. Listing or not a port here has - no impact on whether the port is actually exposed, any port listening on - the default "0.0.0.0" address inside a container will be accessible from - the network. - model_description (str): - The description of the Model. - model_instance_schema_uri (str): - Optional. Points to a YAML file stored on Google Cloud - Storage describing the format of a single instance, which - are used in - ``PredictRequest.instances``, - ``ExplainRequest.instances`` - and - ``BatchPredictionJob.input_config``. - The schema is defined as an OpenAPI 3.0.2 `Schema - Object `__. - AutoML Models always have this field populated by AI - Platform. Note: The URI given on output will be immutable - and probably different, including the URI scheme, than the - one given on input. The output URI will point to a location - where the user only has a read access. - model_parameters_schema_uri (str): - Optional. Points to a YAML file stored on Google Cloud - Storage describing the parameters of prediction and - explanation via - ``PredictRequest.parameters``, - ``ExplainRequest.parameters`` - and - ``BatchPredictionJob.model_parameters``. - The schema is defined as an OpenAPI 3.0.2 `Schema - Object `__. - AutoML Models always have this field populated by AI - Platform, if no parameters are supported it is set to an - empty string. Note: The URI given on output will be - immutable and probably different, including the URI scheme, - than the one given on input. The output URI will point to a - location where the user only has a read access. - model_prediction_schema_uri (str): - Optional. Points to a YAML file stored on Google Cloud - Storage describing the format of a single prediction - produced by this Model, which are returned via - ``PredictResponse.predictions``, - ``ExplainResponse.explanations``, - and - ``BatchPredictionJob.output_config``. - The schema is defined as an OpenAPI 3.0.2 `Schema - Object `__. - AutoML Models always have this field populated by AI - Platform. Note: The URI given on output will be immutable - and probably different, including the URI scheme, than the - one given on input. The output URI will point to a location - where the user only has a read access. + Optional. The user-defined name of this TrainingPipeline. + optimization_objective (str): + Optional. Objective function the model is to be optimized towards. + The training process creates a Model that optimizes the value of the objective + function over the validation set. The supported optimization objectives: + "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). + "minimize-mae" - Minimize mean-absolute error (MAE). + "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). + "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) + and mean-absolute-error (MAE). + "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. + (Set this objective to build quantile forecasts.) + column_specs (Dict[str, str]): + Optional. Alternative to column_transformations where the keys of the dict + are column names and their respective values are one of + AutoMLTabularTrainingJob.column_data_types. + When creating transformation for BigQuery Struct column, the column + should be flattened using "." as the delimiter. Only columns with no child + should have a transformation. + If an input column has no transformations on it, such a column is + ignored by the training, except for the targetColumn, which should have + no transformations defined on. + Only one of column_transformations or column_specs should be passed. + column_transformations (List[Dict[str, Dict[str, str]]]): + Optional. Transformations to apply to the input columns (i.e. columns other + than the targetColumn). Each transformation may produce multiple + result values from the column's value, and all are used for training. + When creating transformation for BigQuery Struct column, the column + should be flattened using "." as the delimiter. Only columns with no child + should have a transformation. + If an input column has no transformations on it, such a column is + ignored by the training, except for the targetColumn, which should have + no transformations defined on. + Only one of column_transformations or column_specs should be passed. + Consider using column_specs as column_transformations will be deprecated eventually. project (str): - Project to run training in. Overrides project set in aiplatform.init. + Optional. Project to run training in. Overrides project set in aiplatform.init. location (str): - Location to run training in. Overrides location set in aiplatform.init. + Optional. Location to run training in. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): - Custom credentials to use to run call training service. Overrides + Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. labels (Dict[str, str]): Optional. The labels with user-defined metadata to @@ -1749,12 +1643,9 @@ def __init__( ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. - If set, this TrainingPipeline will be secured by this key. - Note: Model trained by this TrainingPipeline is also secured by this key if ``model_to_upload`` is not set separately. - Overrides encryption_spec_key_name set in aiplatform.init. model_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer @@ -1763,16 +1654,11 @@ def __init__( ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. - If set, the trained Model will be secured by this key. - Overrides encryption_spec_key_name set in aiplatform.init. - staging_bucket (str): - Bucket used to stage source and training artifacts. Overrides - staging_bucket set in aiplatform.init. + Raises: + ValueError: If both column_transformations and column_specs were provided. """ - if not display_name: - display_name = self.__class__._generate_display_name() super().__init__( display_name=display_name, project=project, @@ -1781,74 +1667,76 @@ def __init__( labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, - container_uri=container_uri, - model_instance_schema_uri=model_instance_schema_uri, - model_parameters_schema_uri=model_parameters_schema_uri, - model_prediction_schema_uri=model_prediction_schema_uri, - model_serving_container_environment_variables=model_serving_container_environment_variables, - model_serving_container_ports=model_serving_container_ports, - model_serving_container_image_uri=model_serving_container_image_uri, - model_serving_container_command=model_serving_container_command, - model_serving_container_args=model_serving_container_args, - model_serving_container_predict_route=model_serving_container_predict_route, - model_serving_container_health_route=model_serving_container_health_route, - model_description=model_description, - staging_bucket=staging_bucket, ) - self._requirements = requirements - self._script_path = script_path + self._column_transformations = ( + column_transformations_utils.validate_and_get_column_transformations( + column_specs, + column_transformations, + ) + ) + + self._optimization_objective = optimization_objective + self._additional_experiments = [] + + @property + @classmethod + @abc.abstractmethod + def _model_type(cls) -> str: + """The type of forecasting model.""" + pass + + @property + @classmethod + @abc.abstractmethod + def _training_task_definition(cls) -> str: + """A GCS path to the YAML file that defines the training task. + + The definition files that can be used here are found in + gs://google-cloud-aiplatform/schema/trainingjob/definition/. + """ + pass def run( self, - dataset: Optional[ - Union[ - datasets.ImageDataset, - datasets.TabularDataset, - datasets.TextDataset, - datasets.VideoDataset, - ] - ] = None, - annotation_schema_uri: Optional[str] = None, - model_display_name: Optional[str] = None, - model_labels: Optional[Dict[str, str]] = None, - base_output_dir: Optional[str] = None, - service_account: Optional[str] = None, - network: Optional[str] = None, - bigquery_destination: Optional[str] = None, - args: Optional[List[Union[str, float, int]]] = None, - environment_variables: Optional[Dict[str, str]] = None, - replica_count: int = 1, - machine_type: str = "n1-standard-4", - accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", - accelerator_count: int = 0, - boot_disk_type: str = "pd-ssd", - boot_disk_size_gb: int = 100, - reduction_server_replica_count: int = 0, - reduction_server_machine_type: Optional[str] = None, - reduction_server_container_uri: Optional[str] = None, + dataset: datasets.TimeSeriesDataset, + target_column: str, + time_column: str, + time_series_identifier_column: str, + unavailable_at_forecast_columns: List[str], + available_at_forecast_columns: List[str], + forecast_horizon: int, + data_granularity_unit: str, + data_granularity_count: int, training_fraction_split: Optional[float] = None, validation_fraction_split: Optional[float] = None, test_fraction_split: Optional[float] = None, - training_filter_split: Optional[str] = None, - validation_filter_split: Optional[str] = None, - test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, timestamp_split_column_name: Optional[str] = None, - timeout: Optional[int] = None, - restart_job_on_worker_restart: bool = False, - enable_web_access: bool = False, - tensorboard: Optional[str] = None, - sync=True, + weight_column: Optional[str] = None, + time_series_attribute_columns: Optional[List[str]] = None, + context_window: Optional[int] = None, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + budget_milli_node_hours: int = 1000, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + additional_experiments: Optional[List[str]] = None, + hierarchy_group_columns: Optional[List[str]] = None, + hierarchy_group_total_weight: Optional[float] = None, + hierarchy_temporal_total_weight: Optional[float] = None, + hierarchy_group_temporal_total_weight: Optional[float] = None, + window_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + sync: bool = True, create_request_timeout: Optional[float] = None, - ) -> Optional[models.Model]: - """Runs the custom training job. - - Distributed Training Support: - If replica count = 1 then one chief replica will be provisioned. If - replica_count > 1 the remainder will be provisioned as a worker replica pool. - ie: replica_count = 10 will result in 1 chief and 9 workers - All replicas have same machine_type, accelerator_type, and accelerator_count + ) -> models.Model: + """Runs the training job and returns a model. If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: @@ -1858,17 +1746,6 @@ def run( decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. - Data filter splits: - Assigns input data to training, validation, and test sets - based on the given filters, data pieces not matched by any - filter are ignored. Currently only supported for Datasets - containing DataItems. - If any of the filters in this message are to match nothing, then - they can be set as '-' (the minus sign). - If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and - ``test_filter_split`` must be provided. - Supported only for unstructured Datasets. - Predefined splits: Assigns input data to training, validation, and test sets based on the value of a provided key. If using predefined splits, ``predefined_split_column_name`` must be provided. @@ -1882,48 +1759,125 @@ def run( Supported only for tabular Datasets. Args: - dataset ( - Union[ - datasets.ImageDataset, - datasets.TabularDataset, - datasets.TextDataset, - datasets.VideoDataset, - ] - ): - Vertex AI to fit this training against. Custom training script should - retrieve datasets through passed in environment variables uris: + dataset (datasets.TimeSeriesDataset): + Required. The dataset within the same Project from which data will be used to train the Model. The + Dataset must use schema compatible with Model being trained, + and what is compatible should be described in the used + TrainingPipeline's [training_task_definition] + [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. + For time series Datasets, all their data is exported to + training, to pick and choose from. + target_column (str): + Required. Name of the column that the Model is to predict values for. This + column must be unavailable at forecast. + time_column (str): + Required. Name of the column that identifies time order in the time series. + This column must be available at forecast. + time_series_identifier_column (str): + Required. Name of the column that identifies the time series. + unavailable_at_forecast_columns (List[str]): + Required. Column names of columns that are unavailable at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is unknown before the forecast + (e.g. population of a city in a given year, or weather on a given day). + available_at_forecast_columns (List[str]): + Required. Column names of columns that are available at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is known at forecast. + forecast_horizon: (int): + Required. The amount of time into the future for which forecasted values for the target are + returned. Expressed in number of units defined by the [data_granularity_unit] and + [data_granularity_count] field. Inclusive. + data_granularity_unit (str): + Required. The data granularity unit. Accepted values are ``minute``, + ``hour``, ``day``, ``week``, ``month``, ``year``. + data_granularity_count (int): + Required. The number of data granularity units between data points in the training + data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other + values of [data_granularity_unit], must be 1. + predefined_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key (either the label's value or + value in the column) must be one of {``TRAIN``, + ``VALIDATE``, ``TEST``}, and it defines to which set the + given piece of data is assigned. If for a piece of data the + key is not present or has an invalid value, that piece is + ignored by the pipeline. - os.environ["AIP_TRAINING_DATA_URI"] - os.environ["AIP_VALIDATION_DATA_URI"] - os.environ["AIP_TEST_DATA_URI"] + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + This parameter must be used with training_fraction_split, + validation_fraction_split, and test_fraction_split. + weight_column (str): + Optional. Name of the column that should be used as the weight column. + Higher values in this column give more importance to the row + during Model training. The column must have numeric values between 0 and + 10000 inclusively, and 0 value means that the row is ignored. + If the weight column field is not set, then all rows are assumed to have + equal weight of 1. This column must be available at forecast. + time_series_attribute_columns (List[str]): + Optional. Column names that should be used as attribute columns. + Each column is constant within a time series. + context_window (int): + Optional. The amount of time into the past training and prediction data is used for + model training and prediction respectively. Expressed in number of units defined by the + [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the + default value of 0 which means the model sets each series context window to be 0 (also + known as "cold start"). Inclusive. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. - Additionally the dataset format is passed in as: + Expected format: + ``bq://::`` - os.environ["AIP_DATA_FORMAT"] - annotation_schema_uri (str): - Google Cloud Storage URI points to a YAML file describing - annotation schema. The schema is defined as an OpenAPI 3.0.2 - [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files - that can be used here are found in - gs://google-cloud-aiplatform/schema/dataset/annotation/, - note that the chosen schema must be consistent with - ``metadata`` - of the Dataset specified by - ``dataset_id``. + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` - Only Annotations that both match this schema and belong to - DataItems not ignored by the split method are used in - respectively training, validation or test role, depending on - the role of the DataItem they are on. + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. - When used in conjunction with - ``annotations_filter``, - the Annotations used for training are filtered by both - ``annotations_filter`` - and - ``annotation_schema_uri``. + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. + quantiles (List[float]): + Quantiles to use for the `minimize-quantile-loss` + [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in + this case. + + Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. + Each quantile must be unique. + validation_options (str): + Validation options for the data validation component. The available options are: + "fail-pipeline" - (default), will validate against the validation and fail the pipeline + if it fails. + "ignore-validation" - ignore the results of the validation and continue the pipeline + budget_milli_node_hours (int): + Optional. The train budget of creating this Model, expressed in milli node + hours i.e. 1,000 value in this field means 1 node hour. + The training cost of the model will not exceed this budget. The final + cost will be attempted to be close to the budget, though may end up + being (even) noticeably smaller - at the backend's discretion. This + especially may happen when further model training ceases to provide + any improvements. + If the budget is set to a value known to be insufficient to train a + Model for the given training set, the training won't be attempted and + will error. + The minimum value is 1000 and the maximum is 72000. model_display_name (str): - If the script produces a managed Vertex AI Model. The display name of + Optional. If the script produces a managed Vertex AI Model. The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. @@ -1938,345 +1892,229 @@ def run( are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. - base_output_dir (str): - GCS output directory of job. If not provided a - timestamped directory in the staging directory will be used. - - Vertex AI sets the following environment variables when it runs your training code: + additional_experiments (List[str]): + Optional. Additional experiment flags for the time series forcasting training. + create_request_timeout (float): + Optional. The timeout for the create request in seconds. + hierarchy_group_columns (List[str]): + Optional. A list of time series attribute column names that + define the time series hierarchy. Only one level of hierarchy is + supported, ex. ``region`` for a hierarchy of stores or + ``department`` for a hierarchy of products. If multiple columns + are specified, time series will be grouped by their combined + values, ex. (``blue``, ``large``) for ``color`` and ``size``, up + to 5 columns are accepted. If no group columns are specified, + all time series are considered to be part of the same group. + hierarchy_group_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + time series in the same hierarchy group. + hierarchy_temporal_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + the horizon for a single time series. + hierarchy_group_temporal_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + both the horizon and time series in the same hierarchy group. + window_column (str): + Optional. Name of the column that should be used to filter input + rows. The column should contain either booleans or string + booleans; if the value of the row is True, generate a sliding + window from that row. + window_stride_length (int): + Optional. Step length used to generate input examples. Every + ``window_stride_length`` rows will be used to generate a sliding + window. + window_max_count (int): + Optional. Number of rows that should be used to generate input + examples. If the total row count is larger than this number, the + input data will be randomly sampled to hit the count. + holiday_regions (List[str]): + Optional. The geographical regions to use when creating holiday + features. This option is only allowed when data_granularity_unit + is ``day``. Acceptable values can come from any of the following + levels: + Top level: GLOBAL + Second level: continental regions + NA: North America + JAPAC: Japan and Asia Pacific + EMEA: Europe, the Middle East and Africa + LAC: Latin America and the Caribbean + Third level: countries from ISO 3166-1 Country codes. + sync (bool): + Whether to execute this method synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + Returns: + model: The trained Vertex AI Model resource or None if training did not + produce a Vertex AI Model. - - AIP_MODEL_DIR: a Cloud Storage URI of a directory intended for saving model artifacts, i.e. /model/ - - AIP_CHECKPOINT_DIR: a Cloud Storage URI of a directory intended for saving checkpoints, i.e. /checkpoints/ - - AIP_TENSORBOARD_LOG_DIR: a Cloud Storage URI of a directory intended for saving TensorBoard logs, i.e. /logs/ + Raises: + RuntimeError: If Training job has already been run or is waiting to run. + """ - service_account (str): - Specifies the service account for workload run-as account. - Users submitting jobs must have act-as permission on this run-as account. - network (str): - The full name of the Compute Engine network to which the job - should be peered. For example, projects/12345/global/networks/myVPC. - Private services access must already be configured for the network. - If left unspecified, the job is not peered with any network. - bigquery_destination (str): - Provide this field if `dataset` is a BiqQuery dataset. - The BigQuery project location where the training data is to - be written to. In the given project a new dataset is created - with name - ``dataset___`` - where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All - training input data will be written into that dataset. In - the dataset three tables will be created, ``training``, - ``validation`` and ``test``. + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) - - AIP_DATA_FORMAT = "bigquery". - - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" - - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" - - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" - args (List[Unions[str, int, float]]): - Command line arguments to be passed to the Python script. - environment_variables (Dict[str, str]): - Environment variables to be passed to the container. - Should be a dictionary where keys are environment variable names - and values are environment variable values for those names. - At most 10 environment variables can be specified. - The Name of the environment variable must be unique. - - environment_variables = { - 'MY_KEY': 'MY_VALUE' - } - replica_count (int): - The number of worker replicas. If replica count = 1 then one chief - replica will be provisioned. If replica_count > 1 the remainder will be - provisioned as a worker replica pool. - machine_type (str): - The type of machine to use for training. - accelerator_type (str): - Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED, - NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, - NVIDIA_TESLA_T4 - accelerator_count (int): - The number of accelerators to attach to a worker replica. - boot_disk_type (str): - Type of the boot disk, default is `pd-ssd`. - Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or - `pd-standard` (Persistent Disk Hard Disk Drive). - boot_disk_size_gb (int): - Size in GB of the boot disk, default is 100GB. - boot disk size must be within the range of [100, 64000]. - reduction_server_replica_count (int): - The number of reduction server replicas, default is 0. - reduction_server_machine_type (str): - Optional. The type of machine to use for reduction server. - reduction_server_container_uri (str): - Optional. The Uri of the reduction server container image. - See details: https://cloud.google.com/vertex-ai/docs/training/distributed-training#reduce_training_time_with_reduction_server - training_fraction_split (float): - Optional. The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split (float): - Optional. The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split (float): - Optional. The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - training_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to train the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - validation_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to validate the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - test_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to test the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - predefined_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key (either the label's value or - value in the column) must be one of {``training``, - ``validation``, ``test``}, and it defines to which set the - given piece of data is assigned. If for a piece of data the - key is not present or has an invalid value, that piece is - ignored by the pipeline. - - Supported only for tabular and time series Datasets. - timestamp_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key values of the key (the values in - the column) must be in RFC 3339 `date-time` format, where - `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a - piece of data the key is not present or has an invalid value, - that piece is ignored by the pipeline. - - Supported only for tabular and time series Datasets. - timeout (int): - The maximum job running time in seconds. The default is 7 days. - restart_job_on_worker_restart (bool): - Restarts the entire CustomJob if a worker - gets restarted. This feature can be used by - distributed training jobs that are not resilient - to workers leaving and joining a job. - enable_web_access (bool): - Whether you want Vertex AI to enable interactive shell access - to training containers. - https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell - tensorboard (str): - Optional. The name of a Vertex AI - [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard] - resource to which this CustomJob will upload Tensorboard - logs. Format: - ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` - - The training script should write Tensorboard to following Vertex AI environment - variable: - - AIP_TENSORBOARD_LOG_DIR - - `service_account` is required with provided `tensorboard`. - For more information on configuring your service account please visit: - https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training - create_request_timeout (float): - Optional. The timeout for the create request in seconds. - sync (bool): - Whether to execute this method synchronously. If False, this method - will be executed in concurrent Future and any downstream object will - be immediately returned and synced when the Future has completed. + if self._is_waiting_to_run(): + raise RuntimeError( + f"{self._model_type} Forecasting Training is already scheduled " + "to run." + ) - Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. - """ - worker_pool_specs, managed_model = self._prepare_and_validate_run( - model_display_name=model_display_name, - model_labels=model_labels, - replica_count=replica_count, - machine_type=machine_type, - accelerator_count=accelerator_count, - accelerator_type=accelerator_type, - boot_disk_type=boot_disk_type, - boot_disk_size_gb=boot_disk_size_gb, - reduction_server_replica_count=reduction_server_replica_count, - reduction_server_machine_type=reduction_server_machine_type, - ) + if self._has_run: + raise RuntimeError( + f"{self._model_type} Forecasting Training has already run." + ) - # make and copy package - python_packager = source_utils._TrainingScriptPythonPackager( - script_path=self._script_path, requirements=self._requirements - ) + if additional_experiments: + self._add_additional_experiments(additional_experiments) return self._run( - python_packager=python_packager, dataset=dataset, - annotation_schema_uri=annotation_schema_uri, - worker_pool_specs=worker_pool_specs, - managed_model=managed_model, - args=args, - environment_variables=environment_variables, - base_output_dir=base_output_dir, - service_account=service_account, - network=network, - bigquery_destination=bigquery_destination, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, - training_filter_split=training_filter_split, - validation_filter_split=validation_filter_split, - test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, timestamp_split_column_name=timestamp_split_column_name, - timeout=timeout, - restart_job_on_worker_restart=restart_job_on_worker_restart, - enable_web_access=enable_web_access, - tensorboard=tensorboard, - reduction_server_container_uri=reduction_server_container_uri - if reduction_server_replica_count > 0 - else None, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + budget_milli_node_hours=budget_milli_node_hours, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + model_display_name=model_display_name, + model_labels=model_labels, + hierarchy_group_columns=hierarchy_group_columns, + hierarchy_group_total_weight=hierarchy_group_total_weight, + hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, + window_column=window_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, sync=sync, create_request_timeout=create_request_timeout, ) - @base.optional_sync(construct_object_on_arg="managed_model") + @base.optional_sync() def _run( self, - python_packager: source_utils._TrainingScriptPythonPackager, - dataset: Optional[ - Union[ - datasets.ImageDataset, - datasets.TabularDataset, - datasets.TextDataset, - datasets.VideoDataset, - ] - ], - annotation_schema_uri: Optional[str], - worker_pool_specs: worker_spec_utils._DistributedTrainingSpec, - managed_model: Optional[gca_model.Model] = None, - args: Optional[List[Union[str, float, int]]] = None, - environment_variables: Optional[Dict[str, str]] = None, - base_output_dir: Optional[str] = None, - service_account: Optional[str] = None, - network: Optional[str] = None, - bigquery_destination: Optional[str] = None, + dataset: datasets.TimeSeriesDataset, + target_column: str, + time_column: str, + time_series_identifier_column: str, + unavailable_at_forecast_columns: List[str], + available_at_forecast_columns: List[str], + forecast_horizon: int, + data_granularity_unit: str, + data_granularity_count: int, training_fraction_split: Optional[float] = None, validation_fraction_split: Optional[float] = None, test_fraction_split: Optional[float] = None, - training_filter_split: Optional[str] = None, - validation_filter_split: Optional[str] = None, - test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, timestamp_split_column_name: Optional[str] = None, - timeout: Optional[int] = None, - restart_job_on_worker_restart: bool = False, - enable_web_access: bool = False, - tensorboard: Optional[str] = None, - reduction_server_container_uri: Optional[str] = None, - sync=True, + weight_column: Optional[str] = None, + time_series_attribute_columns: Optional[List[str]] = None, + context_window: Optional[int] = None, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + budget_milli_node_hours: int = 1000, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + hierarchy_group_columns: Optional[List[str]] = None, + hierarchy_group_total_weight: Optional[float] = None, + hierarchy_temporal_total_weight: Optional[float] = None, + hierarchy_group_temporal_total_weight: Optional[float] = None, + window_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + sync: bool = True, create_request_timeout: Optional[float] = None, - ) -> Optional[models.Model]: - """Packages local script and launches training_job. - - Args: - python_packager (source_utils._TrainingScriptPythonPackager): - Required. Python Packager pointing to training script locally. - dataset ( - Union[ - datasets.ImageDataset, - datasets.TabularDataset, - datasets.TextDataset, - datasets.VideoDataset, - ] - ): - Vertex AI to fit this training against. - annotation_schema_uri (str): - Google Cloud Storage URI points to a YAML file describing - annotation schema. - worker_pools_spec (worker_spec_utils._DistributedTrainingSpec): - Worker pools pecs required to run job. - managed_model (gca_model.Model): - Model proto if this script produces a Managed Model. - args (List[Unions[str, int, float]]): - Command line arguments to be passed to the Python script. - environment_variables (Dict[str, str]): - Environment variables to be passed to the container. - Should be a dictionary where keys are environment variable names - and values are environment variable values for those names. - At most 10 environment variables can be specified. - The Name of the environment variable must be unique. - - environment_variables = { - 'MY_KEY': 'MY_VALUE' - } - base_output_dir (str): - GCS output directory of job. If not provided a - timestamped directory in the staging directory will be used. + ) -> models.Model: + """Runs the training job and returns a model. - Vertex AI sets the following environment variables when it runs your training code: + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. - - AIP_MODEL_DIR: a Cloud Storage URI of a directory intended for saving model artifacts, i.e. /model/ - - AIP_CHECKPOINT_DIR: a Cloud Storage URI of a directory intended for saving checkpoints, i.e. /checkpoints/ - - AIP_TENSORBOARD_LOG_DIR: a Cloud Storage URI of a directory intended for saving TensorBoard logs, i.e. /logs/ + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. - service_account (str): - Specifies the service account for workload run-as account. - Users submitting jobs must have act-as permission on this run-as account. - network (str): - The full name of the Compute Engine network to which the job - should be peered. For example, projects/12345/global/networks/myVPC. - Private services access must already be configured for the network. - If left unspecified, the job is not peered with any network. - bigquery_destination (str): - Provide this field if `dataset` is a BiqQuery dataset. - The BigQuery project location where the training data is to - be written to. In the given project a new dataset is created - with name - ``dataset___`` - where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All - training input data will be written into that dataset. In - the dataset three tables will be created, ``training``, - ``validation`` and ``test``. + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. - - AIP_DATA_FORMAT = "bigquery". - - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" - - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" - - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" + Args: + dataset (datasets.TimeSeriesDataset): + Required. The dataset within the same Project from which data will be used to train the Model. The + Dataset must use schema compatible with Model being trained, + and what is compatible should be described in the used + TrainingPipeline's [training_task_definition] + [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. + For time series Datasets, all their data is exported to + training, to pick and choose from. + target_column (str): + Required. Name of the column that the Model is to predict values for. This + column must be unavailable at forecast. + time_column (str): + Required. Name of the column that identifies time order in the time series. + This column must be available at forecast. + time_series_identifier_column (str): + Required. Name of the column that identifies the time series. + unavailable_at_forecast_columns (List[str]): + Required. Column names of columns that are unavailable at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is unknown before the forecast + (e.g. population of a city in a given year, or weather on a given day). + available_at_forecast_columns (List[str]): + Required. Column names of columns that are available at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is known at forecast. + forecast_horizon: (int): + Required. The amount of time into the future for which forecasted values for the target are + returned. Expressed in number of units defined by the [data_granularity_unit] and + [data_granularity_count] field. Inclusive. + data_granularity_unit (str): + Required. The data granularity unit. Accepted values are ``minute``, + ``hour``, ``day``, ``week``, ``month``, ``year``. + data_granularity_count (int): + Required. The number of data granularity units between data points in the training + data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other + values of [data_granularity_unit], must be 1. training_fraction_split (float): - Optional. The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split (float): - Optional. The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split (float): - Optional. The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - training_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to train the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - validation_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to validate the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - test_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to test the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + validation_fraction_split (float): + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -2294,127 +2132,319 @@ def _run( `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - timeout (int): - The maximum job running time in seconds. The default is 7 days. - restart_job_on_worker_restart (bool): - Restarts the entire CustomJob if a worker - gets restarted. This feature can be used by - distributed training jobs that are not resilient - to workers leaving and joining a job. - enable_web_access (bool): - Whether you want Vertex AI to enable interactive shell access - to training containers. - https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell - tensorboard (str): - Optional. The name of a Vertex AI - [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard] - resource to which this CustomJob will upload Tensorboard - logs. Format: - ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` + This parameter must be used with training_fraction_split, + validation_fraction_split, and test_fraction_split. + weight_column (str): + Optional. Name of the column that should be used as the weight column. + Higher values in this column give more importance to the row + during Model training. The column must have numeric values between 0 and + 10000 inclusively, and 0 value means that the row is ignored. + If the weight column field is not set, then all rows are assumed to have + equal weight of 1. This column must be available at forecast. + time_series_attribute_columns (List[str]): + Optional. Column names that should be used as attribute columns. + Each column is constant within a time series. + context_window (int): + Optional. The amount of time into the past training and prediction data is used for + model training and prediction respectively. Expressed in number of units defined by the + [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the + default value of 0 which means the model sets each series context window to be 0 (also + known as "cold start"). Inclusive. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. - The training script should write Tensorboard to following Vertex AI environment - variable: + Expected format: + ``bq://::
`` - AIP_TENSORBOARD_LOG_DIR + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` - `service_account` is required with provided `tensorboard`. - For more information on configuring your service account please visit: - https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training - reduction_server_container_uri (str): - Optional. The Uri of the reduction server container image. + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. + + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. + quantiles (List[float]): + Quantiles to use for the `minimize-quantile-loss` + [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in + this case. + + Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. + Each quantile must be unique. + validation_options (str): + Validation options for the data validation component. The available options are: + "fail-pipeline" - (default), will validate against the validation and fail the pipeline + if it fails. + "ignore-validation" - ignore the results of the validation and continue the pipeline + budget_milli_node_hours (int): + Optional. The train budget of creating this Model, expressed in milli node + hours i.e. 1,000 value in this field means 1 node hour. + The training cost of the model will not exceed this budget. The final + cost will be attempted to be close to the budget, though may end up + being (even) noticeably smaller - at the backend's discretion. This + especially may happen when further model training ceases to provide + any improvements. + If the budget is set to a value known to be insufficient to train a + Model for the given training set, the training won't be attempted and + will error. + The minimum value is 1000 and the maximum is 72000. + model_display_name (str): + Optional. If the script produces a managed Vertex AI Model. The display name of + the Model. The name can be up to 128 characters long and can be consist + of any UTF-8 characters. + + If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + hierarchy_group_columns (List[str]): + Optional. A list of time series attribute column names that + define the time series hierarchy. Only one level of hierarchy is + supported, ex. ``region`` for a hierarchy of stores or + ``department`` for a hierarchy of products. If multiple columns + are specified, time series will be grouped by their combined + values, ex. (``blue``, ``large``) for ``color`` and ``size``, up + to 5 columns are accepted. If no group columns are specified, + all time series are considered to be part of the same group. + hierarchy_group_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + time series in the same hierarchy group. + hierarchy_temporal_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + the horizon for a single time series. + hierarchy_group_temporal_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + both the horizon and time series in the same hierarchy group. + window_column (str): + Optional. Name of the column that should be used to filter input + rows. The column should contain either booleans or string + booleans; if the value of the row is True, generate a sliding + window from that row. + window_stride_length (int): + Optional. Step length used to generate input examples. Every + ``window_stride_length`` rows will be used to generate a sliding + window. + window_max_count (int): + Optional. Number of rows that should be used to generate input + examples. If the total row count is larger than this number, the + input data will be randomly sampled to hit the count. + holiday_regions (List[str]): + Optional. The geographical regions to use when creating holiday + features. This option is only allowed when data_granularity_unit + is ``day``. Acceptable values can come from any of the following + levels: + Top level: GLOBAL + Second level: continental regions + NA: North America + JAPAC: Japan and Asia Pacific + EMEA: Europe, the Middle East and Africa + LAC: Latin America and the Caribbean + Third level: countries from ISO 3166-1 Country codes. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. - create_request_timeout (float) - Optional. The timeout for the create request in seconds - + create_request_timeout (float): + Optional. The timeout for the create request in seconds. Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. """ - package_gcs_uri = python_packager.package_and_copy_to_gcs( - gcs_staging_dir=self._staging_bucket, - project=self.project, - credentials=self.credentials, + # auto-populate transformations + if self._column_transformations is None: + _LOGGER.info( + "No column transformations provided, so now retrieving columns from dataset in order to set default column transformations." + ) + + ( + self._column_transformations, + column_names, + ) = dataset._get_default_column_transformations(target_column) + + _LOGGER.info( + "The column transformation of type 'auto' was set for the following columns: %s." + % column_names + ) + + window_config = self._create_window_config( + column=window_column, + stride_length=window_stride_length, + max_count=window_max_count, ) - for spec_order, spec in enumerate(worker_pool_specs): + training_task_inputs_dict = { + # required inputs + "targetColumn": target_column, + "timeColumn": time_column, + "timeSeriesIdentifierColumn": time_series_identifier_column, + "timeSeriesAttributeColumns": time_series_attribute_columns, + "unavailableAtForecastColumns": unavailable_at_forecast_columns, + "availableAtForecastColumns": available_at_forecast_columns, + "forecastHorizon": forecast_horizon, + "dataGranularity": { + "unit": data_granularity_unit, + "quantity": data_granularity_count, + }, + "transformations": self._column_transformations, + "trainBudgetMilliNodeHours": budget_milli_node_hours, + # optional inputs + "weightColumn": weight_column, + "contextWindow": context_window, + "quantiles": quantiles, + "validationOptions": validation_options, + "optimizationObjective": self._optimization_objective, + "holidayRegions": holiday_regions, + } - if not spec: - continue + # TODO(TheMichaelHu): Remove the ifs once the API supports these inputs. + if any( + [ + hierarchy_group_columns, + hierarchy_group_total_weight, + hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight, + ] + ): + training_task_inputs_dict["hierarchyConfig"] = { + "groupColumns": hierarchy_group_columns, + "groupTotalWeight": hierarchy_group_total_weight, + "temporalTotalWeight": hierarchy_temporal_total_weight, + "groupTemporalTotalWeight": hierarchy_group_temporal_total_weight, + } + if window_config: + training_task_inputs_dict["windowConfig"] = window_config - if ( - spec_order == worker_spec_utils._SPEC_ORDERS["server_spec"] - and reduction_server_container_uri - ): - spec["container_spec"] = { - "image_uri": reduction_server_container_uri, - } - else: - spec["python_package_spec"] = { - "executor_image_uri": self._container_uri, - "python_module": python_packager.module_name, - "package_uris": [package_gcs_uri], - } + final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri + if final_export_eval_bq_uri and not final_export_eval_bq_uri.startswith( + "bq://" + ): + final_export_eval_bq_uri = f"bq://{final_export_eval_bq_uri}" - if args: - spec["python_package_spec"]["args"] = args + if export_evaluated_data_items: + training_task_inputs_dict["exportEvaluatedDataItemsConfig"] = { + "destinationBigqueryUri": final_export_eval_bq_uri, + "overrideExistingTable": export_evaluated_data_items_override_destination, + } - if environment_variables: - spec["python_package_spec"]["env"] = [ - {"name": key, "value": value} - for key, value in environment_variables.items() - ] + if self._additional_experiments: + training_task_inputs_dict[ + "additionalExperiments" + ] = self._additional_experiments - ( - training_task_inputs, - base_output_dir, - ) = self._prepare_training_task_inputs_and_output_dir( - worker_pool_specs=worker_pool_specs, - base_output_dir=base_output_dir, - service_account=service_account, - network=network, - timeout=timeout, - restart_job_on_worker_restart=restart_job_on_worker_restart, - enable_web_access=enable_web_access, - tensorboard=tensorboard, + model = gca_model.Model( + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, + encryption_spec=self._model_encryption_spec, ) - model = self._run_job( - training_task_definition=schema.training_job.definition.custom_task, - training_task_inputs=training_task_inputs, + new_model = self._run_job( + training_task_definition=self._training_task_definition, + training_task_inputs=training_task_inputs_dict, dataset=dataset, - annotation_schema_uri=annotation_schema_uri, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, - training_filter_split=training_filter_split, - validation_filter_split=validation_filter_split, - test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, timestamp_split_column_name=timestamp_split_column_name, - model=managed_model, - gcs_destination_uri_prefix=base_output_dir, - bigquery_destination=bigquery_destination, + model=model, create_request_timeout=create_request_timeout, ) - return model + if export_evaluated_data_items: + _LOGGER.info( + "Exported examples available at:\n%s" + % self.evaluated_data_items_bigquery_uri + ) + + return new_model + + @property + def _model_upload_fail_string(self) -> str: + """Helper property for model upload failure.""" + return ( + f"Training Pipeline {self.resource_name} is not configured to upload a " + "Model." + ) + + @property + def evaluated_data_items_bigquery_uri(self) -> Optional[str]: + """BigQuery location of exported evaluated examples from the Training Job + Returns: + str: BigQuery uri for the exported evaluated examples if the export + feature is enabled for training. + None: If the export feature was not enabled for training. + """ + + self._assert_gca_resource_is_available() + + metadata = self._gca_resource.training_task_metadata + if metadata and "evaluatedDataItemsBigqueryUri" in metadata: + return metadata["evaluatedDataItemsBigqueryUri"] + return None -class CustomContainerTrainingJob(_CustomTrainingJob): - """Class to launch a Custom Training Job in Vertex AI using a - Container.""" + def _add_additional_experiments(self, additional_experiments: List[str]): + """Add experiment flags to the training job. + Args: + additional_experiments (List[str]): + Experiment flags that can enable some experimental training features. + """ + self._additional_experiments.extend(additional_experiments) + + @staticmethod + def _create_window_config( + column: Optional[str] = None, + stride_length: Optional[int] = None, + max_count: Optional[int] = None, + ) -> Optional[Dict[str, Union[int, str]]]: + """Creates a window config from training job arguments.""" + configs = { + "column": column, + "strideLength": stride_length, + "maxCount": max_count, + } + present_configs = {k: v for k, v in configs.items() if v is not None} + if not present_configs: + return None + if len(present_configs) > 1: + raise ValueError( + "More than one windowing strategy provided. Make sure only one " + "of window_column, window_stride_length, or window_max_count " + "is specified." + ) + return present_configs + + +# TODO(b/172368325) add scheduling, custom_job.Scheduling +class CustomTrainingJob(_CustomTrainingJob): + """Class to launch a Custom Training Job in Vertex AI using a script. + + Takes a training implementation as a python script and executes that + script in Cloud Vertex AI Training. + """ def __init__( self, # TODO(b/223262536): Make display_name parameter fully optional in next major release display_name: str, + script_path: str, container_uri: str, - command: Sequence[str] = None, + requirements: Optional[Sequence[str]] = None, model_serving_container_image_uri: Optional[str] = None, model_serving_container_predict_route: Optional[str] = None, model_serving_container_health_route: Optional[str] = None, @@ -2434,12 +2464,13 @@ def __init__( model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, ): - """Constructs a Custom Container Training Job. + """Constructs a Custom Training Job from a Python script. - job = aiplatform.CustomContainerTrainingJob( + job = aiplatform.CustomTrainingJob( display_name='test-train', - container_uri='gcr.io/my_project_id/my_image_name:tag', - command=['python3', 'run_script.py'] + script_path='test_script.py', + requirements=['pandas', 'numpy'], + container_uri='gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest', model_serving_container_image_uri='gcr.io/my-trainer/serving:1', model_serving_container_predict_route='predict', model_serving_container_health_route='metadata, @@ -2471,11 +2502,11 @@ def __init__( Args: display_name (str): Required. The user-defined name of this TrainingPipeline. + script_path (str): Required. Local path to training script. container_uri (str): Required: Uri of the training container image in the GCR. - command (Sequence[str]): - The command to be invoked when the container is started. - It overrides the entrypoint instruction in Dockerfile when provided + requirements (Sequence[str]): + List of python packages dependencies of script. model_serving_container_image_uri (str): If the training produces a managed Vertex AI Model, the URI of the Model serving container suitable for serving the model produced by the @@ -2634,7 +2665,8 @@ def __init__( staging_bucket=staging_bucket, ) - self._command = command + self._requirements = requirements + self._script_path = script_path def run( self, @@ -2719,7 +2751,14 @@ def run( Supported only for tabular Datasets. Args: - dataset (Union[datasets.ImageDataset,datasets.TabularDataset,datasets.TextDataset,datasets.VideoDataset]): + dataset ( + Union[ + datasets.ImageDataset, + datasets.TabularDataset, + datasets.TextDataset, + datasets.VideoDataset, + ] + ): Vertex AI to fit this training against. Custom training script should retrieve datasets through passed in environment variables uris: @@ -2914,21 +2953,16 @@ def run( `service_account` is required with provided `tensorboard`. For more information on configuring your service account please visit: https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training + create_request_timeout (float): + Optional. The timeout for the create request in seconds. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. - - Raises: - RuntimeError: If Training job has already been run, staging_bucket has not - been set, or model_display_name was provided but required arguments - were not provided in constructor. """ worker_pool_specs, managed_model = self._prepare_and_validate_run( model_display_name=model_display_name, @@ -2943,7 +2977,13 @@ def run( reduction_server_machine_type=reduction_server_machine_type, ) + # make and copy package + python_packager = source_utils._TrainingScriptPythonPackager( + script_path=self._script_path, requirements=self._requirements + ) + return self._run( + python_packager=python_packager, dataset=dataset, annotation_schema_uri=annotation_schema_uri, worker_pool_specs=worker_pool_specs, @@ -2976,6 +3016,7 @@ def run( @base.optional_sync(construct_object_on_arg="managed_model") def _run( self, + python_packager: source_utils._TrainingScriptPythonPackager, dataset: Optional[ Union[ datasets.ImageDataset, @@ -3010,7 +3051,10 @@ def _run( create_request_timeout: Optional[float] = None, ) -> Optional[models.Model]: """Packages local script and launches training_job. + Args: + python_packager (source_utils._TrainingScriptPythonPackager): + Required. Python Packager pointing to training script locally. dataset ( Union[ datasets.ImageDataset, @@ -3057,14 +3101,8 @@ def _run( should be peered. For example, projects/12345/global/networks/myVPC. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. - timeout (int): - The maximum job running time in seconds. The default is 7 days. - restart_job_on_worker_restart (bool): - Restarts the entire CustomJob if a worker - gets restarted. This feature can be used by - distributed training jobs that are not resilient - to workers leaving and joining a job. bigquery_destination (str): + Provide this field if `dataset` is a BiqQuery dataset. The BigQuery project location where the training data is to be written to. In the given project a new dataset is created with name @@ -3127,6 +3165,13 @@ def _run( that piece is ignored by the pipeline. Supported only for tabular and time series Datasets. + timeout (int): + The maximum job running time in seconds. The default is 7 days. + restart_job_on_worker_restart (bool): + Restarts the entire CustomJob if a worker + gets restarted. This feature can be used by + distributed training jobs that are not resilient + to workers leaving and joining a job. enable_web_access (bool): Whether you want Vertex AI to enable interactive shell access to training containers. @@ -3152,13 +3197,18 @@ def _run( Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. + create_request_timeout (float) + Optional. The timeout for the create request in seconds Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. """ + package_gcs_uri = python_packager.package_and_copy_to_gcs( + gcs_staging_dir=self._staging_bucket, + project=self.project, + credentials=self.credentials, + ) for spec_order, spec in enumerate(worker_pool_specs): @@ -3173,16 +3223,17 @@ def _run( "image_uri": reduction_server_container_uri, } else: - spec["containerSpec"] = {"imageUri": self._container_uri} - - if self._command: - spec["containerSpec"]["command"] = self._command + spec["python_package_spec"] = { + "executor_image_uri": self._container_uri, + "python_module": python_packager.module_name, + "package_uris": [package_gcs_uri], + } if args: - spec["containerSpec"]["args"] = args + spec["python_package_spec"]["args"] = args if environment_variables: - spec["containerSpec"]["env"] = [ + spec["python_package_spec"]["env"] = [ {"name": key, "value": value} for key, value in environment_variables.items() ] @@ -3223,125 +3274,170 @@ def _run( return model -class AutoMLTabularTrainingJob(_TrainingJob): - _supported_training_schemas = (schema.training_job.definition.automl_tabular,) +class CustomContainerTrainingJob(_CustomTrainingJob): + """Class to launch a Custom Training Job in Vertex AI using a + Container.""" def __init__( self, # TODO(b/223262536): Make display_name parameter fully optional in next major release display_name: str, - optimization_prediction_type: str, - optimization_objective: Optional[str] = None, - column_specs: Optional[Dict[str, str]] = None, - column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, - optimization_objective_recall_value: Optional[float] = None, - optimization_objective_precision_value: Optional[float] = None, + container_uri: str, + command: Sequence[str] = None, + model_serving_container_image_uri: Optional[str] = None, + model_serving_container_predict_route: Optional[str] = None, + model_serving_container_health_route: Optional[str] = None, + model_serving_container_command: Optional[Sequence[str]] = None, + model_serving_container_args: Optional[Sequence[str]] = None, + model_serving_container_environment_variables: Optional[Dict[str, str]] = None, + model_serving_container_ports: Optional[Sequence[int]] = None, + model_description: Optional[str] = None, + model_instance_schema_uri: Optional[str] = None, + model_parameters_schema_uri: Optional[str] = None, + model_prediction_schema_uri: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, + staging_bucket: Optional[str] = None, ): - """Constructs a AutoML Tabular Training Job. - - Example usage: + """Constructs a Custom Container Training Job. - job = training_jobs.AutoMLTabularTrainingJob( - display_name="my_display_name", - optimization_prediction_type="classification", - optimization_objective="minimize-log-loss", - column_specs={"column_1": "auto", "column_2": "numeric"}, + job = aiplatform.CustomContainerTrainingJob( + display_name='test-train', + container_uri='gcr.io/my_project_id/my_image_name:tag', + command=['python3', 'run_script.py'] + model_serving_container_image_uri='gcr.io/my-trainer/serving:1', + model_serving_container_predict_route='predict', + model_serving_container_health_route='metadata, labels={'key': 'value'}, ) - Args: - display_name (str): - Required. The user-defined name of this TrainingPipeline. - optimization_prediction_type (str): - The type of prediction the Model is to produce. - "classification" - Predict one out of multiple target values is - picked for each row. - "regression" - Predict a value based on its relation to other values. - This type is available only to columns that contain - semantically numeric values, i.e. integers or floating - point number, even if stored as e.g. strings. + Usage with Dataset: - optimization_objective (str): - Optional. Objective function the Model is to be optimized towards. The training - task creates a Model that maximizes/minimizes the value of the objective - function over the validation set. + ds = aiplatform.TabularDataset( + 'projects/my-project/locations/us-central1/datasets/12345') - The supported optimization objectives depend on the prediction type, and - in the case of classification also the number of distinct values in the - target column (two distint values -> binary, 3 or more distinct values - -> multi class). - If the field is not set, the default objective function is used. + job.run( + ds, + replica_count=1, + model_display_name='my-trained-model', + model_labels={'key': 'value'}, + ) - Classification (binary): - "maximize-au-roc" (default) - Maximize the area under the receiver - operating characteristic (ROC) curve. - "minimize-log-loss" - Minimize log loss. - "maximize-au-prc" - Maximize the area under the precision-recall curve. - "maximize-precision-at-recall" - Maximize precision for a specified - recall value. - "maximize-recall-at-precision" - Maximize recall for a specified - precision value. + Usage without Dataset: - Classification (multi class): - "minimize-log-loss" (default) - Minimize log loss. + job.run(replica_count=1, model_display_name='my-trained-model) - Regression: - "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). - "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - column_specs (Dict[str, str]): - Optional. Alternative to column_transformations where the keys of the dict - are column names and their respective values are one of - AutoMLTabularTrainingJob.column_data_types. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. If none - of column_transformations or column_specs is passed, the local credentials - being used will try setting column_specs to "auto". To do this, the local - credentials require read access to the GCS or BigQuery training data source. - column_transformations (List[Dict[str, Dict[str, str]]]): - Optional. Transformations to apply to the input columns (i.e. columns other - than the targetColumn). Each transformation may produce multiple - result values from the column's value, and all are used for training. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - Consider using column_specs as column_transformations will be deprecated - eventually. If none of column_transformations or column_specs is passed, - the local credentials being used will try setting column_transformations to - "auto". To do this, the local credentials require read access to the GCS or - BigQuery training data source. - optimization_objective_recall_value (float): - Optional. Required when maximize-precision-at-recall optimizationObjective was - picked, represents the recall value at which the optimization is done. - The minimum value is 0 and the maximum is 1.0. - optimization_objective_precision_value (float): - Optional. Required when maximize-recall-at-precision optimizationObjective was - picked, represents the precision value at which the optimization is - done. + TODO(b/169782082) add documentation about traning utilities + To ensure your model gets saved in Vertex AI, write your saved model to + os.environ["AIP_MODEL_DIR"] in your provided training script. - The minimum value is 0 and the maximum is 1.0. + + Args: + display_name (str): + Required. The user-defined name of this TrainingPipeline. + container_uri (str): + Required: Uri of the training container image in the GCR. + command (Sequence[str]): + The command to be invoked when the container is started. + It overrides the entrypoint instruction in Dockerfile when provided + model_serving_container_image_uri (str): + If the training produces a managed Vertex AI Model, the URI of the + Model serving container suitable for serving the model produced by the + training script. + model_serving_container_predict_route (str): + If the training produces a managed Vertex AI Model, An HTTP path to + send prediction requests to the container, and which must be supported + by it. If not specified a default HTTP path will be used by Vertex AI. + model_serving_container_health_route (str): + If the training produces a managed Vertex AI Model, an HTTP path to + send health check requests to the container, and which must be supported + by it. If not specified a standard HTTP path will be used by AI + Platform. + model_serving_container_command (Sequence[str]): + The command with which the container is run. Not executed within a + shell. The Docker image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's + environment. If a variable cannot be resolved, the reference in the + input string will be unchanged. The $(VAR_NAME) syntax can be escaped + with a double $$, ie: $$(VAR_NAME). Escaped references will never be + expanded, regardless of whether the variable exists or not. + model_serving_container_args (Sequence[str]): + The arguments to the command. The Docker image's CMD is used if this is + not provided. Variable references $(VAR_NAME) are expanded using the + container's environment. If a variable cannot be resolved, the reference + in the input string will be unchanged. The $(VAR_NAME) syntax can be + escaped with a double $$, ie: $$(VAR_NAME). Escaped references will + never be expanded, regardless of whether the variable exists or not. + model_serving_container_environment_variables (Dict[str, str]): + The environment variables that are to be present in the container. + Should be a dictionary where keys are environment variable names + and values are environment variable values for those names. + model_serving_container_ports (Sequence[int]): + Declaration of ports that are exposed by the container. This field is + primarily informational, it gives Vertex AI information about the + network connections the container uses. Listing or not a port here has + no impact on whether the port is actually exposed, any port listening on + the default "0.0.0.0" address inside a container will be accessible from + the network. + model_description (str): + The description of the Model. + model_instance_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single instance, which + are used in + ``PredictRequest.instances``, + ``ExplainRequest.instances`` + and + ``BatchPredictionJob.input_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + model_parameters_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the parameters of prediction and + explanation via + ``PredictRequest.parameters``, + ``ExplainRequest.parameters`` + and + ``BatchPredictionJob.model_parameters``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform, if no parameters are supported it is set to an + empty string. Note: The URI given on output will be + immutable and probably different, including the URI scheme, + than the one given on input. The output URI will point to a + location where the user only has a read access. + model_prediction_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single prediction + produced by this Model, which are returned via + ``PredictResponse.predictions``, + ``ExplainResponse.explanations``, + and + ``BatchPredictionJob.output_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. project (str): - Optional. Project to run training in. Overrides project set in aiplatform.init. + Project to run training in. Overrides project set in aiplatform.init. location (str): - Optional. Location to run training in. Overrides location set in aiplatform.init. + Location to run training in. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): - Optional. Custom credentials to use to run call training service. Overrides + Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. labels (Dict[str, str]): Optional. The labels with user-defined metadata to @@ -3378,9 +3474,9 @@ def __init__( If set, the trained Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. - - Raises: - ValueError: If both column_transformations and column_specs were provided. + staging_bucket (str): + Bucket used to stage source and training artifacts. Overrides + staging_bucket set in aiplatform.init. """ if not display_name: display_name = self.__class__._generate_display_name() @@ -3392,77 +3488,226 @@ def __init__( labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, + container_uri=container_uri, + model_instance_schema_uri=model_instance_schema_uri, + model_parameters_schema_uri=model_parameters_schema_uri, + model_prediction_schema_uri=model_prediction_schema_uri, + model_serving_container_environment_variables=model_serving_container_environment_variables, + model_serving_container_ports=model_serving_container_ports, + model_serving_container_image_uri=model_serving_container_image_uri, + model_serving_container_command=model_serving_container_command, + model_serving_container_args=model_serving_container_args, + model_serving_container_predict_route=model_serving_container_predict_route, + model_serving_container_health_route=model_serving_container_health_route, + model_description=model_description, + staging_bucket=staging_bucket, ) - self._column_transformations = ( - column_transformations_utils.validate_and_get_column_transformations( - column_specs, column_transformations - ) - ) + self._command = command - self._optimization_objective = optimization_objective - self._optimization_prediction_type = optimization_prediction_type - self._optimization_objective_recall_value = optimization_objective_recall_value - self._optimization_objective_precision_value = ( - optimization_objective_precision_value - ) + def run( + self, + dataset: Optional[ + Union[ + datasets.ImageDataset, + datasets.TabularDataset, + datasets.TextDataset, + datasets.VideoDataset, + ] + ] = None, + annotation_schema_uri: Optional[str] = None, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + base_output_dir: Optional[str] = None, + service_account: Optional[str] = None, + network: Optional[str] = None, + bigquery_destination: Optional[str] = None, + args: Optional[List[Union[str, float, int]]] = None, + environment_variables: Optional[Dict[str, str]] = None, + replica_count: int = 1, + machine_type: str = "n1-standard-4", + accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", + accelerator_count: int = 0, + boot_disk_type: str = "pd-ssd", + boot_disk_size_gb: int = 100, + reduction_server_replica_count: int = 0, + reduction_server_machine_type: Optional[str] = None, + reduction_server_container_uri: Optional[str] = None, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, + predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, + timeout: Optional[int] = None, + restart_job_on_worker_restart: bool = False, + enable_web_access: bool = False, + tensorboard: Optional[str] = None, + sync=True, + create_request_timeout: Optional[float] = None, + ) -> Optional[models.Model]: + """Runs the custom training job. - self._additional_experiments = [] + Distributed Training Support: + If replica count = 1 then one chief replica will be provisioned. If + replica_count > 1 the remainder will be provisioned as a worker replica pool. + ie: replica_count = 10 will result in 1 chief and 9 workers + All replicas have same machine_type, accelerator_type, and accelerator_count + + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. + + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. + + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. + + Args: + dataset (Union[datasets.ImageDataset,datasets.TabularDataset,datasets.TextDataset,datasets.VideoDataset]): + Vertex AI to fit this training against. Custom training script should + retrieve datasets through passed in environment variables uris: + + os.environ["AIP_TRAINING_DATA_URI"] + os.environ["AIP_VALIDATION_DATA_URI"] + os.environ["AIP_TEST_DATA_URI"] + + Additionally the dataset format is passed in as: + + os.environ["AIP_DATA_FORMAT"] + annotation_schema_uri (str): + Google Cloud Storage URI points to a YAML file describing + annotation schema. The schema is defined as an OpenAPI 3.0.2 + [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files + that can be used here are found in + gs://google-cloud-aiplatform/schema/dataset/annotation/, + note that the chosen schema must be consistent with + ``metadata`` + of the Dataset specified by + ``dataset_id``. + + Only Annotations that both match this schema and belong to + DataItems not ignored by the split method are used in + respectively training, validation or test role, depending on + the role of the DataItem they are on. + + When used in conjunction with + ``annotations_filter``, + the Annotations used for training are filtered by both + ``annotations_filter`` + and + ``annotation_schema_uri``. + model_display_name (str): + If the script produces a managed Vertex AI Model. The display name of + the Model. The name can be up to 128 characters long and can be consist + of any UTF-8 characters. + + If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + base_output_dir (str): + GCS output directory of job. If not provided a + timestamped directory in the staging directory will be used. - def run( - self, - dataset: datasets.TabularDataset, - target_column: str, - training_fraction_split: Optional[float] = None, - validation_fraction_split: Optional[float] = None, - test_fraction_split: Optional[float] = None, - predefined_split_column_name: Optional[str] = None, - timestamp_split_column_name: Optional[str] = None, - weight_column: Optional[str] = None, - budget_milli_node_hours: int = 1000, - model_display_name: Optional[str] = None, - model_labels: Optional[Dict[str, str]] = None, - disable_early_stopping: bool = False, - export_evaluated_data_items: bool = False, - export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, - export_evaluated_data_items_override_destination: bool = False, - additional_experiments: Optional[List[str]] = None, - sync: bool = True, - create_request_timeout: Optional[float] = None, - ) -> models.Model: - """Runs the training job and returns a model. + Vertex AI sets the following environment variables when it runs your training code: - If training on a Vertex AI dataset, you can use one of the following split configurations: - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + - AIP_MODEL_DIR: a Cloud Storage URI of a directory intended for saving model artifacts, i.e. /model/ + - AIP_CHECKPOINT_DIR: a Cloud Storage URI of a directory intended for saving checkpoints, i.e. /checkpoints/ + - AIP_TENSORBOARD_LOG_DIR: a Cloud Storage URI of a directory intended for saving TensorBoard logs, i.e. /logs/ - Predefined splits: - Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, ``predefined_split_column_name`` must be provided. - Supported only for tabular Datasets. + service_account (str): + Specifies the service account for workload run-as account. + Users submitting jobs must have act-as permission on this run-as account. + network (str): + The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Private services access must already be configured for the network. + If left unspecified, the job is not peered with any network. + bigquery_destination (str): + Provide this field if `dataset` is a BiqQuery dataset. + The BigQuery project location where the training data is to + be written to. In the given project a new dataset is created + with name + ``dataset___`` + where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All + training input data will be written into that dataset. In + the dataset three tables will be created, ``training``, + ``validation`` and ``test``. - Timestamp splits: - Assigns input data to training, validation, and test sets - based on a provided timestamps. The youngest data pieces are - assigned to training set, next to validation set, and the oldest - to the test set. - Supported only for tabular Datasets. + - AIP_DATA_FORMAT = "bigquery". + - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" + - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" + - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" + args (List[Unions[str, int, float]]): + Command line arguments to be passed to the Python script. + environment_variables (Dict[str, str]): + Environment variables to be passed to the container. + Should be a dictionary where keys are environment variable names + and values are environment variable values for those names. + At most 10 environment variables can be specified. + The Name of the environment variable must be unique. - Args: - dataset (datasets.TabularDataset): - Required. The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For tabular Datasets, all their data is exported to - training, to pick and choose from. - target_column (str): - Required. The name of the column values of which the Model is to predict. + environment_variables = { + 'MY_KEY': 'MY_VALUE' + } + replica_count (int): + The number of worker replicas. If replica count = 1 then one chief + replica will be provisioned. If replica_count > 1 the remainder will be + provisioned as a worker replica pool. + machine_type (str): + The type of machine to use for training. + accelerator_type (str): + Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED, + NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, + NVIDIA_TESLA_T4 + accelerator_count (int): + The number of accelerators to attach to a worker replica. + boot_disk_type (str): + Type of the boot disk, default is `pd-ssd`. + Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or + `pd-standard` (Persistent Disk Hard Disk Drive). + boot_disk_size_gb (int): + Size in GB of the boot disk, default is 100GB. + boot disk size must be within the range of [100, 64000]. + reduction_server_replica_count (int): + The number of reduction server replicas, default is 0. + reduction_server_machine_type (str): + Optional. The type of machine to use for reduction server. + reduction_server_container_uri (str): + Optional. The Uri of the reduction server container image. + See details: https://cloud.google.com/vertex-ai/docs/training/distributed-training#reduce_training_time_with_reduction_server training_fraction_split (float): Optional. The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. @@ -3472,6 +3717,27 @@ def run( test_fraction_split (float): Optional. The fraction of the input data that is to be used to evaluate the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -3489,174 +3755,198 @@ def run( `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - This parameter must be used with training_fraction_split, - validation_fraction_split, and test_fraction_split. - weight_column (str): - Optional. Name of the column that should be used as the weight column. - Higher values in this column give more importance to the row - during Model training. The column must have numeric values between 0 and - 10000 inclusively, and 0 value means that the row is ignored. - If the weight column field is not set, then all rows are assumed to have - equal weight of 1. - budget_milli_node_hours (int): - Optional. The train budget of creating this Model, expressed in milli node - hours i.e. 1,000 value in this field means 1 node hour. - The training cost of the model will not exceed this budget. The final - cost will be attempted to be close to the budget, though may end up - being (even) noticeably smaller - at the backend's discretion. This - especially may happen when further model training ceases to provide - any improvements. - If the budget is set to a value known to be insufficient to train a - Model for the given training set, the training won't be attempted and - will error. - The minimum value is 1000 and the maximum is 72000. - model_display_name (str): - Optional. If the script produces a managed Vertex AI Model. The display name of - the Model. The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - - If not provided upon creation, the job's display_name is used. - model_labels (Dict[str, str]): - Optional. The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - disable_early_stopping (bool): - Required. If true, the entire budget is used. This disables the early stopping - feature. By default, the early stopping feature is enabled, which means - that training might stop before the entire training budget has been - used, if further training does no longer brings significant improvement - to the model. - export_evaluated_data_items (bool): - Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - Expected format: - ``bq://::
`` + Supported only for tabular and time series Datasets. + timeout (int): + The maximum job running time in seconds. The default is 7 days. + restart_job_on_worker_restart (bool): + Restarts the entire CustomJob if a worker + gets restarted. This feature can be used by + distributed training jobs that are not resilient + to workers leaving and joining a job. + enable_web_access (bool): + Whether you want Vertex AI to enable interactive shell access + to training containers. + https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell + tensorboard (str): + Optional. The name of a Vertex AI + [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard] + resource to which this CustomJob will upload Tensorboard + logs. Format: + ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` + The training script should write Tensorboard to following Vertex AI environment + variable: - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. + AIP_TENSORBOARD_LOG_DIR - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - additional_experiments (List[str]): - Optional. Additional experiment flags for the automl tables training. + `service_account` is required with provided `tensorboard`. + For more information on configuring your service account please visit: + https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. create_request_timeout (float): Optional. The timeout for the create request in seconds. + Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. Raises: - RuntimeError: If Training job has already been run or is waiting to run. + RuntimeError: If Training job has already been run, staging_bucket has not + been set, or model_display_name was provided but required arguments + were not provided in constructor. """ - if model_display_name: - utils.validate_display_name(model_display_name) - if model_labels: - utils.validate_labels(model_labels) - - if self._is_waiting_to_run(): - raise RuntimeError("AutoML Tabular Training is already scheduled to run.") - - if self._has_run: - raise RuntimeError("AutoML Tabular Training has already run.") - - if additional_experiments: - self._add_additional_experiments(additional_experiments) + worker_pool_specs, managed_model = self._prepare_and_validate_run( + model_display_name=model_display_name, + model_labels=model_labels, + replica_count=replica_count, + machine_type=machine_type, + accelerator_count=accelerator_count, + accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, + reduction_server_replica_count=reduction_server_replica_count, + reduction_server_machine_type=reduction_server_machine_type, + ) return self._run( dataset=dataset, - target_column=target_column, + annotation_schema_uri=annotation_schema_uri, + worker_pool_specs=worker_pool_specs, + managed_model=managed_model, + args=args, + environment_variables=environment_variables, + base_output_dir=base_output_dir, + service_account=service_account, + network=network, + bigquery_destination=bigquery_destination, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, timestamp_split_column_name=timestamp_split_column_name, - weight_column=weight_column, - budget_milli_node_hours=budget_milli_node_hours, - model_display_name=model_display_name, - model_labels=model_labels, - disable_early_stopping=disable_early_stopping, - export_evaluated_data_items=export_evaluated_data_items, - export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, - export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + timeout=timeout, + restart_job_on_worker_restart=restart_job_on_worker_restart, + enable_web_access=enable_web_access, + tensorboard=tensorboard, + reduction_server_container_uri=reduction_server_container_uri + if reduction_server_replica_count > 0 + else None, sync=sync, create_request_timeout=create_request_timeout, ) - @base.optional_sync() + @base.optional_sync(construct_object_on_arg="managed_model") def _run( self, - dataset: datasets.TabularDataset, - target_column: str, + dataset: Optional[ + Union[ + datasets.ImageDataset, + datasets.TabularDataset, + datasets.TextDataset, + datasets.VideoDataset, + ] + ], + annotation_schema_uri: Optional[str], + worker_pool_specs: worker_spec_utils._DistributedTrainingSpec, + managed_model: Optional[gca_model.Model] = None, + args: Optional[List[Union[str, float, int]]] = None, + environment_variables: Optional[Dict[str, str]] = None, + base_output_dir: Optional[str] = None, + service_account: Optional[str] = None, + network: Optional[str] = None, + bigquery_destination: Optional[str] = None, training_fraction_split: Optional[float] = None, validation_fraction_split: Optional[float] = None, test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, timestamp_split_column_name: Optional[str] = None, - weight_column: Optional[str] = None, - budget_milli_node_hours: int = 1000, - model_display_name: Optional[str] = None, - model_labels: Optional[Dict[str, str]] = None, - disable_early_stopping: bool = False, - export_evaluated_data_items: bool = False, - export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, - export_evaluated_data_items_override_destination: bool = False, - sync: bool = True, + timeout: Optional[int] = None, + restart_job_on_worker_restart: bool = False, + enable_web_access: bool = False, + tensorboard: Optional[str] = None, + reduction_server_container_uri: Optional[str] = None, + sync=True, create_request_timeout: Optional[float] = None, - ) -> models.Model: - """Runs the training job and returns a model. + ) -> Optional[models.Model]: + """Packages local script and launches training_job. + Args: + dataset ( + Union[ + datasets.ImageDataset, + datasets.TabularDataset, + datasets.TextDataset, + datasets.VideoDataset, + ] + ): + Vertex AI to fit this training against. + annotation_schema_uri (str): + Google Cloud Storage URI points to a YAML file describing + annotation schema. + worker_pools_spec (worker_spec_utils._DistributedTrainingSpec): + Worker pools pecs required to run job. + managed_model (gca_model.Model): + Model proto if this script produces a Managed Model. + args (List[Unions[str, int, float]]): + Command line arguments to be passed to the Python script. + environment_variables (Dict[str, str]): + Environment variables to be passed to the container. + Should be a dictionary where keys are environment variable names + and values are environment variable values for those names. + At most 10 environment variables can be specified. + The Name of the environment variable must be unique. - If training on a Vertex AI dataset, you can use one of the following split configurations: - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + environment_variables = { + 'MY_KEY': 'MY_VALUE' + } + base_output_dir (str): + GCS output directory of job. If not provided a + timestamped directory in the staging directory will be used. - Predefined splits: - Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, ``predefined_split_column_name`` must be provided. - Supported only for tabular Datasets. + Vertex AI sets the following environment variables when it runs your training code: - Timestamp splits: - Assigns input data to training, validation, and test sets - based on a provided timestamps. The youngest data pieces are - assigned to training set, next to validation set, and the oldest - to the test set. - Supported only for tabular Datasets. + - AIP_MODEL_DIR: a Cloud Storage URI of a directory intended for saving model artifacts, i.e. /model/ + - AIP_CHECKPOINT_DIR: a Cloud Storage URI of a directory intended for saving checkpoints, i.e. /checkpoints/ + - AIP_TENSORBOARD_LOG_DIR: a Cloud Storage URI of a directory intended for saving TensorBoard logs, i.e. /logs/ - Args: - dataset (datasets.TabularDataset): - Required. The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For tabular Datasets, all their data is exported to - training, to pick and choose from. - target_column (str): - Required. The name of the column values of which the Model is to predict. + service_account (str): + Specifies the service account for workload run-as account. + Users submitting jobs must have act-as permission on this run-as account. + network (str): + The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Private services access must already be configured for the network. + If left unspecified, the job is not peered with any network. + timeout (int): + The maximum job running time in seconds. The default is 7 days. + restart_job_on_worker_restart (bool): + Restarts the entire CustomJob if a worker + gets restarted. This feature can be used by + distributed training jobs that are not resilient + to workers leaving and joining a job. + bigquery_destination (str): + The BigQuery project location where the training data is to + be written to. In the given project a new dataset is created + with name + ``dataset___`` + where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All + training input data will be written into that dataset. In + the dataset three tables will be created, ``training``, + ``validation`` and ``test``. + + - AIP_DATA_FORMAT = "bigquery". + - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" + - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" + - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" training_fraction_split (float): Optional. The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. @@ -3666,6 +3956,27 @@ def _run( test_fraction_split (float): Optional. The fraction of the input data that is to be used to evaluate the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -3683,71 +3994,29 @@ def _run( `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - This parameter must be used with training_fraction_split, - validation_fraction_split, and test_fraction_split. - weight_column (str): - Optional. Name of the column that should be used as the weight column. - Higher values in this column give more importance to the row - during Model training. The column must have numeric values between 0 and - 10000 inclusively, and 0 value means that the row is ignored. - If the weight column field is not set, then all rows are assumed to have - equal weight of 1. - budget_milli_node_hours (int): - Optional. The train budget of creating this Model, expressed in milli node - hours i.e. 1,000 value in this field means 1 node hour. - The training cost of the model will not exceed this budget. The final - cost will be attempted to be close to the budget, though may end up - being (even) noticeably smaller - at the backend's discretion. This - especially may happen when further model training ceases to provide - any improvements. - If the budget is set to a value known to be insufficient to train a - Model for the given training set, the training won't be attempted and - will error. - The minimum value is 1000 and the maximum is 72000. - model_display_name (str): - Optional. If the script produces a managed Vertex AI Model. The display name of - the Model. The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - - If not provided upon creation, the job's display_name is used. - model_labels (Dict[str, str]): - Optional. The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - disable_early_stopping (bool): - Required. If true, the entire budget is used. This disables the early stopping - feature. By default, the early stopping feature is enabled, which means - that training might stop before the entire training budget has been - used, if further training does no longer brings significant improvement - to the model. - export_evaluated_data_items (bool): - Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - Expected format: - ``bq://::
`` + Supported only for tabular and time series Datasets. + enable_web_access (bool): + Whether you want Vertex AI to enable interactive shell access + to training containers. + https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell + tensorboard (str): + Optional. The name of a Vertex AI + [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard] + resource to which this CustomJob will upload Tensorboard + logs. Format: + ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` + The training script should write Tensorboard to following Vertex AI environment + variable: - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. + AIP_TENSORBOARD_LOG_DIR - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. + `service_account` is required with provided `tensorboard`. + For more information on configuring your service account please visit: + https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training + reduction_server_container_uri (str): + Optional. The Uri of the reduction server container image. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -3760,141 +4029,82 @@ def _run( produce a Vertex AI Model. """ - training_task_definition = schema.training_job.definition.automl_tabular - - # auto-populate transformations - if self._column_transformations is None: - _LOGGER.info( - "No column transformations provided, so now retrieving columns from dataset in order to set default column transformations." - ) - - ( - self._column_transformations, - column_names, - ) = column_transformations_utils.get_default_column_transformations( - dataset=dataset, target_column=target_column - ) + for spec_order, spec in enumerate(worker_pool_specs): - _LOGGER.info( - "The column transformation of type 'auto' was set for the following columns: %s." - % column_names - ) + if not spec: + continue - training_task_inputs_dict = { - # required inputs - "targetColumn": target_column, - "transformations": self._column_transformations, - "trainBudgetMilliNodeHours": budget_milli_node_hours, - # optional inputs - "weightColumnName": weight_column, - "disableEarlyStopping": disable_early_stopping, - "optimizationObjective": self._optimization_objective, - "predictionType": self._optimization_prediction_type, - "optimizationObjectiveRecallValue": self._optimization_objective_recall_value, - "optimizationObjectivePrecisionValue": self._optimization_objective_precision_value, - } + if ( + spec_order == worker_spec_utils._SPEC_ORDERS["server_spec"] + and reduction_server_container_uri + ): + spec["container_spec"] = { + "image_uri": reduction_server_container_uri, + } + else: + spec["containerSpec"] = {"imageUri": self._container_uri} - final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri - if final_export_eval_bq_uri and not final_export_eval_bq_uri.startswith( - "bq://" - ): - final_export_eval_bq_uri = f"bq://{final_export_eval_bq_uri}" + if self._command: + spec["containerSpec"]["command"] = self._command - if export_evaluated_data_items: - training_task_inputs_dict["exportEvaluatedDataItemsConfig"] = { - "destinationBigqueryUri": final_export_eval_bq_uri, - "overrideExistingTable": export_evaluated_data_items_override_destination, - } + if args: + spec["containerSpec"]["args"] = args - if self._additional_experiments: - training_task_inputs_dict[ - "additionalExperiments" - ] = self._additional_experiments + if environment_variables: + spec["containerSpec"]["env"] = [ + {"name": key, "value": value} + for key, value in environment_variables.items() + ] - model = gca_model.Model( - display_name=model_display_name or self._display_name, - labels=model_labels or self._labels, - encryption_spec=self._model_encryption_spec, + ( + training_task_inputs, + base_output_dir, + ) = self._prepare_training_task_inputs_and_output_dir( + worker_pool_specs=worker_pool_specs, + base_output_dir=base_output_dir, + service_account=service_account, + network=network, + timeout=timeout, + restart_job_on_worker_restart=restart_job_on_worker_restart, + enable_web_access=enable_web_access, + tensorboard=tensorboard, ) - return self._run_job( - training_task_definition=training_task_definition, - training_task_inputs=training_task_inputs_dict, + model = self._run_job( + training_task_definition=schema.training_job.definition.custom_task, + training_task_inputs=training_task_inputs, dataset=dataset, + annotation_schema_uri=annotation_schema_uri, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, timestamp_split_column_name=timestamp_split_column_name, - model=model, + model=managed_model, + gcs_destination_uri_prefix=base_output_dir, + bigquery_destination=bigquery_destination, create_request_timeout=create_request_timeout, ) - @property - def _model_upload_fail_string(self) -> str: - """Helper property for model upload failure.""" - return ( - f"Training Pipeline {self.resource_name} is not configured to upload a " - "Model." - ) - - def _add_additional_experiments(self, additional_experiments: List[str]): - """Add experiment flags to the training job. - Args: - additional_experiments (List[str]): - Experiment flags that can enable some experimental training features. - """ - self._additional_experiments.extend(additional_experiments) - - @staticmethod - def get_auto_column_specs( - dataset: datasets.TabularDataset, - target_column: str, - ) -> Dict[str, str]: - """Returns a dict with all non-target columns as keys and 'auto' as values. - - Example usage: - - column_specs = training_jobs.AutoMLTabularTrainingJob.get_auto_column_specs( - dataset=my_dataset, - target_column="my_target_column", - ) - - Args: - dataset (datasets.TabularDataset): - Required. Intended dataset. - target_column(str): - Required. Intended target column. - Returns: - Dict[str, str] - Column names as keys and 'auto' as values - """ - column_names = [ - column for column in dataset.column_names if column != target_column - ] - column_specs = {column: "auto" for column in column_names} - return column_specs - - class column_data_types: - AUTO = "auto" - NUMERIC = "numeric" - CATEGORICAL = "categorical" - TIMESTAMP = "timestamp" - TEXT = "text" - REPEATED_NUMERIC = "repeated_numeric" - REPEATED_CATEGORICAL = "repeated_categorical" - REPEATED_TEXT = "repeated_text" + return model -class AutoMLForecastingTrainingJob(_TrainingJob): - _supported_training_schemas = (schema.training_job.definition.automl_forecasting,) +class AutoMLTabularTrainingJob(_TrainingJob): + _supported_training_schemas = (schema.training_job.definition.automl_tabular,) def __init__( self, - display_name: Optional[str] = None, + # TODO(b/223262536): Make display_name parameter fully optional in next major release + display_name: str, + optimization_prediction_type: str, optimization_objective: Optional[str] = None, column_specs: Optional[Dict[str, str]] = None, column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, @@ -3902,23 +4112,58 @@ def __init__( training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): - """Constructs a AutoML Forecasting Training Job. + """Constructs a AutoML Tabular Training Job. + + Example usage: + + job = training_jobs.AutoMLTabularTrainingJob( + display_name="my_display_name", + optimization_prediction_type="classification", + optimization_objective="minimize-log-loss", + column_specs={"column_1": "auto", "column_2": "numeric"}, + labels={'key': 'value'}, + ) Args: display_name (str): - Optional. The user-defined name of this TrainingPipeline. + Required. The user-defined name of this TrainingPipeline. + optimization_prediction_type (str): + The type of prediction the Model is to produce. + "classification" - Predict one out of multiple target values is + picked for each row. + "regression" - Predict a value based on its relation to other values. + This type is available only to columns that contain + semantically numeric values, i.e. integers or floating + point number, even if stored as e.g. strings. + optimization_objective (str): - Optional. Objective function the model is to be optimized towards. - The training process creates a Model that optimizes the value of the objective - function over the validation set. The supported optimization objectives: + Optional. Objective function the Model is to be optimized towards. The training + task creates a Model that maximizes/minimizes the value of the objective + function over the validation set. + + The supported optimization objectives depend on the prediction type, and + in the case of classification also the number of distinct values in the + target column (two distint values -> binary, 3 or more distinct values + -> multi class). + If the field is not set, the default objective function is used. + + Classification (binary): + "maximize-au-roc" (default) - Maximize the area under the receiver + operating characteristic (ROC) curve. + "minimize-log-loss" - Minimize log loss. + "maximize-au-prc" - Maximize the area under the precision-recall curve. + "maximize-precision-at-recall" - Maximize precision for a specified + recall value. + "maximize-recall-at-precision" - Maximize recall for a specified + precision value. + + Classification (multi class): + "minimize-log-loss" (default) - Minimize log loss. + + Regression: "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). - "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) - and mean-absolute-error (MAE). - "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. - (Set this objective to build quantile forecasts.) column_specs (Dict[str, str]): Optional. Alternative to column_transformations where the keys of the dict are column names and their respective values are one of @@ -3929,7 +4174,10 @@ def __init__( If an input column has no transformations on it, such a column is ignored by the training, except for the targetColumn, which should have no transformations defined on. - Only one of column_transformations or column_specs should be passed. + Only one of column_transformations or column_specs should be passed. If none + of column_transformations or column_specs is passed, the local credentials + being used will try setting column_specs to "auto". To do this, the local + credentials require read access to the GCS or BigQuery training data source. column_transformations (List[Dict[str, Dict[str, str]]]): Optional. Transformations to apply to the input columns (i.e. columns other than the targetColumn). Each transformation may produce multiple @@ -3941,7 +4189,22 @@ def __init__( ignored by the training, except for the targetColumn, which should have no transformations defined on. Only one of column_transformations or column_specs should be passed. - Consider using column_specs as column_transformations will be deprecated eventually. + Consider using column_specs as column_transformations will be deprecated + eventually. If none of column_transformations or column_specs is passed, + the local credentials being used will try setting column_transformations to + "auto". To do this, the local credentials require read access to the GCS or + BigQuery training data source. + optimization_objective_recall_value (float): + Optional. Required when maximize-precision-at-recall optimizationObjective was + picked, represents the recall value at which the optimization is done. + + The minimum value is 0 and the maximum is 1.0. + optimization_objective_precision_value (float): + Optional. Required when maximize-recall-at-precision optimizationObjective was + picked, represents the precision value at which the optimization is + done. + + The minimum value is 0 and the maximum is 1.0. project (str): Optional. Project to run training in. Overrides project set in aiplatform.init. location (str): @@ -4007,35 +4270,31 @@ def __init__( ) self._optimization_objective = optimization_objective + self._optimization_prediction_type = optimization_prediction_type + self._optimization_objective_recall_value = optimization_objective_recall_value + self._optimization_objective_precision_value = ( + optimization_objective_precision_value + ) + self._additional_experiments = [] def run( self, - dataset: datasets.TimeSeriesDataset, - target_column: str, - time_column: str, - time_series_identifier_column: str, - unavailable_at_forecast_columns: List[str], - available_at_forecast_columns: List[str], - forecast_horizon: int, - data_granularity_unit: str, - data_granularity_count: int, + dataset: datasets.TabularDataset, + target_column: str, training_fraction_split: Optional[float] = None, validation_fraction_split: Optional[float] = None, test_fraction_split: Optional[float] = None, predefined_split_column_name: Optional[str] = None, timestamp_split_column_name: Optional[str] = None, weight_column: Optional[str] = None, - time_series_attribute_columns: Optional[List[str]] = None, - context_window: Optional[int] = None, - export_evaluated_data_items: bool = False, - export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, - export_evaluated_data_items_override_destination: bool = False, - quantiles: Optional[List[float]] = None, - validation_options: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, + disable_early_stopping: bool = False, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, additional_experiments: Optional[List[str]] = None, sync: bool = True, create_request_timeout: Optional[float] = None, @@ -4063,47 +4322,30 @@ def run( Supported only for tabular Datasets. Args: - dataset (datasets.TimeSeriesDataset): + dataset (datasets.TabularDataset): Required. The dataset within the same Project from which data will be used to train the Model. The Dataset must use schema compatible with Model being trained, and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For time series Datasets, all their data is exported to + For tabular Datasets, all their data is exported to training, to pick and choose from. target_column (str): - Required. Name of the column that the Model is to predict values for. This - column must be unavailable at forecast. - time_column (str): - Required. Name of the column that identifies time order in the time series. - This column must be available at forecast. - time_series_identifier_column (str): - Required. Name of the column that identifies the time series. - unavailable_at_forecast_columns (List[str]): - Required. Column names of columns that are unavailable at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is unknown before the forecast - (e.g. population of a city in a given year, or weather on a given day). - available_at_forecast_columns (List[str]): - Required. Column names of columns that are available at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is known at forecast. - forecast_horizon: (int): - Required. The amount of time into the future for which forecasted values for the target are - returned. Expressed in number of units defined by the [data_granularity_unit] and - [data_granularity_count] field. Inclusive. - data_granularity_unit (str): - Required. The data granularity unit. Accepted values are ``minute``, - ``hour``, ``day``, ``week``, ``month``, ``year``. - data_granularity_count (int): - Required. The number of data granularity units between data points in the training - data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other - values of [data_granularity_unit], must be 1. + Required. The name of the column values of which the Model is to predict. + training_fraction_split (float): + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + validation_fraction_split (float): + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or - value in the column) must be one of {``TRAIN``, - ``VALIDATE``, ``TEST``}, and it defines to which set the + value in the column) must be one of {``training``, + ``validation``, ``test``}, and it defines to which set the given piece of data is assigned. If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. @@ -4125,49 +4367,7 @@ def run( during Model training. The column must have numeric values between 0 and 10000 inclusively, and 0 value means that the row is ignored. If the weight column field is not set, then all rows are assumed to have - equal weight of 1. This column must be available at forecast. - time_series_attribute_columns (List[str]): - Optional. Column names that should be used as attribute columns. - Each column is constant within a time series. - context_window (int): - Optional. The amount of time into the past training and prediction data is used for - model training and prediction respectively. Expressed in number of units defined by the - [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the - default value of 0 which means the model sets each series context window to be 0 (also - known as "cold start"). Inclusive. - export_evaluated_data_items (bool): - Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - - Expected format: - ``bq://::
`` - - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` - - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - quantiles (List[float]): - Quantiles to use for the `minimize-quantile-loss` - [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in - this case. - - Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. - Each quantile must be unique. - validation_options (str): - Validation options for the data validation component. The available options are: - "fail-pipeline" - (default), will validate against the validation and fail the pipeline - if it fails. - "ignore-validation" - ignore the results of the validation and continue the pipeline + equal weight of 1. budget_milli_node_hours (int): Optional. The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. @@ -4196,14 +4396,41 @@ def run( are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + disable_early_stopping (bool): + Required. If true, the entire budget is used. This disables the early stopping + feature. By default, the early stopping feature is enabled, which means + that training might stop before the entire training budget has been + used, if further training does no longer brings significant improvement + to the model. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. + + Expected format: + ``bq://::
`` + + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` + + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. + + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. additional_experiments (List[str]): - Optional. Additional experiment flags for the time series forcasting training. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. + Optional. Additional experiment flags for the automl tables training. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. + create_request_timeout (float): + Optional. The timeout for the create request in seconds. Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. @@ -4211,19 +4438,16 @@ def run( Raises: RuntimeError: If Training job has already been run or is waiting to run. """ - if model_display_name: utils.validate_display_name(model_display_name) if model_labels: utils.validate_labels(model_labels) if self._is_waiting_to_run(): - raise RuntimeError( - "AutoML Forecasting Training is already scheduled to run." - ) + raise RuntimeError("AutoML Tabular Training is already scheduled to run.") if self._has_run: - raise RuntimeError("AutoML Forecasting Training has already run.") + raise RuntimeError("AutoML Tabular Training has already run.") if additional_experiments: self._add_additional_experiments(additional_experiments) @@ -4231,29 +4455,19 @@ def run( return self._run( dataset=dataset, target_column=target_column, - time_column=time_column, - time_series_identifier_column=time_series_identifier_column, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - available_at_forecast_columns=available_at_forecast_columns, - forecast_horizon=forecast_horizon, - data_granularity_unit=data_granularity_unit, - data_granularity_count=data_granularity_count, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, predefined_split_column_name=predefined_split_column_name, timestamp_split_column_name=timestamp_split_column_name, weight_column=weight_column, - time_series_attribute_columns=time_series_attribute_columns, - context_window=context_window, budget_milli_node_hours=budget_milli_node_hours, + model_display_name=model_display_name, + model_labels=model_labels, + disable_early_stopping=disable_early_stopping, export_evaluated_data_items=export_evaluated_data_items, export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, - quantiles=quantiles, - validation_options=validation_options, - model_display_name=model_display_name, - model_labels=model_labels, sync=sync, create_request_timeout=create_request_timeout, ) @@ -4261,31 +4475,21 @@ def run( @base.optional_sync() def _run( self, - dataset: datasets.TimeSeriesDataset, + dataset: datasets.TabularDataset, target_column: str, - time_column: str, - time_series_identifier_column: str, - unavailable_at_forecast_columns: List[str], - available_at_forecast_columns: List[str], - forecast_horizon: int, - data_granularity_unit: str, - data_granularity_count: int, training_fraction_split: Optional[float] = None, validation_fraction_split: Optional[float] = None, test_fraction_split: Optional[float] = None, predefined_split_column_name: Optional[str] = None, timestamp_split_column_name: Optional[str] = None, weight_column: Optional[str] = None, - time_series_attribute_columns: Optional[List[str]] = None, - context_window: Optional[int] = None, - export_evaluated_data_items: bool = False, - export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, - export_evaluated_data_items_override_destination: bool = False, - quantiles: Optional[List[float]] = None, - validation_options: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, + disable_early_stopping: bool = False, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, sync: bool = True, create_request_timeout: Optional[float] = None, ) -> models.Model: @@ -4312,42 +4516,16 @@ def _run( Supported only for tabular Datasets. Args: - dataset (datasets.TimeSeriesDataset): + dataset (datasets.TabularDataset): Required. The dataset within the same Project from which data will be used to train the Model. The Dataset must use schema compatible with Model being trained, and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For time series Datasets, all their data is exported to + For tabular Datasets, all their data is exported to training, to pick and choose from. target_column (str): - Required. Name of the column that the Model is to predict values for. This - column must be unavailable at forecast. - time_column (str): - Required. Name of the column that identifies time order in the time series. - This column must be available at forecast. - time_series_identifier_column (str): - Required. Name of the column that identifies the time series. - unavailable_at_forecast_columns (List[str]): - Required. Column names of columns that are unavailable at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is unknown before the forecast - (e.g. population of a city in a given year, or weather on a given day). - available_at_forecast_columns (List[str]): - Required. Column names of columns that are available at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is known at forecast. - forecast_horizon: (int): - Required. The amount of time into the future for which forecasted values for the target are - returned. Expressed in number of units defined by the [data_granularity_unit] and - [data_granularity_count] field. Inclusive. - data_granularity_unit (str): - Required. The data granularity unit. Accepted values are ``minute``, - ``hour``, ``day``, ``week``, ``month``, ``year``. - data_granularity_count (int): - Required. The number of data granularity units between data points in the training - data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other - values of [data_granularity_unit], must be 1. + Required. The name of the column values of which the Model is to predict. training_fraction_split (float): Optional. The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. @@ -4373,58 +4551,17 @@ def _run( the column) must be in RFC 3339 `date-time` format, where `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a piece of data the key is not present or has an invalid value, - that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - This parameter must be used with training_fraction_split, - validation_fraction_split, and test_fraction_split. - weight_column (str): - Optional. Name of the column that should be used as the weight column. - Higher values in this column give more importance to the row - during Model training. The column must have numeric values between 0 and - 10000 inclusively, and 0 value means that the row is ignored. - If the weight column field is not set, then all rows are assumed to have - equal weight of 1. This column must be available at forecast. - time_series_attribute_columns (List[str]): - Optional. Column names that should be used as attribute columns. - Each column is constant within a time series. - context_window (int): - Optional. The number of periods offset into the past to restrict past sequence, where each - period is one unit of granularity as defined by [period]. When not provided uses the - default value of 0 which means the model sets each series historical window to be 0 (also - known as "cold start"). Inclusive. - export_evaluated_data_items (bool): - Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - - Expected format: - ``bq://::
`` - - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` - - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - quantiles (List[float]): - Quantiles to use for the `minimize-quantile-loss` - [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in - this case. - - Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. - Each quantile must be unique. - validation_options (str): - Validation options for the data validation component. The available options are: - "fail-pipeline" - (default), will validate against the validation and fail the pipeline - if it fails. - "ignore-validation" - ignore the results of the validation and continue the pipeline + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + This parameter must be used with training_fraction_split, + validation_fraction_split, and test_fraction_split. + weight_column (str): + Optional. Name of the column that should be used as the weight column. + Higher values in this column give more importance to the row + during Model training. The column must have numeric values between 0 and + 10000 inclusively, and 0 value means that the row is ignored. + If the weight column field is not set, then all rows are assumed to have + equal weight of 1. budget_milli_node_hours (int): Optional. The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. @@ -4453,18 +4590,46 @@ def _run( are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + disable_early_stopping (bool): + Required. If true, the entire budget is used. This disables the early stopping + feature. By default, the early stopping feature is enabled, which means + that training might stop before the entire training budget has been + used, if further training does no longer brings significant improvement + to the model. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. + + Expected format: + ``bq://::
`` + + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` + + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. + + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. create_request_timeout (float): Optional. The timeout for the create request in seconds. + Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. """ - training_task_definition = schema.training_job.definition.automl_forecasting + training_task_definition = schema.training_job.definition.automl_tabular # auto-populate transformations if self._column_transformations is None: @@ -4475,7 +4640,9 @@ def _run( ( self._column_transformations, column_names, - ) = dataset._get_default_column_transformations(target_column) + ) = column_transformations_utils.get_default_column_transformations( + dataset=dataset, target_column=target_column + ) _LOGGER.info( "The column transformation of type 'auto' was set for the following columns: %s." @@ -4485,24 +4652,15 @@ def _run( training_task_inputs_dict = { # required inputs "targetColumn": target_column, - "timeColumn": time_column, - "timeSeriesIdentifierColumn": time_series_identifier_column, - "timeSeriesAttributeColumns": time_series_attribute_columns, - "unavailableAtForecastColumns": unavailable_at_forecast_columns, - "availableAtForecastColumns": available_at_forecast_columns, - "forecastHorizon": forecast_horizon, - "dataGranularity": { - "unit": data_granularity_unit, - "quantity": data_granularity_count, - }, "transformations": self._column_transformations, "trainBudgetMilliNodeHours": budget_milli_node_hours, # optional inputs - "weightColumn": weight_column, - "contextWindow": context_window, - "quantiles": quantiles, - "validationOptions": validation_options, + "weightColumnName": weight_column, + "disableEarlyStopping": disable_early_stopping, "optimizationObjective": self._optimization_objective, + "predictionType": self._optimization_prediction_type, + "optimizationObjectiveRecallValue": self._optimization_objective_recall_value, + "optimizationObjectivePrecisionValue": self._optimization_objective_precision_value, } final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri @@ -4528,7 +4686,7 @@ def _run( encryption_spec=self._model_encryption_spec, ) - new_model = self._run_job( + return self._run_job( training_task_definition=training_task_definition, training_task_inputs=training_task_inputs_dict, dataset=dataset, @@ -4541,14 +4699,6 @@ def _run( create_request_timeout=create_request_timeout, ) - if export_evaluated_data_items: - _LOGGER.info( - "Exported examples available at:\n%s" - % self.evaluated_data_items_bigquery_uri - ) - - return new_model - @property def _model_upload_fail_string(self) -> str: """Helper property for model upload failure.""" @@ -4557,23 +4707,6 @@ def _model_upload_fail_string(self) -> str: "Model." ) - @property - def evaluated_data_items_bigquery_uri(self) -> Optional[str]: - """BigQuery location of exported evaluated examples from the Training Job - Returns: - str: BigQuery uri for the exported evaluated examples if the export - feature is enabled for training. - None: If the export feature was not enabled for training. - """ - - self._assert_gca_resource_is_available() - - metadata = self._gca_resource.training_task_metadata - if metadata and "evaluatedDataItemsBigqueryUri" in metadata: - return metadata["evaluatedDataItemsBigqueryUri"] - - return None - def _add_additional_experiments(self, additional_experiments: List[str]): """Add experiment flags to the training job. Args: @@ -4582,6 +4715,215 @@ def _add_additional_experiments(self, additional_experiments: List[str]): """ self._additional_experiments.extend(additional_experiments) + @staticmethod + def get_auto_column_specs( + dataset: datasets.TabularDataset, + target_column: str, + ) -> Dict[str, str]: + """Returns a dict with all non-target columns as keys and 'auto' as values. + + Example usage: + + column_specs = training_jobs.AutoMLTabularTrainingJob.get_auto_column_specs( + dataset=my_dataset, + target_column="my_target_column", + ) + + Args: + dataset (datasets.TabularDataset): + Required. Intended dataset. + target_column(str): + Required. Intended target column. + Returns: + Dict[str, str] + Column names as keys and 'auto' as values + """ + column_names = [ + column for column in dataset.column_names if column != target_column + ] + column_specs = {column: "auto" for column in column_names} + return column_specs + + class column_data_types: + AUTO = "auto" + NUMERIC = "numeric" + CATEGORICAL = "categorical" + TIMESTAMP = "timestamp" + TEXT = "text" + REPEATED_NUMERIC = "repeated_numeric" + REPEATED_CATEGORICAL = "repeated_categorical" + REPEATED_TEXT = "repeated_text" + + +class AutoMLForecastingTrainingJob(_ForecastingTrainingJob): + _model_type = "AutoML" + _training_task_definition = schema.training_job.definition.automl_forecasting + _supported_training_schemas = (schema.training_job.definition.automl_forecasting,) + + def run( + self, + dataset: datasets.TimeSeriesDataset, + target_column: str, + time_column: str, + time_series_identifier_column: str, + unavailable_at_forecast_columns: List[str], + available_at_forecast_columns: List[str], + forecast_horizon: int, + data_granularity_unit: str, + data_granularity_count: int, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, + weight_column: Optional[str] = None, + time_series_attribute_columns: Optional[List[str]] = None, + context_window: Optional[int] = None, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + budget_milli_node_hours: int = 1000, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + additional_experiments: Optional[List[str]] = None, + hierarchy_group_columns: Optional[List[str]] = None, + hierarchy_group_total_weight: Optional[float] = None, + hierarchy_temporal_total_weight: Optional[float] = None, + hierarchy_group_temporal_total_weight: Optional[float] = None, + window_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + sync: bool = True, + create_request_timeout: Optional[float] = None, + ) -> models.Model: + return super().run( + dataset=dataset, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + budget_milli_node_hours=budget_milli_node_hours, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + model_display_name=model_display_name, + model_labels=model_labels, + additional_experiments=additional_experiments, + hierarchy_group_columns=hierarchy_group_columns, + hierarchy_group_total_weight=hierarchy_group_total_weight, + hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, + window_column=window_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + sync=sync, + create_request_timeout=create_request_timeout, + ) + + +class SequenceToSequencePlusForecastingTrainingJob(_ForecastingTrainingJob): + _model_type = "Seq2Seq" + _training_task_definition = schema.training_job.definition.seq2seq_plus_forecasting + _supported_training_schemas = ( + schema.training_job.definition.seq2seq_plus_forecasting, + ) + + def run( + self, + dataset: datasets.TimeSeriesDataset, + target_column: str, + time_column: str, + time_series_identifier_column: str, + unavailable_at_forecast_columns: List[str], + available_at_forecast_columns: List[str], + forecast_horizon: int, + data_granularity_unit: str, + data_granularity_count: int, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, + weight_column: Optional[str] = None, + time_series_attribute_columns: Optional[List[str]] = None, + context_window: Optional[int] = None, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + budget_milli_node_hours: int = 1000, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + additional_experiments: Optional[List[str]] = None, + hierarchy_group_columns: Optional[List[str]] = None, + hierarchy_group_total_weight: Optional[float] = None, + hierarchy_temporal_total_weight: Optional[float] = None, + hierarchy_group_temporal_total_weight: Optional[float] = None, + window_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + sync: bool = True, + create_request_timeout: Optional[float] = None, + ) -> models.Model: + return super().run( + dataset=dataset, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + budget_milli_node_hours=budget_milli_node_hours, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + model_display_name=model_display_name, + model_labels=model_labels, + additional_experiments=additional_experiments, + hierarchy_group_columns=hierarchy_group_columns, + hierarchy_group_total_weight=hierarchy_group_total_weight, + hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, + window_column=window_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + sync=sync, + create_request_timeout=create_request_timeout, + ) + class AutoMLImageTrainingJob(_TrainingJob): _supported_training_schemas = ( diff --git a/google/cloud/aiplatform/version.py b/google/cloud/aiplatform/version.py index ec04f5d40b..21995202bd 100644 --- a/google/cloud/aiplatform/version.py +++ b/google/cloud/aiplatform/version.py @@ -15,4 +15,4 @@ # limitations under the License. # -__version__ = "1.13.1" +__version__ = "1.14.0" diff --git a/google/cloud/aiplatform_v1/__init__.py b/google/cloud/aiplatform_v1/__init__.py index 055ceb5de2..f17d902b53 100644 --- a/google/cloud/aiplatform_v1/__init__.py +++ b/google/cloud/aiplatform_v1/__init__.py @@ -362,10 +362,12 @@ from .types.model_service import UploadModelResponse from .types.operation import DeleteOperationMetadata from .types.operation import GenericOperationMetadata +from .types.pipeline_failure_policy import PipelineFailurePolicy from .types.pipeline_job import PipelineJob from .types.pipeline_job import PipelineJobDetail from .types.pipeline_job import PipelineTaskDetail from .types.pipeline_job import PipelineTaskExecutorDetail +from .types.pipeline_job import PipelineTemplateMetadata from .types.pipeline_service import CancelPipelineJobRequest from .types.pipeline_service import CancelTrainingPipelineRequest from .types.pipeline_service import CreatePipelineJobRequest @@ -829,12 +831,14 @@ "NearestNeighborSearchOperationMetadata", "NfsMount", "PauseModelDeploymentMonitoringJobRequest", + "PipelineFailurePolicy", "PipelineJob", "PipelineJobDetail", "PipelineServiceClient", "PipelineState", "PipelineTaskDetail", "PipelineTaskExecutorDetail", + "PipelineTemplateMetadata", "Port", "PredefinedSplit", "PredictRequest", diff --git a/google/cloud/aiplatform_v1/services/migration_service/client.py b/google/cloud/aiplatform_v1/services/migration_service/client.py index 99736fc796..269c1529b4 100644 --- a/google/cloud/aiplatform_v1/services/migration_service/client.py +++ b/google/cloud/aiplatform_v1/services/migration_service/client.py @@ -192,40 +192,40 @@ def parse_annotated_dataset_path(path: str) -> Dict[str, str]: @staticmethod def dataset_path( project: str, + location: str, dataset: str, ) -> str: """Returns a fully-qualified dataset string.""" - return "projects/{project}/datasets/{dataset}".format( + return "projects/{project}/locations/{location}/datasets/{dataset}".format( project=project, + location=location, dataset=dataset, ) @staticmethod def parse_dataset_path(path: str) -> Dict[str, str]: """Parses a dataset path into its component segments.""" - m = re.match(r"^projects/(?P.+?)/datasets/(?P.+?)$", path) + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/datasets/(?P.+?)$", + path, + ) return m.groupdict() if m else {} @staticmethod def dataset_path( project: str, - location: str, dataset: str, ) -> str: """Returns a fully-qualified dataset string.""" - return "projects/{project}/locations/{location}/datasets/{dataset}".format( + return "projects/{project}/datasets/{dataset}".format( project=project, - location=location, dataset=dataset, ) @staticmethod def parse_dataset_path(path: str) -> Dict[str, str]: """Parses a dataset path into its component segments.""" - m = re.match( - r"^projects/(?P.+?)/locations/(?P.+?)/datasets/(?P.+?)$", - path, - ) + m = re.match(r"^projects/(?P.+?)/datasets/(?P.+?)$", path) return m.groupdict() if m else {} @staticmethod diff --git a/google/cloud/aiplatform_v1/types/__init__.py b/google/cloud/aiplatform_v1/types/__init__.py index 5bd19073ed..773a75b3cf 100644 --- a/google/cloud/aiplatform_v1/types/__init__.py +++ b/google/cloud/aiplatform_v1/types/__init__.py @@ -423,6 +423,7 @@ PipelineJobDetail, PipelineTaskDetail, PipelineTaskExecutorDetail, + PipelineTemplateMetadata, ) from .pipeline_service import ( CancelPipelineJobRequest, @@ -883,10 +884,12 @@ "UploadModelResponse", "DeleteOperationMetadata", "GenericOperationMetadata", + "PipelineFailurePolicy", "PipelineJob", "PipelineJobDetail", "PipelineTaskDetail", "PipelineTaskExecutorDetail", + "PipelineTemplateMetadata", "CancelPipelineJobRequest", "CancelTrainingPipelineRequest", "CreatePipelineJobRequest", diff --git a/google/cloud/aiplatform_v1/types/endpoint.py b/google/cloud/aiplatform_v1/types/endpoint.py index 1c21e85e73..22c1d6804c 100644 --- a/google/cloud/aiplatform_v1/types/endpoint.py +++ b/google/cloud/aiplatform_v1/types/endpoint.py @@ -221,10 +221,10 @@ class DeployedModel(proto.Message): This value should be 1-10 characters, and valid characters are /[0-9]/. model (str): - Required. The name of the Model that this is - the deployment of. Note that the Model may be in - a different location than the DeployedModel's - Endpoint. + Required. The resource name of the Model that + this is the deployment of. Note that the Model + may be in a different location than the + DeployedModel's Endpoint. display_name (str): The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. diff --git a/google/cloud/aiplatform_v1/types/explanation_metadata.py b/google/cloud/aiplatform_v1/types/explanation_metadata.py index c528cf94f4..5d9dcca392 100644 --- a/google/cloud/aiplatform_v1/types/explanation_metadata.py +++ b/google/cloud/aiplatform_v1/types/explanation_metadata.py @@ -69,6 +69,9 @@ class ExplanationMetadata(proto.Message): including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. + latent_space_source (str): + Name of the source to generate embeddings for + example based explanations. """ class InputMetadata(proto.Message): @@ -457,6 +460,10 @@ class OutputMetadata(proto.Message): proto.STRING, number=3, ) + latent_space_source = proto.Field( + proto.STRING, + number=5, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1/types/featurestore.py b/google/cloud/aiplatform_v1/types/featurestore.py index c6cb0022a0..e88b5c17bc 100644 --- a/google/cloud/aiplatform_v1/types/featurestore.py +++ b/google/cloud/aiplatform_v1/types/featurestore.py @@ -92,12 +92,46 @@ class OnlineServingConfig(proto.Message): set to 0, the featurestore will not have an online store and cannot be used for online serving. + scaling (google.cloud.aiplatform_v1.types.Featurestore.OnlineServingConfig.Scaling): + Online serving scaling configuration. Only one of + ``fixed_node_count`` and ``scaling`` can be set. Setting one + will reset the other. """ + class Scaling(proto.Message): + r"""Online serving scaling configuration. If min_node_count and + max_node_count are set to the same value, the cluster will be + configured with the fixed number of node (no auto-scaling). + + Attributes: + min_node_count (int): + Required. The minimum number of nodes to + scale down to. Must be greater than or equal to + 1. + max_node_count (int): + The maximum number of nodes to scale up to. Must be greater + than min_node_count, and less than or equal to 10 times of + 'min_node_count'. + """ + + min_node_count = proto.Field( + proto.INT32, + number=1, + ) + max_node_count = proto.Field( + proto.INT32, + number=2, + ) + fixed_node_count = proto.Field( proto.INT32, number=2, ) + scaling = proto.Field( + proto.MESSAGE, + number=4, + message="Featurestore.OnlineServingConfig.Scaling", + ) name = proto.Field( proto.STRING, diff --git a/google/cloud/aiplatform_v1/types/featurestore_online_service.py b/google/cloud/aiplatform_v1/types/featurestore_online_service.py index 7a337359f8..0d0ab77ed3 100644 --- a/google/cloud/aiplatform_v1/types/featurestore_online_service.py +++ b/google/cloud/aiplatform_v1/types/featurestore_online_service.py @@ -236,7 +236,7 @@ class StreamingReadFeatureValuesRequest(proto.Message): class FeatureValue(proto.Message): r"""Value for a feature. - NEXT ID: 15 + (-- NEXT ID: 15 --) This message has `oneof`_ fields (mutually exclusive fields). For each oneof, at most one member field can be set at the same time. diff --git a/google/cloud/aiplatform_v1/types/manual_batch_tuning_parameters.py b/google/cloud/aiplatform_v1/types/manual_batch_tuning_parameters.py index df542f78a7..9d1d396059 100644 --- a/google/cloud/aiplatform_v1/types/manual_batch_tuning_parameters.py +++ b/google/cloud/aiplatform_v1/types/manual_batch_tuning_parameters.py @@ -38,7 +38,7 @@ class ManualBatchTuningParameters(proto.Message): value will result in a whole batch not fitting in a machine's memory, and the whole operation will fail. - The default value is 4. + The default value is 64. """ batch_size = proto.Field( diff --git a/google/cloud/aiplatform_v1/types/model_evaluation.py b/google/cloud/aiplatform_v1/types/model_evaluation.py index 51b7f5c158..026d719282 100644 --- a/google/cloud/aiplatform_v1/types/model_evaluation.py +++ b/google/cloud/aiplatform_v1/types/model_evaluation.py @@ -37,6 +37,8 @@ class ModelEvaluation(proto.Message): name (str): Output only. The resource name of the ModelEvaluation. + display_name (str): + The display name of the ModelEvaluation. metrics_schema_uri (str): Points to a YAML file stored on Google Cloud Storage describing the @@ -92,6 +94,11 @@ class ModelEvaluation(proto.Message): [ExplanationSpec][google.cloud.aiplatform.v1.ExplanationSpec] that are used for explaining the predicted values on the evaluated data. + metadata (google.protobuf.struct_pb2.Value): + The metadata of the ModelEvaluation. For the ModelEvaluation + uploaded from Managed Pipeline, metadata contains a + structured value with keys of "pipeline_job_id", + "evaluation_dataset_type", "evaluation_dataset_path". """ class ModelEvaluationExplanationSpec(proto.Message): @@ -123,6 +130,10 @@ class ModelEvaluationExplanationSpec(proto.Message): proto.STRING, number=1, ) + display_name = proto.Field( + proto.STRING, + number=10, + ) metrics_schema_uri = proto.Field( proto.STRING, number=2, @@ -159,6 +170,11 @@ class ModelEvaluationExplanationSpec(proto.Message): number=9, message=ModelEvaluationExplanationSpec, ) + metadata = proto.Field( + proto.MESSAGE, + number=11, + message=struct_pb2.Value, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1/types/model_monitoring.py b/google/cloud/aiplatform_v1/types/model_monitoring.py index 4987479e5b..0d0ab49d15 100644 --- a/google/cloud/aiplatform_v1/types/model_monitoring.py +++ b/google/cloud/aiplatform_v1/types/model_monitoring.py @@ -84,6 +84,8 @@ class TrainingDataset(proto.Message): "csv" The source file is a CSV file. + "jsonl" + The source file is a JSONL file. target_field (str): The target field name the model is to predict. This field will be excluded when doing diff --git a/google/cloud/aiplatform_v1/types/pipeline_failure_policy.py b/google/cloud/aiplatform_v1/types/pipeline_failure_policy.py new file mode 100644 index 0000000000..4400d72fc5 --- /dev/null +++ b/google/cloud/aiplatform_v1/types/pipeline_failure_policy.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.aiplatform.v1", + manifest={ + "PipelineFailurePolicy", + }, +) + + +class PipelineFailurePolicy(proto.Enum): + r"""Represents the failure policy of a pipeline. Currently, the default + of a pipeline is that the pipeline will continue to run until no + more tasks can be executed, also known as + PIPELINE_FAILURE_POLICY_FAIL_SLOW. However, if a pipeline is set to + PIPELINE_FAILURE_POLICY_FAIL_FAST, it will stop scheduling any new + tasks when a task has failed. Any scheduled tasks will continue to + completion. + """ + PIPELINE_FAILURE_POLICY_UNSPECIFIED = 0 + PIPELINE_FAILURE_POLICY_FAIL_SLOW = 1 + PIPELINE_FAILURE_POLICY_FAIL_FAST = 2 + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1/types/pipeline_job.py b/google/cloud/aiplatform_v1/types/pipeline_job.py index cbfd7335e7..3e2152b65e 100644 --- a/google/cloud/aiplatform_v1/types/pipeline_job.py +++ b/google/cloud/aiplatform_v1/types/pipeline_job.py @@ -19,6 +19,7 @@ from google.cloud.aiplatform_v1.types import context from google.cloud.aiplatform_v1.types import encryption_spec as gca_encryption_spec from google.cloud.aiplatform_v1.types import execution as gca_execution +from google.cloud.aiplatform_v1.types import pipeline_failure_policy from google.cloud.aiplatform_v1.types import pipeline_state from google.cloud.aiplatform_v1.types import value as gca_value from google.protobuf import struct_pb2 # type: ignore @@ -30,6 +31,7 @@ package="google.cloud.aiplatform.v1", manifest={ "PipelineJob", + "PipelineTemplateMetadata", "PipelineJobDetail", "PipelineTaskDetail", "PipelineTaskExecutorDetail", @@ -109,6 +111,15 @@ class PipelineJob(proto.Message): to the GCP resources being launched, if applied, such as Vertex AI Training or Dataflow job. If left unspecified, the workload is not peered with any network. + template_uri (str): + A template uri from where the + [PipelineJob.pipeline_spec][google.cloud.aiplatform.v1.PipelineJob.pipeline_spec], + if empty, will be downloaded. + template_metadata (google.cloud.aiplatform_v1.types.PipelineTemplateMetadata): + Output only. Pipeline template metadata. Will fill up fields + if + [PipelineJob.template_uri][google.cloud.aiplatform.v1.PipelineJob.template_uri] + is from supported template registry. """ class RuntimeConfig(proto.Message): @@ -144,6 +155,14 @@ class RuntimeConfig(proto.Message): ``PipelineJob.pipeline_spec.schema_version`` 2.1.0, such as pipelines built using Kubeflow Pipelines SDK 1.9 or higher and the v2 DSL. + failure_policy (google.cloud.aiplatform_v1.types.PipelineFailurePolicy): + Represents the failure policy of a pipeline. Currently, the + default of a pipeline is that the pipeline will continue to + run until no more tasks can be executed, also known as + PIPELINE_FAILURE_POLICY_FAIL_SLOW. However, if a pipeline is + set to PIPELINE_FAILURE_POLICY_FAIL_FAST, it will stop + scheduling any new tasks when a task has failed. Any + scheduled tasks will continue to completion. """ parameters = proto.MapField( @@ -162,6 +181,11 @@ class RuntimeConfig(proto.Message): number=3, message=struct_pb2.Value, ) + failure_policy = proto.Field( + proto.ENUM, + number=4, + enum=pipeline_failure_policy.PipelineFailurePolicy, + ) name = proto.Field( proto.STRING, @@ -234,6 +258,38 @@ class RuntimeConfig(proto.Message): proto.STRING, number=18, ) + template_uri = proto.Field( + proto.STRING, + number=19, + ) + template_metadata = proto.Field( + proto.MESSAGE, + number=20, + message="PipelineTemplateMetadata", + ) + + +class PipelineTemplateMetadata(proto.Message): + r"""Pipeline template metadata if + [PipelineJob.template_uri][google.cloud.aiplatform.v1.PipelineJob.template_uri] + is from supported template registry. Currently, the only supported + registry is Artifact Registry. + + Attributes: + version (str): + The version_name in artifact registry. + + Will always be presented in output if the + [PipelineJob.template_uri][google.cloud.aiplatform.v1.PipelineJob.template_uri] + is from supported template registry. + + Format is "sha256:abcdef123456...". + """ + + version = proto.Field( + proto.STRING, + number=3, + ) class PipelineJobDetail(proto.Message): diff --git a/google/cloud/aiplatform_v1/types/pipeline_service.py b/google/cloud/aiplatform_v1/types/pipeline_service.py index 1b02ad9ce3..4e197a08e1 100644 --- a/google/cloud/aiplatform_v1/types/pipeline_service.py +++ b/google/cloud/aiplatform_v1/types/pipeline_service.py @@ -286,8 +286,8 @@ class ListPipelineJobsRequest(proto.Message): - ``labels``: Supports key-value equality and key presence. - ``template_uri``: Supports ``=``, ``!=`` comparisons, and ``:`` wildcard. - - ``template_metadata.version_name``: Supports ``=``, - ``!=`` comparisons, and ``:`` wildcard. + - ``template_metadata.version``: Supports ``=``, ``!=`` + comparisons, and ``:`` wildcard. Filter expressions can be combined together using logical operators (``AND`` & ``OR``). For example: diff --git a/google/cloud/aiplatform_v1beta1/__init__.py b/google/cloud/aiplatform_v1beta1/__init__.py index b04a7bcd87..47b70735ad 100644 --- a/google/cloud/aiplatform_v1beta1/__init__.py +++ b/google/cloud/aiplatform_v1beta1/__init__.py @@ -115,6 +115,8 @@ from .types.explanation import Attribution from .types.explanation import BlurBaselineConfig from .types.explanation import Examples +from .types.explanation import ExamplesOverride +from .types.explanation import ExamplesRestrictionsNamespace from .types.explanation import Explanation from .types.explanation import ExplanationMetadataOverride from .types.explanation import ExplanationParameters @@ -123,6 +125,8 @@ from .types.explanation import FeatureNoiseSigma from .types.explanation import IntegratedGradientsAttribution from .types.explanation import ModelExplanation +from .types.explanation import Neighbor +from .types.explanation import Presets from .types.explanation import SampledShapleyAttribution from .types.explanation import SmoothGradConfig from .types.explanation import XraiAttribution @@ -361,12 +365,16 @@ from .types.model_service import ListModelVersionsRequest from .types.model_service import ListModelVersionsResponse from .types.model_service import MergeVersionAliasesRequest +from .types.model_service import UpdateExplanationDatasetOperationMetadata +from .types.model_service import UpdateExplanationDatasetRequest +from .types.model_service import UpdateExplanationDatasetResponse from .types.model_service import UpdateModelRequest from .types.model_service import UploadModelOperationMetadata from .types.model_service import UploadModelRequest from .types.model_service import UploadModelResponse from .types.operation import DeleteOperationMetadata from .types.operation import GenericOperationMetadata +from .types.pipeline_failure_policy import PipelineFailurePolicy from .types.pipeline_job import PipelineJob from .types.pipeline_job import PipelineJobDetail from .types.pipeline_job import PipelineTaskDetail @@ -654,6 +662,8 @@ "EnvVar", "Event", "Examples", + "ExamplesOverride", + "ExamplesRestrictionsNamespace", "Execution", "ExplainRequest", "ExplainResponse", @@ -838,8 +848,10 @@ "MutateDeployedIndexRequest", "MutateDeployedIndexResponse", "NearestNeighborSearchOperationMetadata", + "Neighbor", "NfsMount", "PauseModelDeploymentMonitoringJobRequest", + "PipelineFailurePolicy", "PipelineJob", "PipelineJobDetail", "PipelineServiceClient", @@ -854,6 +866,7 @@ "PredictResponse", "PredictSchemata", "PredictionServiceClient", + "Presets", "PrivateEndpoints", "PurgeArtifactsMetadata", "PurgeArtifactsRequest", @@ -929,6 +942,9 @@ "UpdateEndpointRequest", "UpdateEntityTypeRequest", "UpdateExecutionRequest", + "UpdateExplanationDatasetOperationMetadata", + "UpdateExplanationDatasetRequest", + "UpdateExplanationDatasetResponse", "UpdateFeatureRequest", "UpdateFeaturestoreOperationMetadata", "UpdateFeaturestoreRequest", diff --git a/google/cloud/aiplatform_v1beta1/gapic_metadata.json b/google/cloud/aiplatform_v1beta1/gapic_metadata.json index 8786a28c47..777e311ce7 100644 --- a/google/cloud/aiplatform_v1beta1/gapic_metadata.json +++ b/google/cloud/aiplatform_v1beta1/gapic_metadata.json @@ -1326,6 +1326,11 @@ "merge_version_aliases" ] }, + "UpdateExplanationDataset": { + "methods": [ + "update_explanation_dataset" + ] + }, "UpdateModel": { "methods": [ "update_model" @@ -1401,6 +1406,11 @@ "merge_version_aliases" ] }, + "UpdateExplanationDataset": { + "methods": [ + "update_explanation_dataset" + ] + }, "UpdateModel": { "methods": [ "update_model" diff --git a/google/cloud/aiplatform_v1beta1/services/model_service/async_client.py b/google/cloud/aiplatform_v1beta1/services/model_service/async_client.py index 961278f9e0..5183fbed2e 100644 --- a/google/cloud/aiplatform_v1beta1/services/model_service/async_client.py +++ b/google/cloud/aiplatform_v1beta1/services/model_service/async_client.py @@ -394,6 +394,17 @@ async def sample_get_model(): Required. The name of the Model resource. Format: ``projects/{project}/locations/{location}/models/{model}`` + In order to retrieve a specific version of the model, + also provide the version ID or version alias. Example: + ``projects/{project}/locations/{location}/models/{model}@2`` + or + ``projects/{project}/locations/{location}/models/{model}@golden`` + If no version ID or alias is specified, the "default" + version will be returned. The "default" version alias is + created for the first version of the model, and can be + moved to other versions later on. There will be exactly + one default version. + This corresponds to the ``name`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -796,6 +807,120 @@ async def sample_update_model(): # Done; return the response. return response + async def update_explanation_dataset( + self, + request: Union[model_service.UpdateExplanationDatasetRequest, dict] = None, + *, + model: str = None, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> operation_async.AsyncOperation: + r"""Incrementally update the dataset used for an examples + model. + + .. code-block:: python + + from google.cloud import aiplatform_v1beta1 + + async def sample_update_explanation_dataset(): + # Create a client + client = aiplatform_v1beta1.ModelServiceAsyncClient() + + # Initialize request argument(s) + request = aiplatform_v1beta1.UpdateExplanationDatasetRequest( + model="model_value", + ) + + # Make the request + operation = client.update_explanation_dataset(request=request) + + print("Waiting for operation to complete...") + + response = await operation.result() + + # Handle the response + print(response) + + Args: + request (Union[google.cloud.aiplatform_v1beta1.types.UpdateExplanationDatasetRequest, dict]): + The request object. Request message for + [ModelService.UpdateExplanationDataset][google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset]. + model (:class:`str`): + Required. The resource name of the Model to update. + Format: + ``projects/{project}/locations/{location}/models/{model}`` + + This corresponds to the ``model`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.api_core.operation_async.AsyncOperation: + An object representing a long-running operation. + + The result type for the operation will be + :class:`google.cloud.aiplatform_v1beta1.types.UpdateExplanationDatasetResponse` + Response message of + [ModelService.UpdateExplanationDataset][google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset] + operation. + + """ + # Create or coerce a protobuf request object. + # Quick check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([model]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = model_service.UpdateExplanationDatasetRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if model is not None: + request.model = model + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.update_explanation_dataset, + default_timeout=None, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("model", request.model),)), + ) + + # Send the request. + response = await rpc( + request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # Wrap the response in an operation future. + response = operation_async.from_gapic( + response, + self._client._transport.operations_client, + model_service.UpdateExplanationDatasetResponse, + metadata_type=model_service.UpdateExplanationDatasetOperationMetadata, + ) + + # Done; return the response. + return response + async def delete_model( self, request: Union[model_service.DeleteModelRequest, dict] = None, @@ -1218,7 +1343,11 @@ async def sample_export_model(): [ModelService.ExportModel][google.cloud.aiplatform.v1beta1.ModelService.ExportModel]. name (:class:`str`): Required. The resource name of the - Model to export. + Model to export. The resource name may + contain version id or version alias to + specify the version, if no version is + specified, the default version will be + exported. This corresponds to the ``name`` field on the ``request`` instance; if ``request`` is provided, this diff --git a/google/cloud/aiplatform_v1beta1/services/model_service/client.py b/google/cloud/aiplatform_v1beta1/services/model_service/client.py index 02acd097e1..1cecdc485c 100644 --- a/google/cloud/aiplatform_v1beta1/services/model_service/client.py +++ b/google/cloud/aiplatform_v1beta1/services/model_service/client.py @@ -698,6 +698,17 @@ def sample_get_model(): Required. The name of the Model resource. Format: ``projects/{project}/locations/{location}/models/{model}`` + In order to retrieve a specific version of the model, + also provide the version ID or version alias. Example: + ``projects/{project}/locations/{location}/models/{model}@2`` + or + ``projects/{project}/locations/{location}/models/{model}@golden`` + If no version ID or alias is specified, the "default" + version will be returned. The "default" version alias is + created for the first version of the model, and can be + moved to other versions later on. There will be exactly + one default version. + This corresponds to the ``name`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -1100,6 +1111,122 @@ def sample_update_model(): # Done; return the response. return response + def update_explanation_dataset( + self, + request: Union[model_service.UpdateExplanationDatasetRequest, dict] = None, + *, + model: str = None, + retry: OptionalRetry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> gac_operation.Operation: + r"""Incrementally update the dataset used for an examples + model. + + .. code-block:: python + + from google.cloud import aiplatform_v1beta1 + + def sample_update_explanation_dataset(): + # Create a client + client = aiplatform_v1beta1.ModelServiceClient() + + # Initialize request argument(s) + request = aiplatform_v1beta1.UpdateExplanationDatasetRequest( + model="model_value", + ) + + # Make the request + operation = client.update_explanation_dataset(request=request) + + print("Waiting for operation to complete...") + + response = operation.result() + + # Handle the response + print(response) + + Args: + request (Union[google.cloud.aiplatform_v1beta1.types.UpdateExplanationDatasetRequest, dict]): + The request object. Request message for + [ModelService.UpdateExplanationDataset][google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset]. + model (str): + Required. The resource name of the Model to update. + Format: + ``projects/{project}/locations/{location}/models/{model}`` + + This corresponds to the ``model`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.api_core.operation.Operation: + An object representing a long-running operation. + + The result type for the operation will be + :class:`google.cloud.aiplatform_v1beta1.types.UpdateExplanationDatasetResponse` + Response message of + [ModelService.UpdateExplanationDataset][google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset] + operation. + + """ + # Create or coerce a protobuf request object. + # Quick check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([model]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a model_service.UpdateExplanationDatasetRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, model_service.UpdateExplanationDatasetRequest): + request = model_service.UpdateExplanationDatasetRequest(request) + # If we have keyword arguments corresponding to fields on the + # request, apply these. + if model is not None: + request.model = model + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[ + self._transport.update_explanation_dataset + ] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("model", request.model),)), + ) + + # Send the request. + response = rpc( + request, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + + # Wrap the response in an operation future. + response = gac_operation.from_gapic( + response, + self._transport.operations_client, + model_service.UpdateExplanationDatasetResponse, + metadata_type=model_service.UpdateExplanationDatasetOperationMetadata, + ) + + # Done; return the response. + return response + def delete_model( self, request: Union[model_service.DeleteModelRequest, dict] = None, @@ -1522,7 +1649,11 @@ def sample_export_model(): [ModelService.ExportModel][google.cloud.aiplatform.v1beta1.ModelService.ExportModel]. name (str): Required. The resource name of the - Model to export. + Model to export. The resource name may + contain version id or version alias to + specify the version, if no version is + specified, the default version will be + exported. This corresponds to the ``name`` field on the ``request`` instance; if ``request`` is provided, this diff --git a/google/cloud/aiplatform_v1beta1/services/model_service/transports/base.py b/google/cloud/aiplatform_v1beta1/services/model_service/transports/base.py index 232a8945bd..60718e8ef9 100644 --- a/google/cloud/aiplatform_v1beta1/services/model_service/transports/base.py +++ b/google/cloud/aiplatform_v1beta1/services/model_service/transports/base.py @@ -155,6 +155,11 @@ def _prep_wrapped_messages(self, client_info): default_timeout=5.0, client_info=client_info, ), + self.update_explanation_dataset: gapic_v1.method.wrap_method( + self.update_explanation_dataset, + default_timeout=None, + client_info=client_info, + ), self.delete_model: gapic_v1.method.wrap_method( self.delete_model, default_timeout=5.0, @@ -266,6 +271,15 @@ def update_model( ]: raise NotImplementedError() + @property + def update_explanation_dataset( + self, + ) -> Callable[ + [model_service.UpdateExplanationDatasetRequest], + Union[operations_pb2.Operation, Awaitable[operations_pb2.Operation]], + ]: + raise NotImplementedError() + @property def delete_model( self, diff --git a/google/cloud/aiplatform_v1beta1/services/model_service/transports/grpc.py b/google/cloud/aiplatform_v1beta1/services/model_service/transports/grpc.py index 9e7c5971a4..55aeb13351 100644 --- a/google/cloud/aiplatform_v1beta1/services/model_service/transports/grpc.py +++ b/google/cloud/aiplatform_v1beta1/services/model_service/transports/grpc.py @@ -382,6 +382,35 @@ def update_model( ) return self._stubs["update_model"] + @property + def update_explanation_dataset( + self, + ) -> Callable[ + [model_service.UpdateExplanationDatasetRequest], operations_pb2.Operation + ]: + r"""Return a callable for the update explanation dataset method over gRPC. + + Incrementally update the dataset used for an examples + model. + + Returns: + Callable[[~.UpdateExplanationDatasetRequest], + ~.Operation]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "update_explanation_dataset" not in self._stubs: + self._stubs["update_explanation_dataset"] = self.grpc_channel.unary_unary( + "/google.cloud.aiplatform.v1beta1.ModelService/UpdateExplanationDataset", + request_serializer=model_service.UpdateExplanationDatasetRequest.serialize, + response_deserializer=operations_pb2.Operation.FromString, + ) + return self._stubs["update_explanation_dataset"] + @property def delete_model( self, diff --git a/google/cloud/aiplatform_v1beta1/services/model_service/transports/grpc_asyncio.py b/google/cloud/aiplatform_v1beta1/services/model_service/transports/grpc_asyncio.py index e88c9094aa..eafeb60964 100644 --- a/google/cloud/aiplatform_v1beta1/services/model_service/transports/grpc_asyncio.py +++ b/google/cloud/aiplatform_v1beta1/services/model_service/transports/grpc_asyncio.py @@ -393,6 +393,36 @@ def update_model( ) return self._stubs["update_model"] + @property + def update_explanation_dataset( + self, + ) -> Callable[ + [model_service.UpdateExplanationDatasetRequest], + Awaitable[operations_pb2.Operation], + ]: + r"""Return a callable for the update explanation dataset method over gRPC. + + Incrementally update the dataset used for an examples + model. + + Returns: + Callable[[~.UpdateExplanationDatasetRequest], + Awaitable[~.Operation]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "update_explanation_dataset" not in self._stubs: + self._stubs["update_explanation_dataset"] = self.grpc_channel.unary_unary( + "/google.cloud.aiplatform.v1beta1.ModelService/UpdateExplanationDataset", + request_serializer=model_service.UpdateExplanationDatasetRequest.serialize, + response_deserializer=operations_pb2.Operation.FromString, + ) + return self._stubs["update_explanation_dataset"] + @property def delete_model( self, diff --git a/google/cloud/aiplatform_v1beta1/types/__init__.py b/google/cloud/aiplatform_v1beta1/types/__init__.py index be1d312971..3a46e1f5c2 100644 --- a/google/cloud/aiplatform_v1beta1/types/__init__.py +++ b/google/cloud/aiplatform_v1beta1/types/__init__.py @@ -119,6 +119,8 @@ Attribution, BlurBaselineConfig, Examples, + ExamplesOverride, + ExamplesRestrictionsNamespace, Explanation, ExplanationMetadataOverride, ExplanationParameters, @@ -127,6 +129,8 @@ FeatureNoiseSigma, IntegratedGradientsAttribution, ModelExplanation, + Neighbor, + Presets, SampledShapleyAttribution, SmoothGradConfig, XraiAttribution, @@ -414,6 +418,9 @@ ListModelVersionsRequest, ListModelVersionsResponse, MergeVersionAliasesRequest, + UpdateExplanationDatasetOperationMetadata, + UpdateExplanationDatasetRequest, + UpdateExplanationDatasetResponse, UpdateModelRequest, UploadModelOperationMetadata, UploadModelRequest, @@ -650,6 +657,8 @@ "Attribution", "BlurBaselineConfig", "Examples", + "ExamplesOverride", + "ExamplesRestrictionsNamespace", "Explanation", "ExplanationMetadataOverride", "ExplanationParameters", @@ -658,6 +667,8 @@ "FeatureNoiseSigma", "IntegratedGradientsAttribution", "ModelExplanation", + "Neighbor", + "Presets", "SampledShapleyAttribution", "SmoothGradConfig", "XraiAttribution", @@ -888,12 +899,16 @@ "ListModelVersionsRequest", "ListModelVersionsResponse", "MergeVersionAliasesRequest", + "UpdateExplanationDatasetOperationMetadata", + "UpdateExplanationDatasetRequest", + "UpdateExplanationDatasetResponse", "UpdateModelRequest", "UploadModelOperationMetadata", "UploadModelRequest", "UploadModelResponse", "DeleteOperationMetadata", "GenericOperationMetadata", + "PipelineFailurePolicy", "PipelineJob", "PipelineJobDetail", "PipelineTaskDetail", diff --git a/google/cloud/aiplatform_v1beta1/types/batch_prediction_job.py b/google/cloud/aiplatform_v1beta1/types/batch_prediction_job.py index 2b54b55692..7c0a3a02ce 100644 --- a/google/cloud/aiplatform_v1beta1/types/batch_prediction_job.py +++ b/google/cloud/aiplatform_v1beta1/types/batch_prediction_job.py @@ -64,6 +64,10 @@ class BatchPredictionJob(proto.Message): Starting this job has no impact on any existing deployments of the Model and their resources. Exactly one of model and unmanaged_container_model must be set. + + The model resource name may contain version id or version + alias to specify the version, if no version is specified, + the default version will be used. model_version_id (str): Output only. The version ID of the Model that produces the predictions via this job. diff --git a/google/cloud/aiplatform_v1beta1/types/completion_stats.py b/google/cloud/aiplatform_v1beta1/types/completion_stats.py index 5a58d6a06b..3ab6b8dc1a 100644 --- a/google/cloud/aiplatform_v1beta1/types/completion_stats.py +++ b/google/cloud/aiplatform_v1beta1/types/completion_stats.py @@ -44,6 +44,11 @@ class CompletionStats(proto.Message): Set to -1 if the number is unknown (for example, the operation failed before the total entity number could be collected). + successful_forecast_point_count (int): + Output only. The number of the successful + forecast points that are generated by the + forecasting model. This is ONLY used by the + forecasting batch prediction. """ successful_count = proto.Field( @@ -58,6 +63,10 @@ class CompletionStats(proto.Message): proto.INT64, number=3, ) + successful_forecast_point_count = proto.Field( + proto.INT64, + number=5, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1beta1/types/endpoint.py b/google/cloud/aiplatform_v1beta1/types/endpoint.py index 85ff783e64..fc4fabb8e0 100644 --- a/google/cloud/aiplatform_v1beta1/types/endpoint.py +++ b/google/cloud/aiplatform_v1beta1/types/endpoint.py @@ -223,10 +223,14 @@ class DeployedModel(proto.Message): This value should be 1-10 characters, and valid characters are /[0-9]/. model (str): - Required. The name of the Model that this is - the deployment of. Note that the Model may be in - a different location than the DeployedModel's - Endpoint. + Required. The resource name of the Model that + this is the deployment of. Note that the Model + may be in a different location than the + DeployedModel's Endpoint. + The resource name may contain version id or + version alias to specify the version, if no + version is specified, the default version will + be deployed. model_version_id (str): Output only. The version ID of the model that is deployed. diff --git a/google/cloud/aiplatform_v1beta1/types/explanation.py b/google/cloud/aiplatform_v1beta1/types/explanation.py index 2a81fe19ca..a276c39b41 100644 --- a/google/cloud/aiplatform_v1beta1/types/explanation.py +++ b/google/cloud/aiplatform_v1beta1/types/explanation.py @@ -26,6 +26,7 @@ "Explanation", "ModelExplanation", "Attribution", + "Neighbor", "ExplanationSpec", "ExplanationParameters", "SampledShapleyAttribution", @@ -35,8 +36,11 @@ "FeatureNoiseSigma", "BlurBaselineConfig", "Examples", + "Presets", "ExplanationSpecOverride", "ExplanationMetadataOverride", + "ExamplesOverride", + "ExamplesRestrictionsNamespace", }, ) @@ -71,6 +75,13 @@ class Explanation(proto.Message): is specified, the attributions are stored by [Attribution.output_index][google.cloud.aiplatform.v1beta1.Attribution.output_index] in the same order as they appear in the output_indices. + neighbors (Sequence[google.cloud.aiplatform_v1beta1.types.Neighbor]): + Output only. List of the nearest neighbors + for example-based explanations. + For models deployed with the examples + explanations feature enabled, the attributions + field is empty and instead the neighbors field + is populated. """ attributions = proto.RepeatedField( @@ -78,6 +89,11 @@ class Explanation(proto.Message): number=1, message="Attribution", ) + neighbors = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Neighbor", + ) class ModelExplanation(proto.Message): @@ -268,6 +284,26 @@ class Attribution(proto.Message): ) +class Neighbor(proto.Message): + r"""Neighbors for example-based explanations. + + Attributes: + neighbor_id (str): + Output only. The neighbor id. + neighbor_distance (float): + Output only. The neighbor distance. + """ + + neighbor_id = proto.Field( + proto.STRING, + number=1, + ) + neighbor_distance = proto.Field( + proto.DOUBLE, + number=2, + ) + + class ExplanationSpec(proto.Message): r"""Specification of Model explanation. @@ -656,35 +692,97 @@ class Examples(proto.Message): r"""Example-based explainability that returns the nearest neighbors from the provided dataset. + This message has `oneof`_ fields (mutually exclusive fields). + For each oneof, at most one member field can be set at the same time. + Setting any member of the oneof automatically clears all other + members. + + .. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields + Attributes: - gcs_source (google.cloud.aiplatform_v1beta1.types.GcsSource): - The Cloud Storage location for the input - instances. nearest_neighbor_search_config (google.protobuf.struct_pb2.Value): The configuration for the generated index, the semantics are the same as [metadata][google.cloud.aiplatform.v1beta1.Index.metadata] and should match NearestNeighborSearchConfig. + + This field is a member of `oneof`_ ``config``. + presets (google.cloud.aiplatform_v1beta1.types.Presets): + Preset config based on the desired query + speed-precision trade-off and modality + + This field is a member of `oneof`_ ``config``. + gcs_source (google.cloud.aiplatform_v1beta1.types.GcsSource): + The Cloud Storage location for the input + instances. neighbor_count (int): The number of neighbors to return. """ - gcs_source = proto.Field( - proto.MESSAGE, - number=1, - message=io.GcsSource, - ) nearest_neighbor_search_config = proto.Field( proto.MESSAGE, number=2, + oneof="config", message=struct_pb2.Value, ) + presets = proto.Field( + proto.MESSAGE, + number=4, + oneof="config", + message="Presets", + ) + gcs_source = proto.Field( + proto.MESSAGE, + number=1, + message=io.GcsSource, + ) neighbor_count = proto.Field( proto.INT32, number=3, ) +class Presets(proto.Message): + r"""Preset configuration for example-based explanations + + Attributes: + query (google.cloud.aiplatform_v1beta1.types.Presets.Query): + Preset option controlling parameters for + query speed-precision trade-off + + This field is a member of `oneof`_ ``_query``. + modality (google.cloud.aiplatform_v1beta1.types.Presets.Modality): + Preset option controlling parameters for + different modalities + """ + + class Query(proto.Enum): + r"""Preset option controlling parameters for query + speed-precision trade-off + """ + PRECISE = 0 + FAST = 1 + + class Modality(proto.Enum): + r"""Preset option controlling parameters for different modalities""" + MODALITY_UNSPECIFIED = 0 + IMAGE = 1 + TEXT = 2 + TABULAR = 3 + + query = proto.Field( + proto.ENUM, + number=1, + optional=True, + enum=Query, + ) + modality = proto.Field( + proto.ENUM, + number=2, + enum=Modality, + ) + + class ExplanationSpecOverride(proto.Message): r"""The [ExplanationSpec][google.cloud.aiplatform.v1beta1.ExplanationSpec] @@ -701,6 +799,9 @@ class ExplanationSpecOverride(proto.Message): metadata (google.cloud.aiplatform_v1beta1.types.ExplanationMetadataOverride): The metadata to be overridden. If not specified, no metadata is overridden. + examples_override (google.cloud.aiplatform_v1beta1.types.ExamplesOverride): + The example-based explanations parameter + overrides. """ parameters = proto.Field( @@ -713,6 +814,11 @@ class ExplanationSpecOverride(proto.Message): number=2, message="ExplanationMetadataOverride", ) + examples_override = proto.Field( + proto.MESSAGE, + number=3, + message="ExamplesOverride", + ) class ExplanationMetadataOverride(proto.Message): @@ -763,4 +869,81 @@ class InputMetadataOverride(proto.Message): ) +class ExamplesOverride(proto.Message): + r"""Overrides for example-based explanations. + + Attributes: + neighbor_count (int): + The number of neighbors to return. + crowding_count (int): + The number of neighbors to return that have + the same crowding tag. + restrictions (Sequence[google.cloud.aiplatform_v1beta1.types.ExamplesRestrictionsNamespace]): + Restrict the resulting nearest neighbors to + respect these constraints. + return_embeddings (bool): + If true, return the embeddings instead of + neighbors. + data_format (google.cloud.aiplatform_v1beta1.types.ExamplesOverride.DataFormat): + The format of the data being provided with + each call. + """ + + class DataFormat(proto.Enum): + r"""Data format enum.""" + DATA_FORMAT_UNSPECIFIED = 0 + INSTANCES = 1 + EMBEDDINGS = 2 + + neighbor_count = proto.Field( + proto.INT32, + number=1, + ) + crowding_count = proto.Field( + proto.INT32, + number=2, + ) + restrictions = proto.RepeatedField( + proto.MESSAGE, + number=3, + message="ExamplesRestrictionsNamespace", + ) + return_embeddings = proto.Field( + proto.BOOL, + number=4, + ) + data_format = proto.Field( + proto.ENUM, + number=5, + enum=DataFormat, + ) + + +class ExamplesRestrictionsNamespace(proto.Message): + r"""Restrictions namespace for example-based explanations + overrides. + + Attributes: + namespace_name (str): + The namespace name. + allow (Sequence[str]): + The list of allowed tags. + deny (Sequence[str]): + The list of deny tags. + """ + + namespace_name = proto.Field( + proto.STRING, + number=1, + ) + allow = proto.RepeatedField( + proto.STRING, + number=2, + ) + deny = proto.RepeatedField( + proto.STRING, + number=3, + ) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1beta1/types/explanation_metadata.py b/google/cloud/aiplatform_v1beta1/types/explanation_metadata.py index 230b9b0af1..e70c882d50 100644 --- a/google/cloud/aiplatform_v1beta1/types/explanation_metadata.py +++ b/google/cloud/aiplatform_v1beta1/types/explanation_metadata.py @@ -69,6 +69,9 @@ class ExplanationMetadata(proto.Message): including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. + latent_space_source (str): + Name of the source to generate embeddings for + example based explanations. """ class InputMetadata(proto.Message): @@ -457,6 +460,10 @@ class OutputMetadata(proto.Message): proto.STRING, number=3, ) + latent_space_source = proto.Field( + proto.STRING, + number=5, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1beta1/types/featurestore_online_service.py b/google/cloud/aiplatform_v1beta1/types/featurestore_online_service.py index f6cb04de3f..b153d6be49 100644 --- a/google/cloud/aiplatform_v1beta1/types/featurestore_online_service.py +++ b/google/cloud/aiplatform_v1beta1/types/featurestore_online_service.py @@ -238,7 +238,6 @@ class StreamingReadFeatureValuesRequest(proto.Message): class FeatureValue(proto.Message): r"""Value for a feature. - NEXT ID: 15 This message has `oneof`_ fields (mutually exclusive fields). For each oneof, at most one member field can be set at the same time. diff --git a/google/cloud/aiplatform_v1beta1/types/manual_batch_tuning_parameters.py b/google/cloud/aiplatform_v1beta1/types/manual_batch_tuning_parameters.py index d1c6a65a7f..13fc2ace35 100644 --- a/google/cloud/aiplatform_v1beta1/types/manual_batch_tuning_parameters.py +++ b/google/cloud/aiplatform_v1beta1/types/manual_batch_tuning_parameters.py @@ -38,7 +38,7 @@ class ManualBatchTuningParameters(proto.Message): value will result in a whole batch not fitting in a machine's memory, and the whole operation will fail. - The default value is 4. + The default value is 64. """ batch_size = proto.Field( diff --git a/google/cloud/aiplatform_v1beta1/types/model_evaluation.py b/google/cloud/aiplatform_v1beta1/types/model_evaluation.py index 3ca2944a50..f1da0263e7 100644 --- a/google/cloud/aiplatform_v1beta1/types/model_evaluation.py +++ b/google/cloud/aiplatform_v1beta1/types/model_evaluation.py @@ -37,6 +37,8 @@ class ModelEvaluation(proto.Message): name (str): Output only. The resource name of the ModelEvaluation. + display_name (str): + The display name of the ModelEvaluation. metrics_schema_uri (str): Points to a YAML file stored on Google Cloud Storage describing the @@ -70,6 +72,11 @@ class ModelEvaluation(proto.Message): [ExplanationSpec][google.cloud.aiplatform.v1beta1.ExplanationSpec] that are used for explaining the predicted values on the evaluated data. + metadata (google.protobuf.struct_pb2.Value): + The metadata of the ModelEvaluation. For the ModelEvaluation + uploaded from Managed Pipeline, metadata contains a + structured value with keys of "pipeline_job_id", + "evaluation_dataset_type", "evaluation_dataset_path". """ class ModelEvaluationExplanationSpec(proto.Message): @@ -101,6 +108,10 @@ class ModelEvaluationExplanationSpec(proto.Message): proto.STRING, number=1, ) + display_name = proto.Field( + proto.STRING, + number=10, + ) metrics_schema_uri = proto.Field( proto.STRING, number=2, @@ -129,6 +140,11 @@ class ModelEvaluationExplanationSpec(proto.Message): number=9, message=ModelEvaluationExplanationSpec, ) + metadata = proto.Field( + proto.MESSAGE, + number=11, + message=struct_pb2.Value, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1beta1/types/model_monitoring.py b/google/cloud/aiplatform_v1beta1/types/model_monitoring.py index e347fe7785..8ec55cce08 100644 --- a/google/cloud/aiplatform_v1beta1/types/model_monitoring.py +++ b/google/cloud/aiplatform_v1beta1/types/model_monitoring.py @@ -84,6 +84,8 @@ class TrainingDataset(proto.Message): "csv" The source file is a CSV file. + "jsonl" + The source file is a JSONL file. target_field (str): The target field name the model is to predict. This field will be excluded when doing diff --git a/google/cloud/aiplatform_v1beta1/types/model_service.py b/google/cloud/aiplatform_v1beta1/types/model_service.py index 5aaadd0376..88560ebb84 100644 --- a/google/cloud/aiplatform_v1beta1/types/model_service.py +++ b/google/cloud/aiplatform_v1beta1/types/model_service.py @@ -15,6 +15,7 @@ # import proto # type: ignore +from google.cloud.aiplatform_v1beta1.types import explanation from google.cloud.aiplatform_v1beta1.types import io from google.cloud.aiplatform_v1beta1.types import model as gca_model from google.cloud.aiplatform_v1beta1.types import ( @@ -37,11 +38,14 @@ "ListModelVersionsRequest", "ListModelVersionsResponse", "UpdateModelRequest", + "UpdateExplanationDatasetRequest", + "UpdateExplanationDatasetOperationMetadata", "DeleteModelRequest", "DeleteModelVersionRequest", "MergeVersionAliasesRequest", "ExportModelRequest", "ExportModelOperationMetadata", + "UpdateExplanationDatasetResponse", "ExportModelResponse", "ImportModelEvaluationRequest", "GetModelEvaluationRequest", @@ -146,6 +150,17 @@ class GetModelRequest(proto.Message): name (str): Required. The name of the Model resource. Format: ``projects/{project}/locations/{location}/models/{model}`` + + In order to retrieve a specific version of the model, also + provide the version ID or version alias. Example: + ``projects/{project}/locations/{location}/models/{model}@2`` + or + ``projects/{project}/locations/{location}/models/{model}@golden`` + If no version ID or alias is specified, the "default" + version will be returned. The "default" version alias is + created for the first version of the model, and can be moved + to other versions later on. There will be exactly one + default version. """ name = proto.Field( @@ -377,6 +392,46 @@ class UpdateModelRequest(proto.Message): ) +class UpdateExplanationDatasetRequest(proto.Message): + r"""Request message for + [ModelService.UpdateExplanationDataset][google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset]. + + Attributes: + model (str): + Required. The resource name of the Model to update. Format: + ``projects/{project}/locations/{location}/models/{model}`` + examples (google.cloud.aiplatform_v1beta1.types.Examples): + The example config containing the location of + the dataset. + """ + + model = proto.Field( + proto.STRING, + number=1, + ) + examples = proto.Field( + proto.MESSAGE, + number=2, + message=explanation.Examples, + ) + + +class UpdateExplanationDatasetOperationMetadata(proto.Message): + r"""Runtime operation information for + [ModelService.UpdateExplanationDataset][google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset]. + + Attributes: + generic_metadata (google.cloud.aiplatform_v1beta1.types.GenericOperationMetadata): + The common part of the operation metadata. + """ + + generic_metadata = proto.Field( + proto.MESSAGE, + number=1, + message=operation.GenericOperationMetadata, + ) + + class DeleteModelRequest(proto.Message): r"""Request message for [ModelService.DeleteModel][google.cloud.aiplatform.v1beta1.ModelService.DeleteModel]. @@ -457,7 +512,10 @@ class ExportModelRequest(proto.Message): Attributes: name (str): Required. The resource name of the Model to - export. + export. The resource name may contain version id + or version alias to specify the version, if no + version is specified, the default version will + be exported. output_config (google.cloud.aiplatform_v1beta1.types.ExportModelRequest.OutputConfig): Required. The desired output location and configuration. @@ -569,6 +627,14 @@ class OutputInfo(proto.Message): ) +class UpdateExplanationDatasetResponse(proto.Message): + r"""Response message of + [ModelService.UpdateExplanationDataset][google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset] + operation. + + """ + + class ExportModelResponse(proto.Message): r"""Response message of [ModelService.ExportModel][google.cloud.aiplatform.v1beta1.ModelService.ExportModel] diff --git a/google/cloud/aiplatform_v1beta1/types/pipeline_failure_policy.py b/google/cloud/aiplatform_v1beta1/types/pipeline_failure_policy.py new file mode 100644 index 0000000000..d528bce8a1 --- /dev/null +++ b/google/cloud/aiplatform_v1beta1/types/pipeline_failure_policy.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.aiplatform.v1beta1", + manifest={ + "PipelineFailurePolicy", + }, +) + + +class PipelineFailurePolicy(proto.Enum): + r"""Represents the failure policy of a pipeline. Currently, the default + of a pipeline is that the pipeline will continue to run until no + more tasks can be executed, also known as + PIPELINE_FAILURE_POLICY_FAIL_SLOW. However, if a pipeline is set to + PIPELINE_FAILURE_POLICY_FAIL_FAST, it will stop scheduling any new + tasks when a task has failed. Any scheduled tasks will continue to + completion. + """ + PIPELINE_FAILURE_POLICY_UNSPECIFIED = 0 + PIPELINE_FAILURE_POLICY_FAIL_SLOW = 1 + PIPELINE_FAILURE_POLICY_FAIL_FAST = 2 + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/aiplatform_v1beta1/types/pipeline_job.py b/google/cloud/aiplatform_v1beta1/types/pipeline_job.py index a43e876a5f..8b0e7d37a1 100644 --- a/google/cloud/aiplatform_v1beta1/types/pipeline_job.py +++ b/google/cloud/aiplatform_v1beta1/types/pipeline_job.py @@ -19,6 +19,7 @@ from google.cloud.aiplatform_v1beta1.types import context from google.cloud.aiplatform_v1beta1.types import encryption_spec as gca_encryption_spec from google.cloud.aiplatform_v1beta1.types import execution as gca_execution +from google.cloud.aiplatform_v1beta1.types import pipeline_failure_policy from google.cloud.aiplatform_v1beta1.types import pipeline_state from google.cloud.aiplatform_v1beta1.types import value as gca_value from google.protobuf import struct_pb2 # type: ignore @@ -154,6 +155,14 @@ class RuntimeConfig(proto.Message): ``PipelineJob.pipeline_spec.schema_version`` 2.1.0, such as pipelines built using Kubeflow Pipelines SDK 1.9 or higher and the v2 DSL. + failure_policy (google.cloud.aiplatform_v1beta1.types.PipelineFailurePolicy): + Represents the failure policy of a pipeline. Currently, the + default of a pipeline is that the pipeline will continue to + run until no more tasks can be executed, also known as + PIPELINE_FAILURE_POLICY_FAIL_SLOW. However, if a pipeline is + set to PIPELINE_FAILURE_POLICY_FAIL_FAST, it will stop + scheduling any new tasks when a task has failed. Any + scheduled tasks will continue to completion. """ parameters = proto.MapField( @@ -172,6 +181,11 @@ class RuntimeConfig(proto.Message): number=3, message=struct_pb2.Value, ) + failure_policy = proto.Field( + proto.ENUM, + number=4, + enum=pipeline_failure_policy.PipelineFailurePolicy, + ) name = proto.Field( proto.STRING, diff --git a/google/cloud/aiplatform_v1beta1/types/pipeline_service.py b/google/cloud/aiplatform_v1beta1/types/pipeline_service.py index f236fa402f..3a084f10b6 100644 --- a/google/cloud/aiplatform_v1beta1/types/pipeline_service.py +++ b/google/cloud/aiplatform_v1beta1/types/pipeline_service.py @@ -288,8 +288,8 @@ class ListPipelineJobsRequest(proto.Message): - ``labels``: Supports key-value equality and key presence. - ``template_uri``: Supports ``=``, ``!=`` comparisons, and ``:`` wildcard. - - ``template_metadata.version_name``: Supports ``=``, - ``!=`` comparisons, and ``:`` wildcard. + - ``template_metadata.version``: Supports ``=``, ``!=`` + comparisons, and ``:`` wildcard. Filter expressions can be combined together using logical operators (``AND`` & ``OR``). For example: diff --git a/samples/generated_samples/aiplatform_v1beta1_generated_model_service_update_explanation_dataset_async.py b/samples/generated_samples/aiplatform_v1beta1_generated_model_service_update_explanation_dataset_async.py new file mode 100644 index 0000000000..cc070a4f30 --- /dev/null +++ b/samples/generated_samples/aiplatform_v1beta1_generated_model_service_update_explanation_dataset_async.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Generated code. DO NOT EDIT! +# +# Snippet for UpdateExplanationDataset +# NOTE: This snippet has been automatically generated for illustrative purposes only. +# It may require modifications to work in your environment. + +# To install the latest published package dependency, execute the following: +# python3 -m pip install google-cloud-aiplatform + + +# [START aiplatform_v1beta1_generated_ModelService_UpdateExplanationDataset_async] +from google.cloud import aiplatform_v1beta1 + + +async def sample_update_explanation_dataset(): + # Create a client + client = aiplatform_v1beta1.ModelServiceAsyncClient() + + # Initialize request argument(s) + request = aiplatform_v1beta1.UpdateExplanationDatasetRequest( + model="model_value", + ) + + # Make the request + operation = client.update_explanation_dataset(request=request) + + print("Waiting for operation to complete...") + + response = await operation.result() + + # Handle the response + print(response) + +# [END aiplatform_v1beta1_generated_ModelService_UpdateExplanationDataset_async] diff --git a/samples/generated_samples/aiplatform_v1beta1_generated_model_service_update_explanation_dataset_sync.py b/samples/generated_samples/aiplatform_v1beta1_generated_model_service_update_explanation_dataset_sync.py new file mode 100644 index 0000000000..3f17d7ca8d --- /dev/null +++ b/samples/generated_samples/aiplatform_v1beta1_generated_model_service_update_explanation_dataset_sync.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Generated code. DO NOT EDIT! +# +# Snippet for UpdateExplanationDataset +# NOTE: This snippet has been automatically generated for illustrative purposes only. +# It may require modifications to work in your environment. + +# To install the latest published package dependency, execute the following: +# python3 -m pip install google-cloud-aiplatform + + +# [START aiplatform_v1beta1_generated_ModelService_UpdateExplanationDataset_sync] +from google.cloud import aiplatform_v1beta1 + + +def sample_update_explanation_dataset(): + # Create a client + client = aiplatform_v1beta1.ModelServiceClient() + + # Initialize request argument(s) + request = aiplatform_v1beta1.UpdateExplanationDatasetRequest( + model="model_value", + ) + + # Make the request + operation = client.update_explanation_dataset(request=request) + + print("Waiting for operation to complete...") + + response = operation.result() + + # Handle the response + print(response) + +# [END aiplatform_v1beta1_generated_ModelService_UpdateExplanationDataset_sync] diff --git a/samples/generated_samples/snippet_metadata_aiplatform_v1beta1.json b/samples/generated_samples/snippet_metadata_aiplatform_v1beta1.json index 99cdb25b68..0d4c8a0532 100644 --- a/samples/generated_samples/snippet_metadata_aiplatform_v1beta1.json +++ b/samples/generated_samples/snippet_metadata_aiplatform_v1beta1.json @@ -20571,6 +20571,167 @@ ], "title": "aiplatform_v1beta1_generated_model_service_merge_version_aliases_sync.py" }, + { + "canonical": true, + "clientMethod": { + "async": true, + "client": { + "fullName": "google.cloud.aiplatform_v1beta1.ModelServiceAsyncClient", + "shortName": "ModelServiceAsyncClient" + }, + "fullName": "google.cloud.aiplatform_v1beta1.ModelServiceAsyncClient.update_explanation_dataset", + "method": { + "fullName": "google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset", + "service": { + "fullName": "google.cloud.aiplatform.v1beta1.ModelService", + "shortName": "ModelService" + }, + "shortName": "UpdateExplanationDataset" + }, + "parameters": [ + { + "name": "request", + "type": "google.cloud.aiplatform_v1beta1.types.UpdateExplanationDatasetRequest" + }, + { + "name": "model", + "type": "str" + }, + { + "name": "retry", + "type": "google.api_core.retry.Retry" + }, + { + "name": "timeout", + "type": "float" + }, + { + "name": "metadata", + "type": "Sequence[Tuple[str, str]" + } + ], + "resultType": "google.api_core.operation_async.AsyncOperation", + "shortName": "update_explanation_dataset" + }, + "description": "Sample for UpdateExplanationDataset", + "file": "aiplatform_v1beta1_generated_model_service_update_explanation_dataset_async.py", + "language": "PYTHON", + "origin": "API_DEFINITION", + "regionTag": "aiplatform_v1beta1_generated_ModelService_UpdateExplanationDataset_async", + "segments": [ + { + "end": 48, + "start": 27, + "type": "FULL" + }, + { + "end": 48, + "start": 27, + "type": "SHORT" + }, + { + "end": 33, + "start": 31, + "type": "CLIENT_INITIALIZATION" + }, + { + "end": 38, + "start": 34, + "type": "REQUEST_INITIALIZATION" + }, + { + "end": 45, + "start": 39, + "type": "REQUEST_EXECUTION" + }, + { + "end": 49, + "start": 46, + "type": "RESPONSE_HANDLING" + } + ], + "title": "aiplatform_v1beta1_generated_model_service_update_explanation_dataset_async.py" + }, + { + "canonical": true, + "clientMethod": { + "client": { + "fullName": "google.cloud.aiplatform_v1beta1.ModelServiceClient", + "shortName": "ModelServiceClient" + }, + "fullName": "google.cloud.aiplatform_v1beta1.ModelServiceClient.update_explanation_dataset", + "method": { + "fullName": "google.cloud.aiplatform.v1beta1.ModelService.UpdateExplanationDataset", + "service": { + "fullName": "google.cloud.aiplatform.v1beta1.ModelService", + "shortName": "ModelService" + }, + "shortName": "UpdateExplanationDataset" + }, + "parameters": [ + { + "name": "request", + "type": "google.cloud.aiplatform_v1beta1.types.UpdateExplanationDatasetRequest" + }, + { + "name": "model", + "type": "str" + }, + { + "name": "retry", + "type": "google.api_core.retry.Retry" + }, + { + "name": "timeout", + "type": "float" + }, + { + "name": "metadata", + "type": "Sequence[Tuple[str, str]" + } + ], + "resultType": "google.api_core.operation.Operation", + "shortName": "update_explanation_dataset" + }, + "description": "Sample for UpdateExplanationDataset", + "file": "aiplatform_v1beta1_generated_model_service_update_explanation_dataset_sync.py", + "language": "PYTHON", + "origin": "API_DEFINITION", + "regionTag": "aiplatform_v1beta1_generated_ModelService_UpdateExplanationDataset_sync", + "segments": [ + { + "end": 48, + "start": 27, + "type": "FULL" + }, + { + "end": 48, + "start": 27, + "type": "SHORT" + }, + { + "end": 33, + "start": 31, + "type": "CLIENT_INITIALIZATION" + }, + { + "end": 38, + "start": 34, + "type": "REQUEST_INITIALIZATION" + }, + { + "end": 45, + "start": 39, + "type": "REQUEST_EXECUTION" + }, + { + "end": 49, + "start": 46, + "type": "RESPONSE_HANDLING" + } + ], + "title": "aiplatform_v1beta1_generated_model_service_update_explanation_dataset_sync.py" + }, { "canonical": true, "clientMethod": { diff --git a/samples/model-builder/conftest.py b/samples/model-builder/conftest.py index c871a72607..73a4ee5f2d 100644 --- a/samples/model-builder/conftest.py +++ b/samples/model-builder/conftest.py @@ -45,6 +45,12 @@ def mock_tabular_dataset(): yield mock +@pytest.fixture +def mock_time_series_dataset(): + mock = MagicMock(aiplatform.datasets.TimeSeriesDataset) + yield mock + + @pytest.fixture def mock_text_dataset(): mock = MagicMock(aiplatform.datasets.TextDataset) @@ -74,6 +80,13 @@ def mock_get_tabular_dataset(mock_tabular_dataset): yield mock_get_tabular_dataset +@pytest.fixture +def mock_get_time_series_dataset(mock_time_series_dataset): + with patch.object(aiplatform, "TimeSeriesDataset") as mock_get_time_series_dataset: + mock_get_time_series_dataset.return_value = mock_time_series_dataset + yield mock_get_time_series_dataset + + @pytest.fixture def mock_get_text_dataset(mock_text_dataset): with patch.object(aiplatform, "TextDataset") as mock_get_text_dataset: @@ -107,6 +120,15 @@ def mock_create_tabular_dataset(mock_tabular_dataset): yield mock_create_tabular_dataset +@pytest.fixture +def mock_create_time_series_dataset(mock_time_series_dataset): + with patch.object( + aiplatform.TimeSeriesDataset, "create" + ) as mock_create_time_series_dataset: + mock_create_time_series_dataset.return_value = mock_time_series_dataset + yield mock_create_time_series_dataset + + @pytest.fixture def mock_create_text_dataset(mock_text_dataset): with patch.object(aiplatform.TextDataset, "create") as mock_create_text_dataset: @@ -183,6 +205,12 @@ def mock_tabular_training_job(): yield mock +@pytest.fixture +def mock_forecasting_training_job(): + mock = MagicMock(aiplatform.training_jobs.AutoMLForecastingTrainingJob) + yield mock + + @pytest.fixture def mock_text_training_job(): mock = MagicMock(aiplatform.training_jobs.AutoMLTextTrainingJob) @@ -208,6 +236,19 @@ def mock_run_automl_tabular_training_job(mock_tabular_training_job): yield mock +@pytest.fixture +def mock_get_automl_forecasting_training_job(mock_forecasting_training_job): + with patch.object(aiplatform, "AutoMLForecastingTrainingJob") as mock: + mock.return_value = mock_forecasting_training_job + yield mock + + +@pytest.fixture +def mock_run_automl_forecasting_training_job(mock_forecasting_training_job): + with patch.object(mock_forecasting_training_job, "run") as mock: + yield mock + + @pytest.fixture def mock_get_automl_image_training_job(mock_image_training_job): with patch.object(aiplatform, "AutoMLImageTrainingJob") as mock: diff --git a/samples/model-builder/create_and_import_dataset_tabular_bigquery_sample.py b/samples/model-builder/create_and_import_dataset_tabular_bigquery_sample.py index 38328ab652..5942ef1f3f 100644 --- a/samples/model-builder/create_and_import_dataset_tabular_bigquery_sample.py +++ b/samples/model-builder/create_and_import_dataset_tabular_bigquery_sample.py @@ -18,13 +18,17 @@ # [START aiplatform_sdk_create_and_import_dataset_tabular_bigquery_sample] def create_and_import_dataset_tabular_bigquery_sample( - display_name: str, project: str, location: str, bq_source: str, + display_name: str, + project: str, + location: str, + bigquery_source: str, ): aiplatform.init(project=project, location=location) dataset = aiplatform.TabularDataset.create( - display_name=display_name, bq_source=bq_source, + display_name=display_name, + bigquery_source=bigquery_source, ) dataset.wait() diff --git a/samples/model-builder/create_and_import_dataset_tabular_bigquery_sample_test.py b/samples/model-builder/create_and_import_dataset_tabular_bigquery_sample_test.py index 6917eee9cf..448fcf9c44 100644 --- a/samples/model-builder/create_and_import_dataset_tabular_bigquery_sample_test.py +++ b/samples/model-builder/create_and_import_dataset_tabular_bigquery_sample_test.py @@ -24,7 +24,7 @@ def test_create_and_import_dataset_tabular_bigquery_sample( create_and_import_dataset_tabular_bigquery_sample.create_and_import_dataset_tabular_bigquery_sample( project=constants.PROJECT, location=constants.LOCATION, - bq_source=constants.BIGQUERY_SOURCE, + bigquery_source=constants.BIGQUERY_SOURCE, display_name=constants.DISPLAY_NAME, ) @@ -32,5 +32,6 @@ def test_create_and_import_dataset_tabular_bigquery_sample( project=constants.PROJECT, location=constants.LOCATION ) mock_create_tabular_dataset.assert_called_once_with( - display_name=constants.DISPLAY_NAME, bq_source=constants.BIGQUERY_SOURCE, + display_name=constants.DISPLAY_NAME, + bigquery_source=constants.BIGQUERY_SOURCE, ) diff --git a/samples/model-builder/create_and_import_dataset_time_series_bigquery_sample.py b/samples/model-builder/create_and_import_dataset_time_series_bigquery_sample.py new file mode 100644 index 0000000000..76c17a9cd3 --- /dev/null +++ b/samples/model-builder/create_and_import_dataset_time_series_bigquery_sample.py @@ -0,0 +1,40 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_create_and_import_dataset_time_series_bigquery_sample] +def create_and_import_dataset_time_series_bigquery_sample( + display_name: str, + project: str, + location: str, + bigquery_source: str, +): + + aiplatform.init(project=project, location=location) + + dataset = aiplatform.TimeSeriesDataset.create( + display_name=display_name, + bigquery_source=bigquery_source, + ) + + dataset.wait() + + print(f'\tDataset: "{dataset.display_name}"') + print(f'\tname: "{dataset.resource_name}"') + + +# [END aiplatform_sdk_create_and_import_dataset_time_series_bigquery_sample] diff --git a/samples/model-builder/create_and_import_dataset_time_series_bigquery_sample_test.py b/samples/model-builder/create_and_import_dataset_time_series_bigquery_sample_test.py new file mode 100644 index 0000000000..21436da8ee --- /dev/null +++ b/samples/model-builder/create_and_import_dataset_time_series_bigquery_sample_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_and_import_dataset_time_series_bigquery_sample +import test_constants as constants + + +def test_create_and_import_dataset_time_series_bigquery_sample( + mock_sdk_init, mock_create_time_series_dataset +): + + create_and_import_dataset_time_series_bigquery_sample.create_and_import_dataset_time_series_bigquery_sample( + project=constants.PROJECT, + location=constants.LOCATION, + bigquery_source=constants.BIGQUERY_SOURCE, + display_name=constants.DISPLAY_NAME, + ) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_create_time_series_dataset.assert_called_once_with( + display_name=constants.DISPLAY_NAME, + bigquery_source=constants.BIGQUERY_SOURCE, + ) diff --git a/samples/model-builder/create_and_import_dataset_time_series_gcs_sample.py b/samples/model-builder/create_and_import_dataset_time_series_gcs_sample.py new file mode 100644 index 0000000000..a2fde124d8 --- /dev/null +++ b/samples/model-builder/create_and_import_dataset_time_series_gcs_sample.py @@ -0,0 +1,41 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Union + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_create_and_import_dataset_time_series_gcs_sample] +def create_and_import_dataset_time_series_gcs_sample( + display_name: str, + project: str, + location: str, + gcs_source: Union[str, List[str]], +): + + aiplatform.init(project=project, location=location) + + dataset = aiplatform.TimeSeriesDataset.create( + display_name=display_name, + gcs_source=gcs_source, + ) + + dataset.wait() + + print(f'\tDataset: "{dataset.display_name}"') + print(f'\tname: "{dataset.resource_name}"') + + +# [END aiplatform_sdk_create_and_import_dataset_time_series_gcs_sample] diff --git a/samples/model-builder/create_and_import_dataset_time_series_gcs_sample_test.py b/samples/model-builder/create_and_import_dataset_time_series_gcs_sample_test.py new file mode 100644 index 0000000000..90c1f9711b --- /dev/null +++ b/samples/model-builder/create_and_import_dataset_time_series_gcs_sample_test.py @@ -0,0 +1,37 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_and_import_dataset_time_series_gcs_sample +import test_constants as constants + + +def test_create_and_import_dataset_time_series_gcs_sample( + mock_sdk_init, mock_create_time_series_dataset +): + + create_and_import_dataset_time_series_gcs_sample.create_and_import_dataset_time_series_gcs_sample( + project=constants.PROJECT, + location=constants.LOCATION, + gcs_source=constants.GCS_SOURCES, + display_name=constants.DISPLAY_NAME, + ) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_create_time_series_dataset.assert_called_once_with( + display_name=constants.DISPLAY_NAME, + gcs_source=constants.GCS_SOURCES, + ) diff --git a/samples/model-builder/create_batch_prediction_job_bigquery_sample.py b/samples/model-builder/create_batch_prediction_job_bigquery_sample.py new file mode 100644 index 0000000000..94f66fd04c --- /dev/null +++ b/samples/model-builder/create_batch_prediction_job_bigquery_sample.py @@ -0,0 +1,47 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_create_batch_prediction_job_bigquery_sample] +def create_batch_prediction_job_bigquery_sample( + project: str, + location: str, + model_resource_name: str, + job_display_name: str, + bigquery_source: str, + bigquery_destination_prefix: str, + sync: bool = True, +): + aiplatform.init(project=project, location=location) + + my_model = aiplatform.Model(model_resource_name) + + batch_prediction_job = my_model.batch_predict( + job_display_name=job_display_name, + bigquery_source=bigquery_source, + bigquery_destination_prefix=bigquery_destination_prefix, + sync=sync, + ) + + batch_prediction_job.wait() + + print(batch_prediction_job.display_name) + print(batch_prediction_job.resource_name) + print(batch_prediction_job.state) + return batch_prediction_job + + +# [END aiplatform_sdk_create_batch_prediction_job_bigquery_sample] diff --git a/samples/model-builder/create_batch_prediction_job_bigquery_sample_test.py b/samples/model-builder/create_batch_prediction_job_bigquery_sample_test.py new file mode 100644 index 0000000000..6b5c509d95 --- /dev/null +++ b/samples/model-builder/create_batch_prediction_job_bigquery_sample_test.py @@ -0,0 +1,42 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_batch_prediction_job_bigquery_sample +import test_constants as constants + + +def test_create_batch_prediction_job_bigquery_sample( + mock_sdk_init, mock_model, mock_init_model, mock_batch_predict_model +): + + create_batch_prediction_job_bigquery_sample.create_batch_prediction_job_bigquery_sample( + project=constants.PROJECT, + location=constants.LOCATION, + model_resource_name=constants.MODEL_NAME, + job_display_name=constants.DISPLAY_NAME, + bigquery_source=constants.BIGQUERY_SOURCE, + bigquery_destination_prefix=constants.BIGQUERY_DESTINATION_PREFIX, + ) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_init_model.assert_called_once_with(constants.MODEL_NAME) + mock_batch_predict_model.assert_called_once_with( + job_display_name=constants.DISPLAY_NAME, + bigquery_source=constants.BIGQUERY_SOURCE, + bigquery_destination_prefix=constants.BIGQUERY_DESTINATION_PREFIX, + sync=True, + ) diff --git a/samples/model-builder/create_batch_prediction_job_dedicated_resources_bigquery_sample.py b/samples/model-builder/create_batch_prediction_job_dedicated_resources_bigquery_sample.py new file mode 100644 index 0000000000..6c2cdd8953 --- /dev/null +++ b/samples/model-builder/create_batch_prediction_job_dedicated_resources_bigquery_sample.py @@ -0,0 +1,59 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union + +from google.cloud import aiplatform, aiplatform_v1 + + +# [START aiplatform_sdk_create_batch_prediction_job_dedicated_resources_bigquery_sample] +def create_batch_prediction_job_dedicated_resources_bigquery_sample( + project: str, + location: str, + model_resource_name: str, + job_display_name: str, + bigquery_source: str, + bigquery_destination_prefix: str, + machine_type: str = "n1-standard-2", + accelerator_count: int = 1, + accelerator_type: Union[str, aiplatform_v1.AcceleratorType] = "NVIDIA_TESLA_K80", + starting_replica_count: int = 1, + max_replica_count: int = 1, + sync: bool = True, +): + aiplatform.init(project=project, location=location) + + my_model = aiplatform.Model(model_resource_name) + + batch_prediction_job = my_model.batch_predict( + job_display_name=job_display_name, + bigquery_source=bigquery_source, + bigquery_destination_prefix=bigquery_destination_prefix, + machine_type=machine_type, + accelerator_count=accelerator_count, + accelerator_type=accelerator_type, + starting_replica_count=starting_replica_count, + max_replica_count=max_replica_count, + sync=sync, + ) + + batch_prediction_job.wait() + + print(batch_prediction_job.display_name) + print(batch_prediction_job.resource_name) + print(batch_prediction_job.state) + return batch_prediction_job + + +# [END aiplatform_sdk_create_batch_prediction_job_dedicated_resources_bigquery_sample] diff --git a/samples/model-builder/create_batch_prediction_job_dedicated_resources_sample_bigquery_test.py b/samples/model-builder/create_batch_prediction_job_dedicated_resources_sample_bigquery_test.py new file mode 100644 index 0000000000..d40b91d31e --- /dev/null +++ b/samples/model-builder/create_batch_prediction_job_dedicated_resources_sample_bigquery_test.py @@ -0,0 +1,55 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest + +import create_batch_prediction_job_dedicated_resources_bigquery_sample +import test_constants as constants + + +@pytest.mark.usefixtures("mock_model") +def test_create_batch_prediction_job_bigquery_sample( + mock_sdk_init, mock_init_model, mock_batch_predict_model +): + + create_batch_prediction_job_dedicated_resources_bigquery_sample.create_batch_prediction_job_dedicated_resources_bigquery_sample( + project=constants.PROJECT, + location=constants.LOCATION, + model_resource_name=constants.MODEL_NAME, + job_display_name=constants.DISPLAY_NAME, + bigquery_source=constants.BIGQUERY_SOURCE, + bigquery_destination_prefix=constants.BIGQUERY_DESTINATION_PREFIX, + machine_type=constants.ACCELERATOR_TYPE, + accelerator_count=constants.ACCELERATOR_COUNT, + accelerator_type=constants.ACCELERATOR_TYPE, + starting_replica_count=constants.MIN_REPLICA_COUNT, + max_replica_count=constants.MAX_REPLICA_COUNT, + ) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_init_model.assert_called_once_with(constants.MODEL_NAME) + mock_batch_predict_model.assert_called_once_with( + job_display_name=constants.DISPLAY_NAME, + bigquery_source=constants.BIGQUERY_SOURCE, + bigquery_destination_prefix=constants.BIGQUERY_DESTINATION_PREFIX, + machine_type=constants.ACCELERATOR_TYPE, + accelerator_count=constants.ACCELERATOR_COUNT, + accelerator_type=constants.ACCELERATOR_TYPE, + starting_replica_count=constants.MIN_REPLICA_COUNT, + max_replica_count=constants.MAX_REPLICA_COUNT, + sync=True, + ) diff --git a/samples/model-builder/create_training_pipeline_forecasting_sample.py b/samples/model-builder/create_training_pipeline_forecasting_sample.py new file mode 100644 index 0000000000..0b710e894b --- /dev/null +++ b/samples/model-builder/create_training_pipeline_forecasting_sample.py @@ -0,0 +1,98 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_create_training_pipeline_forecasting_sample] +def create_training_pipeline_forecasting_sample( + project: str, + display_name: str, + dataset_id: str, + location: str = "us-central1", + model_display_name: str = None, + target_column: str = "target_column", + time_column: str = "date", + time_series_identifier_column: str = "time_series_id", + unavailable_at_forecast_columns: List[str] = [], + available_at_forecast_columns: List[str] = [], + forecast_horizon: int = 1, + data_granularity_unit: str = "week", + data_granularity_count: int = 1, + training_fraction_split: float = 0.8, + validation_fraction_split: float = 0.1, + test_fraction_split: float = 0.1, + budget_milli_node_hours: int = 8000, + timestamp_split_column_name: str = "timestamp_split", + weight_column: str = "weight", + time_series_attribute_columns: List[str] = [], + context_window: int = 0, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + predefined_split_column_name: Optional[str] = None, + sync: bool = True, +): + aiplatform.init(project=project, location=location) + + # Create training job + forecasting_job = aiplatform.AutoMLForecastingTrainingJob( + display_name=display_name, optimization_objective="minimize-rmse" + ) + + # Retrieve existing dataset + dataset = aiplatform.TimeSeriesDataset(dataset_id) + + # Run training job + model = forecasting_job.run( + dataset=dataset, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + budget_milli_node_hours=budget_milli_node_hours, + model_display_name=model_display_name, + sync=sync, + ) + + model.wait() + + print(model.display_name) + print(model.resource_name) + print(model.uri) + return model + + +# [END aiplatform_sdk_create_training_pipeline_forecasting_sample] diff --git a/samples/model-builder/create_training_pipeline_forecasting_sample_test.py b/samples/model-builder/create_training_pipeline_forecasting_sample_test.py new file mode 100644 index 0000000000..2aa6eaa76d --- /dev/null +++ b/samples/model-builder/create_training_pipeline_forecasting_sample_test.py @@ -0,0 +1,85 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_training_pipeline_forecasting_sample +import test_constants as constants + + +def test_create_training_pipeline_forecasting_sample( + mock_sdk_init, + mock_time_series_dataset, + mock_get_automl_forecasting_training_job, + mock_run_automl_forecasting_training_job, + mock_get_time_series_dataset, +): + + create_training_pipeline_forecasting_sample.create_training_pipeline_forecasting_sample( + project=constants.PROJECT, + display_name=constants.DISPLAY_NAME, + dataset_id=constants.RESOURCE_ID, + model_display_name=constants.DISPLAY_NAME_2, + target_column=constants.TABULAR_TARGET_COLUMN, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + timestamp_split_column_name=constants.TIMESTAMP_SPLIT_COLUMN_NAME, + weight_column=constants.WEIGHT_COLUMN, + time_series_attribute_columns=constants.TIME_SERIES_ATTRIBUTE_COLUMNS, + context_window=constants.CONTEXT_WINDOW, + export_evaluated_data_items=constants.EXPORT_EVALUATED_DATA_ITEMS, + export_evaluated_data_items_bigquery_destination_uri=constants.EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, + export_evaluated_data_items_override_destination=constants.EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, + quantiles=constants.QUANTILES, + validation_options=constants.VALIDATION_OPTIONS, + predefined_split_column_name=constants.PREDEFINED_SPLIT_COLUMN_NAME, + ) + + mock_get_time_series_dataset.assert_called_once_with(constants.RESOURCE_ID) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_get_automl_forecasting_training_job.assert_called_once_with( + display_name=constants.DISPLAY_NAME, + optimization_objective="minimize-rmse", + ) + mock_run_automl_forecasting_training_job.assert_called_once_with( + dataset=mock_time_series_dataset, + target_column=constants.TABULAR_TARGET_COLUMN, + time_column=constants.FORECASTNG_TIME_COLUMN, + time_series_identifier_column=constants.FORECASTNG_TIME_SERIES_IDENTIFIER_COLUMN, + unavailable_at_forecast_columns=constants.FORECASTNG_UNAVAILABLE_AT_FORECAST_COLUMNS, + available_at_forecast_columns=constants.FORECASTNG_AVAILABLE_AT_FORECAST_COLUMNS, + forecast_horizon=constants.FORECASTNG_FORECAST_HORIZON, + data_granularity_unit=constants.DATA_GRANULARITY_UNIT, + data_granularity_count=constants.DATA_GRANULARITY_COUNT, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + model_display_name=constants.DISPLAY_NAME_2, + timestamp_split_column_name=constants.TIMESTAMP_SPLIT_COLUMN_NAME, + weight_column=constants.WEIGHT_COLUMN, + time_series_attribute_columns=constants.TIME_SERIES_ATTRIBUTE_COLUMNS, + context_window=constants.CONTEXT_WINDOW, + export_evaluated_data_items=constants.EXPORT_EVALUATED_DATA_ITEMS, + export_evaluated_data_items_bigquery_destination_uri=constants.EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, + export_evaluated_data_items_override_destination=constants.EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, + quantiles=constants.QUANTILES, + validation_options=constants.VALIDATION_OPTIONS, + predefined_split_column_name=constants.PREDEFINED_SPLIT_COLUMN_NAME, + sync=True, + ) diff --git a/samples/model-builder/create_training_pipeline_image_classification_sample.py b/samples/model-builder/create_training_pipeline_image_classification_sample.py index 417cfa43b0..ccf95ec746 100644 --- a/samples/model-builder/create_training_pipeline_image_classification_sample.py +++ b/samples/model-builder/create_training_pipeline_image_classification_sample.py @@ -22,7 +22,7 @@ def create_training_pipeline_image_classification_sample( project: str, location: str, display_name: str, - dataset_id: int, + dataset_id: str, model_display_name: Optional[str] = None, model_type: str = "CLOUD", multi_label: bool = False, @@ -38,8 +38,8 @@ def create_training_pipeline_image_classification_sample( job = aiplatform.AutoMLImageTrainingJob( display_name=display_name, model_type=model_type, - prediction_type='classification', - multi_label=multi_label + prediction_type="classification", + multi_label=multi_label, ) my_image_ds = aiplatform.ImageDataset(dataset_id) diff --git a/samples/model-builder/create_training_pipeline_tabular_classification_sample.py b/samples/model-builder/create_training_pipeline_tabular_classification_sample.py index 317a1472b6..6e9e520317 100644 --- a/samples/model-builder/create_training_pipeline_tabular_classification_sample.py +++ b/samples/model-builder/create_training_pipeline_tabular_classification_sample.py @@ -19,9 +19,10 @@ def create_training_pipeline_tabular_classification_sample( project: str, display_name: str, - dataset_id: int, + dataset_id: str, location: str = "us-central1", model_display_name: str = None, + target_column: str = "target_column", training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, @@ -32,14 +33,14 @@ def create_training_pipeline_tabular_classification_sample( aiplatform.init(project=project, location=location) tabular_classification_job = aiplatform.AutoMLTabularTrainingJob( - display_name=display_name, - optimization_prediction_type="classification" + display_name=display_name, optimization_prediction_type="classification" ) - my_tabular_dataset = aiplatform.TabularDataset(dataset_id) + my_tabular_dataset = aiplatform.TabularDataset(dataset_name=dataset_id) model = tabular_classification_job.run( dataset=my_tabular_dataset, + target_column=target_column, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, diff --git a/samples/model-builder/create_training_pipeline_tabular_classification_sample_test.py b/samples/model-builder/create_training_pipeline_tabular_classification_sample_test.py index 1cfbb02202..cb0de0320d 100644 --- a/samples/model-builder/create_training_pipeline_tabular_classification_sample_test.py +++ b/samples/model-builder/create_training_pipeline_tabular_classification_sample_test.py @@ -30,6 +30,7 @@ def test_create_training_pipeline_tabular_classification_sample( display_name=constants.DISPLAY_NAME, dataset_id=constants.RESOURCE_ID, model_display_name=constants.DISPLAY_NAME_2, + target_column=constants.TABULAR_TARGET_COLUMN, training_fraction_split=constants.TRAINING_FRACTION_SPLIT, validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, test_fraction_split=constants.TEST_FRACTION_SPLIT, @@ -37,18 +38,19 @@ def test_create_training_pipeline_tabular_classification_sample( disable_early_stopping=False, ) - mock_get_tabular_dataset.assert_called_once_with(constants.RESOURCE_ID) + mock_get_tabular_dataset.assert_called_once_with(dataset_name=constants.RESOURCE_ID) mock_sdk_init.assert_called_once_with( project=constants.PROJECT, location=constants.LOCATION ) mock_get_automl_tabular_training_job.assert_called_once_with( display_name=constants.DISPLAY_NAME, - optimization_prediction_type="classification" + optimization_prediction_type="classification", ) mock_run_automl_tabular_training_job.assert_called_once_with( dataset=mock_tabular_dataset, model_display_name=constants.DISPLAY_NAME_2, + target_column=constants.TABULAR_TARGET_COLUMN, training_fraction_split=constants.TRAINING_FRACTION_SPLIT, validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, test_fraction_split=constants.TEST_FRACTION_SPLIT, diff --git a/samples/model-builder/create_training_pipeline_tabular_regression_sample.py b/samples/model-builder/create_training_pipeline_tabular_regression_sample.py index f7edcce1d9..9a3524f234 100644 --- a/samples/model-builder/create_training_pipeline_tabular_regression_sample.py +++ b/samples/model-builder/create_training_pipeline_tabular_regression_sample.py @@ -19,9 +19,10 @@ def create_training_pipeline_tabular_regression_sample( project: str, display_name: str, - dataset_id: int, + dataset_id: str, location: str = "us-central1", model_display_name: str = None, + target_column: str = "target_column", training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, @@ -32,14 +33,14 @@ def create_training_pipeline_tabular_regression_sample( aiplatform.init(project=project, location=location) tabular_regression_job = aiplatform.AutoMLTabularTrainingJob( - display_name=display_name, - optimization_prediction_type="regression" + display_name=display_name, optimization_prediction_type="regression" ) - my_tabular_dataset = aiplatform.TabularDataset(dataset_id) + my_tabular_dataset = aiplatform.TabularDataset(dataset_name=dataset_id) model = tabular_regression_job.run( dataset=my_tabular_dataset, + target_column=target_column, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, diff --git a/samples/model-builder/create_training_pipeline_tabular_regression_sample_test.py b/samples/model-builder/create_training_pipeline_tabular_regression_sample_test.py index d9a6b386e0..c39ebcfa4f 100644 --- a/samples/model-builder/create_training_pipeline_tabular_regression_sample_test.py +++ b/samples/model-builder/create_training_pipeline_tabular_regression_sample_test.py @@ -30,6 +30,7 @@ def test_create_training_pipeline_tabular_regression_sample( display_name=constants.DISPLAY_NAME, dataset_id=constants.RESOURCE_ID, model_display_name=constants.DISPLAY_NAME_2, + target_column=constants.TABULAR_TARGET_COLUMN, training_fraction_split=constants.TRAINING_FRACTION_SPLIT, validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, test_fraction_split=constants.TEST_FRACTION_SPLIT, @@ -37,18 +38,18 @@ def test_create_training_pipeline_tabular_regression_sample( disable_early_stopping=False, ) - mock_get_tabular_dataset.assert_called_once_with(constants.RESOURCE_ID) + mock_get_tabular_dataset.assert_called_once_with(dataset_name=constants.RESOURCE_ID) mock_sdk_init.assert_called_once_with( project=constants.PROJECT, location=constants.LOCATION ) mock_get_automl_tabular_training_job.assert_called_once_with( - display_name=constants.DISPLAY_NAME, - optimization_prediction_type="regression" + display_name=constants.DISPLAY_NAME, optimization_prediction_type="regression" ) mock_run_automl_tabular_training_job.assert_called_once_with( dataset=mock_tabular_dataset, model_display_name=constants.DISPLAY_NAME_2, + target_column=constants.TABULAR_TARGET_COLUMN, training_fraction_split=constants.TRAINING_FRACTION_SPLIT, validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, test_fraction_split=constants.TEST_FRACTION_SPLIT, diff --git a/samples/model-builder/create_training_pipeline_text_classification_sample.py b/samples/model-builder/create_training_pipeline_text_classification_sample.py index 9306a82084..b27a2f18d4 100644 --- a/samples/model-builder/create_training_pipeline_text_classification_sample.py +++ b/samples/model-builder/create_training_pipeline_text_classification_sample.py @@ -22,7 +22,7 @@ def create_training_pipeline_text_classification_sample( project: str, location: str, display_name: str, - dataset_id: int, + dataset_id: str, model_display_name: Optional[str] = None, multi_label: bool = False, training_fraction_split: float = 0.8, diff --git a/samples/model-builder/create_training_pipeline_text_entity_extraction_sample.py b/samples/model-builder/create_training_pipeline_text_entity_extraction_sample.py index 2d53cb2d63..8e5c25d1b2 100644 --- a/samples/model-builder/create_training_pipeline_text_entity_extraction_sample.py +++ b/samples/model-builder/create_training_pipeline_text_entity_extraction_sample.py @@ -22,7 +22,7 @@ def create_training_pipeline_text_entity_extraction_sample( project: str, location: str, display_name: str, - dataset_id: int, + dataset_id: str, model_display_name: Optional[str] = None, training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1, diff --git a/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample.py b/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample.py index 685bed6feb..57a829f237 100644 --- a/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample.py +++ b/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample.py @@ -22,7 +22,7 @@ def create_training_pipeline_text_sentiment_analysis_sample( project: str, location: str, display_name: str, - dataset_id: int, + dataset_id: str, model_display_name: Optional[str] = None, sentiment_max: int = 10, training_fraction_split: float = 0.8, diff --git a/samples/model-builder/explain_sample.py b/samples/model-builder/explain_sample.py new file mode 100644 index 0000000000..2f61e0cd9c --- /dev/null +++ b/samples/model-builder/explain_sample.py @@ -0,0 +1,48 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_explain_sample] +def explain_sample(project: str, location: str, endpoint_id: str, instance_dict: Dict): + + aiplatform.init(project=project, location=location) + + endpoint = aiplatform.Endpoint(endpoint_id) + + response = endpoint.explain(instances=[instance_dict], parameters={}) + + for explanation in response.explanations: + print(" explanation") + # Feature attributions. + attributions = explanation.attributions + for attribution in attributions: + print(" attribution") + print(" baseline_output_value:", attribution.baseline_output_value) + print(" instance_output_value:", attribution.instance_output_value) + print(" output_display_name:", attribution.output_display_name) + print(" approximation_error:", attribution.approximation_error) + print(" output_name:", attribution.output_name) + output_index = attribution.output_index + for output_index in output_index: + print(" output_index:", output_index) + + for prediction in response.predictions: + print(prediction) + + +# [END aiplatform_sdk_explain_sample] diff --git a/samples/model-builder/explain_sample_test.py b/samples/model-builder/explain_sample_test.py new file mode 100644 index 0000000000..83434edae8 --- /dev/null +++ b/samples/model-builder/explain_sample_test.py @@ -0,0 +1,41 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import explain_sample +import test_constants as constants + + +def test_explain_sample( + mock_sdk_init, mock_endpoint, mock_get_endpoint, mock_endpoint_explain +): + + explain_sample.explain_sample( + project=constants.PROJECT, + location=constants.LOCATION, + endpoint_id=constants.ENDPOINT_NAME, + instance_dict=constants.PREDICTION_TABULAR_INSTANCE, + ) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + + mock_get_endpoint.assert_called_once_with( + constants.ENDPOINT_NAME, + ) + + mock_endpoint_explain.assert_called_once_with( + instances=[constants.PREDICTION_TABULAR_INSTANCE], parameters={} + ) diff --git a/samples/model-builder/test_constants.py b/samples/model-builder/test_constants.py index fc7959292e..2e50ef62e6 100644 --- a/samples/model-builder/test_constants.py +++ b/samples/model-builder/test_constants.py @@ -41,6 +41,7 @@ TRAINING_JOB_NAME = f"{PARENT}/trainingJobs/{RESOURCE_ID}" BIGQUERY_SOURCE = f"bq://{PROJECT}.{DATASET_NAME}.table1" +BIGQUERY_DESTINATION_PREFIX = "bq://bigquery-public-data.ml_datasets.iris" GCS_SOURCES = ["gs://bucket1/source1.jsonl", "gs://bucket7/source4.jsonl"] BIGQUERY_SOURCE = "bq://bigquery-public-data.ml_datasets.iris" @@ -211,14 +212,8 @@ FEATURE_NAME = f"projects/{PROJECT}/locations/{LOCATION}/featurestores/{FEATURESTORE_ID}/entityTypes/{ENTITY_TYPE_ID}/features/{FEATURE_ID}" FEATURE_VALUE_TYPE = "INT64" FEATURE_CONFIGS = { - "age": { - "value_type": "INT64", - "description": "User age" - }, - "gender": { - "value_type": "STRING", - "description": "User gender" - }, + "age": {"value_type": "INT64", "description": "User age"}, + "gender": {"value_type": "STRING", "description": "User gender"}, "liked_genres": { "value_type": "STRING_ARRAY", "description": "An array of genres this user liked", @@ -240,3 +235,23 @@ ) GCS_SOURCE_TYPE = "avro" WORKER_COUNT = 1 + +TABULAR_TARGET_COLUMN = "target_column" +FORECASTNG_TIME_COLUMN = "date" +FORECASTNG_TIME_SERIES_IDENTIFIER_COLUMN = "time_series_id" +FORECASTNG_UNAVAILABLE_AT_FORECAST_COLUMNS = [] +FORECASTNG_AVAILABLE_AT_FORECAST_COLUMNS = [] +FORECASTNG_FORECAST_HORIZON = 1 +DATA_GRANULARITY_UNIT = "week" +DATA_GRANULARITY_COUNT = 1 + +TIMESTAMP_SPLIT_COLUMN_NAME = "timestamp_split_column_name" +WEIGHT_COLUMN = "weight" +TIME_SERIES_ATTRIBUTE_COLUMNS = [] +CONTEXT_WINDOW = 0 +EXPORT_EVALUATED_DATA_ITEMS = True +EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI = "bq://test:test:test" +EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION = True +QUANTILES = [0, 0.5, 1] +VALIDATION_OPTIONS = "fail-pipeline" +PREDEFINED_SPLIT_COLUMN_NAME = "predefined" diff --git a/setup.py b/setup.py index 7db8ad5f27..d1d01a6bde 100644 --- a/setup.py +++ b/setup.py @@ -103,8 +103,9 @@ # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "proto-plus >= 1.15.0", - "packaging >= 14.3", + "proto-plus >= 1.15.0, <2.0.0dev", + "protobuf >= 3.19.0, <4.0.0dev", + "packaging >= 14.3, <22.0.0dev", "google-cloud-storage >= 1.32.0, < 3.0.0dev", "google-cloud-bigquery >= 1.15.0, < 3.0.0dev", "google-cloud-resource-manager >= 1.3.3, < 3.0.0dev", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 0380c54f59..acf8ecba0c 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -8,6 +8,7 @@ google-api-core==1.31.5 libcst==0.2.5 proto-plus==1.15.0 +protobuf==3.19.0 mock==4.0.2 google-cloud-storage==1.32.0 google-auth==1.25.0 # TODO: Remove when google-api-core >= 1.26.0 is required diff --git a/tests/system/aiplatform/test_dataset.py b/tests/system/aiplatform/test_dataset.py index 54e2528e1f..7cd3c0416c 100644 --- a/tests/system/aiplatform/test_dataset.py +++ b/tests/system/aiplatform/test_dataset.py @@ -51,6 +51,9 @@ ) _TEST_DATASET_DISPLAY_NAME = "permanent_50_flowers_dataset" _TEST_TABULAR_CLASSIFICATION_GCS_SOURCE = "gs://ucaip-sample-resources/iris_1000.csv" +_TEST_FORECASTING_BQ_SOURCE = ( + "bq://ucaip-sample-tests:ucaip_test_us_central1.2020_sales_train" +) _TEST_TEXT_ENTITY_EXTRACTION_GCS_SOURCE = f"gs://{TEST_BUCKET}/ai-platform-unified/sdk/datasets/text_entity_extraction_dataset.jsonl" _TEST_IMAGE_OBJECT_DETECTION_GCS_SOURCE = ( "gs://ucaip-test-us-central1/dataset/salads_oid_ml_use_public_unassigned.jsonl" @@ -306,6 +309,30 @@ def test_create_tabular_dataset_from_dataframe_with_provided_schema( finally: tabular_dataset.delete() + def test_create_time_series_dataset(self): + """Use the Dataset.create() method to create a new time series dataset. + Then confirm the dataset was successfully created and references GCS source.""" + + try: + time_series_dataset = aiplatform.TimeSeriesDataset.create( + display_name=self._make_display_name(key="create_time_series_dataset"), + bq_source=[_TEST_FORECASTING_BQ_SOURCE], + create_request_timeout=None, + ) + + gapic_metadata = time_series_dataset.to_dict()["metadata"] + bq_source_uri = gapic_metadata["inputConfig"]["bigquerySource"]["uri"] + + assert _TEST_FORECASTING_BQ_SOURCE == bq_source_uri + assert ( + time_series_dataset.metadata_schema_uri + == aiplatform.schema.dataset.metadata.time_series + ) + + finally: + if time_series_dataset is not None: + time_series_dataset.delete() + def test_export_data(self, storage_client, staging_bucket): """Get an existing dataset, export data to a newly created folder in Google Cloud Storage, then verify data was successfully exported.""" diff --git a/tests/system/aiplatform/test_e2e_forecasting.py b/tests/system/aiplatform/test_e2e_forecasting.py new file mode 100644 index 0000000000..024946b91b --- /dev/null +++ b/tests/system/aiplatform/test_e2e_forecasting.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from google.cloud import aiplatform +from google.cloud.aiplatform import training_jobs +from google.cloud.aiplatform.compat.types import job_state +from google.cloud.aiplatform.compat.types import pipeline_state +import pytest +from tests.system.aiplatform import e2e_base + +_TRAINING_DATASET_BQ_PATH = ( + "bq://ucaip-sample-tests:ucaip_test_us_central1.2020_sales_train" +) +_PREDICTION_DATASET_BQ_PATH = ( + "bq://ucaip-sample-tests:ucaip_test_us_central1.2021_sales_predict" +) + + +@pytest.mark.usefixtures("prepare_staging_bucket", "delete_staging_bucket") +class TestEndToEndForecasting(e2e_base.TestEndToEnd): + """End to end system test of the Vertex SDK with forecasting data.""" + + _temp_prefix = "temp-vertex-sdk-e2e-forecasting" + + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + pytest.param( + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + marks=pytest.mark.skip(reason="Seq2Seq not yet released."), + ), + ], + ) + def test_end_to_end_forecasting(self, shared_state, training_job): + """Builds a dataset, trains models, and gets batch predictions.""" + resources = [] + + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + staging_bucket=shared_state["staging_bucket_name"], + ) + try: + ds = aiplatform.TimeSeriesDataset.create( + display_name=self._make_display_name("dataset"), + bq_source=[_TRAINING_DATASET_BQ_PATH], + sync=False, + create_request_timeout=180.0, + ) + resources.append(ds) + + time_column = "date" + time_series_identifier_column = "store_name" + target_column = "sale_dollars" + column_specs = { + time_column: "timestamp", + target_column: "numeric", + "city": "categorical", + "zip_code": "categorical", + "county": "categorical", + } + + job = training_job( + display_name=self._make_display_name("train-housing-forecasting"), + optimization_objective="minimize-rmse", + column_specs=column_specs, + ) + resources.append(job) + + model = job.run( + dataset=ds, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + available_at_forecast_columns=[time_column], + unavailable_at_forecast_columns=[target_column], + time_series_attribute_columns=["city", "zip_code", "county"], + forecast_horizon=30, + context_window=30, + data_granularity_unit="day", + data_granularity_count=1, + budget_milli_node_hours=1000, + holiday_regions=["GLOBAL"], + hierarchy_group_total_weight=1, + window_stride_length=1, + model_display_name=self._make_display_name("forecasting-liquor-model"), + sync=False, + ) + resources.append(model) + + batch_prediction_job = model.batch_predict( + job_display_name=self._make_display_name("forecasting-liquor-model"), + instances_format="bigquery", + predictions_format="csv", + machine_type="n1-standard-4", + bigquery_source=_PREDICTION_DATASET_BQ_PATH, + gcs_destination_prefix=( + f'gs://{shared_state["staging_bucket_name"]}/bp_results/' + ), + sync=False, + ) + resources.append(batch_prediction_job) + + batch_prediction_job.wait() + assert job.state == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + assert batch_prediction_job.state == job_state.JobState.JOB_STATE_SUCCEEDED + finally: + for resource in resources: + resource.delete() diff --git a/tests/system/aiplatform/test_model_upload.py b/tests/system/aiplatform/test_model_upload.py index 3187453561..48e6169af4 100644 --- a/tests/system/aiplatform/test_model_upload.py +++ b/tests/system/aiplatform/test_model_upload.py @@ -77,9 +77,9 @@ def test_upload_and_deploy_xgboost_model(self, shared_state): assert model.description == "new_description" assert model.labels == {"my_label": "updated"} - assert len(endpoint.list_models) == 1 + assert len(endpoint.list_models()) == 1 endpoint.deploy(model, traffic_percentage=100) - assert len(endpoint.list_models) == 2 + assert len(endpoint.list_models()) == 2 traffic_split = { deployed_model.id: 50 for deployed_model in endpoint.list_models() } diff --git a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py index 4861470244..64e85befa6 100644 --- a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py @@ -24,7 +24,7 @@ from google.cloud.aiplatform import datasets from google.cloud.aiplatform import initializer from google.cloud.aiplatform import schema -from google.cloud.aiplatform.training_jobs import AutoMLForecastingTrainingJob +from google.cloud.aiplatform import training_jobs from google.cloud.aiplatform.compat.services import ( model_service_client, @@ -81,6 +81,14 @@ _TEST_TRAINING_WEIGHT_COLUMN = "weight" _TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME = "minimize-rmse" _TEST_ADDITIONAL_EXPERIMENTS = ["exp1", "exp2"] +_TEST_HIERARCHY_GROUP_COLUMNS = [] +_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT = 1 +_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT = None +_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT = None +_TEST_WINDOW_COLUMN = None +_TEST_WINDOW_STRIDE_LENGTH = 1 +_TEST_WINDOW_MAX_COUNT = None +_TEST_TRAINING_HOLIDAY_REGIONS = ["GLOBAL"] _TEST_TRAINING_TASK_INPUTS_DICT = { # required inputs "targetColumn": _TEST_TRAINING_TARGET_COLUMN, @@ -106,6 +114,16 @@ "quantiles": _TEST_TRAINING_QUANTILES, "validationOptions": _TEST_TRAINING_VALIDATION_OPTIONS, "optimizationObjective": _TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, + "hierarchyConfig": { + "groupColumns": _TEST_HIERARCHY_GROUP_COLUMNS, + "groupTotalWeight": _TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + "temporalTotalWeight": _TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + "groupTemporalTotalWeight": _TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + }, + "windowConfig": { + "strideLength": _TEST_WINDOW_STRIDE_LENGTH, + }, + "holidayRegions": _TEST_TRAINING_HOLIDAY_REGIONS, } _TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS = json_format.ParseDict( @@ -248,7 +266,7 @@ def mock_dataset_nontimeseries(): @pytest.mark.usefixtures("google_auth_mock") -class TestAutoMLForecastingTrainingJob: +class TestForecastingTrainingJob: def setup_method(self): importlib.reload(initializer) importlib.reload(aiplatform) @@ -257,6 +275,13 @@ def teardown_method(self): initializer.global_pool.shutdown(wait=True) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_call_pipeline_service_create( self, mock_pipeline_service_create, @@ -264,10 +289,11 @@ def test_run_call_pipeline_service_create( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -297,8 +323,16 @@ def test_run_call_pipeline_service_create( quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, additional_experiments=_TEST_ADDITIONAL_EXPERIMENTS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, create_request_timeout=None, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -318,7 +352,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -345,6 +379,13 @@ def test_run_call_pipeline_service_create( assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_call_pipeline_service_create_with_timeout( self, mock_pipeline_service_create, @@ -352,10 +393,11 @@ def test_run_call_pipeline_service_create_with_timeout( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -385,8 +427,16 @@ def test_run_call_pipeline_service_create_with_timeout( quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, additional_experiments=_TEST_ADDITIONAL_EXPERIMENTS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, create_request_timeout=180.0, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -406,7 +456,7 @@ def test_run_call_pipeline_service_create_with_timeout( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -420,16 +470,24 @@ def test_run_call_pipeline_service_create_with_timeout( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -455,8 +513,16 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, create_request_timeout=None, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -475,7 +541,7 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -489,16 +555,24 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_call_pipeline_if_set_additional_experiments( self, mock_pipeline_service_create, mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -525,8 +599,16 @@ def test_run_call_pipeline_if_set_additional_experiments( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, create_request_timeout=None, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -541,7 +623,7 @@ def test_run_call_pipeline_if_set_additional_experiments( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -559,14 +641,22 @@ def test_run_call_pipeline_if_set_additional_experiments( "mock_model_service_get", ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_called_twice_raises( self, mock_dataset_time_series, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -592,7 +682,15 @@ def test_run_called_twice_raises( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) with pytest.raises(RuntimeError): @@ -616,20 +714,36 @@ def test_run_called_twice_raises( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_raises_if_pipeline_fails( self, mock_pipeline_service_create_and_get_with_fail, mock_dataset_time_series, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -656,7 +770,15 @@ def test_run_raises_if_pipeline_fails( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -665,10 +787,21 @@ def test_run_raises_if_pipeline_fails( with pytest.raises(RuntimeError): job.get_model() - def test_raises_before_run_is_called(self, mock_pipeline_service_create): + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) + def test_raises_before_run_is_called( + self, + mock_pipeline_service_create, + training_job, + ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -684,6 +817,13 @@ def test_raises_before_run_is_called(self, mock_pipeline_service_create): job.state @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_splits_fraction( self, mock_pipeline_service_create, @@ -691,10 +831,11 @@ def test_splits_fraction( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): """ Initiate aiplatform with encryption key name. - Create and run an AutoML Video Classification training job, verify calls and return value + Create and run an Forecasting training job, verify calls and return value """ aiplatform.init( @@ -702,7 +843,7 @@ def test_splits_fraction( encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -731,8 +872,16 @@ def test_splits_fraction( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, create_request_timeout=None, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -756,7 +905,7 @@ def test_splits_fraction( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -770,6 +919,13 @@ def test_splits_fraction( ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_splits_timestamp( self, mock_pipeline_service_create, @@ -777,10 +933,11 @@ def test_splits_timestamp( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): """Initiate aiplatform with encryption key name. - Create and run an AutoML Forecasting training job, verify calls and + Create and run an Forecasting training job, verify calls and return value """ @@ -789,7 +946,7 @@ def test_splits_timestamp( encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -819,8 +976,16 @@ def test_splits_timestamp( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, create_request_timeout=None, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -844,9 +1009,7 @@ def test_splits_timestamp( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=( - schema.training_job.definition.automl_forecasting - ), + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -860,6 +1023,13 @@ def test_splits_timestamp( ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_splits_predefined( self, mock_pipeline_service_create, @@ -867,10 +1037,11 @@ def test_splits_predefined( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): """ Initiate aiplatform with encryption key name. - Create and run an AutoML Video Classification training job, verify calls and return value + Create and run an Forecasting training job, verify calls and return value """ aiplatform.init( @@ -878,7 +1049,7 @@ def test_splits_predefined( encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -905,8 +1076,16 @@ def test_splits_predefined( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, create_request_timeout=None, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -928,7 +1107,7 @@ def test_splits_predefined( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -942,6 +1121,13 @@ def test_splits_predefined( ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_splits_default( self, mock_pipeline_service_create, @@ -949,10 +1135,11 @@ def test_splits_default( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): """ Initiate aiplatform with encryption key name. - Create and run an AutoML Video Classification training job, verify calls and return value + Create and run an Forecasting training job, verify calls and return value """ aiplatform.init( @@ -960,7 +1147,7 @@ def test_splits_default( encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -986,8 +1173,16 @@ def test_splits_default( export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, quantiles=_TEST_TRAINING_QUANTILES, validation_options=_TEST_TRAINING_VALIDATION_OPTIONS, + hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS, + hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT, + hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT, + hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT, + window_column=_TEST_WINDOW_COLUMN, + window_stride_length=_TEST_WINDOW_STRIDE_LENGTH, + window_max_count=_TEST_WINDOW_MAX_COUNT, sync=sync, create_request_timeout=None, + holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS, ) if not sync: @@ -1004,7 +1199,7 @@ def test_splits_default( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, diff --git a/tests/unit/aiplatform/test_cloud_profiler.py b/tests/unit/aiplatform/test_cloud_profiler.py index 6306fcd76f..388405d034 100644 --- a/tests/unit/aiplatform/test_cloud_profiler.py +++ b/tests/unit/aiplatform/test_cloud_profiler.py @@ -175,13 +175,13 @@ def tf_import_mock(name, *args, **kwargs): def testCanInitializeTFVersion(self): import tensorflow - with mock.patch.dict(tensorflow.__dict__, {"__version__": "1.2.3.4"}): + with mock.patch.object(tensorflow, "__version__", return_value="1.2.3.4"): assert not TFProfiler.can_initialize() def testCanInitializeOldTFVersion(self): import tensorflow - with mock.patch.dict(tensorflow.__dict__, {"__version__": "2.3.0"}): + with mock.patch.object(tensorflow, "__version__", return_value="2.3.0"): assert not TFProfiler.can_initialize() def testCanInitializeNoProfilePlugin(self): diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py index 3e79328b34..50a6c93303 100644 --- a/tests/unit/aiplatform/test_endpoints.py +++ b/tests/unit/aiplatform/test_endpoints.py @@ -279,8 +279,11 @@ def update_endpoint_mock(): with mock.patch.object( endpoint_service_client.EndpointServiceClient, "update_endpoint" ) as update_endpoint_mock: - update_endpoint_lro_mock = mock.Mock(ga_operation.Operation) - update_endpoint_mock.return_value = update_endpoint_lro_mock + update_endpoint_mock.return_value = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + name=_TEST_ENDPOINT_NAME, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) yield update_endpoint_mock @@ -768,9 +771,18 @@ def test_update_endpoint(self, update_endpoint_mock): timeout=_TEST_TIMEOUT, ) + update_endpoint_mock.return_value = gca_endpoint.Endpoint( + name=_TEST_ENDPOINT_NAME, + display_name=_TEST_DISPLAY_NAME, + description=_TEST_DESCRIPTION, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + @pytest.mark.usefixtures("get_endpoint_with_models_mock") def test_update_traffic_split(self, update_endpoint_mock): endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) + endpoint.update(traffic_split={_TEST_ID: 10, _TEST_ID_2: 80, _TEST_ID_3: 10}) expected_endpoint = gca_endpoint.Endpoint( @@ -788,6 +800,12 @@ def test_update_traffic_split(self, update_endpoint_mock): timeout=_TEST_TIMEOUT, ) + update_endpoint_mock.return_value = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + name=_TEST_ENDPOINT_NAME, + traffic_split={_TEST_ID: 10, _TEST_ID_2: 80, _TEST_ID_3: 10}, + ) + @pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock") @pytest.mark.parametrize("sync", [True, False]) def test_deploy(self, deploy_model_mock, sync): diff --git a/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_tf1_test.py b/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_tf1_test.py index 1300df5968..fc178b1ce5 100644 --- a/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_tf1_test.py +++ b/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_tf1_test.py @@ -26,6 +26,7 @@ from test_models import upload_model_mock, get_model_mock # noqa: F401 +@pytest.mark.usefixtures("google_auth_mock") class SavedModelMetadataBuilderTF1Test(tf.test.TestCase): def _set_up(self): self.sess = tf.Session(graph=tf.Graph()) diff --git a/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_tf2_test.py b/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_tf2_test.py index a18eed243c..82493db24e 100644 --- a/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_tf2_test.py +++ b/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_tf2_test.py @@ -27,6 +27,7 @@ from test_models import upload_model_mock, get_model_mock # noqa: F401 +@pytest.mark.usefixtures("google_auth_mock") class SavedModelMetadataBuilderTF2Test(tf.test.TestCase): def _set_up_sequential(self): # Set up for the sequential. diff --git a/tests/unit/aiplatform/test_pipeline_jobs.py b/tests/unit/aiplatform/test_pipeline_jobs.py index 159400f8ce..1f6f2bb50c 100644 --- a/tests/unit/aiplatform/test_pipeline_jobs.py +++ b/tests/unit/aiplatform/test_pipeline_jobs.py @@ -1038,3 +1038,166 @@ def test_pipeline_failure_raises(self, mock_load_yaml_and_json, sync): if not sync: job.wait() + + @pytest.mark.parametrize( + "job_spec", + [_TEST_PIPELINE_SPEC_JSON, _TEST_PIPELINE_SPEC_YAML, _TEST_PIPELINE_JOB], + ) + def test_clone_pipeline_job( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + job_spec, + mock_load_yaml_and_json, + ): + aiplatform.init( + project=_TEST_PROJECT, + staging_bucket=_TEST_GCS_BUCKET_NAME, + location=_TEST_LOCATION, + credentials=_TEST_CREDENTIALS, + ) + + job = pipeline_jobs.PipelineJob( + display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME, + template_path=_TEST_TEMPLATE_PATH, + job_id=_TEST_PIPELINE_JOB_ID, + parameter_values=_TEST_PIPELINE_PARAMETER_VALUES, + enable_caching=True, + ) + + cloned = job.clone(job_id=f"cloned-{_TEST_PIPELINE_JOB_ID}") + + cloned.submit( + service_account=_TEST_SERVICE_ACCOUNT, + network=_TEST_NETWORK, + create_request_timeout=None, + ) + + expected_runtime_config_dict = { + "gcsOutputDirectory": _TEST_GCS_BUCKET_NAME, + "parameterValues": _TEST_PIPELINE_PARAMETER_VALUES, + } + runtime_config = gca_pipeline_job.PipelineJob.RuntimeConfig()._pb + json_format.ParseDict(expected_runtime_config_dict, runtime_config) + + job_spec = yaml.safe_load(job_spec) + pipeline_spec = job_spec.get("pipelineSpec") or job_spec + + # Construct expected request + expected_gapic_pipeline_job = gca_pipeline_job.PipelineJob( + display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME, + pipeline_spec={ + "components": {}, + "pipelineInfo": pipeline_spec["pipelineInfo"], + "root": pipeline_spec["root"], + "schemaVersion": "2.1.0", + }, + runtime_config=runtime_config, + service_account=_TEST_SERVICE_ACCOUNT, + network=_TEST_NETWORK, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=_TEST_PARENT, + pipeline_job=expected_gapic_pipeline_job, + pipeline_job_id=f"cloned-{_TEST_PIPELINE_JOB_ID}", + timeout=None, + ) + + assert not mock_pipeline_service_get.called + + cloned.wait() + + mock_pipeline_service_get.assert_called_with( + name=_TEST_PIPELINE_JOB_NAME, retry=base._DEFAULT_RETRY + ) + + assert cloned._gca_resource == make_pipeline_job( + gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + ) + + @pytest.mark.parametrize( + "job_spec", + [_TEST_PIPELINE_SPEC_JSON, _TEST_PIPELINE_SPEC_YAML, _TEST_PIPELINE_JOB], + ) + def test_clone_pipeline_job_with_all_args( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + job_spec, + mock_load_yaml_and_json, + ): + aiplatform.init( + project=_TEST_PROJECT, + staging_bucket=_TEST_GCS_BUCKET_NAME, + location=_TEST_LOCATION, + credentials=_TEST_CREDENTIALS, + ) + + job = pipeline_jobs.PipelineJob( + display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME, + template_path=_TEST_TEMPLATE_PATH, + job_id=_TEST_PIPELINE_JOB_ID, + parameter_values=_TEST_PIPELINE_PARAMETER_VALUES, + enable_caching=True, + ) + + cloned = job.clone( + display_name=f"cloned-{_TEST_PIPELINE_JOB_DISPLAY_NAME}", + job_id=f"cloned-{_TEST_PIPELINE_JOB_ID}", + pipeline_root=f"cloned-{_TEST_GCS_BUCKET_NAME}", + parameter_values=_TEST_PIPELINE_PARAMETER_VALUES, + enable_caching=True, + credentials=_TEST_CREDENTIALS, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + ) + + cloned.submit( + service_account=_TEST_SERVICE_ACCOUNT, + network=_TEST_NETWORK, + create_request_timeout=None, + ) + + expected_runtime_config_dict = { + "gcsOutputDirectory": f"cloned-{_TEST_GCS_BUCKET_NAME}", + "parameterValues": _TEST_PIPELINE_PARAMETER_VALUES, + } + runtime_config = gca_pipeline_job.PipelineJob.RuntimeConfig()._pb + json_format.ParseDict(expected_runtime_config_dict, runtime_config) + + job_spec = yaml.safe_load(job_spec) + pipeline_spec = job_spec.get("pipelineSpec") or job_spec + + # Construct expected request + expected_gapic_pipeline_job = gca_pipeline_job.PipelineJob( + display_name=f"cloned-{_TEST_PIPELINE_JOB_DISPLAY_NAME}", + pipeline_spec={ + "components": {}, + "pipelineInfo": pipeline_spec["pipelineInfo"], + "root": pipeline_spec["root"], + "schemaVersion": "2.1.0", + }, + runtime_config=runtime_config, + service_account=_TEST_SERVICE_ACCOUNT, + network=_TEST_NETWORK, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=_TEST_PARENT, + pipeline_job=expected_gapic_pipeline_job, + pipeline_job_id=f"cloned-{_TEST_PIPELINE_JOB_ID}", + timeout=None, + ) + + assert not mock_pipeline_service_get.called + + cloned.wait() + + mock_pipeline_service_get.assert_called_with( + name=_TEST_PIPELINE_JOB_NAME, retry=base._DEFAULT_RETRY + ) + + assert cloned._gca_resource == make_pipeline_job( + gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + ) diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py index a956584663..47a5ae01a2 100644 --- a/tests/unit/aiplatform/test_training_jobs.py +++ b/tests/unit/aiplatform/test_training_jobs.py @@ -330,6 +330,9 @@ def mock_get_backing_custom_job_with_enable_web_access(): yield get_custom_job_mock +@pytest.mark.skipif( + sys.executable is None, reason="requires python path to invoke subprocess" +) @pytest.mark.usefixtures("google_auth_mock") class TestTrainingScriptPythonPackagerHelpers: def setup_method(self): @@ -446,6 +449,9 @@ def test_get_python_executable_returns_python_executable(self): assert "python" in source_utils._get_python_executable().lower() +@pytest.mark.skipif( + sys.executable is None, reason="requires python path to invoke subprocess" +) @pytest.mark.usefixtures("google_auth_mock") class TestTrainingScriptPythonPackager: def setup_method(self): diff --git a/tests/unit/aiplatform/test_uploader.py b/tests/unit/aiplatform/test_uploader.py index 44cb1bb11a..3d064075b9 100644 --- a/tests/unit/aiplatform/test_uploader.py +++ b/tests/unit/aiplatform/test_uploader.py @@ -44,7 +44,7 @@ from google.api_core import datetime_helpers from google.cloud.aiplatform.tensorboard import uploader_utils from google.cloud.aiplatform.tensorboard.plugins.tf_profiler import profile_uploader -import google.cloud.aiplatform.tensorboard.uploader as uploader_lib +from google.cloud.aiplatform.tensorboard import uploader as uploader_lib from google.cloud import storage from google.cloud.aiplatform.compat.services import ( tensorboard_service_client, diff --git a/tests/unit/gapic/aiplatform_v1/test_dataset_service.py b/tests/unit/gapic/aiplatform_v1/test_dataset_service.py index f46c4ec2e1..15b5143fbb 100644 --- a/tests/unit/gapic/aiplatform_v1/test_dataset_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_dataset_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_endpoint_service.py b/tests/unit/gapic/aiplatform_v1/test_endpoint_service.py index 7a6b3792af..c1e7478183 100644 --- a/tests/unit/gapic/aiplatform_v1/test_endpoint_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_endpoint_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_featurestore_online_serving_service.py b/tests/unit/gapic/aiplatform_v1/test_featurestore_online_serving_service.py index 906e745604..23a073890c 100644 --- a/tests/unit/gapic/aiplatform_v1/test_featurestore_online_serving_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_featurestore_online_serving_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_featurestore_service.py b/tests/unit/gapic/aiplatform_v1/test_featurestore_service.py index ac236b5eb5..1a8c648044 100644 --- a/tests/unit/gapic/aiplatform_v1/test_featurestore_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_featurestore_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_index_endpoint_service.py b/tests/unit/gapic/aiplatform_v1/test_index_endpoint_service.py index 3a2f967be0..19b768a38c 100644 --- a/tests/unit/gapic/aiplatform_v1/test_index_endpoint_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_index_endpoint_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_index_service.py b/tests/unit/gapic/aiplatform_v1/test_index_service.py index f26c29e405..50635f9d66 100644 --- a/tests/unit/gapic/aiplatform_v1/test_index_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_index_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_job_service.py b/tests/unit/gapic/aiplatform_v1/test_job_service.py index c02439e56a..658b29d607 100644 --- a/tests/unit/gapic/aiplatform_v1/test_job_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_job_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_metadata_service.py b/tests/unit/gapic/aiplatform_v1/test_metadata_service.py index 150b4c8ab1..1136c8c23b 100644 --- a/tests/unit/gapic/aiplatform_v1/test_metadata_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_metadata_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_migration_service.py b/tests/unit/gapic/aiplatform_v1/test_migration_service.py index 2d38e9a189..0072bf2e0c 100644 --- a/tests/unit/gapic/aiplatform_v1/test_migration_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_migration_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio @@ -1933,19 +1939,22 @@ def test_parse_annotated_dataset_path(): def test_dataset_path(): project = "cuttlefish" - dataset = "mussel" - expected = "projects/{project}/datasets/{dataset}".format( + location = "mussel" + dataset = "winkle" + expected = "projects/{project}/locations/{location}/datasets/{dataset}".format( project=project, + location=location, dataset=dataset, ) - actual = MigrationServiceClient.dataset_path(project, dataset) + actual = MigrationServiceClient.dataset_path(project, location, dataset) assert expected == actual def test_parse_dataset_path(): expected = { - "project": "winkle", - "dataset": "nautilus", + "project": "nautilus", + "location": "scallop", + "dataset": "abalone", } path = MigrationServiceClient.dataset_path(**expected) @@ -1955,22 +1964,19 @@ def test_parse_dataset_path(): def test_dataset_path(): - project = "scallop" - location = "abalone" - dataset = "squid" - expected = "projects/{project}/locations/{location}/datasets/{dataset}".format( + project = "squid" + dataset = "clam" + expected = "projects/{project}/datasets/{dataset}".format( project=project, - location=location, dataset=dataset, ) - actual = MigrationServiceClient.dataset_path(project, location, dataset) + actual = MigrationServiceClient.dataset_path(project, dataset) assert expected == actual def test_parse_dataset_path(): expected = { - "project": "clam", - "location": "whelk", + "project": "whelk", "dataset": "octopus", } path = MigrationServiceClient.dataset_path(**expected) diff --git a/tests/unit/gapic/aiplatform_v1/test_model_service.py b/tests/unit/gapic/aiplatform_v1/test_model_service.py index a02df4c752..aa6029b483 100644 --- a/tests/unit/gapic/aiplatform_v1/test_model_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_model_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio @@ -2384,6 +2390,7 @@ def test_import_model_evaluation(request_type, transport: str = "grpc"): # Designate an appropriate return value for the call. call.return_value = gca_model_evaluation.ModelEvaluation( name="name_value", + display_name="display_name_value", metrics_schema_uri="metrics_schema_uri_value", slice_dimensions=["slice_dimensions_value"], data_item_schema_uri="data_item_schema_uri_value", @@ -2399,6 +2406,7 @@ def test_import_model_evaluation(request_type, transport: str = "grpc"): # Establish that the response is the type that we expect. assert isinstance(response, gca_model_evaluation.ModelEvaluation) assert response.name == "name_value" + assert response.display_name == "display_name_value" assert response.metrics_schema_uri == "metrics_schema_uri_value" assert response.slice_dimensions == ["slice_dimensions_value"] assert response.data_item_schema_uri == "data_item_schema_uri_value" @@ -2445,6 +2453,7 @@ async def test_import_model_evaluation_async( call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( gca_model_evaluation.ModelEvaluation( name="name_value", + display_name="display_name_value", metrics_schema_uri="metrics_schema_uri_value", slice_dimensions=["slice_dimensions_value"], data_item_schema_uri="data_item_schema_uri_value", @@ -2461,6 +2470,7 @@ async def test_import_model_evaluation_async( # Establish that the response is the type that we expect. assert isinstance(response, gca_model_evaluation.ModelEvaluation) assert response.name == "name_value" + assert response.display_name == "display_name_value" assert response.metrics_schema_uri == "metrics_schema_uri_value" assert response.slice_dimensions == ["slice_dimensions_value"] assert response.data_item_schema_uri == "data_item_schema_uri_value" @@ -2657,6 +2667,7 @@ def test_get_model_evaluation(request_type, transport: str = "grpc"): # Designate an appropriate return value for the call. call.return_value = model_evaluation.ModelEvaluation( name="name_value", + display_name="display_name_value", metrics_schema_uri="metrics_schema_uri_value", slice_dimensions=["slice_dimensions_value"], data_item_schema_uri="data_item_schema_uri_value", @@ -2672,6 +2683,7 @@ def test_get_model_evaluation(request_type, transport: str = "grpc"): # Establish that the response is the type that we expect. assert isinstance(response, model_evaluation.ModelEvaluation) assert response.name == "name_value" + assert response.display_name == "display_name_value" assert response.metrics_schema_uri == "metrics_schema_uri_value" assert response.slice_dimensions == ["slice_dimensions_value"] assert response.data_item_schema_uri == "data_item_schema_uri_value" @@ -2718,6 +2730,7 @@ async def test_get_model_evaluation_async( call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( model_evaluation.ModelEvaluation( name="name_value", + display_name="display_name_value", metrics_schema_uri="metrics_schema_uri_value", slice_dimensions=["slice_dimensions_value"], data_item_schema_uri="data_item_schema_uri_value", @@ -2734,6 +2747,7 @@ async def test_get_model_evaluation_async( # Establish that the response is the type that we expect. assert isinstance(response, model_evaluation.ModelEvaluation) assert response.name == "name_value" + assert response.display_name == "display_name_value" assert response.metrics_schema_uri == "metrics_schema_uri_value" assert response.slice_dimensions == ["slice_dimensions_value"] assert response.data_item_schema_uri == "data_item_schema_uri_value" diff --git a/tests/unit/gapic/aiplatform_v1/test_pipeline_service.py b/tests/unit/gapic/aiplatform_v1/test_pipeline_service.py index 86a01f4d50..c809b80ed2 100644 --- a/tests/unit/gapic/aiplatform_v1/test_pipeline_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_pipeline_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio @@ -52,6 +58,7 @@ from google.cloud.aiplatform_v1.types import io from google.cloud.aiplatform_v1.types import model from google.cloud.aiplatform_v1.types import operation as gca_operation +from google.cloud.aiplatform_v1.types import pipeline_failure_policy from google.cloud.aiplatform_v1.types import pipeline_job from google.cloud.aiplatform_v1.types import pipeline_job as gca_pipeline_job from google.cloud.aiplatform_v1.types import pipeline_service @@ -2163,6 +2170,7 @@ def test_create_pipeline_job(request_type, transport: str = "grpc"): state=pipeline_state.PipelineState.PIPELINE_STATE_QUEUED, service_account="service_account_value", network="network_value", + template_uri="template_uri_value", ) response = client.create_pipeline_job(request) @@ -2178,6 +2186,7 @@ def test_create_pipeline_job(request_type, transport: str = "grpc"): assert response.state == pipeline_state.PipelineState.PIPELINE_STATE_QUEUED assert response.service_account == "service_account_value" assert response.network == "network_value" + assert response.template_uri == "template_uri_value" def test_create_pipeline_job_empty_call(): @@ -2224,6 +2233,7 @@ async def test_create_pipeline_job_async( state=pipeline_state.PipelineState.PIPELINE_STATE_QUEUED, service_account="service_account_value", network="network_value", + template_uri="template_uri_value", ) ) response = await client.create_pipeline_job(request) @@ -2240,6 +2250,7 @@ async def test_create_pipeline_job_async( assert response.state == pipeline_state.PipelineState.PIPELINE_STATE_QUEUED assert response.service_account == "service_account_value" assert response.network == "network_value" + assert response.template_uri == "template_uri_value" @pytest.mark.asyncio @@ -2444,6 +2455,7 @@ def test_get_pipeline_job(request_type, transport: str = "grpc"): state=pipeline_state.PipelineState.PIPELINE_STATE_QUEUED, service_account="service_account_value", network="network_value", + template_uri="template_uri_value", ) response = client.get_pipeline_job(request) @@ -2459,6 +2471,7 @@ def test_get_pipeline_job(request_type, transport: str = "grpc"): assert response.state == pipeline_state.PipelineState.PIPELINE_STATE_QUEUED assert response.service_account == "service_account_value" assert response.network == "network_value" + assert response.template_uri == "template_uri_value" def test_get_pipeline_job_empty_call(): @@ -2500,6 +2513,7 @@ async def test_get_pipeline_job_async( state=pipeline_state.PipelineState.PIPELINE_STATE_QUEUED, service_account="service_account_value", network="network_value", + template_uri="template_uri_value", ) ) response = await client.get_pipeline_job(request) @@ -2516,6 +2530,7 @@ async def test_get_pipeline_job_async( assert response.state == pipeline_state.PipelineState.PIPELINE_STATE_QUEUED assert response.service_account == "service_account_value" assert response.network == "network_value" + assert response.template_uri == "template_uri_value" @pytest.mark.asyncio diff --git a/tests/unit/gapic/aiplatform_v1/test_prediction_service.py b/tests/unit/gapic/aiplatform_v1/test_prediction_service.py index 2f1813de9f..a6c3c1d5d4 100644 --- a/tests/unit/gapic/aiplatform_v1/test_prediction_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_prediction_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_specialist_pool_service.py b/tests/unit/gapic/aiplatform_v1/test_specialist_pool_service.py index 16ac1df1ea..000f24e24a 100644 --- a/tests/unit/gapic/aiplatform_v1/test_specialist_pool_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_specialist_pool_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_tensorboard_service.py b/tests/unit/gapic/aiplatform_v1/test_tensorboard_service.py index 5eb0930a18..7d620f095c 100644 --- a/tests/unit/gapic/aiplatform_v1/test_tensorboard_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_tensorboard_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1/test_vizier_service.py b/tests/unit/gapic/aiplatform_v1/test_vizier_service.py index 0c9bd53d79..ede60e5893 100644 --- a/tests/unit/gapic/aiplatform_v1/test_vizier_service.py +++ b/tests/unit/gapic/aiplatform_v1/test_vizier_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_dataset_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_dataset_service.py index 469624d9b5..b38db766a0 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_dataset_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_dataset_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_endpoint_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_endpoint_service.py index 6900f5853c..10a51e6d92 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_endpoint_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_endpoint_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_featurestore_online_serving_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_featurestore_online_serving_service.py index b216ab128b..c3d96f8657 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_featurestore_online_serving_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_featurestore_online_serving_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_featurestore_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_featurestore_service.py index b29bba1435..30ebffc04b 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_featurestore_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_featurestore_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_index_endpoint_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_index_endpoint_service.py index 41485dea9c..75373a6806 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_index_endpoint_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_index_endpoint_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_index_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_index_service.py index f163aaa75a..c41fe6afff 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_index_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_index_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_job_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_job_service.py index c615e37c59..68a99c25fa 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_job_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_job_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_metadata_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_metadata_service.py index 3a97bca5c0..001a8bfbc8 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_metadata_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_metadata_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_migration_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_migration_service.py index 49fd9ca313..129ca60552 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_migration_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_migration_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_model_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_model_service.py index 0ca4ae225d..53085570b0 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_model_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_model_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio @@ -2356,6 +2362,247 @@ async def test_update_model_flattened_error_async(): ) +@pytest.mark.parametrize( + "request_type", + [ + model_service.UpdateExplanationDatasetRequest, + dict, + ], +) +def test_update_explanation_dataset(request_type, transport: str = "grpc"): + client = ModelServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.update_explanation_dataset), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/spam") + response = client.update_explanation_dataset(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == model_service.UpdateExplanationDatasetRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, future.Future) + + +def test_update_explanation_dataset_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = ModelServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.update_explanation_dataset), "__call__" + ) as call: + client.update_explanation_dataset() + call.assert_called() + _, args, _ = call.mock_calls[0] + assert args[0] == model_service.UpdateExplanationDatasetRequest() + + +@pytest.mark.asyncio +async def test_update_explanation_dataset_async( + transport: str = "grpc_asyncio", + request_type=model_service.UpdateExplanationDatasetRequest, +): + client = ModelServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.update_explanation_dataset), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/spam") + ) + response = await client.update_explanation_dataset(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == model_service.UpdateExplanationDatasetRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, future.Future) + + +@pytest.mark.asyncio +async def test_update_explanation_dataset_async_from_dict(): + await test_update_explanation_dataset_async(request_type=dict) + + +def test_update_explanation_dataset_field_headers(): + client = ModelServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = model_service.UpdateExplanationDatasetRequest() + + request.model = "model_value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.update_explanation_dataset), "__call__" + ) as call: + call.return_value = operations_pb2.Operation(name="operations/op") + client.update_explanation_dataset(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "model=model_value", + ) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_update_explanation_dataset_field_headers_async(): + client = ModelServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = model_service.UpdateExplanationDatasetRequest() + + request.model = "model_value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.update_explanation_dataset), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/op") + ) + await client.update_explanation_dataset(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "model=model_value", + ) in kw["metadata"] + + +def test_update_explanation_dataset_flattened(): + client = ModelServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.update_explanation_dataset), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/op") + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.update_explanation_dataset( + model="model_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + arg = args[0].model + mock_val = "model_value" + assert arg == mock_val + + +def test_update_explanation_dataset_flattened_error(): + client = ModelServiceClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.update_explanation_dataset( + model_service.UpdateExplanationDatasetRequest(), + model="model_value", + ) + + +@pytest.mark.asyncio +async def test_update_explanation_dataset_flattened_async(): + client = ModelServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.update_explanation_dataset), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/op") + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/spam") + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.update_explanation_dataset( + model="model_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + arg = args[0].model + mock_val = "model_value" + assert arg == mock_val + + +@pytest.mark.asyncio +async def test_update_explanation_dataset_flattened_error_async(): + client = ModelServiceAsyncClient( + credentials=ga_credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.update_explanation_dataset( + model_service.UpdateExplanationDatasetRequest(), + model="model_value", + ) + + @pytest.mark.parametrize( "request_type", [ @@ -3416,6 +3663,7 @@ def test_import_model_evaluation(request_type, transport: str = "grpc"): # Designate an appropriate return value for the call. call.return_value = gca_model_evaluation.ModelEvaluation( name="name_value", + display_name="display_name_value", metrics_schema_uri="metrics_schema_uri_value", slice_dimensions=["slice_dimensions_value"], ) @@ -3429,6 +3677,7 @@ def test_import_model_evaluation(request_type, transport: str = "grpc"): # Establish that the response is the type that we expect. assert isinstance(response, gca_model_evaluation.ModelEvaluation) assert response.name == "name_value" + assert response.display_name == "display_name_value" assert response.metrics_schema_uri == "metrics_schema_uri_value" assert response.slice_dimensions == ["slice_dimensions_value"] @@ -3473,6 +3722,7 @@ async def test_import_model_evaluation_async( call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( gca_model_evaluation.ModelEvaluation( name="name_value", + display_name="display_name_value", metrics_schema_uri="metrics_schema_uri_value", slice_dimensions=["slice_dimensions_value"], ) @@ -3487,6 +3737,7 @@ async def test_import_model_evaluation_async( # Establish that the response is the type that we expect. assert isinstance(response, gca_model_evaluation.ModelEvaluation) assert response.name == "name_value" + assert response.display_name == "display_name_value" assert response.metrics_schema_uri == "metrics_schema_uri_value" assert response.slice_dimensions == ["slice_dimensions_value"] @@ -3681,6 +3932,7 @@ def test_get_model_evaluation(request_type, transport: str = "grpc"): # Designate an appropriate return value for the call. call.return_value = model_evaluation.ModelEvaluation( name="name_value", + display_name="display_name_value", metrics_schema_uri="metrics_schema_uri_value", slice_dimensions=["slice_dimensions_value"], ) @@ -3694,6 +3946,7 @@ def test_get_model_evaluation(request_type, transport: str = "grpc"): # Establish that the response is the type that we expect. assert isinstance(response, model_evaluation.ModelEvaluation) assert response.name == "name_value" + assert response.display_name == "display_name_value" assert response.metrics_schema_uri == "metrics_schema_uri_value" assert response.slice_dimensions == ["slice_dimensions_value"] @@ -3738,6 +3991,7 @@ async def test_get_model_evaluation_async( call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( model_evaluation.ModelEvaluation( name="name_value", + display_name="display_name_value", metrics_schema_uri="metrics_schema_uri_value", slice_dimensions=["slice_dimensions_value"], ) @@ -3752,6 +4006,7 @@ async def test_get_model_evaluation_async( # Establish that the response is the type that we expect. assert isinstance(response, model_evaluation.ModelEvaluation) assert response.name == "name_value" + assert response.display_name == "display_name_value" assert response.metrics_schema_uri == "metrics_schema_uri_value" assert response.slice_dimensions == ["slice_dimensions_value"] @@ -5196,6 +5451,7 @@ def test_model_service_base_transport(): "list_models", "list_model_versions", "update_model", + "update_explanation_dataset", "delete_model", "delete_model_version", "merge_version_aliases", diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_pipeline_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_pipeline_service.py index 19d37fae67..3712934a17 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_pipeline_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_pipeline_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio @@ -54,6 +60,7 @@ from google.cloud.aiplatform_v1beta1.types import io from google.cloud.aiplatform_v1beta1.types import model from google.cloud.aiplatform_v1beta1.types import operation as gca_operation +from google.cloud.aiplatform_v1beta1.types import pipeline_failure_policy from google.cloud.aiplatform_v1beta1.types import pipeline_job from google.cloud.aiplatform_v1beta1.types import pipeline_job as gca_pipeline_job from google.cloud.aiplatform_v1beta1.types import pipeline_service diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_prediction_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_prediction_service.py index 987d023874..d6bf05ed42 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_prediction_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_prediction_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_specialist_pool_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_specialist_pool_service.py index cb2d05022d..cf4d765de9 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_specialist_pool_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_specialist_pool_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_tensorboard_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_tensorboard_service.py index 712168fc3a..0763993f1a 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_tensorboard_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_tensorboard_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_vizier_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_vizier_service.py index a73602e3e0..819f46fb17 100644 --- a/tests/unit/gapic/aiplatform_v1beta1/test_vizier_service.py +++ b/tests/unit/gapic/aiplatform_v1beta1/test_vizier_service.py @@ -14,7 +14,13 @@ # limitations under the License. # import os -import mock + +# try/except added for compatibility with python < 3.8 +try: + from unittest import mock + from unittest.mock import AsyncMock +except ImportError: + import mock import grpc from grpc.experimental import aio