diff --git a/.kokoro/continuous/system.cfg b/.kokoro/continuous/system.cfg index 4b312a0260..a41c56ee6d 100644 --- a/.kokoro/continuous/system.cfg +++ b/.kokoro/continuous/system.cfg @@ -11,5 +11,5 @@ env_vars: { value: "-n=16 --dist=loadscope" } -# Kokoro VM timeout of 12 hours for system tests -timeout_mins: 720 +# Kokoro VM timeout of 5 hours for system tests +timeout_mins: 300 diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 78e48be88a..c5435061da 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.51.0" + ".": "1.52.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index b42c727e4e..7325a6ae20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## [1.52.0](https://github.com/googleapis/python-aiplatform/compare/v1.51.0...v1.52.0) (2024-05-21) + + +### Features + +* Add FeatureGroup delete ([f9011e0](https://github.com/googleapis/python-aiplatform/commit/f9011e0b1b4ea8470849ecdd5ba9e086c73b778b)) +* Add support for ToolConfig in the LangChain template ([9bda328](https://github.com/googleapis/python-aiplatform/commit/9bda3288b59eb52c18a13c292561cb2c720ff331)) +* Create Vertex Experiment when uploading Tensorboard logs ([339f8b6](https://github.com/googleapis/python-aiplatform/commit/339f8b667952c7302c36605842ba92fa1c7135b8)) +* GenAI - Add BatchPredictionJob for GenAI models ([df4a4f2](https://github.com/googleapis/python-aiplatform/commit/df4a4f2745178a6458bb6dc1f124c8ee60e986c1)) +* GenAI - Add cancel, delete, list methods in BatchPredictionJob ([7ff8071](https://github.com/googleapis/python-aiplatform/commit/7ff80714c2ec55330d5d6a0075366f8f700128af)) +* GenAI - Added the `BatchPredictionJob.submit` method ([4d091c6](https://github.com/googleapis/python-aiplatform/commit/4d091c68b17b3c1b4a912aa38d3a098fdc21238d)) +* Private Endpoints - Added private service connect support to prediction endpoint. ([6bdcfb3](https://github.com/googleapis/python-aiplatform/commit/6bdcfb3c0c6b121d5fbfcdad9dd218a1ddfc3e0d)) + + +### Bug Fixes + +* Add validation for evaluation dataset fields, update logging info for eval api request count ([d6ef500](https://github.com/googleapis/python-aiplatform/commit/d6ef50080f3b2b923ba0fb89eb0a8daebf8f68d4)) +* Fix feature attribution drift visualization for model monitoring SDK ([710f33d](https://github.com/googleapis/python-aiplatform/commit/710f33d87e242a283e8fbe5327ba9fa781d0d8fc)) +* Fix the default value of response_column_name in EvalTask.evaluate() ([98f9b35](https://github.com/googleapis/python-aiplatform/commit/98f9b35ccde7dad7f3e6b9e259a201ee2784d15e)) +* Update get_experiment_df to pass Experiment and allow empty metrics. ([de5d0f3](https://github.com/googleapis/python-aiplatform/commit/de5d0f3a17a77cbc70ada480768d9209c7b02828)) + + +### Documentation + +* Add Vertex Model Monitoring V2 SDK documentation ([b47e6ff](https://github.com/googleapis/python-aiplatform/commit/b47e6ff1f17278a6f1e4c31def05f3a09d981b28)) +* Update docstrings for rapid evaluation library. ([d6d371d](https://github.com/googleapis/python-aiplatform/commit/d6d371d61abd2daa2f222ca82540c5e0c4b3a602)) + ## [1.51.0](https://github.com/googleapis/python-aiplatform/compare/v1.50.0...v1.51.0) (2024-05-10) diff --git a/docs/vertexai/services.rst b/docs/vertexai/services.rst index 20d9439219..817c557590 100644 --- a/docs/vertexai/services.rst +++ b/docs/vertexai/services.rst @@ -54,3 +54,19 @@ Vertex AI SDK :members: :show-inheritance: :inherited-members: + +.. automodule:: vertexai.resources + :no-members: + +.. automodule:: vertexai.resources.preview + :no-members: + +.. automodule:: vertexai.resources.preview.ml_monitoring + :members: + :show-inheritance: + :inherited-members: + +.. automodule:: vertexai.resources.preview.ml_monitoring.spec + :members: + :show-inheritance: + :inherited-members: diff --git a/google/cloud/aiplatform/gapic_version.py b/google/cloud/aiplatform/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/gapic_version.py +++ b/google/cloud/aiplatform/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index fe856642f5..70d5029013 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -15,16 +15,16 @@ # limitations under the License. # -from typing import Iterable, Optional, Union, Sequence, Dict, List +from typing import Iterable, Optional, Union, Sequence, Dict, List, Tuple import abc import copy import datetime import time import tempfile -import uuid from google.auth import credentials as auth_credentials +from google.api_core import exceptions as api_exceptions from google.protobuf import duration_pb2 # type: ignore from google.protobuf import field_mask_pb2 # type: ignore from google.rpc import status_pb2 @@ -35,6 +35,7 @@ batch_prediction_job as gca_bp_job_compat, completion_stats as gca_completion_stats, custom_job as gca_custom_job_compat, + execution as gca_execution_compat, explanation as gca_explanation_compat, encryption_spec as gca_encryption_spec_compat, io as gca_io_compat, @@ -61,7 +62,6 @@ batch_prediction_job as batch_prediction_job_v1, ) from google.cloud.aiplatform_v1.types import custom_job as custom_job_v1 -from google.cloud.aiplatform_v1.types import execution as execution_v1 _LOGGER = base.Logger(__name__) @@ -1583,8 +1583,6 @@ def _empty_constructor( self._logged_web_access_uris = set() if isinstance(self, CustomJob): - self._experiment = None - self._experiment_run = None self._enable_autolog = False return self @@ -1633,13 +1631,22 @@ def _block_until_complete(self): self._log_job_state() - if isinstance(self, CustomJob) and self._experiment_run: - # sync resource before end run - self._experiment_run = aiplatform.ExperimentRun.get( - self._experiment_run.name, - experiment=self._experiment, - ) - self._experiment_run.end_run() + if isinstance(self, CustomJob): + # End the experiment run associated with the custom job, if exists. + experiment_run = self._gca_resource.job_spec.experiment_run + if experiment_run: + try: + # sync resource before end run + experiment_run_context = aiplatform.Context(experiment_run) + experiment_run_context.update( + metadata={ + metadata_constants._STATE_KEY: gca_execution_compat.Execution.State.COMPLETE.name + } + ) + except (ValueError, api_exceptions.GoogleAPIError) as e: + _LOGGER.warning( + f"Failed to end experiment run {experiment_run} due to: {e}" + ) # Error is only populated when the job state is # JOB_STATE_FAILED or JOB_STATE_CANCELLED. @@ -1852,8 +1859,6 @@ def __init__( ), ) - self._experiment = None - self._experiment_run = None self._enable_autolog = False @property @@ -2510,79 +2515,10 @@ def submit( if persistent_resource_id: self._gca_resource.job_spec.persistent_resource_id = persistent_resource_id - # TODO(b/275105711) Update implementation after experiment/run in the proto - if experiment: - # short-term solution to set experiment/experimentRun in SDK - if isinstance(experiment, aiplatform.Experiment): - self._experiment = experiment - # convert the Experiment instance to string to be passed to env - experiment = experiment.name - else: - self._experiment = aiplatform.Experiment.get(experiment_name=experiment) - if not self._experiment: - raise ValueError( - f"Experiment '{experiment}' doesn't exist. " - "Please call aiplatform.init(experiment='my-exp') to create an experiment." - ) - elif ( - not self._experiment.backing_tensorboard_resource_name - and self._enable_autolog - ): - raise ValueError( - f"Experiment '{experiment}' doesn't have a backing tensorboard resource, " - "which is required by the experiment autologging feature. " - "Please call Experiment.assign_backing_tensorboard('my-tb-resource-name')." - ) - - # if run name is not specified, auto-generate one - if not experiment_run: - experiment_run = ( - # TODO(b/223262536)Once display_name is optional this run name - # might be invalid as well. - f"{self._gca_resource.display_name}-{uuid.uuid4().hex[0:5]}" - ) - - # get or create the experiment run for the job - if isinstance(experiment_run, aiplatform.ExperimentRun): - self._experiment_run = experiment_run - # convert the ExperimentRun instance to string to be passed to env - experiment_run = experiment_run.name - else: - self._experiment_run = aiplatform.ExperimentRun.get( - run_name=experiment_run, - experiment=self._experiment, - ) - if not self._experiment_run: - self._experiment_run = aiplatform.ExperimentRun.create( - run_name=experiment_run, - experiment=self._experiment, - ) - self._experiment_run.update_state(execution_v1.Execution.State.RUNNING) - - worker_pool_specs = self._gca_resource.job_spec.worker_pool_specs - for spec in worker_pool_specs: - if not spec: - continue - - if "python_package_spec" in spec: - container_spec = spec.python_package_spec - else: - container_spec = spec.container_spec - - experiment_env = [ - { - "name": metadata_constants.ENV_EXPERIMENT_KEY, - "value": experiment, - }, - { - "name": metadata_constants.ENV_EXPERIMENT_RUN_KEY, - "value": experiment_run, - }, - ] - if "env" in container_spec: - container_spec.env.extend(experiment_env) - else: - container_spec.env = experiment_env + ( + self._gca_resource.job_spec.experiment, + self._gca_resource.job_spec.experiment_run, + ) = self._get_experiment_and_run_resource_name(experiment, experiment_run) _LOGGER.log_create_with_lro(self.__class__) @@ -2606,26 +2542,38 @@ def submit( ) ) - if experiment: - custom_job = { - metadata_constants._CUSTOM_JOB_RESOURCE_NAME: self.resource_name, - metadata_constants._CUSTOM_JOB_CONSOLE_URI: self._dashboard_uri(), - } - - run_context = self._experiment_run._metadata_node - custom_jobs = run_context._gca_resource.metadata.get( - metadata_constants._CUSTOM_JOB_KEY - ) - if custom_jobs: - custom_jobs.append(custom_job) - else: - custom_jobs = [custom_job] - run_context.update({metadata_constants._CUSTOM_JOB_KEY: custom_jobs}) - @property def job_spec(self): return self._gca_resource.job_spec + @staticmethod + def _get_experiment_and_run_resource_name( + experiment: Optional[Union["aiplatform.Experiment", str]] = None, + experiment_run: Optional[Union["aiplatform.ExperimentRun", str]] = None, + ) -> Tuple[str, str]: + """Helper method to get the experiment and run resource name for the custom job.""" + if not experiment: + return None, None + + experiment_resource = ( + aiplatform.Experiment(experiment) + if isinstance(experiment, str) + else experiment + ) + + if not experiment_run: + return experiment_resource.resource_name, None + + experiment_run_resource = ( + aiplatform.ExperimentRun(experiment_run, experiment_resource) + if isinstance(experiment_run, str) + else experiment_run + ) + return ( + experiment_resource.resource_name, + experiment_run_resource.resource_name, + ) + class HyperparameterTuningJob(_RunnableJob, base.PreviewMixin): """Vertex AI Hyperparameter Tuning Job.""" diff --git a/google/cloud/aiplatform/metadata/context.py b/google/cloud/aiplatform/metadata/context.py index 3707bc1f3e..28c118292a 100644 --- a/google/cloud/aiplatform/metadata/context.py +++ b/google/cloud/aiplatform/metadata/context.py @@ -290,6 +290,7 @@ def update( metadata: Optional[Dict] = None, description: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + location: Optional[str] = None, ): """Updates an existing Metadata Context with new metadata. @@ -307,7 +308,10 @@ def update( for _ in range(_ETAG_ERROR_MAX_RETRY_COUNT - 1): try: super().update( - metadata=metadata, description=description, credentials=credentials + metadata=metadata, + description=description, + credentials=credentials, + location=location, ) return except Aborted as aborted_exception: @@ -322,7 +326,10 @@ def update( # Expose result/exception directly in the last retry. super().update( - metadata=metadata, description=description, credentials=credentials + metadata=metadata, + description=description, + credentials=credentials, + location=location, ) @classmethod diff --git a/google/cloud/aiplatform/metadata/experiment_resources.py b/google/cloud/aiplatform/metadata/experiment_resources.py index 5f2b2c1a66..0d1561eac3 100644 --- a/google/cloud/aiplatform/metadata/experiment_resources.py +++ b/google/cloud/aiplatform/metadata/experiment_resources.py @@ -482,7 +482,8 @@ def get_data_frame( metadata_context.schema_title ]._query_experiment_row, metadata_context, - include_time_series, + experiment=self, + include_time_series=include_time_series, ) for metadata_context in contexts ] @@ -494,7 +495,8 @@ def get_data_frame( metadata_execution.schema_title ]._query_experiment_row, metadata_execution, - include_time_series, + experiment=self, + include_time_series=include_time_series, ) for metadata_execution in executions ) @@ -634,7 +636,8 @@ def assign_backing_tensorboard( self._metadata_context.update( metadata={ constants._BACKING_TENSORBOARD_RESOURCE_KEY: tensorboard.resource_name - } + }, + location=self._metadata_context.location, ) def _log_experiment_loggable(self, experiment_loggable: "_ExperimentLoggable"): diff --git a/google/cloud/aiplatform/metadata/experiment_run_resource.py b/google/cloud/aiplatform/metadata/experiment_run_resource.py index 00eeac3817..f4d2a85b0d 100644 --- a/google/cloud/aiplatform/metadata/experiment_run_resource.py +++ b/google/cloud/aiplatform/metadata/experiment_run_resource.py @@ -454,6 +454,8 @@ def _initialize_experiment_run( self._metadata_metric_artifact = self._v1_get_metric_artifact() if not self._is_legacy_experiment_run() and lookup_tensorboard_run: self._backing_tensorboard_run = self._lookup_tensorboard_run_artifact() + if not self._backing_tensorboard_run: + self._assign_to_experiment_backing_tensorboard() @classmethod def list( @@ -553,13 +555,16 @@ def _create_v1_experiment_run( def _query_experiment_row( cls, node: Union[context.Context, execution.Execution], - include_time_series: Optional[bool] = True, + experiment: Optional[experiment_resources.Experiment] = None, + include_time_series: bool = True, ) -> experiment_resources._ExperimentRow: """Retrieves the runs metric and parameters into an experiment run row. Args: node (Union[context._Context, execution.Execution]): Required. Metadata node instance that represents this run. + experiment: + Optional. Experiment associated with this run. include_time_series (bool): Optional. Whether or not to include time series metrics in df. Default is True. @@ -568,7 +573,7 @@ def _query_experiment_row( """ this_experiment_run = cls.__new__(cls) this_experiment_run._initialize_experiment_run( - node, lookup_tensorboard_run=include_time_series + node, experiment=experiment, lookup_tensorboard_run=include_time_series ) row = experiment_resources._ExperimentRow( @@ -620,8 +625,11 @@ def _get_latest_time_series_metric_columns(self) -> Dict[str, Union[float, int]] return { display_name: data.values[-1].scalar.value for display_name, data in time_series_metrics.items() - if data.value_type - == gca_tensorboard_time_series.TensorboardTimeSeries.ValueType.SCALAR + if ( + data.values + and data.value_type + == gca_tensorboard_time_series.TensorboardTimeSeries.ValueType.SCALAR + ) } return {} @@ -716,7 +724,9 @@ def create( The newly created experiment run. """ - experiment = cls._get_experiment(experiment) + experiment = cls._get_experiment( + experiment, project=project, location=location, credentials=credentials + ) run_id = _format_experiment_run_resource_id( experiment_name=experiment.name, run_name=run_name @@ -760,7 +770,10 @@ def _create_context(): try: if tensorboard: cls._assign_backing_tensorboard( - self=experiment_run, tensorboard=tensorboard + self=experiment_run, + tensorboard=tensorboard, + project=project, + location=location, ) else: cls._assign_to_experiment_backing_tensorboard(self=experiment_run) @@ -792,7 +805,10 @@ def _format_tensorboard_experiment_display_name(experiment_name: str) -> str: return f"{experiment_name} Backing Tensorboard Experiment" def _assign_backing_tensorboard( - self, tensorboard: Union[tensorboard_resource.Tensorboard, str] + self, + tensorboard: Union[tensorboard_resource.Tensorboard, str], + project: Optional[str] = None, + location: Optional[str] = None, ): """Assign tensorboard as the backing tensorboard to this run. @@ -802,7 +818,10 @@ def _assign_backing_tensorboard( """ if isinstance(tensorboard, str): tensorboard = tensorboard_resource.Tensorboard( - tensorboard, credentials=self._metadata_node.credentials + tensorboard, + project=project, + location=location, + credentials=self._metadata_node.credentials, ) tensorboard_resource_name_parts = tensorboard._parse_resource_name( @@ -827,6 +846,8 @@ def _assign_backing_tensorboard( self._experiment.name ), tensorboard_name=tensorboard.resource_name, + project=project, + location=location, credentials=tensorboard.credentials, labels=constants._VERTEX_EXPERIMENT_TB_EXPERIMENT_LABEL, ) @@ -849,6 +870,8 @@ def _assign_backing_tensorboard( tensorboard_run = tensorboard_resource.TensorboardRun.create( tensorboard_run_id=self._run_name, tensorboard_experiment_name=tensorboard_experiment.resource_name, + project=project, + location=location, credentials=tensorboard.credentials, ) @@ -865,6 +888,8 @@ def _assign_backing_tensorboard( schema_title=constants._TENSORBOARD_RUN_REFERENCE_ARTIFACT.schema_title, schema_version=constants._TENSORBOARD_RUN_REFERENCE_ARTIFACT.schema_version, state=gca_artifact.Artifact.State.LIVE, + project=project, + location=location, ) self._metadata_node.add_artifacts_and_executions( diff --git a/google/cloud/aiplatform/metadata/metadata.py b/google/cloud/aiplatform/metadata/metadata.py index 02adb9b5b8..08f1235820 100644 --- a/google/cloud/aiplatform/metadata/metadata.py +++ b/google/cloud/aiplatform/metadata/metadata.py @@ -276,12 +276,21 @@ def experiment_run(self) -> Optional[experiment_run_resource.ExperimentRun]: """Returns the currently set experiment run or experiment run set via env variable AIP_EXPERIMENT_RUN_NAME.""" if self._experiment_run: return self._experiment_run - if os.getenv(constants.ENV_EXPERIMENT_RUN_KEY): - self._experiment_run = experiment_run_resource.ExperimentRun.get( - os.getenv(constants.ENV_EXPERIMENT_RUN_KEY), + + env_experiment_run = os.getenv(constants.ENV_EXPERIMENT_RUN_KEY) + if env_experiment_run and self.experiment: + # The run could be run name or full resource name, + # so we remove the experiment resource prefix if necessary. + env_experiment_run = env_experiment_run.replace( + f"{self.experiment.resource_name}-", + "", + ) + self._experiment_run = experiment_run_resource.ExperimentRun( + env_experiment_run, experiment=self.experiment, ) return self._experiment_run + return None def set_experiment( @@ -292,6 +301,8 @@ def set_experiment( backing_tensorboard: Optional[ Union[str, tensorboard_resource.Tensorboard, bool] ] = None, + project: Optional[str] = None, + location: Optional[str] = None, ): """Set the experiment. Will retrieve the Experiment if it exists or create one with the provided name. @@ -309,11 +320,20 @@ def set_experiment( To disable using a backing tensorboard, set `backing_tensorboard` to `False`. To maintain this behavior, set `experiment_tensorboard` to `False` in subsequent calls to aiplatform.init(). + project (str): + Optional. Project where this experiment will be retrieved from or created. Overrides project set in + aiplatform.init. + location (str): + Optional. Location where this experiment will be retrieved from or created. Overrides location set in + aiplatform.init. """ self.reset() experiment = experiment_resources.Experiment.get_or_create( - experiment_name=experiment, description=description + experiment_name=experiment, + description=description, + project=project, + location=location, ) if backing_tensorboard and not isinstance(backing_tensorboard, bool): diff --git a/google/cloud/aiplatform/metadata/resource.py b/google/cloud/aiplatform/metadata/resource.py index 580bd6c420..f1ab91a7bc 100644 --- a/google/cloud/aiplatform/metadata/resource.py +++ b/google/cloud/aiplatform/metadata/resource.py @@ -285,6 +285,7 @@ def update( metadata: Optional[Dict] = None, description: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + location: Optional[str] = None, ): """Updates an existing Metadata resource with new metadata. @@ -309,7 +310,9 @@ def update( if description: gca_resource.description = description - api_client = self._instantiate_client(credentials=credentials) + api_client = self._instantiate_client( + credentials=credentials, location=location + ) # TODO: if etag is not valid sync and retry update_gca_resource = self._update_resource( client=api_client, diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 0b935fb166..86f60db044 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -62,6 +62,7 @@ model as gca_model_compat, model_service as gca_model_service_compat, env_var as gca_env_var_compat, + service_networking as gca_service_networking, ) from google.cloud.aiplatform.constants import ( @@ -313,6 +314,14 @@ def network(self) -> Optional[str]: self._assert_gca_resource_is_available() return getattr(self._gca_resource, "network", None) + @property + def private_service_connect_config( + self, + ) -> Optional[gca_service_networking.PrivateServiceConnectConfig]: + """The Private Service Connect configuration for this Endpoint.""" + self._assert_gca_resource_is_available() + return self._gca_resource.private_service_connect_config + @classmethod def create( cls, @@ -463,6 +472,9 @@ def _create( predict_request_response_logging_config: Optional[ gca_endpoint_compat.PredictRequestResponseLoggingConfig ] = None, + private_service_connect_config: Optional[ + gca_service_networking.PrivateServiceConnectConfig + ] = None, ) -> "Endpoint": """Creates a new endpoint by calling the API client. @@ -506,8 +518,9 @@ def _create( Optional. The full name of the Compute Engine network to which this Endpoint will be peered. E.g. "projects/12345/global/networks/myVPC". Private services access must already be configured for the network. + Cannot be specified when private_service_connect is enabled. Read more about PrivateEndpoints - [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints). + [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints) sync (bool): Whether to create this endpoint synchronously. create_request_timeout (float): @@ -525,6 +538,9 @@ def _create( that are not included in either the URI or the body. predict_request_response_logging_config (aiplatform.endpoint.PredictRequestResponseLoggingConfig): Optional. The request response logging configuration for online prediction. + private_service_connect_config (aiplatform.service_network.PrivateServiceConnectConfig): + If enabled, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect). + Cannot be enabled when network is specified. Returns: endpoint (aiplatform.Endpoint): @@ -542,6 +558,7 @@ def _create( encryption_spec=encryption_spec, network=network, predict_request_response_logging_config=predict_request_response_logging_config, + private_service_connect_config=private_service_connect_config, ) operation_future = api_client.create_endpoint( @@ -1879,9 +1896,9 @@ def list( """ return cls._list_with_local_order( - cls_filter=lambda ep: not bool( - ep.network - ), # `network` is empty for public Endpoints + cls_filter=lambda ep: not bool(ep.network) + and not bool(ep.private_service_connect_config), + # `network` is empty and private_service_connect is not enabled for public Endpoints filter=filter, order_by=order_by, project=project, @@ -2012,12 +2029,12 @@ def __init__( credentials=credentials, ) - if not self.network: + if not self.network and not self.private_service_connect_config: raise ValueError( "Please ensure the Endpoint being retrieved is a PrivateEndpoint." ) - self._http_client = urllib3.PoolManager() + self._http_client = urllib3.PoolManager(cert_reqs="CERT_NONE") @property def predict_http_uri(self) -> Optional[str]: @@ -2040,6 +2057,30 @@ def health_http_uri(self) -> Optional[str]: return None return self._gca_resource.deployed_models[0].private_endpoints.health_http_uri + class PrivateServiceConnectConfig: + """Represents a Vertex AI PrivateServiceConnectConfig resource.""" + + _gapic_private_service_connect_config: gca_service_networking.PrivateServiceConnectConfig + + def __init__( + self, + project_allowlist: Optional[Sequence[str]] = None, + ): + """PrivateServiceConnectConfig for a PrivateEndpoint. + + Args: + project_allowlist (Sequence[str]): + Optional. List of projects from which traffic can be accepted + by the endpoint via [ServiceAttachment](https://cloud.google.com/vpc/docs/private-service-connect#service-attachments). + If not set, the endpoint's project will be used. + """ + self._gapic_private_service_connect_config = ( + gca_service_networking.PrivateServiceConnectConfig( + enable_private_service_connect=True, + project_allowlist=project_allowlist, + ) + ) + @classmethod def create( cls, @@ -2052,10 +2093,12 @@ def create( credentials: Optional[auth_credentials.Credentials] = None, encryption_spec_key_name: Optional[str] = None, sync=True, + private_service_connect_config: Optional[PrivateServiceConnectConfig] = None, ) -> "PrivateEndpoint": """Creates a new PrivateEndpoint. Example usage: + For PSA based private endpoint: my_private_endpoint = aiplatform.PrivateEndpoint.create( display_name="my_endpoint_name", project="my_project_id", @@ -2070,6 +2113,22 @@ def create( network="projects/123456789123/global/networks/my_vpc" ) + For PSC based private endpoint: + my_private_endpoint = aiplatform.PrivateEndpoint.create( + display_name="my_endpoint_name", + project="my_project_id", + location="us-central1", + private_service_connect=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig( + project_allowlist=["test-project"]), + ) + + or (when project and location are initialized) + + my_private_endpoint = aiplatform.PrivateEndpoint.create( + display_name="my_endpoint_name", + private_service_connect=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig( + project_allowlist=["test-project"]), + ) Args: display_name (str): Required. The user-defined name of the Endpoint. @@ -2086,6 +2145,7 @@ def create( this Endpoint will be peered. E.g. "projects/123456789123/global/networks/my_vpc". Private services access must already be configured for the network. If left unspecified, the network set with aiplatform.init will be used. + Cannot be set together with private_service_connect_config. description (str): Optional. The description of the Endpoint. labels (Dict[str, str]): @@ -2116,6 +2176,9 @@ def create( Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. + private_service_connect_config (aiplatform.PrivateEndpoint.PrivateServiceConnectConfig): + [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect) configuration for the endpoint. + Cannot be set when network is specified. Returns: endpoint (aiplatform.PrivateEndpoint): @@ -2134,11 +2197,23 @@ def create( location = location or initializer.global_config.location network = network or initializer.global_config.network - if not network: + if not network and not private_service_connect_config: raise ValueError( - "Please provide required argument `network` or set network" + "Please provide required argument `network` or" + "`private_service_connect_config`. You can also set network" "using aiplatform.init(network=...)" ) + if network and private_service_connect_config: + raise ValueError( + "Argument `network` and `private_service_connect_config` enabled" + " mutually exclusive. You can only set one of them." + ) + + config = None + if private_service_connect_config: + config = ( + private_service_connect_config._gapic_private_service_connect_config + ) return cls._create( api_client=api_client, @@ -2153,6 +2228,7 @@ def create( ), network=network, sync=sync, + private_service_connect_config=config, ) @classmethod @@ -2200,7 +2276,7 @@ def _construct_sdk_resource_from_gapic( credentials=credentials, ) - endpoint._http_client = urllib3.PoolManager() + endpoint._http_client = urllib3.PoolManager(cert_reqs="CERT_NONE") return endpoint @@ -2262,15 +2338,44 @@ def _http_request( f"and that {url} is a valid URL." ) from exc - def predict(self, instances: List, parameters: Optional[Dict] = None) -> Prediction: + def _validate_endpoint_override(self, endpoint_override: str) -> bool: + regex = re.compile("^[a-zA-Z0-9-.]+$") + return regex.match(endpoint_override) is not None + + def predict( + self, + instances: List, + parameters: Optional[Dict] = None, + endpoint_override: Optional[str] = None, + ) -> Prediction: """Make a prediction against this PrivateEndpoint using a HTTP request. - This method must be called within the network the PrivateEndpoint is peered to. - Otherwise, the predict() call will fail with error code 404. To check, use `PrivateEndpoint.network`. + For PSA based private endpoint, this method must be called within the + network the PrivateEndpoint is peered to. Otherwise, the predict() call + will fail with error code 404. To check, use `PrivateEndpoint.network`. + + For PSC based priviate endpoint, the project where caller credential are + from must be allowlisted. Example usage: + PSA based private endpoint: + response = my_private_endpoint.predict(instances=[...]) my_predictions = response.predictions + PSC based private endpoint: + + After creating PSC Endpoint pointing to the endpoint's + ServiceAttachment, use the PSC Endpoint IP Address or DNS as + endpoint_override. + + psc_endpoint_address = "10.0.1.23" + or + psc_endpoint_address = "test.my.prediction" + + response = my_private_endpoint.predict(instances=[...], + endpoint_override=psc_endpoint_address) + my_predictions = response.predictions + Args: instances (List): Required. The instances that are the input to the @@ -2292,40 +2397,90 @@ def predict(self, instances: List, parameters: Optional[Dict] = None) -> Predict ][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. + endpoint_override (Optional[str]): + The Private Service Connect endpoint's IP address or DNS that + points to the endpoint's service attachment. Returns: prediction (aiplatform.Prediction): Prediction object with returned predictions and Model ID. Raises: - RuntimeError: If a model has not been deployed a request cannot be made. + RuntimeError: If a model has not been deployed a request cannot be + made for PSA based endpoint. + ValueError: If a endpoint override is not provided for PSC based + endpoint. + ValueError: If a endpoint override is invalid for PSC based endpoint. """ self.wait() self._sync_gca_resource_if_skipped() - if not self._gca_resource.deployed_models: - raise RuntimeError( - "Cannot make a predict request because a model has not been deployed on this Private" - "Endpoint. Please ensure a model has been deployed." + if self.network: + if not self._gca_resource.deployed_models: + raise RuntimeError( + "Cannot make a predict request because a model has not been" + "deployed on this Private Endpoint. Please ensure a model" + "has been deployed." + ) + response = self._http_request( + method="POST", + url=self.predict_http_uri, + body=json.dumps({"instances": instances}), + headers={"Content-Type": "application/json"}, ) + prediction_response = json.loads(response.data) - response = self._http_request( - method="POST", - url=self.predict_http_uri, - body=json.dumps({"instances": instances}), - headers={"Content-Type": "application/json"}, - ) + return Prediction( + predictions=prediction_response.get("predictions"), + metadata=prediction_response.get("metadata"), + deployed_model_id=self._gca_resource.deployed_models[0].id, + ) - prediction_response = json.loads(response.data) + if self.private_service_connect_config: + if not endpoint_override: + raise ValueError( + "Cannot make a predict request because endpoint override is" + "not provided. Please ensure an endpoint override is" + "provided." + ) + if not self._validate_endpoint_override(endpoint_override): + raise ValueError( + "Invalid endpoint override provided. Please only use IP" + "address or DNS." + ) - return Prediction( - predictions=prediction_response.get("predictions"), - metadata=prediction_response.get("metadata"), - deployed_model_id=self._gca_resource.deployed_models[0].id, - ) + if not self.credentials.valid: + self.credentials.refresh(google_auth_requests.Request()) + + token = self.credentials.token + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + + url = f"https://{endpoint_override}/v1/projects/%7Bself.project%7D/locations/%7Bself.location%7D/endpoints/%7Bself.name%7D:predict" + response = self._http_request( + method="POST", + url=url, + body=json.dumps({"instances": instances}), + headers=headers, + ) + + prediction_response = json.loads(response.data) + + return Prediction( + predictions=prediction_response.get("predictions"), + metadata=prediction_response.get("metadata"), + deployed_model_id=prediction_response.get("deployedModelId"), + model_resource_name=prediction_response.get("model"), + model_version_id=prediction_response.get("modelVersionId"), + ) def raw_predict( - self, body: bytes, headers: Dict[str, str] + self, + body: bytes, + headers: Dict[str, str], + endpoint_override: Optional[str] = None, ) -> requests.models.Response: """Make a prediction request using arbitrary headers. This method must be called within the network the PrivateEndpoint is peered to. @@ -2333,29 +2488,78 @@ def raw_predict( Example usage: my_endpoint = aiplatform.PrivateEndpoint(ENDPOINT_ID) + + # PSA based private endpint response = my_endpoint.raw_predict( - body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}' + body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}', headers = {'Content-Type':'application/json'} ) + # PSC based private endpoint + + response = my_endpoint.raw_predict( + body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}', + headers = {'Content-Type':'application/json'}, + endpoint_override = "10.1.0.23" + ) + status_code = response.status_code results = json.dumps(response.text) Args: body (bytes): - The body of the prediction request in bytes. This must not exceed 1.5 mb per request. + The body of the prediction request in bytes. This must not + exceed 1.5 mb per request. headers (Dict[str, str]): - The header of the request as a dictionary. There are no restrictions on the header. + The header of the request as a dictionary. There are no + restrictions on the header. + endpoint_override (Optional[str]): + The Private Service Connect endpoint's IP address or DNS that + points to the endpoint's service attachment. Returns: - A requests.models.Response object containing the status code and prediction results. + A requests.models.Response object containing the status code and + prediction results. + + Raises: + ValueError: If a endpoint override is not provided for PSC based + endpoint. + ValueError: If a endpoint override is invalid for PSC based endpoint. """ self.wait() - return self._http_request( - method="POST", - url=self.predict_http_uri, - body=body, - headers=headers, - ) + if self.network: + return self._http_request( + method="POST", + url=self.predict_http_uri, + body=body, + headers=headers, + ) + + if self.private_service_connect_config: + if not endpoint_override: + raise ValueError( + "Cannot make a predict request because endpoint override is" + "not provided. Please ensure an endpoint override is" + "provided." + ) + if not self._validate_endpoint_override(endpoint_override): + raise ValueError( + "Invalid endpoint override provided. Please only use IP" + "address or DNS." + ) + if not self.credentials.valid: + self.credentials.refresh(google_auth_requests.Request()) + + token = self.credentials.token + headers_with_token = dict(headers) + headers_with_token["Authorization"] = f"Bearer {token}" + + url = f"https://{endpoint_override}/v1/projects/%7Bself.project%7D/locations/%7Bself.location%7D/endpoints/%7Bself.name%7D:rawPredict" + return self._http_request( + method="POST", + url=url, + body=body, + headers=headers_with_token, + ) def explain(self): raise NotImplementedError( @@ -2366,6 +2570,7 @@ def health_check(self) -> bool: """ Makes a request to this PrivateEndpoint's health check URI. Must be within network that this PrivateEndpoint is in. + This is only supported by PSA based private endpoint. Example Usage: if my_private_endpoint.health_check(): @@ -2377,10 +2582,16 @@ def health_check(self) -> bool: Raises: RuntimeError: If a model has not been deployed a request cannot be made. + RuntimeError: If the endpoint is PSC based private endpoint. """ self.wait() self._sync_gca_resource_if_skipped() + if self.private_service_connect_config: + raise RuntimeError( + "Health check request is not supported on PSC based Private Endpoint." + ) + if not self._gca_resource.deployed_models: raise RuntimeError( "Cannot make a health check request because a model has not been deployed on this Private" @@ -2438,9 +2649,9 @@ def list( """ return cls._list_with_local_order( - cls_filter=lambda ep: bool( - ep.network - ), # Only PrivateEndpoints have a network set + cls_filter=lambda ep: bool(ep.network) + or bool(ep.private_service_connect_config), + # Only PrivateEndpoints have a network or private_service_connect_config filter=filter, order_by=order_by, project=project, @@ -2466,14 +2677,35 @@ def deploy( metadata: Optional[Sequence[Tuple[str, str]]] = (), sync=True, disable_container_logging: bool = False, + traffic_percentage: Optional[int] = 0, + traffic_split: Optional[Dict[str, int]] = None, ) -> None: """Deploys a Model to the PrivateEndpoint. Example Usage: + PSA based private endpoint my_private_endpoint.deploy( model=my_model ) + PSC based private endpoint + + psc_endpoint.deploy( + model=first_model, + ) + psc_endpoint.deploy( + model=second_model, + traffic_percentage=50, + ) + psc_endpoint.deploy( + model=third_model, + traffic_percentage={ + 'first_model_id': 40, + 'second_model_id': 30, + 'third_model_id': 30 + }, + ) + Args: model (aiplatform.Model): Required. Model to be deployed. @@ -2530,14 +2762,40 @@ def deploy( Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. + traffic_percentage (int): + Optional. Desired traffic to newly deployed model. + Defaults to 0 if there are pre-existing deployed models. + Defaults to 100 if there are no pre-existing deployed models. + Defaults to 100 for PSA based private endpoint. + Negative values should not be provided. Traffic of previously + deployed models at the endpoint will be scaled down to + accommodate new deployed model's traffic. + Should not be provided if traffic_split is provided. + traffic_split (Dict[str, int]): + Optional. Only supported by PSC base private endpoint. + A map from a DeployedModel's ID to the percentage of + this Endpoint's traffic that should be forwarded to that DeployedModel. + If a DeployedModel's ID is not listed in this map, then it receives + no traffic. The traffic percentage values must add up to 100, or + map must be empty if the Endpoint is to not accept any traffic at + the moment. Key for model being deployed is "0". Should not be + provided if traffic_percentage is provided. """ + + if self.network: + if traffic_split is not None: + raise ValueError( + "Traffic split is not supported for PSA based PrivateEndpoint." + ) + traffic_percentage = 100 + self._validate_deploy_args( min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, deployed_model_display_name=deployed_model_display_name, - traffic_split=None, - traffic_percentage=100, + traffic_split=traffic_split, + traffic_percentage=traffic_percentage, ) explanation_spec = _explanation_utils.create_and_validate_explanation_spec( @@ -2548,8 +2806,8 @@ def deploy( self._deploy( model=model, deployed_model_display_name=deployed_model_display_name, - traffic_percentage=100, - traffic_split=None, + traffic_percentage=traffic_percentage, + traffic_split=traffic_split, machine_type=machine_type, min_replica_count=min_replica_count, max_replica_count=max_replica_count, @@ -2563,14 +2821,90 @@ def deploy( disable_container_logging=disable_container_logging, ) + def update( + self, + display_name: Optional[str] = None, + description: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + traffic_split: Optional[Dict[str, int]] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + update_request_timeout: Optional[float] = None, + ) -> "PrivateEndpoint": + """Updates a PrivateEndpoint. + + Example usage: + PSC based private endpoint + + my_endpoint = my_endpoint.update( + display_name='my-updated-endpoint', + description='my updated description', + labels={'key': 'value'}, + traffic_split={ + '123456': 20, + '234567': 80, + }, + ) + + Args: + display_name (str): + Optional. The display name of the Endpoint. + The name can be up to 128 characters long and can be consist of any UTF-8 + characters. + description (str): + Optional. The description of the Endpoint. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to organize your Endpoints. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + See https://goo.gl/xmQnxf for more information and examples of labels. + traffic_split (Dict[str, int]): + Optional. Only supported by PSC based private endpoint + A map from a DeployedModel's ID to the percentage of this Endpoint's + traffic that should be forwarded to that DeployedModel. + If a DeployedModel's ID is not listed in this map, then it receives no traffic. + The traffic percentage values must add up to 100, or map must be empty if + the Endpoint is to not accept any traffic at a moment. + request_metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as metadata. + update_request_timeout (float): + Optional. The timeout for the update request in seconds. + + Returns: + Endpoint (aiplatform.Prediction): + Updated endpoint resource. + + Raises: + ValueError: If `traffic_split` is set for PSA based private endpoint. + """ + + if self.network: + if traffic_split is not None: + raise ValueError( + "Traffic split is not supported for PSA based Private Endpoint." + ) + + super().update( + display_name=display_name, + description=description, + labels=labels, + traffic_split=traffic_split, + request_metadata=request_metadata, + update_request_timeout=update_request_timeout, + ) + + return self + def undeploy( self, deployed_model_id: str, sync=True, + traffic_split: Optional[Dict[str, int]] = None, ) -> None: """Undeploys a deployed model from the PrivateEndpoint. Example Usage: + PSA based private endpoint: my_private_endpoint.undeploy( deployed_model_id="1234567891232567891" ) @@ -2591,15 +2925,61 @@ def undeploy( Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. + traffic_split (Dict[str, int]): + Optional. Only supported by PSC based private endpoint. + A map of DeployedModel IDs to the percentage of this Endpoint's + traffic that should be forwarded to that DeployedModel. + Required if undeploying a model with non-zero traffic from an Endpoint + with multiple deployed models. The traffic percentage values must + add up to 100, or map must be empty if the Endpoint is to not + accept any traffic at the moment. If a DeployedModel's ID is not + listed in this map, then it receives no traffic. """ self._sync_gca_resource_if_skipped() - # TODO(b/211351292): Add traffic splitting for PrivateEndpoint - self._undeploy( - deployed_model_id=deployed_model_id, - traffic_split=None, - sync=sync, - ) + if self.network: + if traffic_split is not None: + raise ValueError( + "Traffic split is not supported for PSA based PrivateEndpoint." + ) + # PSA based private endpoint + self._undeploy( + deployed_model_id=deployed_model_id, + traffic_split=None, + sync=sync, + ) + + # PSC based private endpoint + if self.private_service_connect_config: + super().undeploy( + deployed_model_id=deployed_model_id, + traffic_split=traffic_split, + sync=sync, + ) + + def undeploy_all(self, sync: bool = True) -> "PrivateEndpoint": + """Undeploys every model deployed to this PrivateEndpoint. + + Args: + sync (bool): + Whether to execute this method synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + """ + if self.network: + self._sync_gca_resource() + # PSA based private endpoint + self._undeploy( + deployed_model_id=self._gca_resource.deployed_models[0].id, + traffic_split=None, + sync=sync, + ) + + if self.private_service_connect_config: + # PSC based private endpoint + super().undeploy_all(sync=sync) + + return self def delete(self, force: bool = False, sync: bool = True) -> None: """Deletes this Vertex AI PrivateEndpoint resource. If force is set to True, @@ -2618,10 +2998,7 @@ def delete(self, force: bool = False, sync: bool = True) -> None: FailedPrecondition: If models are deployed on this Endpoint and force = False. """ if force and self._gca_resource.deployed_models: - self.undeploy( - deployed_model_id=self._gca_resource.deployed_models[0].id, - sync=sync, - ) + self.undeploy_all(sync=sync) super().delete(force=False, sync=sync) @@ -3480,6 +3857,9 @@ def deploy( autoscaling_target_accelerator_duty_cycle: Optional[int] = None, enable_access_logging=False, disable_container_logging: bool = False, + private_service_connect_config: Optional[ + PrivateEndpoint.PrivateServiceConnectConfig + ] = None, ) -> Union[Endpoint, PrivateEndpoint]: """Deploys model to endpoint. Endpoint will be created if unspecified. @@ -3563,11 +3943,12 @@ def deploy( Overrides encryption_spec_key_name set in aiplatform.init. network (str): Optional. The full name of the Compute Engine network to which - the Endpoint, if created, will be peered to. E.g. "projects/12345/global/networks/myVPC". + the Endpoint, if created, will be peered to. E.g. "projects/12345/global/networks/myVPC" Private services access must already be configured for the network. If set or aiplatform.init(network=...) has been set, a PrivateEndpoint will be created. If left unspecified, an Endpoint will be created. Read more about PrivateEndpoints [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints). + Cannot be set together with private_service_connect_config. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -3586,6 +3967,9 @@ def deploy( disable_container_logging (bool): If True, container logs from the deployed model will not be written to Cloud Logging. Defaults to False. + private_service_connect_config (PrivateEndpoint.PrivateServiceConnectConfig): + If true, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect). + Cannot be set together with network. Returns: endpoint (Union[Endpoint, PrivateEndpoint]): @@ -3606,9 +3990,9 @@ def deploy( ) if isinstance(endpoint, PrivateEndpoint): - if traffic_split: + if endpoint.network and traffic_split: raise ValueError( - "Traffic splitting is not yet supported for PrivateEndpoint. " + "Traffic splitting is not yet supported for PSA based PrivateEndpoint. " "Try calling deploy() without providing `traffic_split`. " "A maximum of one model can be deployed to each private Endpoint." ) @@ -3641,6 +4025,7 @@ def deploy( autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, enable_access_logging=enable_access_logging, disable_container_logging=disable_container_logging, + private_service_connect_config=private_service_connect_config, ) @base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False) @@ -3667,6 +4052,9 @@ def _deploy( autoscaling_target_accelerator_duty_cycle: Optional[int] = None, enable_access_logging=False, disable_container_logging: bool = False, + private_service_connect_config: Optional[ + PrivateEndpoint.PrivateServiceConnectConfig + ] = None, ) -> Union[Endpoint, PrivateEndpoint]: """Deploys model to endpoint. Endpoint will be created if unspecified. @@ -3748,6 +4136,7 @@ def _deploy( Private services access must already be configured for the network. Read more about PrivateEndpoints [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints). + Cannot be set together with private_service_connect_config. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -3766,6 +4155,9 @@ def _deploy( disable_container_logging (bool): If True, container logs from the deployed model will not be written to Cloud Logging. Defaults to False. + private_service_connect_config (PrivateEndpoint.PrivateServiceConnectConfig): + If true, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect). + Cannot be set together with network. Returns: endpoint (Union[Endpoint, PrivateEndpoint]): @@ -3775,7 +4167,7 @@ def _deploy( if endpoint is None: display_name = self.display_name[:118] + "_endpoint" - if not network: + if not network and not private_service_connect_config: endpoint = Endpoint.create( display_name=display_name, project=self.project, @@ -3791,6 +4183,7 @@ def _deploy( location=self.location, credentials=self.credentials, encryption_spec_key_name=encryption_spec_key_name, + private_service_connect_config=private_service_connect_config, ) _LOGGER.log_action_start_against_resource("Deploying model to", "", endpoint) diff --git a/google/cloud/aiplatform/pipeline_jobs.py b/google/cloud/aiplatform/pipeline_jobs.py index f1d4997dc3..2b672d2663 100644 --- a/google/cloud/aiplatform/pipeline_jobs.py +++ b/google/cloud/aiplatform/pipeline_jobs.py @@ -887,17 +887,24 @@ def _get_context(self) -> context.Context: @classmethod def _query_experiment_row( - cls, node: context.Context, include_time_series: Optional[bool] = True + cls, + node: context.Context, + experiment: Optional[experiment_resources.Experiment] = None, + include_time_series: bool = True, ) -> experiment_resources._ExperimentRow: """Queries the PipelineJob metadata as an experiment run parameter and metric row. - Parameters are retrieved from the system.Run Execution.metadata of the PipelineJob. + Parameters are retrieved from the system.Run Execution.metadata of the + PipelineJob. - Metrics are retrieved from the system.Metric Artifacts.metadata produced by this PipelineJob. + Metrics are retrieved from the system.Metric Artifacts.metadata produced + by this PipelineJob. Args: node (context._Context): Required. System.PipelineRun context that represents a PipelineJob Run. + experiment: + Optional. Experiment associated with this run. include_time_series (bool): Optional. Whether or not to include time series metrics in df. Default is True. diff --git a/google/cloud/aiplatform/tensorboard/logdir_loader.py b/google/cloud/aiplatform/tensorboard/logdir_loader.py index 0af322ff53..ea9b6200f7 100644 --- a/google/cloud/aiplatform/tensorboard/logdir_loader.py +++ b/google/cloud/aiplatform/tensorboard/logdir_loader.py @@ -63,11 +63,14 @@ def synchronize_runs(self): In addition, any existing `DirectoryLoader` whose run directory no longer exists will be deleted. + + Modify run name to work with Experiments restrictions. """ logger.info("Starting logdir traversal of %s", self._logdir) runs_seen = set() for subdir in io_wrapper.GetLogdirSubdirectories(self._logdir): run = os.path.relpath(subdir, self._logdir) + run = run.replace("/", "-").replace("_", "-") runs_seen.add(run) if run not in self._directory_loaders: logger.info("- Adding run for relative directory %s", run) diff --git a/google/cloud/aiplatform/tensorboard/uploader.py b/google/cloud/aiplatform/tensorboard/uploader.py index ec101785a4..510e4bed2b 100644 --- a/google/cloud/aiplatform/tensorboard/uploader.py +++ b/google/cloud/aiplatform/tensorboard/uploader.py @@ -20,22 +20,21 @@ from collections import defaultdict import functools import logging -import os import re import time from typing import ContextManager, Dict, FrozenSet, Generator, Iterable, Optional, Tuple import uuid -from google.api_core import exceptions from google.cloud import storage from google.cloud.aiplatform import base from google.cloud.aiplatform.compat.services import ( tensorboard_service_client, ) from google.cloud.aiplatform.compat.types import tensorboard_data -from google.cloud.aiplatform.compat.types import tensorboard_experiment from google.cloud.aiplatform.compat.types import tensorboard_service from google.cloud.aiplatform.compat.types import tensorboard_time_series +from google.cloud.aiplatform.metadata import experiment_resources +from google.cloud.aiplatform.metadata import metadata from google.cloud.aiplatform.tensorboard import logdir_loader from google.cloud.aiplatform.tensorboard import upload_tracker from google.cloud.aiplatform.tensorboard import uploader_constants @@ -215,47 +214,45 @@ def active_filter(secs): self._create_additional_senders() - def _create_or_get_experiment(self) -> tensorboard_experiment.TensorboardExperiment: - """Create an experiment or get an experiment. - - Attempts to create an experiment. If the experiment already exists and - creation fails then the experiment will be retrieved. + def create_experiment(self): + """Creates an Experiment for this upload session. - Returns: - The created or retrieved experiment. + Sets the tensorboard resource and experiment, which will get or create a + Vertex Experiment and associate it with a Tensorboard Experiment. """ - logger.info("Creating experiment") + m = self._api.parse_tensorboard_path(self._tensorboard_resource_name) - tb_experiment = tensorboard_experiment.TensorboardExperiment( - description=self._description, display_name=self._experiment_display_name + existing_experiment = experiment_resources.Experiment.get( + experiment_name=self._experiment_name, + project=m["project"], + location=m["location"], ) - - try: - experiment = self._api.create_tensorboard_experiment( - parent=self._tensorboard_resource_name, - tensorboard_experiment=tb_experiment, - tensorboard_experiment_id=self._experiment_name, - ) + if not existing_experiment: self._is_brand_new_experiment = True - except exceptions.AlreadyExists: - logger.info("Creating experiment failed. Retrieving experiment.") - experiment_name = os.path.join( - self._tensorboard_resource_name, "experiments", self._experiment_name - ) - experiment = self._api.get_tensorboard_experiment(name=experiment_name) - return experiment - def create_experiment(self): - """Creates an Experiment for this upload session and returns the ID.""" + metadata._experiment_tracker.reset() + metadata._experiment_tracker.set_tensorboard( + tensorboard=self._tensorboard_resource_name, + project=m["project"], + location=m["location"], + ) + metadata._experiment_tracker.set_experiment( + project=m["project"], + location=m["location"], + experiment=self._experiment_name, + description=self._description, + backing_tensorboard=self._tensorboard_resource_name, + ) - experiment = self._create_or_get_experiment() - self._experiment = experiment + self._tensorboard_experiment_resource_name = ( + f"{self._tensorboard_resource_name}/experiments/{self._experiment_name}" + ) self._one_platform_resource_manager = uploader_utils.OnePlatformResourceManager( - self._experiment.name, self._api + self._tensorboard_experiment_resource_name, self._api ) self._request_sender = _BatchedRequestSender( - self._experiment.name, + self._tensorboard_experiment_resource_name, self._api, allowed_plugins=self._allowed_plugins, upload_limits=self._upload_limits, @@ -271,7 +268,7 @@ def create_experiment(self): # Update partials with experiment name for sender in self._additional_senders.keys(): self._additional_senders[sender] = self._additional_senders[sender]( - experiment_resource_name=self._experiment.name, + experiment_resource_name=self._tensorboard_experiment_resource_name, ) self._dispatcher = _Dispatcher( @@ -310,7 +307,7 @@ def _create_additional_senders(self) -> Dict[str, uploader_utils.RequestSender]: ) def get_experiment_resource_name(self): - return self._experiment.name + return self._tensorboard_experiment_resource_name def start_uploading(self): """Blocks forever to continuously upload data from the logdir. diff --git a/google/cloud/aiplatform/tensorboard/uploader_utils.py b/google/cloud/aiplatform/tensorboard/uploader_utils.py index 8b72c48dc4..bb71c80ceb 100644 --- a/google/cloud/aiplatform/tensorboard/uploader_utils.py +++ b/google/cloud/aiplatform/tensorboard/uploader_utils.py @@ -23,7 +23,6 @@ import re import time from typing import Callable, Dict, Generator, List, Optional, Tuple -import uuid from absl import app from google.api_core import exceptions @@ -31,9 +30,12 @@ from google.cloud.aiplatform.compat.services import ( tensorboard_service_client, ) +from google.cloud.aiplatform.compat.types import execution as gca_execution from google.cloud.aiplatform.compat.types import tensorboard_run from google.cloud.aiplatform.compat.types import tensorboard_service from google.cloud.aiplatform.compat.types import tensorboard_time_series +from google.cloud.aiplatform.metadata import experiment_run_resource +from google.cloud.aiplatform.tensorboard import tensorboard_resource import grpc from tensorboard.util import tb_logging @@ -102,7 +104,7 @@ def __init__(self, experiment_resource_name: str, api: TensorboardServiceClient) def batch_create_runs( self, run_names: List[str] - ) -> List[tensorboard_run.TensorboardRun]: + ) -> List[tensorboard_resource.TensorboardRun]: """Batch creates TensorboardRuns. Args: @@ -110,31 +112,12 @@ def batch_create_runs( Returns: the created TensorboardRuns """ - batch_size = OnePlatformResourceManager.CREATE_RUN_BATCH_SIZE created_runs = [] - for i in range(0, len(run_names), batch_size): - one_batch_run_names = run_names[i : i + batch_size] - tb_run_requests = [ - tensorboard_service.CreateTensorboardRunRequest( - parent=self._experiment_resource_name, - tensorboard_run=tensorboard_run.TensorboardRun( - display_name=run_name - ), - tensorboard_run_id=str(uuid.uuid4()), - ) - for run_name in one_batch_run_names - ] - - tb_runs = self._api.batch_create_tensorboard_runs( - parent=self._experiment_resource_name, - requests=tb_run_requests, - ).tensorboard_runs - - self._run_name_to_run_resource_name.update( - {run.display_name: run.name for run in tb_runs} - ) - - created_runs.extend(tb_runs) + for run_name in run_names: + tb_run = self._create_or_get_run_resource(run_name) + created_runs.append(tb_run) + if run_name not in self._run_name_to_run_resource_name: + self._run_name_to_run_resource_name[run_name] = tb_run.resource_name return created_runs @@ -207,13 +190,16 @@ def get_run_resource_name(self, run_name: str) -> str: """ if run_name not in self._run_name_to_run_resource_name: tb_run = self._create_or_get_run_resource(run_name) - self._run_name_to_run_resource_name[run_name] = tb_run.name + self._run_name_to_run_resource_name[run_name] = tb_run.resource_name return self._run_name_to_run_resource_name[run_name] def _create_or_get_run_resource( self, run_name: str ) -> tensorboard_run.TensorboardRun: - """Creates a new run resource in current tensorboard experiment resource. + """Creates new experiment run and tensorboard run resources. + + The experiment run will be associated with the tensorboard run resource. + This will link all tensorboard run data to the associated experiment. Args: run_name (str): @@ -224,36 +210,32 @@ def _create_or_get_run_resource( The TensorboardRun given the run_name. Raises: - ExistingResourceNotFoundError: - Run name could not be found in resource list. - exceptions.InvalidArgument: + ValueError: run_name argument is invalid. """ - tb_run = tensorboard_run.TensorboardRun() - tb_run.display_name = run_name - try: - tb_run = self._api.create_tensorboard_run( - parent=self._experiment_resource_name, - tensorboard_run=tb_run, - tensorboard_run_id=str(uuid.uuid4()), + m = re.match( + "projects/(.*)/locations/(.*)/tensorboards/(.*)/experiments/(.*)", + self._experiment_resource_name, + ) + project = m[1] + location = m[2] + tensorboard = m[3] + experiment = m[4] + experiment_run = experiment_run_resource.ExperimentRun.get( + project=project, location=location, run_name=run_name + ) + if not experiment_run: + experiment_run = experiment_run_resource.ExperimentRun.create( + project=project, + location=location, + run_name=run_name, + experiment=experiment, + tensorboard=tensorboard, + state=gca_execution.Execution.State.COMPLETE, ) - except exceptions.InvalidArgument as e: - # If the run name already exists then retrieve it - if "already exist" in e.message: - runs_pages = self._api.list_tensorboard_runs( - parent=self._experiment_resource_name - ) - for tb_run in runs_pages: - if tb_run.display_name == run_name: - break + tb_run_artifact = experiment_run._backing_tensorboard_run + tb_run = tb_run_artifact.resource - if tb_run.display_name != run_name: - raise ExistingResourceNotFoundError( - "Run with name %s already exists but is not resource list." - % run_name - ) - else: - raise return tb_run def get_time_series_resource_name( diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index ea798e1101..63639aed9d 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -640,16 +640,72 @@ class FeatureOnlineStoreClientWithOverride(ClientWithOverride): class FeatureRegistryClientWithOverride(ClientWithOverride): + """Adds function override for client classes to support new Feature Store. + + `feature_path()` and `parse_feature_path()` are overriden here to compensate + for the auto-generated GAPIC class which only supports Feature Store + Legacy's feature paths. + """ + + @staticmethod + def feature_path( + project: str, + location: str, + feature_group: str, + feature: str, + ) -> str: + return "projects/{project}/locations/{location}/featureGroups/{feature_group}/features/{feature}".format( + project=project, + location=location, + feature_group=feature_group, + feature=feature, + ) + + @staticmethod + def parse_feature_path(path: str) -> Dict[str, str]: + """Parses a feature path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/featureGroups/(?P.+?)/features/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + class FeatureRegistryServiceClientV1( + feature_registry_service_client_v1.FeatureRegistryServiceClient + ): + @staticmethod + def feature_path(project: str, location: str, feature_group: str, feature: str): + return FeatureRegistryClientWithOverride.feature_path( + project, location, feature_group, feature + ) + + @staticmethod + def parse_feature_path(path: str) -> Dict[str, str]: + return FeatureRegistryClientWithOverride.parse_feature_path(path) + + class FeatureRegistryServiceClientV1Beta1( + feature_registry_service_client_v1beta1.FeatureRegistryServiceClient + ): + @staticmethod + def feature_path(project: str, location: str, feature_group: str, feature: str): + return FeatureRegistryClientWithOverride.feature_path( + project, location, feature_group, feature + ) + + @staticmethod + def parse_feature_path(path: str) -> Dict[str, str]: + return FeatureRegistryClientWithOverride.parse_feature_path(path) + _is_temporary = True _default_version = compat.DEFAULT_VERSION _version_map = ( ( compat.V1, - feature_registry_service_client_v1.FeatureRegistryServiceClient, + FeatureRegistryServiceClientV1, ), ( compat.V1BETA1, - feature_registry_service_client_v1beta1.FeatureRegistryServiceClient, + FeatureRegistryServiceClientV1Beta1, ), ) diff --git a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py +++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform/version.py b/google/cloud/aiplatform/version.py index 8a9b6647c9..9879eee46f 100644 --- a/google/cloud/aiplatform/version.py +++ b/google/cloud/aiplatform/version.py @@ -15,4 +15,4 @@ # limitations under the License. # -__version__ = "1.51.0" +__version__ = "1.52.0" diff --git a/google/cloud/aiplatform_v1/gapic_version.py b/google/cloud/aiplatform_v1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform_v1/gapic_version.py +++ b/google/cloud/aiplatform_v1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/google/cloud/aiplatform_v1beta1/gapic_version.py b/google/cloud/aiplatform_v1beta1/gapic_version.py index 05d77f299d..33b7cbe7a9 100644 --- a/google/cloud/aiplatform_v1beta1/gapic_version.py +++ b/google/cloud/aiplatform_v1beta1/gapic_version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "1.51.0" # {x-release-please-version} +__version__ = "1.52.0" # {x-release-please-version} diff --git a/pypi/_vertex_ai_placeholder/version.py b/pypi/_vertex_ai_placeholder/version.py index 193f605b90..ffe1268267 100644 --- a/pypi/_vertex_ai_placeholder/version.py +++ b/pypi/_vertex_ai_placeholder/version.py @@ -15,4 +15,4 @@ # limitations under the License. # -__version__ = "1.51.0" +__version__ = "1.52.0" diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json index 6ed86ac0e9..a8e19bfad2 100644 --- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json +++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-aiplatform", - "version": "1.51.0" + "version": "1.52.0" }, "snippets": [ { diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json index 111f1b5c1e..e181aba9f3 100644 --- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json +++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json @@ -8,7 +8,7 @@ ], "language": "PYTHON", "name": "google-cloud-aiplatform", - "version": "1.51.0" + "version": "1.52.0" }, "snippets": [ { diff --git a/setup.py b/setup.py index 677fb16fa8..e11afd0cde 100644 --- a/setup.py +++ b/setup.py @@ -139,7 +139,7 @@ ] reasoning_engine_extra_require = [ - "cloudpickle >= 2.2.1, < 3.0", + "cloudpickle >= 2.2.1, < 4.0", "pydantic >= 2.6.3, < 3", ] diff --git a/tests/system/aiplatform/test_language_models.py b/tests/system/aiplatform/test_language_models.py index e8a7ac46dd..b2ca72abb7 100644 --- a/tests/system/aiplatform/test_language_models.py +++ b/tests/system/aiplatform/test_language_models.py @@ -363,6 +363,8 @@ async def test_text_embedding_async(self, api_transport): assert embeddings[1].statistics.token_count > 1000 assert embeddings[1].statistics.truncated + # TODO(b/339907038): Re-enable test after timeout issue is fixed. + @pytest.mark.skip(reason="Causes system tests timeout") @pytest.mark.parametrize("api_transport", ["grpc", "rest"]) def test_tuning(self, shared_state, api_transport): """Test tuning, listing and loading models.""" diff --git a/tests/system/vertexai/test_reasoning_engines.py b/tests/system/vertexai/test_reasoning_engines.py index 0dc2e143a8..a80253d2f3 100644 --- a/tests/system/vertexai/test_reasoning_engines.py +++ b/tests/system/vertexai/test_reasoning_engines.py @@ -18,20 +18,14 @@ import pytest from google import auth from google.api_core import exceptions +from google.cloud import storage import vertexai from tests.system.aiplatform import e2e_base from vertexai.preview import reasoning_engines +from vertexai.preview.generative_models import ToolConfig -class CapitalizeEngine: - """A sample Reasoning Engine.""" - - def set_up(self): - pass - - def query(self, input: str) -> str: - """Capitalizes the input.""" - return input.upper() +_BLOB_FILENAME = vertexai.reasoning_engines._reasoning_engines._BLOB_FILENAME @pytest.mark.usefixtures( @@ -53,81 +47,50 @@ def test_langchain_template(self, shared_state): staging_bucket=f"gs://{shared_state['staging_bucket_name']}", credentials=credentials, ) + # Test prebuilt langchain_template created_app = reasoning_engines.ReasoningEngine.create( - reasoning_engines.LangchainAgent(model="gemini-1.0-pro"), + reasoning_engines.LangchainAgent( + model="gemini-1.5-pro-preview-0409", + model_tool_kwargs={ + "tool_config": { + "function_calling_config": { + "mode": ToolConfig.FunctionCallingConfig.Mode.AUTO, + }, + }, + }, + ), requirements=["google-cloud-aiplatform[reasoningengine,langchain]"], + display_name="test-display-name", + description="test-description", + gcs_dir_name="test-gcs-dir-name", ) shared_state.setdefault("resources", []) shared_state["resources"].append(created_app) # Deletion at teardown. + got_app = reasoning_engines.ReasoningEngine(created_app.resource_name) + + # Test resource attributes + assert isinstance(created_app.resource_name, str) + assert got_app.resource_name == created_app.resource_name + assert got_app.gca_resource.name == got_app.resource_name + assert got_app.gca_resource.display_name == "test-display-name" + assert got_app.gca_resource.description == "test-description" + + # Test operation schemas + assert got_app.operation_schemas() == created_app.operation_schemas() + + # Test query response + # (Wrap in a try-except block because of non-determinism from Gemini.) try: response = created_app.query(input="hello") assert response.get("input") == "hello" - assert isinstance(created_app.resource_name, str) - got_app = reasoning_engines.ReasoningEngine(created_app.resource_name) - assert got_app.resource_name == created_app.resource_name - assert got_app.operation_schemas() == created_app.operation_schemas() response = got_app.query(input="hello") assert response.get("input") == "hello" except exceptions.FailedPrecondition as e: print(e) - def test_create_reasoning_engine_gcs_dir_name(self, shared_state): - # https://github.com/googleapis/python-aiplatform/issues/3650 - super().setup_method() - credentials, _ = auth.default( - scopes=["https://www.googleapis.com/auth/cloud-platform"] - ) - vertexai.init( - project=e2e_base._PROJECT, - location=e2e_base._LOCATION, - staging_bucket=f"gs://{shared_state['staging_bucket_name']}", - credentials=credentials, - ) - created_app = reasoning_engines.ReasoningEngine.create( - reasoning_engine=CapitalizeEngine(), - gcs_dir_name="test-gcs-dir-name", - ) - shared_state.setdefault("resources", []) - shared_state["resources"].append(created_app) # Deletion at teardown. - assert created_app.query(input="hello") == "HELLO" - - def test_create_reasoning_engine_resource_attributes(self, shared_state): - super().setup_method() - credentials, _ = auth.default( - scopes=["https://www.googleapis.com/auth/cloud-platform"] - ) - vertexai.init( - project=e2e_base._PROJECT, - location=e2e_base._LOCATION, - staging_bucket=f"gs://{shared_state['staging_bucket_name']}", - credentials=credentials, - ) - created_app = reasoning_engines.ReasoningEngine.create( - reasoning_engine=CapitalizeEngine(), - reasoning_engine_name="test-reasoning-engine-name", - display_name="test-display-name", - description="test-description", - ) - shared_state.setdefault("resources", []) - shared_state["resources"].append(created_app) # Deletion at teardown. - assert created_app.gca_resource.name == "test-reasoning-engine-name" - assert created_app.gca_resource.display_name == "test-display-name" - assert created_app.gca_resource.description == "test-description" - - def test_create_reasoning_engine_operation_schemas(self, shared_state): - super().setup_method() - credentials, _ = auth.default( - scopes=["https://www.googleapis.com/auth/cloud-platform"] - ) - vertexai.init( - project=e2e_base._PROJECT, - location=e2e_base._LOCATION, - staging_bucket=f"gs://{shared_state['staging_bucket_name']}", - credentials=credentials, - ) - created_app = reasoning_engines.ReasoningEngine.create( - reasoning_engine=CapitalizeEngine(), - ) - shared_state.setdefault("resources", []) - shared_state["resources"].append(created_app) # Deletion at teardown. - assert created_app.operation_schemas() == [] + # Test GCS Bucket subdirectory creation + # Original: https://github.com/googleapis/python-aiplatform/issues/3650 + client = storage.Client(project=e2e_base._PROJECT) + bucket = client.bucket(shared_state["staging_bucket_name"]) + assert bucket.exists() + assert bucket.get_blob(f"test-gcs-dir-name/{_BLOB_FILENAME}").exists() diff --git a/tests/unit/aiplatform/constants.py b/tests/unit/aiplatform/constants.py index 14b6d0ea3c..9c9758c2df 100644 --- a/tests/unit/aiplatform/constants.py +++ b/tests/unit/aiplatform/constants.py @@ -294,6 +294,7 @@ class EndpointConstants: _TEST_ID_2 = "4366591682456584192" _TEST_ID_3 = "5820582938582924817" _TEST_ENDPOINT_NAME = f"projects/{ProjectConstants._TEST_PROJECT}/locations/{ProjectConstants._TEST_LOCATION}/endpoints/{_TEST_ID}" + _TEST_ENDPOINT_NAME_2 = f"projects/{ProjectConstants._TEST_PROJECT}/locations/{ProjectConstants._TEST_LOCATION}/endpoints/{_TEST_ID_2}" _TEST_DISPLAY_NAME = "test-display-name" _TEST_DEPLOYED_MODELS = [ endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME), diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index 8618c80789..46b9ca3fa0 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -83,10 +83,6 @@ "image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "command": [], "args": _TEST_RUN_ARGS, - "env": [ - {"name": "AIP_EXPERIMENT_NAME", "value": _TEST_EXPERIMENT}, - {"name": "AIP_EXPERIMENT_RUN_NAME", "value": _TEST_EXPERIMENT_RUN}, - ], }, } ] @@ -160,6 +156,7 @@ _TEST_EXPERIMENT_DESCRIPTION = "test-experiment-description" _TEST_RUN = "run-1" _TEST_EXECUTION_ID = f"{_TEST_EXPERIMENT}-{_TEST_RUN}" +_TEST_EXPERIMENT_CONTEXT_NAME = f"{_TEST_PARENT_METADATA}/contexts/{_TEST_EXPERIMENT}" _TEST_EXPERIMENT_RUN_CONTEXT_NAME = ( f"{_TEST_PARENT_METADATA}/contexts/{_TEST_EXECUTION_ID}" ) @@ -203,6 +200,8 @@ def _get_custom_job_proto_with_experiments(state=None, name=None, error=None): custom_job_proto.name = name custom_job_proto.state = state custom_job_proto.error = error + custom_job_proto.job_spec.experiment = _TEST_EXPERIMENT_CONTEXT_NAME + custom_job_proto.job_spec.experiment_run = _TEST_EXPERIMENT_RUN_CONTEXT_NAME return custom_job_proto @@ -255,6 +254,28 @@ def get_custom_job_mock(): yield get_custom_job_mock +@pytest.fixture +def get_custom_job_with_experiments_mock(): + with patch.object( + job_service_client.JobServiceClient, "get_custom_job" + ) as get_custom_job_mock: + get_custom_job_mock.side_effect = [ + _get_custom_job_proto( + name=_TEST_CUSTOM_JOB_NAME, + state=gca_job_state_compat.JobState.JOB_STATE_PENDING, + ), + _get_custom_job_proto( + name=_TEST_CUSTOM_JOB_NAME, + state=gca_job_state_compat.JobState.JOB_STATE_RUNNING, + ), + _get_custom_job_proto_with_experiments( + name=_TEST_CUSTOM_JOB_NAME, + state=gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED, + ), + ] + yield get_custom_job_mock + + @pytest.fixture def get_custom_tpu_v5e_job_mock(): with patch.object( @@ -455,6 +476,19 @@ def get_experiment_run_run_mock(): yield get_context_mock +@pytest.fixture +def get_experiment_run_not_found_mock(): + with patch.object(MetadataServiceClient, "get_context") as get_context_mock: + get_context_mock.side_effect = [ + _EXPERIMENT_MOCK, + _EXPERIMENT_RUN_MOCK, + _EXPERIMENT_MOCK, + exceptions.NotFound(""), + ] + + yield get_context_mock + + @pytest.fixture def update_context_mock(): with patch.object(MetadataServiceClient, "update_context") as update_context_mock: @@ -598,7 +632,7 @@ def test_submit_custom_job_with_experiments( restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART, create_request_timeout=None, experiment=_TEST_EXPERIMENT, - experiment_run=_TEST_EXPERIMENT_RUN, + experiment_run=_TEST_RUN, disable_retries=_TEST_DISABLE_RETRIES, ) @@ -616,17 +650,6 @@ def test_submit_custom_job_with_experiments( timeout=None, ) - expected_run_context = copy.deepcopy(_EXPERIMENT_RUN_MOCK) - expected_run_context.metadata[constants._CUSTOM_JOB_KEY] = [ - { - constants._CUSTOM_JOB_RESOURCE_NAME: _TEST_CUSTOM_JOB_NAME, - constants._CUSTOM_JOB_CONSOLE_URI: job._dashboard_uri(), - } - ] - update_context_mock.assert_called_with( - context=expected_run_context, - ) - @pytest.mark.parametrize("sync", [True, False]) @mock.patch.object(jobs, "_JOB_WAIT_TIME", 1) @mock.patch.object(jobs, "_LOG_WAIT_TIME", 1) @@ -714,6 +737,44 @@ def test_create_custom_job_with_timeout_not_explicitly_set( timeout=None, ) + @pytest.mark.usefixtures( + "create_custom_job_mock", + "get_custom_job_with_experiments_mock", + "get_experiment_run_not_found_mock", + "get_tensorboard_run_artifact_not_found_mock", + ) + def test_run_custom_job_with_experiment_run_warning(self, caplog): + + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + staging_bucket=_TEST_STAGING_BUCKET, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = aiplatform.CustomJob( + display_name=_TEST_DISPLAY_NAME, + worker_pool_specs=_TEST_WORKER_POOL_SPEC, + base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, + ) + + job.run( + service_account=_TEST_SERVICE_ACCOUNT, + network=_TEST_NETWORK, + timeout=_TEST_TIMEOUT, + restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART, + create_request_timeout=None, + experiment=_TEST_EXPERIMENT, + experiment_run=_TEST_RUN, + disable_retries=_TEST_DISABLE_RETRIES, + ) + + assert ( + f"Failed to end experiment run {_TEST_EXPERIMENT_RUN_CONTEXT_NAME} due to:" + in caplog.text + ) + @pytest.mark.parametrize("sync", [True, False]) def test_run_custom_job_with_fail_raises( self, create_custom_job_mock, get_custom_job_mock_with_fail, sync diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py index 7a0ec9fc5e..5b725d8728 100644 --- a/tests/unit/aiplatform/test_endpoints.py +++ b/tests/unit/aiplatform/test_endpoints.py @@ -16,55 +16,50 @@ # import copy -import pytest -import urllib3 +from datetime import datetime, timedelta +from importlib import reload import json - from unittest import mock -from importlib import reload -from datetime import datetime, timedelta from google.api_core import operation as ga_operation from google.auth import credentials as auth_credentials - -from google.protobuf import field_mask_pb2 - from google.cloud import aiplatform from google.cloud.aiplatform import base -from google.cloud.aiplatform import initializer from google.cloud.aiplatform import explain +from google.cloud.aiplatform import initializer from google.cloud.aiplatform import models from google.cloud.aiplatform import utils - -from google.cloud.aiplatform.preview import models as preview_models - from google.cloud.aiplatform.compat.services import ( - model_service_client, + deployment_resource_pool_service_client_v1beta1, endpoint_service_client, endpoint_service_client_v1beta1, - prediction_service_client, - prediction_service_client_v1beta1, + model_service_client, prediction_service_async_client, prediction_service_async_client_v1beta1, - deployment_resource_pool_service_client_v1beta1, + prediction_service_client, + prediction_service_client_v1beta1, ) - from google.cloud.aiplatform.compat.types import ( deployment_resource_pool_v1beta1 as gca_deployment_resource_pool_v1beta1, - endpoint as gca_endpoint, + encryption_spec as gca_encryption_spec, + endpoint_service_v1beta1 as gca_endpoint_service_v1beta1, + endpoint_service as gca_endpoint_service, endpoint_v1beta1 as gca_endpoint_v1beta1, - model as gca_model, - machine_resources as gca_machine_resources, + endpoint as gca_endpoint, + io as gca_io, machine_resources_v1beta1 as gca_machine_resources_v1beta1, - prediction_service as gca_prediction_service, + machine_resources as gca_machine_resources, + model as gca_model, prediction_service_v1beta1 as gca_prediction_service_v1beta1, - endpoint_service as gca_endpoint_service, - endpoint_service_v1beta1 as gca_endpoint_service_v1beta1, - encryption_spec as gca_encryption_spec, - io as gca_io, + prediction_service as gca_prediction_service, + service_networking as gca_service_networking, ) - +from google.cloud.aiplatform.preview import models as preview_models import constants as test_constants +import pytest +import urllib3 + +from google.protobuf import field_mask_pb2 _TEST_PROJECT = test_constants.ProjectConstants._TEST_PROJECT @@ -83,6 +78,7 @@ _TEST_TIMEOUT = None _TEST_ENDPOINT_NAME = test_constants.EndpointConstants._TEST_ENDPOINT_NAME +_TEST_ENDPOINT_NAME_2 = test_constants.EndpointConstants._TEST_ENDPOINT_NAME_2 _TEST_ENDPOINT_NAME_ALT_LOCATION = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION_2}/endpoints/{_TEST_ID}" ) @@ -92,6 +88,8 @@ _TEST_VERSION_ID = test_constants.EndpointConstants._TEST_VERSION_ID _TEST_NETWORK = f"projects/{_TEST_PROJECT}/global/networks/{_TEST_ID}" +_TEST_PROJECT_ALLOWLIST = [_TEST_PROJECT] +_TEST_ENDPOINT_OVERRIDE = "endpoint-override.aiplatform.vertex.goog" _TEST_MODEL_ID = test_constants.EndpointConstants._TEST_MODEL_ID _TEST_METADATA = {"foo": "bar"} @@ -226,6 +224,15 @@ create_time=datetime.now() - timedelta(minutes=15), network=_TEST_NETWORK, ), + gca_endpoint.Endpoint( + name=_TEST_ENDPOINT_NAME_2, + display_name="psc", + create_time=datetime.now() - timedelta(minutes=15), + private_service_connect_config=gca_service_networking.PrivateServiceConnectConfig( + enable_private_service_connect=True, + project_allowlist=_TEST_PROJECT_ALLOWLIST, + ), + ), ] _TEST_LIST_FILTER = 'display_name="abc"' @@ -630,7 +637,7 @@ def get_drp_mock(): @pytest.fixture -def create_private_endpoint_mock(): +def create_psa_private_endpoint_mock(): with mock.patch.object( endpoint_service_client.EndpointServiceClient, "create_endpoint" ) as create_private_endpoint_mock: @@ -645,7 +652,7 @@ def create_private_endpoint_mock(): @pytest.fixture -def get_private_endpoint_mock(): +def get_psa_private_endpoint_mock(): with mock.patch.object( endpoint_service_client.EndpointServiceClient, "get_endpoint" ) as get_endpoint_mock: @@ -658,7 +665,7 @@ def get_private_endpoint_mock(): @pytest.fixture -def get_private_endpoint_with_model_mock(): +def get_psa_private_endpoint_with_model_mock(): with mock.patch.object( endpoint_service_client.EndpointServiceClient, "get_endpoint" ) as get_endpoint_mock: @@ -671,6 +678,58 @@ def get_private_endpoint_with_model_mock(): yield get_endpoint_mock +@pytest.fixture +def create_psc_private_endpoint_mock(): + with mock.patch.object( + endpoint_service_client.EndpointServiceClient, "create_endpoint" + ) as create_private_endpoint_mock: + create_private_endpoint_lro_mock = mock.Mock(ga_operation.Operation) + create_private_endpoint_lro_mock.result.return_value = gca_endpoint.Endpoint( + name=_TEST_ENDPOINT_NAME, + display_name=_TEST_DISPLAY_NAME, + private_service_connect_config=gca_service_networking.PrivateServiceConnectConfig( + enable_private_service_connect=True, + project_allowlist=_TEST_PROJECT_ALLOWLIST, + ), + ) + create_private_endpoint_mock.return_value = create_private_endpoint_lro_mock + yield create_private_endpoint_mock + + +@pytest.fixture +def get_psc_private_endpoint_mock(): + with mock.patch.object( + endpoint_service_client.EndpointServiceClient, "get_endpoint" + ) as get_endpoint_mock: + get_endpoint_mock.return_value = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + name=_TEST_ENDPOINT_NAME, + private_service_connect_config=gca_service_networking.PrivateServiceConnectConfig( + enable_private_service_connect=True, + project_allowlist=_TEST_PROJECT_ALLOWLIST, + ), + ) + yield get_endpoint_mock + + +@pytest.fixture +def get_psc_private_endpoint_with_many_model_mock(): + with mock.patch.object( + endpoint_service_client.EndpointServiceClient, "get_endpoint" + ) as get_endpoint_mock: + get_endpoint_mock.return_value = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + name=_TEST_ENDPOINT_NAME, + private_service_connect_config=gca_service_networking.PrivateServiceConnectConfig( + enable_private_service_connect=True, + project_allowlist=_TEST_PROJECT_ALLOWLIST, + ), + deployed_models=_TEST_LONG_DEPLOYED_MODELS, + traffic_split=_TEST_LONG_TRAFFIC_SPLIT, + ) + yield get_endpoint_mock + + @pytest.fixture def predict_private_endpoint_mock(): with mock.patch.object(urllib3.PoolManager, "request") as predict_mock: @@ -680,6 +739,9 @@ def predict_private_endpoint_mock(): { "predictions": _TEST_PREDICTION, "metadata": _TEST_METADATA, + "deployedModelId": _TEST_DEPLOYED_MODELS[0].id, + "model": _TEST_MODEL_NAME, + "modelVersionId": "1", } ), ) @@ -2321,7 +2383,7 @@ def teardown_method(self): initializer.global_pool.shutdown(wait=True) @pytest.mark.parametrize("sync", [True, False]) - def test_create(self, create_private_endpoint_mock, sync): + def test_create_psa(self, create_psa_private_endpoint_mock, sync): test_endpoint = models.PrivateEndpoint.create( display_name=_TEST_DISPLAY_NAME, project=_TEST_PROJECT, @@ -2337,7 +2399,7 @@ def test_create(self, create_private_endpoint_mock, sync): display_name=_TEST_DISPLAY_NAME, network=_TEST_NETWORK ) - create_private_endpoint_mock.assert_called_once_with( + create_psa_private_endpoint_mock.assert_called_once_with( parent=_TEST_PARENT, endpoint=expected_endpoint, metadata=(), @@ -2345,8 +2407,39 @@ def test_create(self, create_private_endpoint_mock, sync): endpoint_id=None, ) - @pytest.mark.usefixtures("get_private_endpoint_with_model_mock") - def test_predict(self, predict_private_endpoint_mock): + @pytest.mark.parametrize("sync", [True, False]) + def test_create_psc(self, create_psc_private_endpoint_mock, sync): + test_endpoint = models.PrivateEndpoint.create( + display_name=_TEST_DISPLAY_NAME, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + private_service_connect_config=models.PrivateEndpoint.PrivateServiceConnectConfig( + project_allowlist=_TEST_PROJECT_ALLOWLIST + ), + sync=sync, + ) + + if not sync: + test_endpoint.wait() + + expected_endpoint = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + private_service_connect_config=gca_service_networking.PrivateServiceConnectConfig( + enable_private_service_connect=True, + project_allowlist=_TEST_PROJECT_ALLOWLIST, + ), + ) + + create_psc_private_endpoint_mock.assert_called_once_with( + parent=_TEST_PARENT, + endpoint=expected_endpoint, + metadata=(), + timeout=None, + endpoint_id=None, + ) + + @pytest.mark.usefixtures("get_psa_private_endpoint_with_model_mock") + def test_psa_predict(self, predict_private_endpoint_mock): test_endpoint = models.PrivateEndpoint(_TEST_ID) test_prediction = test_endpoint.predict( instances=_TEST_INSTANCES, parameters={"param": 3.0} @@ -2366,8 +2459,72 @@ def test_predict(self, predict_private_endpoint_mock): headers={"Content-Type": "application/json"}, ) - @pytest.mark.usefixtures("get_private_endpoint_with_model_mock") - def test_health_check(self, health_check_private_endpoint_mock): + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") + def test_psc_predict(self, predict_private_endpoint_mock): + test_endpoint = models.PrivateEndpoint( + project=_TEST_PROJECT, location=_TEST_LOCATION, endpoint_name=_TEST_ID + ) + test_prediction = test_endpoint.predict( + instances=_TEST_INSTANCES, + parameters={"param": 3.0}, + endpoint_override=_TEST_ENDPOINT_OVERRIDE, + ) + + true_prediction = models.Prediction( + predictions=_TEST_PREDICTION, + deployed_model_id=_TEST_DEPLOYED_MODELS[0].id, + metadata=_TEST_METADATA, + model_version_id="1", + model_resource_name=_TEST_MODEL_NAME, + ) + + assert true_prediction == test_prediction + predict_private_endpoint_mock.assert_called_once_with( + method="POST", + url=f"https://{_test_endpoint_override}/v1/projects/%7B_TEST_PROJECT%7D/locations/%7B_TEST_LOCATION%7D/endpoints/%7B_TEST_ID%7D:predict", + body='{"instances": [[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]]}', + headers={ + "Content-Type": "application/json", + "Authorization": "Bearer None", + }, + ) + + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") + def test_psc_predict_without_endpoint_override(self): + test_endpoint = models.PrivateEndpoint( + project=_TEST_PROJECT, location=_TEST_LOCATION, endpoint_name=_TEST_ID + ) + + with pytest.raises(ValueError) as err: + test_endpoint.predict( + instances=_TEST_INSTANCES, + parameters={"param": 3.0}, + ) + assert err.match( + regexp=r"Cannot make a predict request because endpoint override is" + "not provided. Please ensure an endpoint override is" + "provided." + ) + + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") + def test_psc_predict_with_invalid_endpoint_override(self): + test_endpoint = models.PrivateEndpoint( + project=_TEST_PROJECT, location=_TEST_LOCATION, endpoint_name=_TEST_ID + ) + + with pytest.raises(ValueError) as err: + test_endpoint.predict( + instances=_TEST_INSTANCES, + parameters={"param": 3.0}, + endpoint_override="invalid@endpoint.override", + ) + assert err.match( + regexp=r"Invalid endpoint override provided. Please only use IP" + "address or DNS." + ) + + @pytest.mark.usefixtures("get_psa_private_endpoint_with_model_mock") + def test_psa_health_check(self, health_check_private_endpoint_mock): test_endpoint = models.PrivateEndpoint(_TEST_ID) test_health_check = test_endpoint.health_check() @@ -2378,9 +2535,21 @@ def test_health_check(self, health_check_private_endpoint_mock): method="GET", url="", body=None, headers=None ) - @pytest.mark.usefixtures("get_private_endpoint_mock", "get_model_mock") + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") + def test_psc_health_check(self): + test_endpoint = models.PrivateEndpoint( + project=_TEST_PROJECT, location=_TEST_LOCATION, endpoint_name=_TEST_ID + ) + + with pytest.raises(RuntimeError) as err: + test_endpoint.health_check() + assert err.match( + regexp=r"Health check request is not supported on PSC based Private Endpoint." + ) + + @pytest.mark.usefixtures("get_psa_private_endpoint_mock", "get_model_mock") @pytest.mark.parametrize("sync", [True, False]) - def test_deploy(self, deploy_model_mock, sync): + def test_psa_deploy(self, deploy_model_mock, sync): test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_model._gca_resource.supported_deployment_resources_types.append( @@ -2413,9 +2582,111 @@ def test_deploy(self, deploy_model_mock, sync): traffic_split=None, ) - @pytest.mark.usefixtures("get_private_endpoint_with_model_mock") + @pytest.mark.usefixtures("get_psa_private_endpoint_mock", "get_model_mock") @pytest.mark.parametrize("sync", [True, False]) - def test_undeploy(self, undeploy_model_mock, sync): + def test_psa_deploy_traffic_split_not_supported(self, deploy_model_mock, sync): + test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) + test_model = models.Model(_TEST_ID) + test_model._gca_resource.supported_deployment_resources_types.append( + aiplatform.gapic.Model.DeploymentResourcesType.AUTOMATIC_RESOURCES + ) + + with pytest.raises(ValueError) as err: + test_endpoint.deploy( + test_model, sync=sync, traffic_split=_TEST_TRAFFIC_SPLIT + ) + assert err.match( + regexp=r"Traffic split is not supported for PSA based PrivateEndpoint." + ) + + @pytest.mark.usefixtures("get_psc_private_endpoint_mock", "get_model_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_psc_deploy_traffic_split(self, deploy_model_mock, sync): + test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) + test_model = models.Model(_TEST_ID) + test_model._gca_resource.supported_deployment_resources_types.append( + aiplatform.gapic.Model.DeploymentResourcesType.AUTOMATIC_RESOURCES + ) + test_endpoint.deploy( + model=test_model, sync=sync, traffic_split=_TEST_TRAFFIC_SPLIT + ) + + if not sync: + test_endpoint.wait() + + automatic_resources = gca_machine_resources.AutomaticResources( + min_replica_count=1, + max_replica_count=1, + ) + + deployed_model = gca_endpoint.DeployedModel( + automatic_resources=automatic_resources, + model=test_model.resource_name, + display_name=None, + ) + + deploy_model_mock.assert_called_once_with( + endpoint=test_endpoint.resource_name, + deployed_model=deployed_model, + metadata=(), + timeout=None, + traffic_split=_TEST_TRAFFIC_SPLIT, + ) + + @pytest.mark.usefixtures("get_psc_private_endpoint_mock", "get_model_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_psc_deploy_with_traffic_percentage(self, deploy_model_mock, sync): + test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) + test_model = models.Model(_TEST_ID) + test_endpoint._gca_resource.traffic_split = {"model1": 100} + test_model._gca_resource.supported_deployment_resources_types.append( + aiplatform.gapic.Model.DeploymentResourcesType.AUTOMATIC_RESOURCES + ) + + test_endpoint.deploy( + model=test_model, + traffic_percentage=70, + sync=sync, + ) + if not sync: + test_endpoint.wait() + + automatic_resources = gca_machine_resources.AutomaticResources( + min_replica_count=1, + max_replica_count=1, + ) + deployed_model = gca_endpoint.DeployedModel( + automatic_resources=automatic_resources, + model=test_model.resource_name, + display_name=None, + ) + deploy_model_mock.assert_called_once_with( + endpoint=test_endpoint.resource_name, + deployed_model=deployed_model, + traffic_split={"model1": 30, "0": 70}, + metadata=(), + timeout=None, + ) + + @pytest.mark.usefixtures("get_psa_private_endpoint_with_model_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_psa_undeploy(self, undeploy_model_mock, sync): + test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) + test_endpoint.undeploy("model1", sync=sync) + + if not sync: + test_endpoint.wait() + + undeploy_model_mock.assert_called_once_with( + endpoint=test_endpoint.resource_name, + deployed_model_id="model1", + metadata=(), + traffic_split={}, + ) + + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_psc_undeploy(self, undeploy_model_mock, sync): test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) test_endpoint.undeploy("model1", sync=sync) @@ -2429,9 +2700,85 @@ def test_undeploy(self, undeploy_model_mock, sync): traffic_split={}, ) - @pytest.mark.usefixtures("get_private_endpoint_with_model_mock") + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") @pytest.mark.parametrize("sync", [True, False]) - def test_delete_without_force(self, sdk_undeploy_mock, delete_endpoint_mock, sync): + def test_psc_undeploy_with_traffic_split(self, undeploy_model_mock, sync): + test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) + test_endpoint._gca_resource.traffic_split = {"model1": 40, "model2": 60} + test_endpoint.undeploy( + deployed_model_id="model1", + traffic_split={"model1": 0, "model2": 100}, + sync=sync, + ) + + if not sync: + test_endpoint.wait() + + undeploy_model_mock.assert_called_once_with( + endpoint=test_endpoint.resource_name, + deployed_model_id="model1", + traffic_split={"model2": 100}, + metadata=(), + ) + + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") + def test_psc_update_traffic_split(self, update_endpoint_mock): + endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) + + endpoint.update(traffic_split={_TEST_ID: 10, _TEST_ID_2: 80, _TEST_ID_3: 10}) + + expected_endpoint = gca_endpoint.Endpoint( + name=_TEST_ENDPOINT_NAME, + display_name=_TEST_DISPLAY_NAME, + private_service_connect_config=gca_service_networking.PrivateServiceConnectConfig( + enable_private_service_connect=True, + project_allowlist=_TEST_PROJECT_ALLOWLIST, + ), + traffic_split={_TEST_ID: 10, _TEST_ID_2: 80, _TEST_ID_3: 10}, + ) + expected_update_mask = field_mask_pb2.FieldMask(paths=["traffic_split"]) + + update_endpoint_mock.assert_called_once_with( + endpoint=expected_endpoint, + update_mask=expected_update_mask, + metadata=_TEST_REQUEST_METADATA, + timeout=_TEST_TIMEOUT, + ) + + update_endpoint_mock.return_value = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, + name=_TEST_ENDPOINT_NAME, + private_service_connect_config=gca_service_networking.PrivateServiceConnectConfig( + enable_private_service_connect=True, + project_allowlist=_TEST_PROJECT_ALLOWLIST, + ), + traffic_split={_TEST_ID: 10, _TEST_ID_2: 80, _TEST_ID_3: 10}, + ) + + @pytest.mark.usefixtures("get_psc_private_endpoint_with_many_model_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_undeploy_all(self, sdk_private_undeploy_mock, sync): + test_endpoint = aiplatform.Endpoint(_TEST_ID) + test_endpoint.undeploy_all(sync=sync) + + if not sync: + test_endpoint.wait() + + # undeploy_all() results in an undeploy() call for each deployed_model + # Models are undeployed in ascending order of traffic percentage + expected_models_to_undeploy = ["m6", "m7"] + _TEST_LONG_TRAFFIC_SPLIT_SORTED_IDS + sdk_private_undeploy_mock.assert_has_calls( + [ + mock.call(deployed_model_id=deployed_model_id, sync=sync) + for deployed_model_id in expected_models_to_undeploy + ], + ) + + @pytest.mark.usefixtures("get_psa_private_endpoint_with_model_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_psa_delete_without_force( + self, sdk_undeploy_mock, delete_endpoint_mock, sync + ): test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) test_endpoint.delete(sync=sync) @@ -2443,9 +2790,9 @@ def test_delete_without_force(self, sdk_undeploy_mock, delete_endpoint_mock, syn delete_endpoint_mock.assert_called_once_with(name=_TEST_ENDPOINT_NAME) - @pytest.mark.usefixtures("get_private_endpoint_with_model_mock") + @pytest.mark.usefixtures("get_psa_private_endpoint_with_model_mock") @pytest.mark.parametrize("sync", [True, False]) - def test_delete_with_force(self, sdk_undeploy_mock, delete_endpoint_mock, sync): + def test_psa_delete_with_force(self, sdk_undeploy_mock, delete_endpoint_mock, sync): test_endpoint = models.PrivateEndpoint(_TEST_ENDPOINT_NAME) test_endpoint._gca_resource.deployed_models = [_TEST_DEPLOYED_MODELS[0]] test_endpoint.delete(sync=sync) @@ -2458,10 +2805,42 @@ def test_delete_with_force(self, sdk_undeploy_mock, delete_endpoint_mock, sync): delete_endpoint_mock.assert_called_once_with(name=_TEST_ENDPOINT_NAME) + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_psc_delete_with_force( + self, sdk_undeploy_all_mock, delete_endpoint_mock, sync + ): + test_endpoint = aiplatform.Endpoint(_TEST_ID) + test_endpoint.delete(force=True, sync=sync) + + if not sync: + test_endpoint.wait() + + # undeploy_all() should be called if force is set to True + sdk_undeploy_all_mock.assert_called_once() + + delete_endpoint_mock.assert_called_once_with(name=_TEST_ENDPOINT_NAME) + + @pytest.mark.usefixtures("get_psc_private_endpoint_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_psc_delete_without_force( + self, sdk_undeploy_all_mock, delete_endpoint_mock, sync + ): + test_endpoint = aiplatform.Endpoint(_TEST_ID) + test_endpoint.delete(sync=sync) + + if not sync: + test_endpoint.wait() + + # undeploy_all() should not be called unless force is set to True + sdk_undeploy_all_mock.assert_not_called() + + delete_endpoint_mock.assert_called_once_with(name=_TEST_ENDPOINT_NAME) + @pytest.mark.usefixtures("list_private_endpoints_mock") def test_list(self): ep_list = aiplatform.PrivateEndpoint.list() - assert ep_list # Ensure list is not empty + assert len(ep_list) == 2 # Ensure list include both PSA and PSC endpoints def test_construct_sdk_resource_from_gapic_uses_resource_project(self): PROJECT = "my-project" diff --git a/tests/unit/aiplatform/test_logdir_loader.py b/tests/unit/aiplatform/test_logdir_loader.py index 84395c5d9b..90c87464a2 100644 --- a/tests/unit/aiplatform/test_logdir_loader.py +++ b/tests/unit/aiplatform/test_logdir_loader.py @@ -189,6 +189,8 @@ def test_multiple_writes_to_logdir(self): writer.add_test_summary("tag_b") with FileWriter(os.path.join(logdir, "b", "x")) as writer: writer.add_test_summary("tag_b_x") + with FileWriter(os.path.join(logdir, "b_z")) as writer: + writer.add_test_summary("tag_b_z") writer_c = FileWriter(os.path.join(logdir, "c")) writer_c.add_test_summary("tag_c") writer_c.flush() @@ -199,14 +201,15 @@ def test_multiple_writes_to_logdir(self): { "a": ["tag_a"], "b": ["tag_b"], - "b/x": ["tag_b_x"], + "b-x": ["tag_b_x"], + "b-z": ["tag_b_z"], "c": ["tag_c"], }, ) # A second load should indicate no new data. self.assertEqual( self._extract_run_to_tags(loader.get_run_events()), - {"a": [], "b": [], "b/x": [], "c": []}, + {"a": [], "b": [], "b-x": [], "b-z": [], "c": []}, ) # Write some new data to both new and pre-existing event files. with FileWriter(os.path.join(logdir, "a"), filename_suffix=".other") as writer: @@ -225,7 +228,8 @@ def test_multiple_writes_to_logdir(self): { "a": ["tag_a_2", "tag_a_3", "tag_a_4"], "b": [], - "b/x": ["tag_b_x_2"], + "b-x": ["tag_b_x_2"], + "b-z": [], "c": ["tag_c_2"], }, ) diff --git a/tests/unit/aiplatform/test_metadata.py b/tests/unit/aiplatform/test_metadata.py index 5decf7361d..775a4430a1 100644 --- a/tests/unit/aiplatform/test_metadata.py +++ b/tests/unit/aiplatform/test_metadata.py @@ -18,7 +18,7 @@ import os import copy from importlib import reload -from unittest import mock +from unittest import TestCase, mock from unittest.mock import patch, call import numpy as np @@ -1487,7 +1487,7 @@ def test_start_run_from_env_experiment( "get_tensorboard_run_artifact_not_found_mock", "get_or_create_default_tb_none_mock", ) - def test_init_experiment_run_from_env(self): + def test_init_experiment_run_from_env_run_name(self): os.environ["AIP_EXPERIMENT_RUN_NAME"] = _TEST_RUN aiplatform.init( @@ -1501,6 +1501,26 @@ def test_init_experiment_run_from_env(self): del os.environ["AIP_EXPERIMENT_RUN_NAME"] + @pytest.mark.usefixtures( + "get_metadata_store_mock", + "get_experiment_run_mock", + "get_tensorboard_run_artifact_not_found_mock", + "get_or_create_default_tb_none_mock", + ) + def test_init_experiment_run_from_env_run_resource_name(self): + os.environ["AIP_EXPERIMENT_RUN_NAME"] = _TEST_EXPERIMENT_RUN_CONTEXT_NAME + + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + experiment=_TEST_EXPERIMENT, + ) + + run = metadata._experiment_tracker.experiment_run + assert run.name == _TEST_RUN + + del os.environ["AIP_EXPERIMENT_RUN_NAME"] + def test_get_experiment(self, get_experiment_mock): aiplatform.init( project=_TEST_PROJECT, @@ -2028,6 +2048,32 @@ def test_log_pipeline_job( ], ) + @pytest.mark.usefixtures( + "get_experiment_mock", + ) + def test_get_experiment_df_passes_experiment_variable( + self, + list_context_mock_for_experiment_dataframe_mock, + list_artifact_mock_for_experiment_dataframe, + list_executions_mock_for_experiment_dataframe, + get_tensorboard_run_artifact_mock, + get_tensorboard_run_mock, + ): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + with patch.object( + experiment_run_resource.ExperimentRun, "_query_experiment_row" + ) as query_experiment_row_mock: + row = experiment_resources._ExperimentRow( + experiment_run_type=constants.SYSTEM_EXPERIMENT_RUN, + name=_TEST_EXPERIMENT, + ) + query_experiment_row_mock.return_value = row + + aiplatform.get_experiment_df(_TEST_EXPERIMENT) + _, kwargs = query_experiment_row_mock.call_args_list[0] + TestCase.assertTrue(self, kwargs["experiment"].name == _TEST_EXPERIMENT) + @pytest.mark.usefixtures( "get_experiment_mock", "list_tensorboard_time_series_mock", diff --git a/tests/unit/aiplatform/test_uploader.py b/tests/unit/aiplatform/test_uploader.py index 0e6dae2f73..f0e409fe63 100644 --- a/tests/unit/aiplatform/test_uploader.py +++ b/tests/unit/aiplatform/test_uploader.py @@ -25,6 +25,9 @@ import threading import time from unittest import mock +from unittest.mock import patch + +from absl.testing import parameterized from google.api_core import datetime_helpers from google.cloud import storage @@ -38,7 +41,10 @@ from google.cloud.aiplatform.compat.types import ( tensorboard_time_series as tensorboard_time_series_type, ) +from google.cloud.aiplatform.metadata import experiment_resources +from google.cloud.aiplatform.metadata import metadata from google.cloud.aiplatform.tensorboard import logdir_loader +from google.cloud.aiplatform.tensorboard import tensorboard_resource from google.cloud.aiplatform.tensorboard import upload_tracker from google.cloud.aiplatform.tensorboard import uploader as uploader_lib from google.cloud.aiplatform.tensorboard import uploader_constants @@ -49,6 +55,7 @@ ) import grpc import grpc_testing +import pytest import tensorflow as tf from google.protobuf import timestamp_pb2 @@ -88,8 +95,12 @@ _USE_DEFAULT = object() _TEST_EXPERIMENT_NAME = "test-experiment" +_TEST_PROJECT_NAME = "test_project" +_TEST_LOCATION_NAME = "us-east1" _TEST_TENSORBOARD_RESOURCE_NAME = ( - "projects/test_project/locations/us-central1/tensorboards/test_tensorboard" + "projects/{}/locations/{}/tensorboards/test_tensorboard".format( + _TEST_PROJECT_NAME, _TEST_LOCATION_NAME + ) ) _TEST_LOG_DIR_NAME = "/logs/foo" _TEST_RUN_NAME = "test-run" @@ -165,6 +176,14 @@ def create_tensorboard_time_series( display_name=tensorboard_time_series.display_name, ) + def parse_tensorboard_path_response(path): + """Parses a tensorboard path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/tensorboards/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + test_channel = grpc_testing.channel( service_descriptors=[], time=grpc_testing.strict_real_time() ) @@ -180,6 +199,7 @@ def create_tensorboard_time_series( mock_client.create_tensorboard_time_series.side_effect = ( create_tensorboard_time_series ) + mock_client.parse_tensorboard_path.side_effect = parse_tensorboard_path_response return mock_client @@ -258,6 +278,7 @@ def _create_dispatcher( api=None, allowed_plugins=_USE_DEFAULT, logdir=None, + run_name=_TEST_RUN_NAME, ): if api is _USE_DEFAULT: api = _create_mock_client() @@ -278,6 +299,10 @@ def _create_dispatcher( one_platform_resource_manager = uploader_utils.OnePlatformResourceManager( experiment_resource_name, api ) + one_platform_resource_manager.get_run_resource_name = mock.Mock() + one_platform_resource_manager.get_run_resource_name.return_value = ( + "{}/runs/{}".format(experiment_resource_name, run_name) + ) request_sender = uploader_lib._BatchedRequestSender( experiment_resource_name=experiment_resource_name, @@ -474,29 +499,52 @@ def add_meta_graph(self, meta_graph_def, global_step=None): ) -class TensorboardUploaderTest(tf.test.TestCase): - def test_create_experiment(self): +@pytest.mark.usefixtures("google_auth_mock") +class TensorboardUploaderTest(tf.test.TestCase, parameterized.TestCase): + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_create_experiment( + self, experiment_resources_mock, experiment_tracker_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) logdir = _TEST_LOG_DIR_NAME uploader = _create_uploader(_create_mock_client(), logdir) uploader.create_experiment() - self.assertEqual(uploader._experiment.name, _TEST_ONE_PLATFORM_EXPERIMENT_NAME) + self.assertEqual( + uploader._tensorboard_experiment_resource_name, + _TEST_ONE_PLATFORM_EXPERIMENT_NAME, + ) - def test_create_experiment_with_name(self): + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_create_experiment_with_name( + self, experiment_resources_mock, experiment_tracker_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) logdir = _TEST_LOG_DIR_NAME mock_client = _create_mock_client() new_name = "This is the new name" uploader = _create_uploader(mock_client, logdir, experiment_name=new_name) uploader.create_experiment() - mock_client.create_tensorboard_experiment.assert_called_once() - call_args = mock_client.create_tensorboard_experiment.call_args - self.assertEqual( - call_args[1]["tensorboard_experiment"], - tensorboard_experiment_type.TensorboardExperiment(), - ) - self.assertEqual(call_args[1]["parent"], _TEST_TENSORBOARD_RESOURCE_NAME) - self.assertEqual(call_args[1]["tensorboard_experiment_id"], new_name) - def test_create_experiment_with_description(self): + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_create_experiment_with_description( + self, experiment_resources_mock, experiment_tracker_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) logdir = _TEST_LOG_DIR_NAME mock_client = _create_mock_client() new_description = """ @@ -506,22 +554,17 @@ def test_create_experiment_with_description(self): uploader = _create_uploader(mock_client, logdir, description=new_description) uploader.create_experiment() self.assertEqual(uploader._experiment_name, _TEST_EXPERIMENT_NAME) - mock_client.create_tensorboard_experiment.assert_called_once() - call_args = mock_client.create_tensorboard_experiment.call_args - tb_experiment = tensorboard_experiment_type.TensorboardExperiment( - description=new_description - ) - - expected_call_args = mock.call( - parent=_TEST_TENSORBOARD_RESOURCE_NAME, - tensorboard_experiment_id=_TEST_EXPERIMENT_NAME, - tensorboard_experiment=tb_experiment, + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_create_experiment_with_all_metadata( + self, experiment_resources_mock, experiment_tracker_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME ) - - self.assertEqual(expected_call_args, call_args) - - def test_create_experiment_with_all_metadata(self): logdir = _TEST_LOG_DIR_NAME mock_client = _create_mock_client() new_description = """ @@ -534,18 +577,6 @@ def test_create_experiment_with_all_metadata(self): ) uploader.create_experiment() self.assertEqual(uploader._experiment_name, new_name) - mock_client.create_tensorboard_experiment.assert_called_once() - call_args = mock_client.create_tensorboard_experiment.call_args - - tb_experiment = tensorboard_experiment_type.TensorboardExperiment( - description=new_description - ) - expected_call_args = mock.call( - parent=_TEST_TENSORBOARD_RESOURCE_NAME, - tensorboard_experiment_id=new_name, - tensorboard_experiment=tb_experiment, - ) - self.assertEqual(call_args, expected_call_args) def test_start_uploading_without_create_experiment_fails(self): mock_client = _create_mock_client() @@ -553,12 +584,27 @@ def test_start_uploading_without_create_experiment_fails(self): with self.assertRaisesRegex(RuntimeError, "call create_experiment()"): uploader.start_uploading() - def test_start_uploading_scalars(self): + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_start_uploading_scalars( + self, experiment_resources_mock, experiment_tracker_mock, run_resource_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) mock_client = _create_mock_client() mock_rate_limiter = mock.create_autospec(uploader_utils.RateLimiter) mock_tensor_rate_limiter = mock.create_autospec(uploader_utils.RateLimiter) mock_blob_rate_limiter = mock.create_autospec(uploader_utils.RateLimiter) mock_tracker = mock.MagicMock() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME with mock.patch.object( upload_tracker, "UploadTracker", return_value=mock_tracker ): @@ -566,7 +612,7 @@ def test_start_uploading_scalars(self): writer_client=mock_client, logdir=_TEST_LOG_DIR_NAME, # Send each Event below in a separate WriteScalarRequest - max_scalar_request_size=200, + max_scalar_request_size=180, rpc_rate_limiter=mock_rate_limiter, verbosity=1, # In order to test the upload tracker. ) @@ -612,7 +658,29 @@ def test_start_uploading_scalars(self): self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - def test_start_uploading_scalars_one_shot(self): + @parameterized.parameters( + {"existing_experiment": None}, {"existing_experiment": _TEST_EXPERIMENT_NAME} + ) + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object( + uploader_utils.OnePlatformResourceManager, + "_create_or_get_run_resource", + autospec=True, + ) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_start_uploading_scalars_one_shot( + self, + experiment_resources_mock, + experiment_run_resource_mock, + experiment_tracker_mock, + run_resource_mock, + existing_experiment, + ): """Check that one-shot uploading stops without AbortUploadError.""" def batch_create_runs(parent, requests): @@ -643,6 +711,15 @@ def batch_create_time_series(parent, requests): tensorboard_time_series=tb_time_series ) + tensorboard_run_mock = mock.create_autospec(tensorboard_resource.TensorboardRun) + experiment_resources_mock.get.return_value = existing_experiment + tensorboard_run_mock.resource_name = _TEST_TENSORBOARD_RESOURCE_NAME + tensorboard_run_mock.display_name = _TEST_RUN_NAME + experiment_run_resource_mock.return_value = tensorboard_run_mock + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) mock_client = _create_mock_client() mock_client.batch_create_tensorboard_runs.side_effect = batch_create_runs mock_client.batch_create_tensorboard_time_series.side_effect = ( @@ -651,6 +728,7 @@ def batch_create_time_series(parent, requests): mock_rate_limiter = mock.create_autospec(uploader_utils.RateLimiter) mock_tracker = mock.MagicMock() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME with mock.patch.object( upload_tracker, "UploadTracker", return_value=mock_tracker ): @@ -697,6 +775,7 @@ def batch_create_time_series(parent, requests): ): uploader.start_uploading() + self.assertEqual(existing_experiment is None, uploader._is_brand_new_experiment) self.assertEqual(2, mock_client.write_tensorboard_experiment_data.call_count) self.assertEqual(2, mock_rate_limiter.tick.call_count) @@ -707,7 +786,16 @@ def batch_create_time_series(parent, requests): self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - def test_upload_empty_logdir(self): + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_upload_empty_logdir( + self, experiment_resources_mock, experiment_tracker_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) logdir = self.get_temp_dir() mock_client = _create_mock_client() uploader = _create_uploader(mock_client, logdir) @@ -715,10 +803,19 @@ def test_upload_empty_logdir(self): uploader._upload_once() mock_client.write_tensorboard_experiment_data.assert_not_called() - def test_upload_polls_slowly_once_done(self): + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_upload_polls_slowly_once_done( + self, experiment_resources_mock, experiment_tracker_mock + ): class SuccessError(Exception): pass + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) mock_rate_limiter = mock.create_autospec(uploader_utils.RateLimiter) upload_call_count_box = [0] @@ -739,23 +836,53 @@ def mock_upload_once(): with self.assertRaises(SuccessError): uploader.start_uploading() - def test_upload_swallows_rpc_failure(self): + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_upload_swallows_rpc_failure( + self, experiment_resources_mock, experiment_tracker_mock, run_resource_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) logdir = self.get_temp_dir() with FileWriter(logdir) as writer: writer.add_test_summary("foo") mock_client = _create_mock_client() uploader = _create_uploader(mock_client, logdir) uploader.create_experiment() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME error = _grpc_error(grpc.StatusCode.INTERNAL, "Failure") mock_client.write_tensorboard_experiment_data.side_effect = error uploader._upload_once() mock_client.write_tensorboard_experiment_data.assert_called_once() - def test_upload_full_logdir(self): + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_upload_full_logdir( + self, experiment_resources_mock, experiment_tracker_mock, run_resource_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) logdir = self.get_temp_dir() mock_client = _create_mock_client() uploader = _create_uploader(mock_client, logdir) uploader.create_experiment() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME # Convenience helpers for constructing expected requests. data = tensorboard_data.TimeSeriesData @@ -872,8 +999,23 @@ def test_upload_full_logdir(self): uploader._upload_once() mock_client.write_tensorboard_experiment_data.assert_not_called() - def test_verbosity_zero_creates_upload_tracker_with_verbosity_zero(self): + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_verbosity_zero_creates_upload_tracker_with_verbosity_zero( + self, experiment_resources_mock, experiment_tracker_mock, run_resource_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME mock_tracker = mock.MagicMock() with mock.patch.object( upload_tracker, "UploadTracker", return_value=mock_tracker @@ -904,13 +1046,28 @@ def test_verbosity_zero_creates_upload_tracker_with_verbosity_zero(self): self.assertEqual(mock_constructor.call_args[1], {"verbosity": 0}) self.assertEqual(mock_tracker.scalars_tracker.call_count, 1) - def test_start_uploading_graphs(self): + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_start_uploading_graphs( + self, experiment_resources_mock, experiment_tracker_mock, run_resource_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) mock_client = _create_mock_client() mock_rate_limiter = mock.create_autospec(uploader_utils.RateLimiter) mock_bucket = mock.create_autospec(storage.Bucket) mock_blob = mock.create_autospec(storage.Blob) mock_bucket.blob.return_value = mock_blob mock_tracker = mock.MagicMock() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME def create_time_series(tensorboard_time_series, parent=None): return tensorboard_time_series_type.TensorboardTimeSeries( @@ -955,7 +1112,6 @@ def create_time_series(tensorboard_time_series, parent=None): ), self.assertRaises(AbortUploadError): uploader.start_uploading() - self.assertEqual(1, mock_client.create_tensorboard_experiment.call_count) self.assertEqual(10, mock_bucket.blob.call_count) blob_ids = set() @@ -993,7 +1149,21 @@ def create_time_series(tensorboard_time_series, parent=None): self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) self.assertEqual(mock_tracker.blob_tracker.call_count, 12) - def test_filter_graphs(self): + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_filter_graphs( + self, experiment_resources_mock, experiment_tracker_mock, run_resource_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) # Three graphs: one short, one long, one corrupt. bytes_0 = _create_example_graph_bytes(123) bytes_1 = _create_example_graph_bytes(9999) @@ -1015,6 +1185,7 @@ def test_filter_graphs(self): mock_blob = mock.create_autospec(storage.Blob) mock_bucket.blob.return_value = mock_blob mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME def create_time_series(tensorboard_time_series, parent=None): return tensorboard_time_series_type.TensorboardTimeSeries( @@ -1061,8 +1232,23 @@ def create_time_series(tensorboard_time_series, parent=None): with self.subTest("corrupt graphs should be skipped"): self.assertLen(actual_blobs, 2) - def test_add_profile_plugin(self): + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_add_profile_plugin( + self, experiment_resources_mock, experiment_tracker_mock, run_resource_mock + ): + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) run_name = "profile_test_run" + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME with tempfile.TemporaryDirectory() as logdir: prof_path = os.path.join( logdir, run_name, profile_uploader.ProfileRequestSender.PROFILE_PATH @@ -1090,12 +1276,29 @@ def test_add_profile_plugin(self): # TODO(b/276368161) +@pytest.mark.usefixtures("google_auth_mock") class _TensorBoardTrackerTest(tf.test.TestCase): - def test_thread_continuously_uploads(self): + @patch.object( + uploader_utils.OnePlatformResourceManager, + "get_run_resource_name", + autospec=True, + ) + @patch.object(metadata, "_experiment_tracker", autospec=True) + @patch.object(experiment_resources, "Experiment", autospec=True) + def test_thread_continuously_uploads( + self, experiment_resources_mock, experiment_tracker_mock, run_resource_mock + ): """Test Tensorboard Tracker by mimicking its implementation: Call start_upload through a thread and subsequently end the thread by calling _end_uploading().""" + experiment_resources_mock.get.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_experiment.return_value = _TEST_EXPERIMENT_NAME + experiment_tracker_mock.set_tensorboard.return_value = ( + _TEST_TENSORBOARD_RESOURCE_NAME + ) logdir = self.get_temp_dir() mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME + builder = _create_dispatcher( experiment_resource_name=_TEST_ONE_PLATFORM_EXPERIMENT_NAME, api=mock_client, @@ -1224,6 +1427,7 @@ def test_thread_continuously_uploads(self): self.assertFalse(uploader_thread.is_alive()) +@pytest.mark.usefixtures("google_auth_mock") class BatchedRequestSenderTest(tf.test.TestCase): def _populate_run_from_events( self, n_scalar_events, events, allowed_plugins=_USE_DEFAULT @@ -1335,6 +1539,7 @@ def test_expands_multiple_values_in_event(self): ) +@pytest.mark.usefixtures("google_auth_mock") class ProfileRequestSenderTest(tf.test.TestCase): def _create_builder(self, mock_client, logdir): return _create_dispatcher( @@ -1371,11 +1576,13 @@ def test_profile_event_missing_prof_run_dirs(self): self.assertProtoEquals(call_args_list, []) - def test_profile_event_bad_prof_path(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_profile_event_bad_prof_path(self, run_resource_mock): events = [ event_pb2.Event(file_version="brain.Event:2"), ] prof_run_name = "bad_run_name" + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME with tempfile.TemporaryDirectory() as logdir: prof_path = os.path.join( @@ -1390,11 +1597,13 @@ def test_profile_event_bad_prof_path(self): self.assertProtoEquals(call_args_list, []) - def test_profile_event_single_prof_run(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_profile_event_single_prof_run(self, run_resource_mock): events = [ event_pb2.Event(file_version="brain.Event:2"), ] prof_run_name = "2021_01_01_01_10_10" + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME with tempfile.TemporaryDirectory() as logdir: prof_path = os.path.join( @@ -1411,13 +1620,15 @@ def test_profile_event_single_prof_run(self): profile_tag_counts = _extract_tag_counts_time_series(call_args_list) self.assertEqual(profile_tag_counts, {prof_run_name: 1}) - def test_profile_event_single_prof_run_new_files(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_profile_event_single_prof_run_new_files(self, run_resource_mock): # Check that files are not uploaded twice for the same profiling run events = [ event_pb2.Event(file_version="brain.Event:2"), ] prof_run_name = "2021_01_01_01_10_10" mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME with tempfile.TemporaryDirectory() as logdir: builder = self._create_builder(mock_client=mock_client, logdir=logdir) @@ -1445,7 +1656,8 @@ def test_profile_event_single_prof_run_new_files(self): profile_tag_counts = _extract_tag_counts_time_series(call_args_list) self.assertEqual(profile_tag_counts, {prof_run_name: 1}) - def test_profile_event_multi_prof_run(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_profile_event_multi_prof_run(self, run_resource_mock): events = [ event_pb2.Event(file_version="brain.Event:2"), ] @@ -1453,6 +1665,7 @@ def test_profile_event_multi_prof_run(self): "2021_01_01_01_10_10", "2021_02_02_02_20_20", ] + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME with tempfile.TemporaryDirectory() as logdir: prof_path = os.path.join( @@ -1477,7 +1690,8 @@ def test_profile_event_multi_prof_run(self): profile_tag_counts = _extract_tag_counts_time_series(call_args_list) self.assertEqual(profile_tag_counts, dict.fromkeys(prof_run_names, 1)) - def test_profile_event_add_consecutive_prof_runs(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_profile_event_add_consecutive_prof_runs(self, run_resource_mock): # Multiple profiling events happen one after another, should only update # new profiling runs events = [ @@ -1485,6 +1699,7 @@ def test_profile_event_add_consecutive_prof_runs(self): ] prof_run_name = "2021_01_01_01_10_10" + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME mock_client = _create_mock_client() @@ -1538,6 +1753,7 @@ def test_profile_event_add_consecutive_prof_runs(self): ) +@pytest.mark.usefixtures("google_auth_mock") class ScalarBatchedRequestSenderTest(tf.test.TestCase): def _add_events(self, sender, events): for event in events: @@ -1562,7 +1778,10 @@ def _add_events_and_flush(self, events, expected_n_time_series): ) return call_args - def test_aggregation_by_tag(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_aggregation_by_tag(self, run_resource_mock): + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME + def make_event(step, wall_time, tag, value): return event_pb2.Event( step=step, @@ -1600,7 +1819,9 @@ def make_event(step, wall_time, tag, value): }, ) - def test_v1_summary(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_v1_summary(self, run_resource_mock): + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME event = event_pb2.Event(step=1, wall_time=123.456) event.summary.value.add(tag="foo", simple_value=5.0) call_args = self._add_events_and_flush(_apply_compat([event]), 1) @@ -1623,7 +1844,9 @@ def test_v1_summary(self): call_args[1]["write_run_data_requests"][0].time_series_data, ) - def test_v1_summary_tb_summary(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_v1_summary_tb_summary(self, run_resource_mock): + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME tf_summary = summary_v1.scalar_pb("foo", 5.0) tb_summary = summary_pb2.Summary.FromString(tf_summary.SerializeToString()) event = event_pb2.Event(step=1, wall_time=123.456, summary=tb_summary) @@ -1647,7 +1870,9 @@ def test_v1_summary_tb_summary(self): call_args[1]["write_run_data_requests"][0].time_series_data, ) - def test_v2_summary(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_v2_summary(self, run_resource_mock): + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME event = event_pb2.Event( step=1, wall_time=123.456, summary=scalar_v2_pb("foo", 5.0) ) @@ -1671,7 +1896,9 @@ def test_v2_summary(self): call_args[1]["write_run_data_requests"][0].time_series_data, ) - def test_propagates_experiment_deletion(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_propagates_experiment_deletion(self, run_resource_mock): + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME event = event_pb2.Event(step=1) event.summary.value.add(tag="foo", simple_value=1.0) @@ -1695,8 +1922,10 @@ def test_no_budget_for_base_request(self): ) self.assertEqual(str(cm.exception), "Byte budget too small for base request") - def test_no_room_for_single_point(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_no_room_for_single_point(self, run_resource_mock): mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME event = event_pb2.Event(step=1, wall_time=123.456) event.summary.value.add(tag="foo", simple_value=1.0) sender = _create_scalar_request_sender("123", mock_client, max_request_size=12) @@ -1704,8 +1933,10 @@ def test_no_room_for_single_point(self): self._add_events(sender, [event]) self.assertEqual(str(cm.exception), "add_event failed despite flush") - def test_break_at_run_boundary(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_break_at_run_boundary(self, run_resource_mock): mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME # Choose run name sizes such that one run fits in a 1024 byte request, # but not two. long_run_1 = "A" * 768 @@ -1775,8 +2006,10 @@ def test_break_at_run_boundary(self): call_args_list[1][1]["write_run_data_requests"][0].time_series_data, ) - def test_break_at_tag_boundary(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_break_at_tag_boundary(self, run_resource_mock): mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME # Choose tag name sizes such that one tag fits in a 1024 byte request, # but not two. Note that tag names appear in both `Tag.name` and the # summary metadata. @@ -1819,8 +2052,10 @@ def test_break_at_tag_boundary(self): self.assertProtoEquals(expected_request1[0], request1[0]) self.assertProtoEquals(expected_request1[1], request1[1]) - def test_break_at_scalar_point_boundary(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_break_at_scalar_point_boundary(self, run_resource_mock): mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME point_count = 2000 # comfortably saturates a single 1024-byte request events = [] for step in range(point_count): @@ -1867,8 +2102,10 @@ def test_break_at_scalar_point_boundary(self): total_points_in_result += 1 self.assertEqual(total_points_in_result, point_count) - def test_prunes_tags_and_runs(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_prunes_tags_and_runs(self, run_resource_mock): mock_client = _create_mock_client() + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME event_1 = event_pb2.Event(step=1) event_1.summary.value.add(tag="foo", simple_value=1.0) event_2 = event_pb2.Event(step=2) @@ -1924,7 +2161,9 @@ def mock_add_point(byte_budget_manager_self, point): self.assertProtoEquals(expected_request1[0], request1[0]) self.assertProtoEquals(expected_request2[0], request2[0]) - def test_wall_time_precision(self): + @patch.object(uploader_utils.OnePlatformResourceManager, "get_run_resource_name") + def test_wall_time_precision(self, run_resource_mock): + run_resource_mock.return_value = _TEST_ONE_PLATFORM_RUN_NAME # Test a wall time that is exactly representable in float64 but has enough # digits to incur error if converted to nanoseconds the naive way (* 1e9). event1 = event_pb2.Event(step=1, wall_time=1567808404.765432119) @@ -1954,6 +2193,7 @@ def test_wall_time_precision(self): ) +@pytest.mark.usefixtures("google_auth_mock") class FileRequestSenderTest(tf.test.TestCase): def test_empty_files_no_messages(self): mock_client = _create_mock_client() @@ -2161,7 +2401,3 @@ def _extract_tag_counts_time_series(call_args_list): for call_args in call_args_list for ts_data in call_args[1]["time_series_data"] } - - -if __name__ == "__main__": - tf.test.main() diff --git a/tests/unit/vertex_langchain/test_reasoning_engines.py b/tests/unit/vertex_langchain/test_reasoning_engines.py index cec7f4bf83..f3300c8148 100644 --- a/tests/unit/vertex_langchain/test_reasoning_engines.py +++ b/tests/unit/vertex_langchain/test_reasoning_engines.py @@ -92,6 +92,20 @@ def query(self, unused_arbitrary_string_name: str) -> str: schema_name="CapitalizeEngine_query", ) ) +_TEST_INPUT_REASONING_ENGINE_OBJ = types.ReasoningEngine( + display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, + spec=types.ReasoningEngineSpec( + package_spec=types.ReasoningEngineSpec.PackageSpec( + python_version=f"{sys.version_info.major}.{sys.version_info.minor}", + pickle_object_gcs_uri=_TEST_REASONING_ENGINE_GCS_URI, + dependency_files_gcs_uri=_TEST_REASONING_ENGINE_DEPENDENCY_FILES_GCS_URI, + requirements_gcs_uri=_TEST_REASONING_ENGINE_REQUIREMENTS_GCS_URI, + ), + ), +) +_TEST_INPUT_REASONING_ENGINE_OBJ.spec.class_methods.append( + _TEST_REASONING_ENGINE_QUERY_SCHEMA +) _TEST_REASONING_ENGINE_OBJ = types.ReasoningEngine( name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, @@ -300,7 +314,6 @@ def test_create_reasoning_engine( ): test_reasoning_engine = reasoning_engines.ReasoningEngine.create( self.test_app, - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, ) @@ -308,13 +321,50 @@ def test_create_reasoning_engine( test_reasoning_engine._gca_resource = _TEST_REASONING_ENGINE_OBJ create_reasoning_engine_mock.assert_called_with( parent=_TEST_PARENT, - reasoning_engine=test_reasoning_engine.gca_resource, + reasoning_engine=_TEST_INPUT_REASONING_ENGINE_OBJ, ) get_reasoning_engine_mock.assert_called_with( name=_TEST_REASONING_ENGINE_RESOURCE_NAME, retry=_TEST_RETRY, ) + @pytest.mark.usefixtures("caplog") + def test_create_reasoning_engine_warn_resource_name( + self, + caplog, + create_reasoning_engine_mock, + cloud_storage_create_bucket_mock, + tarfile_open_mock, + cloudpickle_dump_mock, + get_reasoning_engine_mock, + ): + reasoning_engines.ReasoningEngine.create( + self.test_app, + reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, + display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, + requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, + ) + assert "does not support user-defined resource IDs" in caplog.text + + @pytest.mark.usefixtures("caplog") + def test_create_reasoning_engine_warn_sys_version( + self, + caplog, + create_reasoning_engine_mock, + cloud_storage_create_bucket_mock, + tarfile_open_mock, + cloudpickle_dump_mock, + get_reasoning_engine_mock, + ): + sys_version = f"{sys.version_info.major}.{sys.version_info.minor}" + reasoning_engines.ReasoningEngine.create( + self.test_app, + sys_version="3.10" if sys_version != "3.10" else "3.11", + display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, + requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, + ) + assert f"is inconsistent with {sys.version_info=}" in caplog.text + def test_create_reasoning_engine_requirements_from_file( self, create_reasoning_engine_mock, @@ -329,7 +379,6 @@ def test_create_reasoning_engine_requirements_from_file( ) as mock_file: test_reasoning_engine = reasoning_engines.ReasoningEngine.create( self.test_app, - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements="requirements.txt", ) @@ -338,7 +387,7 @@ def test_create_reasoning_engine_requirements_from_file( test_reasoning_engine._gca_resource = _TEST_REASONING_ENGINE_OBJ create_reasoning_engine_mock.assert_called_with( parent=_TEST_PARENT, - reasoning_engine=test_reasoning_engine.gca_resource, + reasoning_engine=_TEST_INPUT_REASONING_ENGINE_OBJ, ) get_reasoning_engine_mock.assert_called_with( name=_TEST_REASONING_ENGINE_RESOURCE_NAME, @@ -356,7 +405,6 @@ def test_delete_after_create_reasoning_engine( ): test_reasoning_engine = reasoning_engines.ReasoningEngine.create( self.test_app, - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, ) @@ -364,7 +412,7 @@ def test_delete_after_create_reasoning_engine( test_reasoning_engine._gca_resource = _TEST_REASONING_ENGINE_OBJ create_reasoning_engine_mock.assert_called_with( parent=_TEST_PARENT, - reasoning_engine=test_reasoning_engine.gca_resource, + reasoning_engine=_TEST_INPUT_REASONING_ENGINE_OBJ, ) get_reasoning_engine_mock.assert_called_with( name=_TEST_REASONING_ENGINE_RESOURCE_NAME, @@ -461,7 +509,6 @@ def test_create_reasoning_engine_unspecified_staging_bucket( ) reasoning_engines.ReasoningEngine.create( self.test_app, - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, ) @@ -486,7 +533,6 @@ def test_create_reasoning_engine_no_query_method( ): reasoning_engines.ReasoningEngine.create( InvalidCapitalizeEngineWithoutQueryMethod(), - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, ) @@ -505,7 +551,6 @@ def test_create_reasoning_engine_noncallable_query_attribute( ): reasoning_engines.ReasoningEngine.create( InvalidCapitalizeEngineWithNoncallableQuery(), - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, ) @@ -521,7 +566,6 @@ def test_create_reasoning_engine_unsupported_sys_version( with pytest.raises(ValueError, match="Unsupported python version"): reasoning_engines.ReasoningEngine.create( self.test_app, - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, sys_version="2.6", @@ -538,7 +582,6 @@ def test_create_reasoning_engine_requirements_ioerror( with pytest.raises(IOError, match="Failed to read requirements"): reasoning_engines.ReasoningEngine.create( self.test_app, - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements="nonexistent_requirements.txt", ) @@ -554,7 +597,6 @@ def test_create_reasoning_engine_nonexistent_extra_packages( with pytest.raises(FileNotFoundError, match="not found"): reasoning_engines.ReasoningEngine.create( self.test_app, - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, extra_packages=_TEST_REASONING_ENGINE_INVALID_EXTRA_PACKAGES, @@ -571,7 +613,6 @@ def test_create_reasoning_engine_with_invalid_query_method( with pytest.raises(ValueError, match="Invalid query signature"): reasoning_engines.ReasoningEngine.create( InvalidCapitalizeEngineWithoutQuerySelf(), - reasoning_engine_name=_TEST_REASONING_ENGINE_RESOURCE_NAME, display_name=_TEST_REASONING_ENGINE_DISPLAY_NAME, requirements=_TEST_REASONING_ENGINE_REQUIREMENTS, ) diff --git a/tests/unit/vertexai/conftest.py b/tests/unit/vertexai/conftest.py index 3baa482abb..7cdc89b7eb 100644 --- a/tests/unit/vertexai/conftest.py +++ b/tests/unit/vertexai/conftest.py @@ -51,6 +51,9 @@ from google.cloud.aiplatform.compat.services import ( feature_online_store_admin_service_client, ) +from google.cloud.aiplatform.compat.services import ( + feature_registry_service_client, +) from feature_store_constants import ( _TEST_BIGTABLE_FOS1, _TEST_EMBEDDING_FV1, @@ -61,6 +64,8 @@ _TEST_OPTIMIZED_FV2, _TEST_PSC_OPTIMIZED_FOS, _TEST_OPTIMIZED_EMBEDDING_FV, + _TEST_FG1_F1, + _TEST_FG1_F2, ) _TEST_PROJECT = "test-project" @@ -496,3 +501,23 @@ def get_optimized_fv_no_endpointmock(): ) as get_optimized_fv_no_endpointmock: get_optimized_fv_no_endpointmock.return_value = _TEST_OPTIMIZED_FV2 yield get_optimized_fv_no_endpointmock + + +@pytest.fixture +def get_feature_mock(): + with patch.object( + feature_registry_service_client.FeatureRegistryServiceClient, + "get_feature", + ) as get_fg_mock: + get_fg_mock.return_value = _TEST_FG1_F1 + yield get_fg_mock + + +@pytest.fixture +def get_feature_with_version_column_mock(): + with patch.object( + feature_registry_service_client.FeatureRegistryServiceClient, + "get_feature", + ) as get_fg_mock: + get_fg_mock.return_value = _TEST_FG1_F2 + yield get_fg_mock diff --git a/tests/unit/vertexai/feature_store_constants.py b/tests/unit/vertexai/feature_store_constants.py index b6fd378bb6..a2eab2b36a 100644 --- a/tests/unit/vertexai/feature_store_constants.py +++ b/tests/unit/vertexai/feature_store_constants.py @@ -314,3 +314,34 @@ ) _TEST_FG_LIST = [_TEST_FG1, _TEST_FG2, _TEST_FG3] + +_TEST_FG1_F1_ID = "my_fg1_f1" +_TEST_FG1_F1_PATH = ( + f"{_TEST_PARENT}/featureGroups/{_TEST_FG1_ID}/features/{_TEST_FG1_F1_ID}" +) +_TEST_FG1_F1_DESCRIPTION = "My feature 1 in feature group 1" +_TEST_FG1_F1_LABELS = {"my_fg1_feature": "f1"} +_TEST_FG1_F1_POINT_OF_CONTACT = "fg1-f1-announce-list" +_TEST_FG1_F1 = types.feature.Feature( + name=_TEST_FG1_F1_PATH, + description=_TEST_FG1_F1_DESCRIPTION, + labels=_TEST_FG1_F1_LABELS, + point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT, +) + + +_TEST_FG1_F2_ID = "my_fg1_f2" +_TEST_FG1_F2_PATH = ( + f"{_TEST_PARENT}/featureGroups/{_TEST_FG1_ID}/features/{_TEST_FG1_F2_ID}" +) +_TEST_FG1_F2_DESCRIPTION = "My feature 2 in feature group 1" +_TEST_FG1_F2_LABELS = {"my_fg1_feature": "f2"} +_TEST_FG1_F2_POINT_OF_CONTACT = "fg1-f2-announce-list" +_TEST_FG1_F2_VERSION_COLUMN_NAME = "specific_column_for_feature_2" +_TEST_FG1_F2 = types.feature.Feature( + name=_TEST_FG1_F2_PATH, + version_column_name=_TEST_FG1_F2_VERSION_COLUMN_NAME, + description=_TEST_FG1_F2_DESCRIPTION, + labels=_TEST_FG1_F2_LABELS, + point_of_contact=_TEST_FG1_F2_POINT_OF_CONTACT, +) diff --git a/tests/unit/vertexai/test_batch_prediction.py b/tests/unit/vertexai/test_batch_prediction.py new file mode 100644 index 0000000000..59fb9ddaa7 --- /dev/null +++ b/tests/unit/vertexai/test_batch_prediction.py @@ -0,0 +1,463 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Unit tests for generative model batch prediction.""" +# pylint: disable=protected-access + +import importlib +import pytest +from unittest import mock + +from google.cloud import aiplatform +import vertexai +from google.cloud.aiplatform import base as aiplatform_base +from google.cloud.aiplatform import initializer as aiplatform_initializer +from google.cloud.aiplatform.compat.services import job_service_client +from google.cloud.aiplatform.compat.types import ( + batch_prediction_job as gca_batch_prediction_job_compat, + io as gca_io_compat, + job_state as gca_job_state_compat, +) +from vertexai.preview import batch_prediction +from vertexai.generative_models import GenerativeModel + + +_TEST_PROJECT = "test-project" +_TEST_LOCATION = "us-central1" +_TEST_BUCKET = "gs://test-bucket" +_TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}" +_TEST_DISPLAY_NAME = "test-display-name" + +_TEST_GEMINI_MODEL_NAME = "gemini-1.0-pro" +_TEST_GEMINI_MODEL_RESOURCE_NAME = f"publishers/google/models/{_TEST_GEMINI_MODEL_NAME}" +_TEST_PALM_MODEL_NAME = "text-bison" +_TEST_PALM_MODEL_RESOURCE_NAME = f"publishers/google/models/{_TEST_PALM_MODEL_NAME}" + +_TEST_GCS_INPUT_URI = "gs://test-bucket/test-input.jsonl" +_TEST_GCS_INPUT_URI_2 = "gs://test-bucket/test-input-2.jsonl" +_TEST_GCS_OUTPUT_PREFIX = "gs://test-bucket/test-output" +_TEST_BQ_INPUT_URI = "bq://test-project.test-dataset.test-input" +_TEST_BQ_OUTPUT_PREFIX = "bq://test-project.test-dataset.test-output" +_TEST_INVALID_URI = "invalid-uri" + + +_TEST_BATCH_PREDICTION_JOB_ID = "123456789" +_TEST_BATCH_PREDICTION_JOB_NAME = ( + f"{_TEST_PARENT}/batchPredictionJobs/{_TEST_BATCH_PREDICTION_JOB_ID}" +) +_TEST_JOB_STATE_RUNNING = gca_job_state_compat.JobState(3) +_TEST_JOB_STATE_SUCCESS = gca_job_state_compat.JobState(4) + +_TEST_GAPIC_BATCH_PREDICTION_JOB = gca_batch_prediction_job_compat.BatchPredictionJob( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + display_name=_TEST_DISPLAY_NAME, + model=_TEST_GEMINI_MODEL_RESOURCE_NAME, + state=_TEST_JOB_STATE_RUNNING, +) + + +# TODO(b/339230025) Mock the whole service instead of methods. +@pytest.fixture +def generate_display_name_mock(): + with mock.patch.object( + aiplatform_base.VertexAiResourceNoun, "_generate_display_name" + ) as generate_display_name_mock: + generate_display_name_mock.return_value = _TEST_DISPLAY_NAME + yield generate_display_name_mock + + +@pytest.fixture +def complete_bq_uri_mock(): + with mock.patch.object( + batch_prediction.BatchPredictionJob, "_complete_bq_uri" + ) as complete_bq_uri_mock: + complete_bq_uri_mock.return_value = _TEST_BQ_OUTPUT_PREFIX + yield complete_bq_uri_mock + + +@pytest.fixture +def get_batch_prediction_job_with_bq_output_mock(): + with mock.patch.object( + job_service_client.JobServiceClient, "get_batch_prediction_job" + ) as get_job_mock: + get_job_mock.return_value = gca_batch_prediction_job_compat.BatchPredictionJob( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + display_name=_TEST_DISPLAY_NAME, + model=_TEST_GEMINI_MODEL_RESOURCE_NAME, + state=_TEST_JOB_STATE_SUCCESS, + output_info=gca_batch_prediction_job_compat.BatchPredictionJob.OutputInfo( + bigquery_output_table=_TEST_BQ_OUTPUT_PREFIX + ), + ) + yield get_job_mock + + +@pytest.fixture +def get_batch_prediction_job_with_gcs_output_mock(): + with mock.patch.object( + job_service_client.JobServiceClient, "get_batch_prediction_job" + ) as get_job_mock: + get_job_mock.return_value = gca_batch_prediction_job_compat.BatchPredictionJob( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + display_name=_TEST_DISPLAY_NAME, + model=_TEST_GEMINI_MODEL_RESOURCE_NAME, + state=_TEST_JOB_STATE_SUCCESS, + output_info=gca_batch_prediction_job_compat.BatchPredictionJob.OutputInfo( + gcs_output_directory=_TEST_GCS_OUTPUT_PREFIX + ), + ) + yield get_job_mock + + +@pytest.fixture +def get_batch_prediction_job_invalid_model_mock(): + with mock.patch.object( + job_service_client.JobServiceClient, "get_batch_prediction_job" + ) as get_job_mock: + get_job_mock.return_value = gca_batch_prediction_job_compat.BatchPredictionJob( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + display_name=_TEST_DISPLAY_NAME, + model=_TEST_PALM_MODEL_RESOURCE_NAME, + state=_TEST_JOB_STATE_SUCCESS, + ) + yield get_job_mock + + +@pytest.fixture +def create_batch_prediction_job_mock(): + with mock.patch.object( + job_service_client.JobServiceClient, "create_batch_prediction_job" + ) as create_job_mock: + create_job_mock.return_value = _TEST_GAPIC_BATCH_PREDICTION_JOB + yield create_job_mock + + +@pytest.fixture +def cancel_batch_prediction_job_mock(): + with mock.patch.object( + job_service_client.JobServiceClient, "cancel_batch_prediction_job" + ) as cancel_job_mock: + yield cancel_job_mock + + +@pytest.fixture +def delete_batch_prediction_job_mock(): + with mock.patch.object( + job_service_client.JobServiceClient, "delete_batch_prediction_job" + ) as delete_job_mock: + yield delete_job_mock + + +@pytest.fixture +def list_batch_prediction_jobs_mock(): + with mock.patch.object( + job_service_client.JobServiceClient, "list_batch_prediction_jobs" + ) as list_jobs_mock: + list_jobs_mock.return_value = [ + _TEST_GAPIC_BATCH_PREDICTION_JOB, + gca_batch_prediction_job_compat.BatchPredictionJob( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + display_name=_TEST_DISPLAY_NAME, + model=_TEST_PALM_MODEL_RESOURCE_NAME, + state=_TEST_JOB_STATE_SUCCESS, + ), + ] + yield list_jobs_mock + + +@pytest.mark.usefixtures( + "google_auth_mock", "generate_display_name_mock", "complete_bq_uri_mock" +) +class TestBatchPredictionJob: + """Unit tests for BatchPredictionJob.""" + + def setup_method(self): + importlib.reload(aiplatform_initializer) + importlib.reload(aiplatform) + importlib.reload(vertexai) + vertexai.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + ) + + def teardown_method(self): + aiplatform_initializer.global_pool.shutdown(wait=True) + + def test_init_batch_prediction_job( + self, get_batch_prediction_job_with_gcs_output_mock + ): + batch_prediction.BatchPredictionJob(_TEST_BATCH_PREDICTION_JOB_ID) + + get_batch_prediction_job_with_gcs_output_mock.assert_called_once_with( + name=_TEST_BATCH_PREDICTION_JOB_NAME, retry=aiplatform_base._DEFAULT_RETRY + ) + + @pytest.mark.usefixtures("get_batch_prediction_job_invalid_model_mock") + def test_init_batch_prediction_job_invalid_model(self): + with pytest.raises( + ValueError, + match=( + f"BatchPredictionJob '{_TEST_BATCH_PREDICTION_JOB_ID}' " + f"runs with the model '{_TEST_PALM_MODEL_RESOURCE_NAME}', " + "which is not a GenAI model." + ), + ): + batch_prediction.BatchPredictionJob(_TEST_BATCH_PREDICTION_JOB_ID) + + @pytest.mark.usefixtures("get_batch_prediction_job_with_gcs_output_mock") + def test_submit_batch_prediction_job_with_gcs_input( + self, create_batch_prediction_job_mock + ): + job = batch_prediction.BatchPredictionJob.submit( + source_model=_TEST_GEMINI_MODEL_NAME, + input_dataset=_TEST_GCS_INPUT_URI, + output_uri_prefix=_TEST_GCS_OUTPUT_PREFIX, + ) + + assert job.gca_resource == _TEST_GAPIC_BATCH_PREDICTION_JOB + assert job.state == _TEST_JOB_STATE_RUNNING + assert not job.has_ended + assert not job.has_succeeded + + job.refresh() + assert job.state == _TEST_JOB_STATE_SUCCESS + assert job.has_ended + assert job.has_succeeded + assert job.output_location == _TEST_GCS_OUTPUT_PREFIX + + expected_gapic_batch_prediction_job = gca_batch_prediction_job_compat.BatchPredictionJob( + display_name=_TEST_DISPLAY_NAME, + model=_TEST_GEMINI_MODEL_RESOURCE_NAME, + input_config=gca_batch_prediction_job_compat.BatchPredictionJob.InputConfig( + instances_format="jsonl", + gcs_source=gca_io_compat.GcsSource(uris=[_TEST_GCS_INPUT_URI]), + ), + output_config=gca_batch_prediction_job_compat.BatchPredictionJob.OutputConfig( + gcs_destination=gca_io_compat.GcsDestination( + output_uri_prefix=_TEST_GCS_OUTPUT_PREFIX + ), + predictions_format="jsonl", + ), + ) + create_batch_prediction_job_mock.assert_called_once_with( + parent=_TEST_PARENT, + batch_prediction_job=expected_gapic_batch_prediction_job, + timeout=None, + ) + + @pytest.mark.usefixtures("get_batch_prediction_job_with_bq_output_mock") + def test_submit_batch_prediction_job_with_bq_input( + self, create_batch_prediction_job_mock + ): + job = batch_prediction.BatchPredictionJob.submit( + source_model=_TEST_GEMINI_MODEL_NAME, + input_dataset=_TEST_BQ_INPUT_URI, + output_uri_prefix=_TEST_BQ_OUTPUT_PREFIX, + ) + + assert job.gca_resource == _TEST_GAPIC_BATCH_PREDICTION_JOB + assert job.state == _TEST_JOB_STATE_RUNNING + assert not job.has_ended + assert not job.has_succeeded + + job.refresh() + assert job.state == _TEST_JOB_STATE_SUCCESS + assert job.has_ended + assert job.has_succeeded + assert job.output_location == _TEST_BQ_OUTPUT_PREFIX + + expected_gapic_batch_prediction_job = gca_batch_prediction_job_compat.BatchPredictionJob( + display_name=_TEST_DISPLAY_NAME, + model=_TEST_GEMINI_MODEL_RESOURCE_NAME, + input_config=gca_batch_prediction_job_compat.BatchPredictionJob.InputConfig( + instances_format="bigquery", + bigquery_source=gca_io_compat.BigQuerySource( + input_uri=_TEST_BQ_INPUT_URI + ), + ), + output_config=gca_batch_prediction_job_compat.BatchPredictionJob.OutputConfig( + bigquery_destination=gca_io_compat.BigQueryDestination( + output_uri=_TEST_BQ_OUTPUT_PREFIX + ), + predictions_format="bigquery", + ), + ) + create_batch_prediction_job_mock.assert_called_once_with( + parent=_TEST_PARENT, + batch_prediction_job=expected_gapic_batch_prediction_job, + timeout=None, + ) + + def test_submit_batch_prediction_job_with_gcs_input_without_output_uri_prefix( + self, create_batch_prediction_job_mock + ): + vertexai.init(staging_bucket=_TEST_BUCKET) + model = GenerativeModel(_TEST_GEMINI_MODEL_NAME) + job = batch_prediction.BatchPredictionJob.submit( + source_model=model, + input_dataset=[_TEST_GCS_INPUT_URI, _TEST_GCS_INPUT_URI_2], + ) + + assert job.gca_resource == _TEST_GAPIC_BATCH_PREDICTION_JOB + + expected_gapic_batch_prediction_job = gca_batch_prediction_job_compat.BatchPredictionJob( + display_name=_TEST_DISPLAY_NAME, + model=_TEST_GEMINI_MODEL_RESOURCE_NAME, + input_config=gca_batch_prediction_job_compat.BatchPredictionJob.InputConfig( + instances_format="jsonl", + gcs_source=gca_io_compat.GcsSource( + uris=[_TEST_GCS_INPUT_URI, _TEST_GCS_INPUT_URI_2] + ), + ), + output_config=gca_batch_prediction_job_compat.BatchPredictionJob.OutputConfig( + gcs_destination=gca_io_compat.GcsDestination( + output_uri_prefix=f"{_TEST_BUCKET}/gen-ai-batch-prediction" + ), + predictions_format="jsonl", + ), + ) + create_batch_prediction_job_mock.assert_called_once_with( + parent=_TEST_PARENT, + batch_prediction_job=expected_gapic_batch_prediction_job, + timeout=None, + ) + + def test_submit_batch_prediction_job_with_bq_input_without_output_uri_prefix( + self, create_batch_prediction_job_mock + ): + model = GenerativeModel(_TEST_GEMINI_MODEL_NAME) + job = batch_prediction.BatchPredictionJob.submit( + source_model=model, + input_dataset=_TEST_BQ_INPUT_URI, + ) + + assert job.gca_resource == _TEST_GAPIC_BATCH_PREDICTION_JOB + + expected_gapic_batch_prediction_job = gca_batch_prediction_job_compat.BatchPredictionJob( + display_name=_TEST_DISPLAY_NAME, + model=_TEST_GEMINI_MODEL_RESOURCE_NAME, + input_config=gca_batch_prediction_job_compat.BatchPredictionJob.InputConfig( + instances_format="bigquery", + bigquery_source=gca_io_compat.BigQuerySource( + input_uri=_TEST_BQ_INPUT_URI + ), + ), + output_config=gca_batch_prediction_job_compat.BatchPredictionJob.OutputConfig( + bigquery_destination=gca_io_compat.BigQueryDestination( + output_uri=_TEST_BQ_OUTPUT_PREFIX + ), + predictions_format="bigquery", + ), + ) + create_batch_prediction_job_mock.assert_called_once_with( + parent=_TEST_PARENT, + batch_prediction_job=expected_gapic_batch_prediction_job, + timeout=None, + ) + + def test_submit_batch_prediction_job_with_invalid_source_model(self): + with pytest.raises( + ValueError, + match=(f"Model '{_TEST_PALM_MODEL_RESOURCE_NAME}' is not a GenAI model."), + ): + batch_prediction.BatchPredictionJob.submit( + source_model=_TEST_PALM_MODEL_NAME, + input_dataset=_TEST_GCS_INPUT_URI, + ) + + def test_submit_batch_prediction_job_with_invalid_input_dataset(self): + with pytest.raises( + ValueError, + match=( + f"Unsupported input URI: {_TEST_INVALID_URI}. " + "Supported formats: 'gs://path/to/input/data.jsonl' and " + "'bq://projectId.bqDatasetId.bqTableId'" + ), + ): + batch_prediction.BatchPredictionJob.submit( + source_model=_TEST_GEMINI_MODEL_NAME, + input_dataset=_TEST_INVALID_URI, + ) + + invalid_bq_uris = ["bq://projectId.dataset1", "bq://projectId.dataset2"] + with pytest.raises( + ValueError, + match=("Multiple Bigquery input datasets are not supported."), + ): + batch_prediction.BatchPredictionJob.submit( + source_model=_TEST_GEMINI_MODEL_NAME, + input_dataset=invalid_bq_uris, + ) + + def test_submit_batch_prediction_job_with_invalid_output_uri_prefix(self): + with pytest.raises( + ValueError, + match=( + f"Unsupported output URI: {_TEST_INVALID_URI}. " + "Supported formats: 'gs://path/to/output/data' and " + "'bq://projectId.bqDatasetId'" + ), + ): + batch_prediction.BatchPredictionJob.submit( + source_model=_TEST_GEMINI_MODEL_NAME, + input_dataset=_TEST_GCS_INPUT_URI, + output_uri_prefix=_TEST_INVALID_URI, + ) + + def test_submit_batch_prediction_job_without_output_uri_prefix_and_bucket(self): + with pytest.raises( + ValueError, + match=( + "Please either specify output_uri_prefix or " + "set staging_bucket in vertexai.init()." + ), + ): + batch_prediction.BatchPredictionJob.submit( + source_model=_TEST_GEMINI_MODEL_NAME, + input_dataset=_TEST_GCS_INPUT_URI, + ) + + @pytest.mark.usefixtures("create_batch_prediction_job_mock") + def test_cancel_batch_prediction_job(self, cancel_batch_prediction_job_mock): + job = batch_prediction.BatchPredictionJob.submit( + source_model=_TEST_GEMINI_MODEL_NAME, + input_dataset=_TEST_GCS_INPUT_URI, + output_uri_prefix=_TEST_GCS_OUTPUT_PREFIX, + ) + job.cancel() + + cancel_batch_prediction_job_mock.assert_called_once_with( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + ) + + @pytest.mark.usefixtures("get_batch_prediction_job_with_gcs_output_mock") + def test_delete_batch_prediction_job(self, delete_batch_prediction_job_mock): + job = batch_prediction.BatchPredictionJob(_TEST_BATCH_PREDICTION_JOB_ID) + job.delete() + + delete_batch_prediction_job_mock.assert_called_once_with( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + ) + + def tes_list_batch_prediction_jobs(self, list_batch_prediction_jobs_mock): + jobs = batch_prediction.BatchPredictionJob.list() + + assert len(jobs) == 1 + assert jobs[0].gca_resource == _TEST_GAPIC_BATCH_PREDICTION_JOB + + list_batch_prediction_jobs_mock.assert_called_once_with( + request={"parent": _TEST_PARENT} + ) diff --git a/tests/unit/vertexai/test_evaluation.py b/tests/unit/vertexai/test_evaluation.py index fefe234621..828ef2a2cf 100644 --- a/tests/unit/vertexai/test_evaluation.py +++ b/tests/unit/vertexai/test_evaluation.py @@ -571,6 +571,19 @@ def test_evaluate_pairwise_metrics_with_multiple_baseline_models(self): ): test_eval_task.evaluate(model=mock_candidate_model) + def test_evaluate_invalid_model_and_dataset_input(self): + test_eval_task = evaluation.EvalTask( + dataset=_TEST_EVAL_DATASET, + metrics=_TEST_METRICS, + ) + with pytest.raises( + ValueError, + match=("The `model` parameter is specified, but the evaluation `dataset`"), + ): + test_eval_task.evaluate( + model=generative_models.GenerativeModel(model_name="invalid_model_name") + ) + @pytest.mark.usefixtures("google_auth_mock") class TestEvaluationUtils: diff --git a/tests/unit/vertexai/test_feature.py b/tests/unit/vertexai/test_feature.py new file mode 100644 index 0000000000..d8dc4f7755 --- /dev/null +++ b/tests/unit/vertexai/test_feature.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +from typing import Dict, Optional + +from google.cloud import aiplatform +from google.cloud.aiplatform import base +from vertexai.resources.preview import ( + Feature, +) +import pytest + + +from feature_store_constants import ( + _TEST_PROJECT, + _TEST_LOCATION, + _TEST_FG1_ID, + _TEST_FG1_F1_ID, + _TEST_FG1_F1_PATH, + _TEST_FG1_F1_DESCRIPTION, + _TEST_FG1_F1_LABELS, + _TEST_FG1_F1_POINT_OF_CONTACT, + _TEST_FG1_F2_ID, + _TEST_FG1_F2_PATH, + _TEST_FG1_F2_VERSION_COLUMN_NAME, + _TEST_FG1_F2_DESCRIPTION, + _TEST_FG1_F2_LABELS, + _TEST_FG1_F2_POINT_OF_CONTACT, +) + + +pytestmark = pytest.mark.usefixtures("google_auth_mock") + + +def feature_eq( + feature_to_check: Feature, + name: str, + resource_name: str, + project: str, + location: str, + description: str, + labels: Dict[str, str], + point_of_contact: str, + version_column_name: Optional[str] = None, +): + """Check if a Feature has the appropriate values set.""" + assert feature_to_check.name == name + assert feature_to_check.resource_name == resource_name + assert feature_to_check.project == project + assert feature_to_check.location == location + assert feature_to_check.description == description + assert feature_to_check.labels == labels + assert feature_to_check.point_of_contact == point_of_contact + + if version_column_name: + assert feature_to_check.version_column_name == version_column_name + + +def test_init_with_feature_id_and_no_fg_id_raises_error(get_feature_mock): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + with pytest.raises( + ValueError, + match=re.escape( + "Since feature is not provided as a path, please specify" + + " feature_group_id." + ), + ): + Feature(_TEST_FG1_F1_ID) + + +def test_init_with_feature_path_and_fg_id_raises_error(get_feature_mock): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + with pytest.raises( + ValueError, + match=re.escape( + "Since feature is provided as a path, feature_group_id should not be specified." + ), + ): + Feature(_TEST_FG1_F1_PATH, feature_group_id=_TEST_FG1_ID) + + +def test_init_with_feature_id(get_feature_mock): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + feature = Feature(_TEST_FG1_F1_ID, feature_group_id=_TEST_FG1_ID) + + get_feature_mock.assert_called_once_with( + name=_TEST_FG1_F1_PATH, + retry=base._DEFAULT_RETRY, + ) + + feature_eq( + feature, + name=_TEST_FG1_F1_ID, + resource_name=_TEST_FG1_F1_PATH, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + description=_TEST_FG1_F1_DESCRIPTION, + labels=_TEST_FG1_F1_LABELS, + point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT, + ) + + +def test_init_with_feature_id_for_explicit_version_column( + get_feature_with_version_column_mock, +): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + feature = Feature(_TEST_FG1_F2_ID, feature_group_id=_TEST_FG1_ID) + + get_feature_with_version_column_mock.assert_called_once_with( + name=_TEST_FG1_F2_PATH, + retry=base._DEFAULT_RETRY, + ) + + feature_eq( + feature, + name=_TEST_FG1_F2_ID, + resource_name=_TEST_FG1_F2_PATH, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + description=_TEST_FG1_F2_DESCRIPTION, + labels=_TEST_FG1_F2_LABELS, + point_of_contact=_TEST_FG1_F2_POINT_OF_CONTACT, + version_column_name=_TEST_FG1_F2_VERSION_COLUMN_NAME, + ) + + +def test_init_with_feature_path(get_feature_mock): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + feature = Feature(_TEST_FG1_F1_PATH) + + get_feature_mock.assert_called_once_with( + name=_TEST_FG1_F1_PATH, + retry=base._DEFAULT_RETRY, + ) + + feature_eq( + feature, + name=_TEST_FG1_F1_ID, + resource_name=_TEST_FG1_F1_PATH, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + description=_TEST_FG1_F1_DESCRIPTION, + labels=_TEST_FG1_F1_LABELS, + point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT, + ) + + +def test_init_with_feature_path_for_explicit_version_column( + get_feature_with_version_column_mock, +): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + feature = Feature(_TEST_FG1_F2_PATH) + + get_feature_with_version_column_mock.assert_called_once_with( + name=_TEST_FG1_F2_PATH, + retry=base._DEFAULT_RETRY, + ) + + feature_eq( + feature, + name=_TEST_FG1_F2_ID, + resource_name=_TEST_FG1_F2_PATH, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + version_column_name=_TEST_FG1_F2_VERSION_COLUMN_NAME, + description=_TEST_FG1_F2_DESCRIPTION, + labels=_TEST_FG1_F2_LABELS, + point_of_contact=_TEST_FG1_F2_POINT_OF_CONTACT, + ) diff --git a/tests/unit/vertexai/test_feature_group.py b/tests/unit/vertexai/test_feature_group.py index 477d41e0d3..1235e06ace 100644 --- a/tests/unit/vertexai/test_feature_group.py +++ b/tests/unit/vertexai/test_feature_group.py @@ -60,7 +60,21 @@ _TEST_FG3_ENTITY_ID_COLUMNS, _TEST_FG3_LABELS, _TEST_FG_LIST, + _TEST_FG1_F1, + _TEST_FG1_F1_ID, + _TEST_FG1_F1_PATH, + _TEST_FG1_F1_DESCRIPTION, + _TEST_FG1_F1_LABELS, + _TEST_FG1_F1_POINT_OF_CONTACT, + _TEST_FG1_F2, + _TEST_FG1_F2_ID, + _TEST_FG1_F2_PATH, + _TEST_FG1_F2_DESCRIPTION, + _TEST_FG1_F2_LABELS, + _TEST_FG1_F2_POINT_OF_CONTACT, + _TEST_FG1_F2_VERSION_COLUMN_NAME, ) +from test_feature import feature_eq pytestmark = pytest.mark.usefixtures("google_auth_mock") @@ -108,6 +122,41 @@ def list_fg_mock(): yield list_fg_mock +@pytest.fixture +def delete_fg_mock(): + with patch.object( + feature_registry_service_client.FeatureRegistryServiceClient, + "delete_feature_group", + ) as delete_fg_mock: + delete_fg_lro_mock = mock.Mock(ga_operation.Operation) + delete_fg_mock.return_value = delete_fg_lro_mock + yield delete_fg_mock + + +@pytest.fixture +def create_feature_mock(): + with patch.object( + feature_registry_service_client.FeatureRegistryServiceClient, + "create_feature", + ) as create_feature_mock: + create_feature_lro_mock = mock.Mock(ga_operation.Operation) + create_feature_lro_mock.result.return_value = _TEST_FG1_F1 + create_feature_mock.return_value = create_feature_lro_mock + yield create_feature_mock + + +@pytest.fixture +def create_feature_with_version_column_mock(): + with patch.object( + feature_registry_service_client.FeatureRegistryServiceClient, + "create_feature", + ) as create_feature_mock: + create_feature_lro_mock = mock.Mock(ga_operation.Operation) + create_feature_lro_mock.result.return_value = _TEST_FG1_F2 + create_feature_mock.return_value = create_feature_lro_mock + yield create_feature_mock + + def fg_eq( fg_to_check: FeatureGroup, name: str, @@ -293,3 +342,191 @@ def test_list(list_fg_mock): location=_TEST_LOCATION, labels=_TEST_FG3_LABELS, ) + + +@pytest.mark.parametrize("force", [True, False]) +def test_delete(force, delete_fg_mock, get_fg_mock, fg_logger_mock, sync=True): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + fg = FeatureGroup(_TEST_FG1_ID) + fg.delete(force=force, sync=sync) + + if not sync: + fg.wait() + + delete_fg_mock.assert_called_once_with( + name=_TEST_FG1_PATH, + force=force, + ) + + fg_logger_mock.assert_has_calls( + [ + call( + "Deleting FeatureGroup resource: projects/test-project/locations/us-central1/featureGroups/my_fg1" + ), + call( + f"Delete FeatureGroup backing LRO: {delete_fg_mock.return_value.operation.name}" + ), + call( + "FeatureGroup resource projects/test-project/locations/us-central1/featureGroups/my_fg1 deleted." + ), + ] + ) + + +def test_get_feature(get_fg_mock, get_feature_mock): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + fg = FeatureGroup(_TEST_FG1_ID) + feature = fg.get_feature(_TEST_FG1_F1_ID) + + get_feature_mock.assert_called_once_with( + name=_TEST_FG1_F1_PATH, + retry=base._DEFAULT_RETRY, + ) + + feature_eq( + feature, + name=_TEST_FG1_F1_ID, + resource_name=_TEST_FG1_F1_PATH, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + description=_TEST_FG1_F1_DESCRIPTION, + labels=_TEST_FG1_F1_LABELS, + point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT, + ) + + +@pytest.mark.parametrize("create_request_timeout", [None, 1.0]) +@pytest.mark.parametrize("sync", [True, False]) +def test_create_feature( + get_fg_mock, + create_feature_mock, + get_feature_mock, + fg_logger_mock, + create_request_timeout, + sync, +): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + fg = FeatureGroup(_TEST_FG1_ID) + feature = fg.create_feature( + _TEST_FG1_F1_ID, + description=_TEST_FG1_F1_DESCRIPTION, + labels=_TEST_FG1_F1_LABELS, + point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT, + create_request_timeout=create_request_timeout, + sync=sync, + ) + + if not sync: + feature.wait() + + expected_feature = types.feature.Feature( + description=_TEST_FG1_F1_DESCRIPTION, + labels=_TEST_FG1_F1_LABELS, + point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT, + ) + create_feature_mock.assert_called_once_with( + parent=_TEST_FG1_PATH, + feature=expected_feature, + feature_id=_TEST_FG1_F1_ID, + metadata=(), + timeout=create_request_timeout, + ) + + feature_eq( + feature, + name=_TEST_FG1_F1_ID, + resource_name=_TEST_FG1_F1_PATH, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + description=_TEST_FG1_F1_DESCRIPTION, + labels=_TEST_FG1_F1_LABELS, + point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT, + ) + + fg_logger_mock.assert_has_calls( + [ + call("Creating Feature"), + call( + f"Create Feature backing LRO: {create_feature_mock.return_value.operation.name}" + ), + call( + "Feature created. Resource name: projects/test-project/locations/us-central1/featureGroups/my_fg1/features/my_fg1_f1" + ), + call("To use this Feature in another session:"), + call( + "feature = aiplatform.Feature('projects/test-project/locations/us-central1/featureGroups/my_fg1/features/my_fg1_f1')" + ), + ] + ) + + +@pytest.mark.parametrize("create_request_timeout", [None, 1.0]) +@pytest.mark.parametrize("sync", [True, False]) +def test_create_feature_with_version_feature_column( + get_fg_mock, + create_feature_with_version_column_mock, + get_feature_with_version_column_mock, + fg_logger_mock, + create_request_timeout, + sync, +): + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + fg = FeatureGroup(_TEST_FG1_ID) + feature = fg.create_feature( + _TEST_FG1_F2_ID, + version_column_name=_TEST_FG1_F2_VERSION_COLUMN_NAME, + description=_TEST_FG1_F2_DESCRIPTION, + labels=_TEST_FG1_F2_LABELS, + point_of_contact=_TEST_FG1_F2_POINT_OF_CONTACT, + create_request_timeout=create_request_timeout, + sync=sync, + ) + + if not sync: + feature.wait() + + expected_feature = types.feature.Feature( + version_column_name=_TEST_FG1_F2_VERSION_COLUMN_NAME, + description=_TEST_FG1_F2_DESCRIPTION, + labels=_TEST_FG1_F2_LABELS, + point_of_contact=_TEST_FG1_F2_POINT_OF_CONTACT, + ) + create_feature_with_version_column_mock.assert_called_once_with( + parent=_TEST_FG1_PATH, + feature=expected_feature, + feature_id=_TEST_FG1_F2_ID, + metadata=(), + timeout=create_request_timeout, + ) + + feature_eq( + feature, + name=_TEST_FG1_F2_ID, + resource_name=_TEST_FG1_F2_PATH, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + description=_TEST_FG1_F2_DESCRIPTION, + labels=_TEST_FG1_F2_LABELS, + point_of_contact=_TEST_FG1_F2_POINT_OF_CONTACT, + version_column_name=_TEST_FG1_F2_VERSION_COLUMN_NAME, + ) + + fg_logger_mock.assert_has_calls( + [ + call("Creating Feature"), + call( + f"Create Feature backing LRO: {create_feature_with_version_column_mock.return_value.operation.name}" + ), + call( + "Feature created. Resource name: projects/test-project/locations/us-central1/featureGroups/my_fg1/features/my_fg1_f2" + ), + call("To use this Feature in another session:"), + call( + "feature = aiplatform.Feature('projects/test-project/locations/us-central1/featureGroups/my_fg1/features/my_fg1_f2')" + ), + ] + ) diff --git a/vertexai/batch_prediction/_batch_prediction.py b/vertexai/batch_prediction/_batch_prediction.py new file mode 100644 index 0000000000..7900579cf4 --- /dev/null +++ b/vertexai/batch_prediction/_batch_prediction.py @@ -0,0 +1,310 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Class to support Batch Prediction with GenAI models.""" +# pylint: disable=protected-access + +import logging +import re +from typing import List, Optional, Union + +from google.cloud.aiplatform import base as aiplatform_base +from google.cloud.aiplatform import initializer as aiplatform_initializer +from google.cloud.aiplatform import jobs +from google.cloud.aiplatform import utils as aiplatform_utils +from google.cloud.aiplatform_v1 import types as gca_types +from vertexai import generative_models + +from google.rpc import status_pb2 + + +_LOGGER = aiplatform_base.Logger(__name__) + +_GEMINI_MODEL_PATTERN = r"publishers/google/models/gemini" + + +class BatchPredictionJob(aiplatform_base._VertexAiResourceNounPlus): + """Represents a BatchPredictionJob that runs with GenAI models.""" + + _resource_noun = "batchPredictionJobs" + _getter_method = "get_batch_prediction_job" + _list_method = "list_batch_prediction_jobs" + _delete_method = "delete_batch_prediction_job" + _job_type = "batch-predictions" + _parse_resource_name_method = "parse_batch_prediction_job_path" + _format_resource_name_method = "batch_prediction_job_path" + + client_class = aiplatform_utils.JobClientWithOverride + + def __init__(self, batch_prediction_job_name: str): + """Retrieves a BatchPredictionJob resource that runs with a GenAI model. + + Args: + batch_prediction_job_name (str): + Required. A fully-qualified BatchPredictionJob resource name or + ID. Example: "projects/.../locations/.../batchPredictionJobs/456" + or "456" when project and location are initialized. + + Raises: + ValueError: If batch_prediction_job_name represents a BatchPredictionJob + resource that runs with another type of model. + """ + super().__init__(resource_name=batch_prediction_job_name) + self._gca_resource = self._get_gca_resource( + resource_name=batch_prediction_job_name + ) + # TODO(b/338452508) Support tuned GenAI models. + if not re.search(_GEMINI_MODEL_PATTERN, self.model_name): + raise ValueError( + f"BatchPredictionJob '{batch_prediction_job_name}' " + f"runs with the model '{self.model_name}', " + "which is not a GenAI model." + ) + + @property + def model_name(self) -> str: + """Returns the model name used for this batch prediction job.""" + return self._gca_resource.model + + @property + def state(self) -> gca_types.JobState: + """Returns the state of this batch prediction job.""" + return self._gca_resource.state + + @property + def has_ended(self) -> bool: + """Returns true if this batch prediction job has ended.""" + return self.state in jobs._JOB_COMPLETE_STATES + + @property + def has_succeeded(self) -> bool: + """Returns true if this batch prediction job has succeeded.""" + return self.state == gca_types.JobState.JOB_STATE_SUCCEEDED + + @property + def error(self) -> Optional[status_pb2.Status]: + """Returns detailed error info for this Job resource.""" + return self._gca_resource.error + + @property + def output_location(self) -> str: + """Returns the output location of this batch prediction job.""" + return ( + self._gca_resource.output_info.gcs_output_directory + or self._gca_resource.output_info.bigquery_output_table + ) + + @classmethod + def submit( + cls, + source_model: Union[str, generative_models.GenerativeModel], + input_dataset: Union[str, List[str]], + *, + output_uri_prefix: Optional[str] = None, + ) -> "BatchPredictionJob": + """Submits a batch prediction job for a GenAI model. + + Args: + source_model (Union[str, generative_models.GenerativeModel]): + Model name or a GenerativeModel instance for batch prediction. + Supported formats: "gemini-1.0-pro", "models/gemini-1.0-pro", + and "publishers/google/models/gemini-1.0-pro" + input_dataset (Union[str,List[str]]): + GCS URI(-s) or Bigquery URI to your input data to run batch + prediction on. Example: "gs://path/to/input/data.jsonl" or + "bq://projectId.bqDatasetId.bqTableId" + output_uri_prefix (str): + GCS or Bigquery URI prefix for the output predictions. Example: + "gs://path/to/output/data" or "bq://projectId.bqDatasetId" + If not specified, f"{STAGING_BUCKET}/gen-ai-batch-prediction" will + be used for GCS source and + f"bq://projectId.gen_ai_batch_prediction.predictions_{TIMESTAMP}" + will be used for Bigquery source. + + Returns: + Instantiated BatchPredictionJob. + + Raises: + ValueError: If source_model is not a GenAI model. + Or if input_dataset or output_uri_prefix are not in supported formats. + Or if output_uri_prefix is not specified and staging_bucket is not + set in vertexai.init(). + """ + # Handle model name + # TODO(b/338452508) Support tuned GenAI models. + model_name = cls._reconcile_model_name( + source_model._model_name + if isinstance(source_model, generative_models.GenerativeModel) + else source_model + ) + + # Handle input URI + gcs_source = None + bigquery_source = None + first_input_uri = ( + input_dataset if isinstance(input_dataset, str) else input_dataset[0] + ) + if first_input_uri.startswith("gs://"): + gcs_source = input_dataset + elif first_input_uri.startswith("bq://"): + if not isinstance(input_dataset, str): + raise ValueError("Multiple Bigquery input datasets are not supported.") + bigquery_source = input_dataset + else: + raise ValueError( + f"Unsupported input URI: {input_dataset}. " + "Supported formats: 'gs://path/to/input/data.jsonl' and " + "'bq://projectId.bqDatasetId.bqTableId'" + ) + + # Handle output URI + gcs_destination_prefix = None + bigquery_destination_prefix = None + if output_uri_prefix: + if output_uri_prefix.startswith("gs://"): + gcs_destination_prefix = output_uri_prefix + elif output_uri_prefix.startswith("bq://"): + # Temporarily handle this in SDK, will remove once b/338423462 is fixed. + bigquery_destination_prefix = cls._complete_bq_uri(output_uri_prefix) + else: + raise ValueError( + f"Unsupported output URI: {output_uri_prefix}. " + "Supported formats: 'gs://path/to/output/data' and " + "'bq://projectId.bqDatasetId'" + ) + else: + if first_input_uri.startswith("gs://"): + if not aiplatform_initializer.global_config.staging_bucket: + raise ValueError( + "Please either specify output_uri_prefix or " + "set staging_bucket in vertexai.init()." + ) + gcs_destination_prefix = ( + aiplatform_initializer.global_config.staging_bucket.rstrip("/") + + "/gen-ai-batch-prediction" + ) + else: + bigquery_destination_prefix = cls._complete_bq_uri() + + # Reuse aiplatform class to submit the job (override _LOGGER) + logging.getLogger("google.cloud.aiplatform.jobs").disabled = True + try: + aiplatform_job = jobs.BatchPredictionJob.submit( + model_name=model_name, + gcs_source=gcs_source, + bigquery_source=bigquery_source, + gcs_destination_prefix=gcs_destination_prefix, + bigquery_destination_prefix=bigquery_destination_prefix, + ) + job = cls._empty_constructor() + job._gca_resource = aiplatform_job._gca_resource + + _LOGGER.log_create_complete( + cls, job._gca_resource, "job", module_name="batch_prediction" + ) + _LOGGER.info("View Batch Prediction Job:\n%s" % job._dashboard_uri()) + + return job + finally: + logging.getLogger("google.cloud.aiplatform.jobs").disabled = False + + def refresh(self) -> "BatchPredictionJob": + """Refreshes the batch prediction job from the service.""" + self._sync_gca_resource() + return self + + def cancel(self): + """Cancels this BatchPredictionJob. + + Success of cancellation is not guaranteed. Use `job.refresh()` and + `job.state` to verify if cancellation was successful. + """ + _LOGGER.log_action_start_against_resource("Cancelling", "run", self) + self.api_client.cancel_batch_prediction_job(name=self.resource_name) + + def delete(self): + """Deletes this BatchPredictionJob resource. + + WARNING: This deletion is permanent. + """ + self._delete() + + @classmethod + def list(cls, filter=None) -> List["BatchPredictionJob"]: + """Lists all BatchPredictionJob instances that run with GenAI models.""" + return cls._list( + cls_filter=lambda gca_resource: re.search( + _GEMINI_MODEL_PATTERN, gca_resource.model + ), + filter=filter, + ) + + def _dashboard_uri(self) -> Optional[str]: + """Returns the Google Cloud console URL where job can be viewed.""" + fields = self._parse_resource_name(self.resource_name) + location = fields.pop("location") + project = fields.pop("project") + job = list(fields.values())[0] + return ( + "https://console.cloud.google.com/ai/platform/locations/" + f"{location}/{self._job_type}/{job}?project={project}" + ) + + @classmethod + def _reconcile_model_name(cls, model_name: str) -> str: + """Reconciles model name to a publisher model resource name.""" + if not model_name: + raise ValueError("model_name must not be empty") + if "/" not in model_name: + model_name = "publishers/google/models/" + model_name + elif model_name.startswith("models/"): + model_name = "publishers/google/" + model_name + elif not model_name.startswith("publishers/google/models/") and not re.search( + r"^projects/.*?/locations/.*?/publishers/google/models/.*$", model_name + ): + raise ValueError(f"Invalid format for model name: {model_name}.") + + if not re.search(_GEMINI_MODEL_PATTERN, model_name): + raise ValueError(f"Model '{model_name}' is not a GenAI model.") + + return model_name + + @classmethod + def _complete_bq_uri(cls, uri: Optional[str] = None): + """Completes a BigQuery uri to a BigQuery table uri.""" + uri_parts = uri.split(".") if uri else [] + uri_len = len(uri_parts) + if len(uri_parts) > 3: + raise ValueError( + f"Invalid URI: {uri}. " + "Supported formats: 'bq://projectId.bqDatasetId.bqTableId'" + ) + + schema_and_project = ( + uri_parts[0] + if uri_len >= 1 + else f"bq://{aiplatform_initializer.global_config.project}" + ) + if not schema_and_project.startswith("bq://"): + raise ValueError("URI must start with 'bq://'") + + dataset = uri_parts[1] if uri_len >= 2 else "gen_ai_batch_prediction" + + table = ( + uri_parts[2] + if uri_len >= 3 + else f"predictions_{aiplatform_utils.timestamped_unique_name()}" + ) + + return f"{schema_and_project}.{dataset}.{table}" diff --git a/vertexai/preview/batch_prediction.py b/vertexai/preview/batch_prediction.py new file mode 100644 index 0000000000..776375676e --- /dev/null +++ b/vertexai/preview/batch_prediction.py @@ -0,0 +1,25 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Classes for batch prediction.""" + +# We just want to re-export certain classes +# pylint: disable=g-multiple-import,g-importing-member +from vertexai.batch_prediction._batch_prediction import ( + BatchPredictionJob, +) + +__all__ = [ + "BatchPredictionJob", +] diff --git a/vertexai/preview/evaluation/_eval_tasks.py b/vertexai/preview/evaluation/_eval_tasks.py index 651ec127fc..cc155e7d48 100644 --- a/vertexai/preview/evaluation/_eval_tasks.py +++ b/vertexai/preview/evaluation/_eval_tasks.py @@ -54,7 +54,8 @@ class EvalTask: models and their settings, and assess the quality of the model's generated text. - Dataset details: + Dataset Details: + Default dataset column names: * content_column_name: "content" * reference_column_name: "reference" @@ -74,12 +75,14 @@ class EvalTask: dataset must contain `instruction` and `context` column. Metrics Details: + The supported metrics, metric bundle descriptions, grading rubrics, and the required input fields can be found on the Vertex AI public documentation page [Evaluation methods and metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval). Usage: - 1. To perform bring your own prediction evaluation, provide the model + + 1. To perform bring-your-own-prediction(BYOP) evaluation, provide the model responses in the response column in the dataset. The response column name is "response" by default, or specify `response_column_name` parameter to customize. @@ -249,7 +252,7 @@ def _evaluate_with_experiment( model: Optional[Union[GenerativeModel, Callable[[str], str]]] = None, prompt_template: Optional[str] = None, experiment_run_name: Optional[str] = None, - response_column_name: str = "response", + response_column_name: Optional[str] = None, ) -> EvalResult: """Runs an evaluation for the EvalTask with an experiment. @@ -264,7 +267,7 @@ def _evaluate_with_experiment( to if an experiment is set for this EvalTask. If not provided, a random unique experiment run name is used. response_column_name: The column name of model response in the dataset. If - not set, default to `response`. + provided, this will override the `response_column_name` of the `EvalTask`. Returns: The evaluation result. @@ -279,7 +282,7 @@ def _evaluate_with_experiment( prompt_template=prompt_template, content_column_name=self.content_column_name, reference_column_name=self.reference_column_name, - response_column_name=response_column_name or self.response_column_name, + response_column_name=response_column_name, ) try: vertexai.preview.log_metrics(eval_result.summary_metrics) @@ -293,7 +296,7 @@ def evaluate( model: Optional[Union[GenerativeModel, Callable[[str], str]]] = None, prompt_template: Optional[str] = None, experiment_run_name: Optional[str] = None, - response_column_name: str = "response", + response_column_name: Optional[str] = None, ) -> EvalResult: """Runs an evaluation for the EvalTask. @@ -308,7 +311,7 @@ def evaluate( to if an experiment is set for this EvalTask. If not provided, a random unique experiment run name is used. response_column_name: The column name of model response in the dataset. If - not set, default to `response`. + provided, this will override the `response_column_name` of the `EvalTask`. Returns: The evaluation result. @@ -321,7 +324,7 @@ def evaluate( "`vertexai.init(experiment='experiment_name')`for logging this" " evaluation run." ) - + response_column_name = response_column_name or self.response_column_name experiment_run_name = experiment_run_name or f"{uuid.uuid4()}" if self.experiment and global_experiment_name: @@ -354,7 +357,7 @@ def evaluate( prompt_template=prompt_template, content_column_name=self.content_column_name, reference_column_name=self.reference_column_name, - response_column_name=response_column_name or self.response_column_name, + response_column_name=response_column_name, ) return eval_result diff --git a/vertexai/preview/evaluation/_evaluation.py b/vertexai/preview/evaluation/_evaluation.py index d396f9460f..fd348b7358 100644 --- a/vertexai/preview/evaluation/_evaluation.py +++ b/vertexai/preview/evaluation/_evaluation.py @@ -534,8 +534,7 @@ async def _compute_metrics( metric_name = metric tasks_by_metric[metric_name].append(task) - api_request_count = (len(api_metrics) + len(custom_metrics)) * len( - evaluation_run_config.dataset) + api_request_count = len(api_metrics) * len(evaluation_run_config.dataset) _LOGGER.info( f"Computing metrics with a total of {api_request_count} Vertex online" " evaluation service requests." @@ -629,7 +628,8 @@ def evaluate( Raises: ValueError: If the metrics list is empty, or the prompt template is not provided for PairwiseMetric, or multiple baseline models are specified for - PairwiseMetric instances. + PairwiseMetric instances, or both model and dataset model response column + are present. """ if not metrics: @@ -655,6 +655,22 @@ def evaluate( constants.Dataset.REFERENCE_COLUMN ) + if ( + model + and evaluation_run_config.column_map.get( + constants.Dataset.MODEL_RESPONSE_COLUMN + ) + in dataset.columns + ): + raise ValueError( + "The `model` parameter is specified, but the evaluation `dataset`" + f" contains model response column `{response_column_name}` to perform" + " bring-your-own-prediction(BYOP) evaluation. If you would like to" + " perform rapid evaluation using the dataset with the existing model" + f" response column `{response_column_name}`, please remove the" + " `model` input parameter." + ) + baseline_model = None pairwise_metric_exists = any( isinstance(metric, metrics_base.PairwiseMetric) diff --git a/vertexai/preview/evaluation/metrics/_base.py b/vertexai/preview/evaluation/metrics/_base.py index f2cb85b784..01f7e209d9 100644 --- a/vertexai/preview/evaluation/metrics/_base.py +++ b/vertexai/preview/evaluation/metrics/_base.py @@ -20,7 +20,67 @@ class PairwiseMetric: - """The Side-by-side(SxS) Pairwise Metric.""" + """The Side-by-side(SxS) Pairwise Metric. + + A model-based evaluation metric that compares two generative models + side-by-side, and allows users to A/B test their generative models to + determine which model is performing better on the given evaluation task. + + For more details on when to use pairwise metrics, see + [Evaluation methods and metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval#pointwise_versus_pairwise). + + Result Details: + + * In `EvalResult.summary_metrics`, win rates for both the baseline and + candidate model are computed, showing the rate of each model performs + better on the given task. The win rate is computed as the number of times + the candidate model performs better than the baseline model divided by the + total number of examples. The win rate is a number between 0 and 1. + + * In `EvalResult.metrics_table`, a pairwise metric produces three + evaluation results for each row in the dataset: + * `pairwise_choice`: the `pairwise_choice` in the evaluation result is + an enumeration that indicates whether the candidate or baseline + model perform better. + * `explanation`: The model AutoRater's rationale behind each verdict + using chain-of-thought reasoning. These explanations help users + scrutinize the AutoRater's judgment and build appropriate trust in + its decisions. + * `confidence`: A score between 0 and 1, which signifies how confident + the AutoRater was with its verdict. A score closer to 1 means higher + confidence. + + See [documentation page](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval#understand-results) + for more details on understanding the metric results. + + Usages: + + ``` + from vertexai.generative_models import GenerativeModel + from vertexai.preview.evaluation import EvalTask, PairwiseMetric + + baseline_model = GenerativeModel("gemini-1.0-pro") + candidate_model = GenerativeModel("gemini-1.5-pro") + + pairwise_summarization_quality = PairwiseMetric( + metric = "summarization_quality", + baseline_model=baseline_model, + ) + + eval_task = EvalTask( + dataset = pd.DataFrame({ + "instruction": [...], + "context": [...], + }), + metrics=[pairwise_summarization_quality], + ) + + pairwise_results = eval_task.evaluate( + prompt_template="instruction: {instruction}. context: {context}", + model=candidate_model, + ) + ``` + """ def __init__( self, @@ -37,8 +97,8 @@ def __init__( Args: metric: The Side-by-side(SxS) pairwise evaluation metric name. baseline_model: The baseline model for the Side-by-side(SxS) comparison. - use_reference: Whether to use reference to compute the metric. If specified, - the reference column is required in the dataset. + use_reference: Whether to use reference to compute the metric. If + specified, the reference column is required in the dataset. version: The metric version to use for evaluation. """ self._metric = metric @@ -74,8 +134,8 @@ class CustomMetric: Attributes: name: The name of the metric. metric_function: The evaluation function. Must use the dataset row/instance - as the metric_function input. Returns per-instance metric result as a - dictionary. The metric score must mapped to the CustomMetric.name as key. + as the metric_function input. Returns per-instance metric result as a + dictionary. The metric score must mapped to the CustomMetric.name as key. """ def __init__( diff --git a/vertexai/preview/evaluation/prompt_template.py b/vertexai/preview/evaluation/prompt_template.py index 14b0f6bd6a..4777bb0002 100644 --- a/vertexai/preview/evaluation/prompt_template.py +++ b/vertexai/preview/evaluation/prompt_template.py @@ -27,18 +27,14 @@ class PromptTemplate: values using the `assemble` method, providing flexibility in generating dynamic prompts. - Example Usage: + Usage: ``` - template_str = "Hello, {name}! Today is {day}. How are you?" - prompt_template = PromptTemplate(template_str) - completed_prompt = prompt_template.assemble(name="John", day="Monday") - print(completed_prompt) + template_str = "Hello, {name}! Today is {day}. How are you?" + prompt_template = PromptTemplate(template_str) + completed_prompt = prompt_template.assemble(name="John", day="Monday") + print(completed_prompt) ``` - - Attributes: - template: The template string containing placeholders for replacement. - placeholders: A set of placeholder names from the template string. """ def __init__(self, template: str): diff --git a/vertexai/preview/reasoning_engines/templates/langchain.py b/vertexai/preview/reasoning_engines/templates/langchain.py index a8e047b1de..195ede6898 100644 --- a/vertexai/preview/reasoning_engines/templates/langchain.py +++ b/vertexai/preview/reasoning_engines/templates/langchain.py @@ -96,6 +96,7 @@ def _default_runnable_builder( prompt: Optional["RunnableSerializable"] = None, output_parser: Optional["RunnableSerializable"] = None, chat_history: Optional["GetSessionHistoryCallable"] = None, + model_tool_kwargs: Optional[Mapping[str, Any]] = None, agent_executor_kwargs: Optional[Mapping[str, Any]] = None, runnable_kwargs: Optional[Mapping[str, Any]] = None, ) -> "RunnableSerializable": @@ -109,10 +110,11 @@ def _default_runnable_builder( has_history: bool = chat_history is not None prompt = prompt or _default_prompt(has_history) output_parser = output_parser or _default_output_parser() + model_tool_kwargs = model_tool_kwargs or {} agent_executor_kwargs = agent_executor_kwargs or {} runnable_kwargs = runnable_kwargs or _default_runnable_kwargs(has_history) if tools: - model = model.bind_tools(tools=tools) + model = model.bind_tools(tools=tools, **model_tool_kwargs) else: tools = [] agent_executor = AgentExecutor( @@ -202,6 +204,7 @@ def __init__( output_parser: Optional["RunnableSerializable"] = None, chat_history: Optional["GetSessionHistoryCallable"] = None, model_kwargs: Optional[Mapping[str, Any]] = None, + model_tool_kwargs: Optional[Mapping[str, Any]] = None, agent_executor_kwargs: Optional[Mapping[str, Any]] = None, runnable_kwargs: Optional[Mapping[str, Any]] = None, model_builder: Optional[Callable] = None, @@ -233,8 +236,9 @@ def __init__( # runnable_builder from langchain import agents from langchain_core.runnables.history import RunnableWithMessageHistory + llm_with_tools = llm.bind_tools(tools=tools, **model_tool_kwargs) agent_executor = agents.AgentExecutor( - agent=prompt | llm.bind_tools(tools=tools) | output_parser, + agent=prompt | llm_with_tools | output_parser, tools=tools, **agent_executor_kwargs, ) @@ -282,6 +286,9 @@ def __init__( "top_k": 40, } ``` + model_tool_kwargs (Mapping[str, Any]): + Optional. Additional keyword arguments when binding tools to the + model using `model.bind_tools()`. agent_executor_kwargs (Mapping[str, Any]): Optional. Additional keyword arguments for the constructor of langchain.agents.AgentExecutor. An example would be @@ -334,6 +341,7 @@ def __init__( self._output_parser = output_parser self._chat_history = chat_history self._model_kwargs = model_kwargs + self._model_tool_kwargs = model_tool_kwargs self._agent_executor_kwargs = agent_executor_kwargs self._runnable_kwargs = runnable_kwargs self._model = None @@ -365,6 +373,7 @@ def set_up(self): tools=self._tools, output_parser=self._output_parser, chat_history=self._chat_history, + model_tool_kwargs=self._model_tool_kwargs, agent_executor_kwargs=self._agent_executor_kwargs, runnable_kwargs=self._runnable_kwargs, ) diff --git a/vertexai/reasoning_engines/_reasoning_engines.py b/vertexai/reasoning_engines/_reasoning_engines.py index 4ee61133f8..c7ccad0dcf 100644 --- a/vertexai/reasoning_engines/_reasoning_engines.py +++ b/vertexai/reasoning_engines/_reasoning_engines.py @@ -178,6 +178,18 @@ def create( f"Unsupported python version: {sys_version}. ReasoningEngine " f"only supports {_SUPPORTED_PYTHON_VERSIONS} at the moment." ) + if reasoning_engine_name: + _LOGGER.warning( + "ReasoningEngine does not support user-defined resource IDs at " + f"the moment. Therefore {reasoning_engine_name=} would be " + "ignored and a random ID will be generated instead." + ) + if sys_version != f"{sys.version_info.major}.{sys.version_info.minor}": + _LOGGER.warning( + f"{sys_version=} is inconsistent with {sys.version_info=}. " + "This might result in issues with deployment, and should only " + "be used as a workaround for advanced cases." + ) sdk_resource = cls.__new__(cls) base.VertexAiResourceNounWithFutureManager.__init__( sdk_resource, diff --git a/vertexai/resources/preview/__init__.py b/vertexai/resources/preview/__init__.py index e0ea4632c8..e6a2706742 100644 --- a/vertexai/resources/preview/__init__.py +++ b/vertexai/resources/preview/__init__.py @@ -35,6 +35,7 @@ ) from vertexai.resources.preview.feature_store import ( + Feature, FeatureGroup, FeatureOnlineStore, FeatureOnlineStoreType, @@ -63,6 +64,7 @@ "PersistentResource", "EntityType", "PipelineJobSchedule", + "Feature", "FeatureGroup", "FeatureGroupBigQuerySource", "FeatureOnlineStoreType", diff --git a/vertexai/resources/preview/feature_store/__init__.py b/vertexai/resources/preview/feature_store/__init__.py index bc7d1b0373..7d32668e8d 100644 --- a/vertexai/resources/preview/feature_store/__init__.py +++ b/vertexai/resources/preview/feature_store/__init__.py @@ -16,6 +16,10 @@ # """The vertexai resources preview module.""" +from vertexai.resources.preview.feature_store.feature import ( + Feature, +) + from vertexai.resources.preview.feature_store.feature_group import ( FeatureGroup, ) @@ -41,6 +45,7 @@ ) __all__ = ( + Feature, FeatureGroup, FeatureGroupBigQuerySource, FeatureOnlineStoreType, diff --git a/vertexai/resources/preview/feature_store/feature.py b/vertexai/resources/preview/feature_store/feature.py new file mode 100644 index 0000000000..ea5319486b --- /dev/null +++ b/vertexai/resources/preview/feature_store/feature.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import re +from typing import Optional +from google.auth import credentials as auth_credentials +from google.cloud.aiplatform import base +from google.cloud.aiplatform import utils +from google.cloud.aiplatform.compat.types import ( + feature as gca_feature, +) + + +class Feature(base.VertexAiResourceNounWithFutureManager): + """Class for managing Feature resources.""" + + client_class = utils.FeatureRegistryClientWithOverride + + _resource_noun = "features" + _getter_method = "get_feature" + _list_method = "list_features" + _delete_method = "delete_features" + _parse_resource_name_method = "parse_feature_path" + _format_resource_name_method = "feature_path" + _gca_resource: gca_feature.Feature + + def __init__( + self, + name: str, + feature_group_id: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ): + """Retrieves an existing managed feature. + + Args: + name: + The resource name + (`projects/.../locations/.../featureGroups/.../features/...`) or + ID. + feature_group_id: + The feature group ID. Must be passed in if name is an ID and not + a resource path. + project: + Project to retrieve feature from. If not set, the project set in + aiplatform.init will be used. + location: + Location to retrieve feature from. If not set, the location set + in aiplatform.init will be used. + credentials: + Custom credentials to use to retrieve this feature. Overrides + credentials set in aiplatform.init. + """ + + super().__init__( + project=project, + location=location, + credentials=credentials, + resource_name=name, + ) + + if re.fullmatch( + r"projects/.+/locations/.+/featureGroups/.+/features/.+", + name, + ): + if feature_group_id: + raise ValueError( + "Since feature is provided as a path, feature_group_id should not be specified." + ) + feature = name + else: + from .feature_group import FeatureGroup + + # Construct the feature path using feature group ID if only the + # feature group ID is provided. + if not feature_group_id: + raise ValueError( + "Since feature is not provided as a path, please specify" + + " feature_group_id." + ) + + feature_group_path = utils.full_resource_name( + resource_name=feature_group_id, + resource_noun=FeatureGroup._resource_noun, + parse_resource_name_method=FeatureGroup._parse_resource_name, + format_resource_name_method=FeatureGroup._format_resource_name, + ) + + feature = f"{feature_group_path}/features/{name}" + + self._gca_resource = self._get_gca_resource(resource_name=feature) + + @property + def version_column_name(self) -> str: + """The name of the BigQuery Table/View column hosting data for this version.""" + return self._gca_resource.version_column_name + + @property + def description(self) -> str: + """The description of the feature.""" + return self._gca_resource.description + + @property + def point_of_contact(self) -> str: + """The point of contact for the feature.""" + return self._gca_resource.point_of_contact diff --git a/vertexai/resources/preview/feature_store/feature_group.py b/vertexai/resources/preview/feature_store/feature_group.py index 90198a4e01..5a89f0e69e 100644 --- a/vertexai/resources/preview/feature_store/feature_group.py +++ b/vertexai/resources/preview/feature_store/feature_group.py @@ -26,12 +26,16 @@ from google.cloud.aiplatform import base, initializer from google.cloud.aiplatform import utils from google.cloud.aiplatform.compat.types import ( + feature as gca_feature, feature_group as gca_feature_group, io as gca_io, ) from vertexai.resources.preview.feature_store.utils import ( FeatureGroupBigQuerySource, ) +from vertexai.resources.preview.feature_store import ( + Feature, +) _LOGGER = base.Logger(__name__) @@ -208,6 +212,152 @@ def create( return feature_group_obj + @base.optional_sync() + def delete(self, force: bool = False, sync: bool = True) -> None: + """Deletes this feature group. + + WARNING: This deletion is permanent. + + Args: + force: + If set to True, all features under this online store will be + deleted prior to online store deletion. Otherwise, deletion + will only succeed if the online store has no FeatureViews. + + If set to true, any Features under this FeatureGroup will also + be deleted. (Otherwise, the request will only work if the + FeatureGroup has no Features.) + sync: + Whether to execute this deletion synchronously. If False, this + method will be executed in concurrent Future and any downstream + object will be immediately returned and synced when the Future + has completed. + """ + + lro = getattr(self.api_client, self._delete_method)( + name=self.resource_name, + force=force, + ) + _LOGGER.log_delete_with_lro(self, lro) + lro.result() + _LOGGER.log_delete_complete(self) + + def get_feature(self, feature_id: str) -> Feature: + """Retrieves an existing managed feature. + + Args: + feature_id: The ID of the feature. + + Returns: + Feature - the Feature resource object under this feature group. + """ + return Feature(f"{self.resource_name}/features/{feature_id}") + + def create_feature( + self, + name: str, + version_column_name: Optional[str] = None, + description: Optional[str] = None, + labels: Optional[Dict[str, str]] = None, + point_of_contact: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + request_metadata: Optional[Sequence[Tuple[str, str]]] = None, + create_request_timeout: Optional[float] = None, + sync: bool = True, + ) -> Feature: + """Creates a new feature. + + Args: + name: The name of the feature. + version_column_name: + The name of the BigQuery Table/View column hosting data for this + version. If no value is provided, will use feature_id. + description: Description of the feature. + labels: + The labels with user-defined metadata to organize your Features. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, + numeric characters, underscores and dashes. International + characters are allowed. + + See https://goo.gl/xmQnxf for more information on and examples + of labels. No more than 64 user labels can be associated with + one Feature (System labels are excluded)." System reserved label + keys are prefixed with "aiplatform.googleapis.com/" and are + immutable. + point_of_contact: + Entity responsible for maintaining this feature. Can be comma + separated list of email addresses or URIs. + project: + Project to create feature in. If unset, the project set in + aiplatform.init will be used. + location: + Location to create feature in. If not set, location set in + aiplatform.init will be used. + credentials: + Custom credentials to use to create this feature. Overrides + credentials set in aiplatform.init. + request_metadata: + Strings which should be sent along with the request as metadata. + create_request_timeout: + The timeout for the create request in seconds. + sync: + Whether to execute this creation synchronously. If False, this + method will be executed in concurrent Future and any downstream + object will be immediately returned and synced when the Future + has completed. + + Returns: + Feature - the Feature resource object. + """ + + gapic_feature = gca_feature.Feature() + + if version_column_name: + gapic_feature.version_column_name = version_column_name + + if description: + gapic_feature.description = description + + if labels: + utils.validate_labels(labels) + gapic_feature.labels = labels + + if point_of_contact: + gapic_feature.point_of_contact = point_of_contact + + if request_metadata is None: + request_metadata = () + + api_client = self.__class__._instantiate_client( + location=location, credentials=credentials + ) + + create_feature_lro = api_client.create_feature( + parent=self.resource_name, + feature=gapic_feature, + feature_id=name, + metadata=request_metadata, + timeout=create_request_timeout, + ) + + _LOGGER.log_create_with_lro(Feature, create_feature_lro) + + created_feature = create_feature_lro.result() + + _LOGGER.log_create_complete(Feature, created_feature, "feature") + + feature_obj = Feature( + name=created_feature.name, + project=project, + location=location, + credentials=credentials, + ) + + return feature_obj + @property def source(self) -> FeatureGroupBigQuerySource: return FeatureGroupBigQuerySource( diff --git a/vertexai/resources/preview/ml_monitoring/model_monitors.py b/vertexai/resources/preview/ml_monitoring/model_monitors.py index 6e26858f08..c53a438e37 100644 --- a/vertexai/resources/preview/ml_monitoring/model_monitors.py +++ b/vertexai/resources/preview/ml_monitoring/model_monitors.py @@ -121,7 +121,7 @@ def _visualize_feature_attribution(feature_attribution_output: str) -> None: import tensorflow as tf with tf.io.gfile.GFile(feature_attribution_output, "r") as f: - return json.loads(f.read()) + print(json.dumps(json.loads(f.read()), indent=4)) def _feature_drift_stats_output_path(output_directory: str, job_id: str) -> (str, str):