From 6c15762a97ac6e4cc751a50c680c82443c41adf5 Mon Sep 17 00:00:00 2001
From: Yeesian Ng <ysian@google.com>
Date: Thu, 7 Aug 2025 08:26:40 -0700
Subject: [PATCH 1/5] fix: Set the `agent_framework` when initializing
 module-based agent engine

PiperOrigin-RevId: 792170095
---
 vertexai/agent_engines/_agent_engines.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vertexai/agent_engines/_agent_engines.py b/vertexai/agent_engines/_agent_engines.py
index 9de7bb6598..416c912e16 100644
--- a/vertexai/agent_engines/_agent_engines.py
+++ b/vertexai/agent_engines/_agent_engines.py
@@ -204,6 +204,7 @@ def __init__(
         agent_name: str,
         register_operations: Dict[str, Sequence[str]],
         sys_paths: Optional[Sequence[str]] = None,
+        agent_framework: Optional[str] = None,
     ):
         """Initializes a module-based agent.
 
@@ -222,7 +223,7 @@ def __init__(
                 to the system path in the sequence being specified here, and
                 only be appended if it is not already in the system path.
         """
-        self.agent_framework = None
+        self.agent_framework = agent_framework
         self._tmpl_attrs = {
             "module_name": module_name,
             "agent_name": agent_name,
@@ -273,7 +274,8 @@ def set_up(self) -> None:
                 f"Agent {agent_name} not found in module "
                 f"{self._tmpl_attrs.get('module_name')}"
             ) from e
-        self.agent_framework = _get_agent_framework(agent)
+        if not self.agent_framework:
+            self.agent_framework = _get_agent_framework(agent)
         self._tmpl_attrs["agent"] = agent
         if hasattr(agent, "set_up"):
             agent.set_up()

From 63e1caa8879c237125aca93f184dfd06689fc128 Mon Sep 17 00:00:00 2001
From: Sara Robinson <sararob@google.com>
Date: Thu, 7 Aug 2025 12:54:51 -0700
Subject: [PATCH 2/5] fix: GenAI SDK client - Fix typo in error message for
 optimize_prompt

PiperOrigin-RevId: 792273838
---
 vertexai/_genai/_prompt_optimizer_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vertexai/_genai/_prompt_optimizer_utils.py b/vertexai/_genai/_prompt_optimizer_utils.py
index 64fa599af7..e1b44b86a3 100644
--- a/vertexai/_genai/_prompt_optimizer_utils.py
+++ b/vertexai/_genai/_prompt_optimizer_utils.py
@@ -15,7 +15,6 @@
 """Utility functions for prompt optimizer."""
 
 import json
-from google.genai import errors
 from . import types
 
 
@@ -65,7 +64,8 @@ def _clean_and_parse_optimized_prompt(output_str: str):
     try:
         return json.loads(cleaned_string)
     except json.JSONDecodeError as e:
-        raise errors.ClinentError(
+        # TODO(b/437144880): raise errors.ClientError here instead
+        raise ValueError(
             f"Failed to parse the response from prompt optimizer endpoint. {e}"
         ) from e
 

From f6ad3bcf7544c361a78462ed6b42922eca2ed1b5 Mon Sep 17 00:00:00 2001
From: Marcus Lin <marcuslin@google.com>
Date: Thu, 7 Aug 2025 14:01:59 -0700
Subject: [PATCH 3/5] chore: Support async_stream_query to make async stream
 query request.

PiperOrigin-RevId: 792298909
---
 .../unit/vertexai/genai/test_agent_engines.py |  6 ++-
 vertexai/_genai/_agent_engines_utils.py       |  2 +-
 vertexai/_genai/agent_engines.py              | 39 ++++++++++++++++++-
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/tests/unit/vertexai/genai/test_agent_engines.py b/tests/unit/vertexai/genai/test_agent_engines.py
index eca4ad638e..2786133cbe 100644
--- a/tests/unit/vertexai/genai/test_agent_engines.py
+++ b/tests/unit/vertexai/genai/test_agent_engines.py
@@ -1624,9 +1624,13 @@ def test_query_agent_engine_stream(self):
             )
 
     def test_query_agent_engine_async_stream(self):
+        async def mock_async_generator():
+            yield genai_types.HttpResponse(body=b"")
+
         with mock.patch.object(
-            self.client.agent_engines._api_client, "request_streamed"
+            self.client.agent_engines._api_client, "async_request_streamed"
         ) as request_mock:
+            request_mock.return_value = mock_async_generator()
             agent = self.client.agent_engines._register_api_methods(
                 agent=_genai_types.AgentEngine(
                     api_client=self.client.agent_engines,
diff --git a/vertexai/_genai/_agent_engines_utils.py b/vertexai/_genai/_agent_engines_utils.py
index 7f2b0f6422..5750df77d7 100644
--- a/vertexai/_genai/_agent_engines_utils.py
+++ b/vertexai/_genai/_agent_engines_utils.py
@@ -1408,7 +1408,7 @@ async def _method(self: genai_types.AgentEngine, **kwargs) -> AsyncIterator[Any]
             raise ValueError("api_client is not initialized.")
         if not self.api_resource:
             raise ValueError("api_resource is not initialized.")
-        for http_response in self.api_client._stream_query(
+        async for http_response in self.api_client._async_stream_query(
             name=self.api_resource.name,
             config={
                 "class_method": method_name,
diff --git a/vertexai/_genai/agent_engines.py b/vertexai/_genai/agent_engines.py
index 7829e068c7..48e9d82aa6 100644
--- a/vertexai/_genai/agent_engines.py
+++ b/vertexai/_genai/agent_engines.py
@@ -19,7 +19,7 @@
 import json
 import logging
 import time
-from typing import Any, Iterator, Optional, Sequence, Tuple, Union
+from typing import Any, AsyncIterator, Iterator, Optional, Sequence, Tuple, Union
 from urllib.parse import urlencode
 
 from google.genai import _api_module
@@ -3128,6 +3128,43 @@ def _stream_query(
         ):
             yield response
 
+    async def _async_stream_query(
+        self,
+        *,
+        name: str,
+        config: Optional[types.QueryAgentEngineConfigOrDict] = None,
+    ) -> AsyncIterator[Any]:
+        """Streams the response of the agent engine asynchronously."""
+        parameter_model = types._QueryAgentEngineRequestParameters(
+            name=name,
+            config=config,
+        )
+        request_dict = _QueryAgentEngineRequestParameters_to_vertex(parameter_model)
+        request_url_dict = request_dict.get("_url")
+        if request_url_dict:
+            path = "{name}:streamQuery?alt=sse".format_map(request_url_dict)
+        else:
+            path = "{name}:streamQuery?alt=sse"
+        query_params = request_dict.get("_query")
+        if query_params:
+            path = f"{path}?{urlencode(query_params)}"
+        # TODO: remove the hack that pops config.
+        request_dict.pop("config", None)
+        http_options = None
+        if (
+            parameter_model.config is not None
+            and parameter_model.config.http_options is not None
+        ):
+            http_options = parameter_model.config.http_options
+
+        request_dict = _common.convert_to_dict(request_dict)
+        request_dict = _common.encode_unserializable_types(request_dict)
+        async_iterator = await self._api_client.async_request_streamed(
+            "post", path, request_dict, http_options
+        )
+        async for response in async_iterator:
+            yield response
+
     def create_memory(
         self,
         *,

From 52eacce2a4150721780b815764dce1fc0dd05a2a Mon Sep 17 00:00:00 2001
From: A Vertex SDK engineer <vertex-sdk-bot@google.com>
Date: Thu, 7 Aug 2025 15:56:47 -0700
Subject: [PATCH 4/5] feat: Add
 autoscaling_target_pubsub_num_undelivered_messages option in Preview model
 deployment on Endpoint & Model classes.

PiperOrigin-RevId: 792342163
---
 google/cloud/aiplatform/models.py         | 84 +++++++++++++++++++++--
 google/cloud/aiplatform/preview/models.py | 68 +++++++++++++++++-
 tests/unit/aiplatform/test_endpoints.py   | 65 ++++++++++++++++++
 tests/unit/aiplatform/test_models.py      | 69 +++++++++++++++++++
 4 files changed, 280 insertions(+), 6 deletions(-)

diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py
index 49521ad6b3..27558ba361 100644
--- a/google/cloud/aiplatform/models.py
+++ b/google/cloud/aiplatform/models.py
@@ -1364,6 +1364,8 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         enable_access_logging=False,
         disable_container_logging: bool = False,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
@@ -1460,6 +1462,12 @@ def deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
+            autoscaling_target_pubsub_num_undelivered_messages (int):
+                Optional. The target number of pubsub undelivered messages for autoscaling.
+                If set, the model will be scaled based on the pubsub queue size.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]):
+                Optional. Monitored resource labels as key value pairs for
+                metric filtering for pubsub_num_undelivered_messages.
             enable_access_logging (bool):
                 Whether to enable endpoint access logging. Defaults to False.
             disable_container_logging (bool):
@@ -1541,6 +1549,8 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             spot=spot,
             enable_access_logging=enable_access_logging,
             disable_container_logging=disable_container_logging,
@@ -1574,6 +1584,8 @@ def _deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         spot: bool = False,
         enable_access_logging=False,
         disable_container_logging: bool = False,
@@ -1673,6 +1685,12 @@ def _deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
+            autoscaling_target_pubsub_num_undelivered_messages (int):
+                Optional. The target number of pubsub undelivered messages for autoscaling.
+                If set, the model will be scaled based on the pubsub queue size.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]):
+                Optional. Monitored resource labels as key value pairs for
+                metric filtering for pubsub_num_undelivered_messages.
             spot (bool):
                 Optional. Whether to schedule the deployment workload on spot VMs.
             enable_access_logging (bool):
@@ -1731,6 +1749,8 @@ def _deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             spot=spot,
             enable_access_logging=enable_access_logging,
             disable_container_logging=disable_container_logging,
@@ -1771,6 +1791,8 @@ def _deploy_call(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         spot: bool = False,
         enable_access_logging=False,
         disable_container_logging: bool = False,
@@ -1876,6 +1898,11 @@ def _deploy_call(
                 A default value of 60 will be used if not specified.
             autoscaling_target_request_count_per_minute (int):
                 Optional. Target request count per minute per instance.
+            autoscaling_target_pubsub_num_undelivered_messages (int):
+                Optional. Target pubsub queue size per instance.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]):
+                Optional. Monitored resource labels as key value pairs for
+                metric filtering for pubsub_num_undelivered_messages.
             spot (bool):
                 Optional. Whether to schedule the deployment workload on spot VMs.
             enable_access_logging (bool):
@@ -1946,6 +1973,8 @@ def _deploy_call(
                 or autoscaling_target_accelerator_duty_cycle
                 or autoscaling_target_cpu_utilization
                 or autoscaling_target_request_count_per_minute
+                or autoscaling_target_pubsub_num_undelivered_messages
+                or autoscaling_pubsub_subscription_labels
             )
 
             if provided_custom_machine_spec:
@@ -1954,7 +1983,9 @@ def _deploy_call(
                     "The machine_type, accelerator_type and accelerator_count, "
                     "autoscaling_target_accelerator_duty_cycle, "
                     "autoscaling_target_cpu_utilization, "
-                    "autoscaling_target_request_count_per_minute parameters "
+                    "autoscaling_target_request_count_per_minute, "
+                    "autoscaling_target_pubsub_num_undelivered_messages, "
+                    "autoscaling_pubsub_subscription_labels parameters "
                     "may not be set when `deployment_resource_pool` is "
                     "specified."
                 )
@@ -2008,6 +2039,8 @@ def _deploy_call(
                 or autoscaling_target_accelerator_duty_cycle
                 or autoscaling_target_cpu_utilization
                 or autoscaling_target_request_count_per_minute
+                or autoscaling_target_pubsub_num_undelivered_messages
+                or autoscaling_pubsub_subscription_labels
             )
 
             # If the model supports both automatic and dedicated deployment resources,
@@ -2022,7 +2055,9 @@ def _deploy_call(
                     "The machine_type, accelerator_type and accelerator_count, "
                     "autoscaling_target_accelerator_duty_cycle, "
                     "autoscaling_target_cpu_utilization, "
-                    "autoscaling_target_request_count_per_minute parameters "
+                    "autoscaling_target_request_count_per_minute, "
+                    "autoscaling_target_pubsub_num_undelivered_messages, "
+                    "autoscaling_pubsub_subscription_labels parameters "
                     "are ignored."
                 )
 
@@ -2079,6 +2114,19 @@ def _deploy_call(
                         [autoscaling_metric_spec]
                     )
 
+                if autoscaling_target_pubsub_num_undelivered_messages:
+                    autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
+                        metric_name=(
+                            "pubsub.googleapis.com/subscription/"
+                            "num_undelivered_messages"
+                        ),
+                        target=autoscaling_target_pubsub_num_undelivered_messages,
+                        monitored_resource_labels=autoscaling_pubsub_subscription_labels,
+                    )
+                    dedicated_resources.autoscaling_metric_specs.extend(
+                        [autoscaling_metric_spec]
+                    )
+
                 if reservation_affinity_type:
                     machine_spec.reservation_affinity = utils.get_reservation_affinity(
                         reservation_affinity_type,
@@ -4399,6 +4447,8 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
     ) -> None:
         """Deploys a Model to the PrivateEndpoint.
 
@@ -4575,6 +4625,8 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
         )
 
     def update(
@@ -5647,6 +5699,8 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         enable_access_logging=False,
         disable_container_logging: bool = False,
         private_service_connect_config: Optional[
@@ -5765,6 +5819,12 @@ def deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
+            autoscaling_target_pubsub_num_undelivered_messages (int):
+                Optional. The target number of pubsub undelivered messages for autoscaling.
+                If set, the model will be scaled based on the pubsub queue size.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]):
+                Optional. Monitored resource labels as key value pairs for
+                metric filtering for pubsub_num_undelivered_messages.
             enable_access_logging (bool):
                 Whether to enable endpoint access logging. Defaults to False.
             disable_container_logging (bool):
@@ -5818,8 +5878,12 @@ def deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
-                available_replica_count reaches required_replica_count, and the
-                rest of the replicas will be retried.
+            autoscaling_target_pubsub_num_undelivered_messages (int):
+                Optional. The target number of pubsub undelivered messages for autoscaling.
+                If set, the model will be scaled based on the pubsub queue size.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]):
+                Optional. Monitored resource labels as key value pairs for
+                metric filtering for pubsub_num_undelivered_messages.
 
         Returns:
             endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5885,6 +5949,8 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             spot=spot,
             enable_access_logging=enable_access_logging,
             disable_container_logging=disable_container_logging,
@@ -5928,6 +5994,8 @@ def _deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         spot: bool = False,
         enable_access_logging=False,
         disable_container_logging: bool = False,
@@ -6048,6 +6116,12 @@ def _deploy(
             autoscaling_target_request_count_per_minute (int):
                 Optional. The target number of requests per minute for autoscaling.
                 If set, the model will be scaled based on the number of requests it receives.
+            autoscaling_target_pubsub_num_undelivered_messages (int):
+                Optional. The target number of pubsub undelivered messages for autoscaling.
+                If set, the model will be scaled based on the pubsub queue size.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]):
+                Optional. Monitored resource labels as key value pairs for
+                metric filtering for pubsub_num_undelivered_messages.
             spot (bool):
                 Optional. Whether to schedule the deployment workload on spot VMs.
             enable_access_logging (bool):
@@ -6137,6 +6211,8 @@ def _deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             spot=spot,
             enable_access_logging=enable_access_logging,
             disable_container_logging=disable_container_logging,
diff --git a/google/cloud/aiplatform/preview/models.py b/google/cloud/aiplatform/preview/models.py
index d001df5059..659395b84f 100644
--- a/google/cloud/aiplatform/preview/models.py
+++ b/google/cloud/aiplatform/preview/models.py
@@ -720,6 +720,8 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
         disable_container_logging: bool = False,
         fast_tryout_enabled: bool = False,
@@ -803,6 +805,11 @@ def deploy(
               specified. A default value of 60 will be used if not specified.
             autoscaling_target_request_count_per_minute (int): Target request
               count per minute per instance.
+            autoscaling_target_pubsub_num_undelivered_messages (int): Target
+              number of pubsub undelivered messages per instance.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
+              Monitored resource labels as key value pairs for metric filtering
+              for pubsub_num_undelivered_messages.
             deployment_resource_pool (DeploymentResourcePool): Optional.
               Resource pool where the model will be deployed. All models that
               are deployed to the same DeploymentResourcePool will be hosted in
@@ -874,6 +881,8 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             deployment_resource_pool=deployment_resource_pool,
             disable_container_logging=disable_container_logging,
             fast_tryout_enabled=fast_tryout_enabled,
@@ -904,6 +913,8 @@ def _deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
         disable_container_logging: bool = False,
         fast_tryout_enabled: bool = False,
@@ -981,6 +992,11 @@ def _deploy(
               specified. A default value of 60 will be used if not specified.
             autoscaling_target_request_count_per_minute (int): Target request
               count per minute per instance.
+            autoscaling_target_pubsub_num_undelivered_messages (int): Target
+              number of pubsub undelivered messages per instance.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
+              Monitored resource labels as key value pairs for metric filtering
+              for pubsub_num_undelivered_messages.
             deployment_resource_pool (DeploymentResourcePool): Optional.
               Resource pool where the model will be deployed. All models that
               are deployed to the same DeploymentResourcePool will be hosted in
@@ -1041,6 +1057,8 @@ def _deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             deployment_resource_pool=deployment_resource_pool,
             disable_container_logging=disable_container_logging,
             fast_tryout_enabled=fast_tryout_enabled,
@@ -1078,6 +1096,8 @@ def _deploy_call(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
         disable_container_logging: bool = False,
         fast_tryout_enabled: bool = False,
@@ -1162,6 +1182,11 @@ def _deploy_call(
               not specified.
             autoscaling_target_request_count_per_minute (int): Optional. Target
               request count per minute per instance.
+            autoscaling_target_pubsub_num_undelivered_messages (int): Optional.
+              Target number of pubsub undelivered messages per instance.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
+              Monitored resource labels as key value pairs for metric filtering
+              for pubsub_num_undelivered_messages.
             deployment_resource_pool (DeploymentResourcePool): Optional.
               Resource pool where the model will be deployed. All models that
               are deployed to the same DeploymentResourcePool will be hosted in
@@ -1249,6 +1274,8 @@ def _deploy_call(
                 or autoscaling_target_accelerator_duty_cycle
                 or autoscaling_target_request_count_per_minute
                 or autoscaling_target_cpu_utilization
+                or autoscaling_target_pubsub_num_undelivered_messages
+                or autoscaling_pubsub_subscription_labels
             )
 
             # If the model supports both automatic and dedicated deployment resources,
@@ -1263,7 +1290,9 @@ def _deploy_call(
                     "The machine_type, accelerator_type and accelerator_count, "
                     "autoscaling_target_accelerator_duty_cycle, "
                     "autoscaling_target_cpu_utilization, "
-                    "autoscaling_target_request_count_per_minute parameters "
+                    "autoscaling_target_request_count_per_minute, "
+                    "autoscaling_target_pubsub_num_undelivered_messages, "
+                    "autoscaling_pubsub_subscription_labels parameters "
                     "are ignored."
                 )
 
@@ -1324,6 +1353,19 @@ def _deploy_call(
                         [autoscaling_metric_spec]
                     )
 
+                if autoscaling_target_pubsub_num_undelivered_messages:
+                    autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
+                        metric_name=(
+                            "pubsub.googleapis.com/subscription/"
+                            "num_undelivered_messages"
+                        ),
+                        target=autoscaling_target_pubsub_num_undelivered_messages,
+                        monitored_resource_labels=autoscaling_pubsub_subscription_labels,
+                    )
+                    dedicated_resources.autoscaling_metric_specs.extend(
+                        [autoscaling_metric_spec]
+                    )
+
                 dedicated_resources.machine_spec = machine_spec
 
                 # Checking if flag fast_tryout_enabled is set, only in v1beta1
@@ -1371,6 +1413,8 @@ def _deploy_call(
                 or autoscaling_target_accelerator_duty_cycle
                 or autoscaling_target_cpu_utilization
                 or autoscaling_target_request_count_per_minute
+                or autoscaling_target_pubsub_num_undelivered_messages
+                or autoscaling_pubsub_subscription_labels
             )
 
             if provided_custom_machine_spec:
@@ -1379,7 +1423,9 @@ def _deploy_call(
                     "The machine_type, accelerator_type and accelerator_count, "
                     "autoscaling_target_accelerator_duty_cycle, "
                     "autoscaling_target_cpu_utilization, "
-                    "autoscaling_target_request_count_per_minute parameters "
+                    "autoscaling_target_request_count_per_minute, "
+                    "autoscaling_target_pubsub_num_undelivered_messages, "
+                    "autoscaling_pubsub_subscription_labels parameters "
                     "may not be set when `deployment_resource_pool` is "
                     "specified."
                 )
@@ -1639,6 +1685,8 @@ def deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
         disable_container_logging: bool = False,
         fast_tryout_enabled: bool = False,
@@ -1743,6 +1791,11 @@ def deploy(
               not specified.
             autoscaling_target_request_count_per_minute (int): Optional. Target
               request count per minute per instance.
+            autoscaling_target_pubsub_num_undelivered_messages (int): Optional. Target
+              number of pubsub undelivered messages per instance.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
+              Monitored resource labels as key value pairs for metric filtering
+              for pubsub_num_undelivered_messages.
             deployment_resource_pool (DeploymentResourcePool): Optional.
               Resource pool where the model will be deployed. All models that
               are deployed to the same DeploymentResourcePool will be hosted in
@@ -1831,6 +1884,8 @@ def deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             deployment_resource_pool=deployment_resource_pool,
             disable_container_logging=disable_container_logging,
             fast_tryout_enabled=fast_tryout_enabled,
@@ -1870,6 +1925,8 @@ def _deploy(
         autoscaling_target_cpu_utilization: Optional[int] = None,
         autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
         autoscaling_target_request_count_per_minute: Optional[int] = None,
+        autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
+        autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
         disable_container_logging: bool = False,
         fast_tryout_enabled: bool = False,
@@ -1966,6 +2023,11 @@ def _deploy(
               not specified.
             autoscaling_target_request_count_per_minute (int): Optional. Target
               request count per minute per instance.
+            autoscaling_target_pubsub_num_undelivered_messages (int): Optional. Target
+              number of pubsub undelivered messages per instance.
+            autoscaling_pubsub_subscription_labels (Dict[str, str]): Optional.
+              Monitored resource labels as key value pairs for metric filtering
+              for pubsub_num_undelivered_messages.
             deployment_resource_pool (DeploymentResourcePool): Optional.
               Resource pool where the model will be deployed. All models that
               are deployed to the same DeploymentResourcePool will be hosted in
@@ -2059,6 +2121,8 @@ def _deploy(
             autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
             autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
             autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
+            autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
+            autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
             deployment_resource_pool=deployment_resource_pool,
             disable_container_logging=disable_container_logging,
             fast_tryout_enabled=fast_tryout_enabled,
diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py
index 5b76194a9b..803e1239be 100644
--- a/tests/unit/aiplatform/test_endpoints.py
+++ b/tests/unit/aiplatform/test_endpoints.py
@@ -144,6 +144,9 @@
 _TEST_METRIC_NAME_REQUEST_COUNT = (
     "aiplatform.googleapis.com/prediction/online/request_count"
 )
+_TEST_METRIC_NAME_PUBSUB_NUM_UNDELIVERED_MESSAGE = (
+    "pubsub.googleapis.com/subscription/num_undelivered_messages"
+)
 
 _TEST_EXPLANATIONS = [gca_prediction_service.explanation.Explanation(attributions=[])]
 _TEST_V1BETA1_EXPLANATIONS = [
@@ -2150,6 +2153,68 @@ def test_deploy_with_autoscaling_target_request_count_per_minute_preview(
             timeout=None,
         )
 
+    @pytest.mark.usefixtures(
+        "get_endpoint_mock", "get_model_mock", "preview_deploy_model_mock"
+    )
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_deploy_with_autoscaling_pubsub_num_undelivered_messages_preview(
+        self, preview_deploy_model_mock, sync
+    ):
+        test_endpoint = preview_models.Endpoint(_TEST_ENDPOINT_NAME)
+        test_model = preview_models.Model(_TEST_ID)
+        test_model._gca_resource.supported_deployment_resources_types.append(
+            aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
+        )
+        test_endpoint.deploy(
+            model=test_model,
+            machine_type=_TEST_MACHINE_TYPE,
+            service_account=_TEST_SERVICE_ACCOUNT,
+            sync=sync,
+            deploy_request_timeout=None,
+            autoscaling_target_pubsub_num_undelivered_messages=3,
+            autoscaling_pubsub_subscription_labels={
+                "subscription_id": "test_subscription_id",
+                "project_id": "test_project_id",
+            },
+        )
+
+        if not sync:
+            test_endpoint.wait()
+
+        expected_dedicated_resources = gca_machine_resources_v1beta1.DedicatedResources(
+            machine_spec=gca_machine_resources_v1beta1.MachineSpec(
+                machine_type=_TEST_MACHINE_TYPE,
+            ),
+            min_replica_count=1,
+            max_replica_count=1,
+            autoscaling_metric_specs=[
+                gca_machine_resources_v1beta1.AutoscalingMetricSpec(
+                    metric_name=_TEST_METRIC_NAME_PUBSUB_NUM_UNDELIVERED_MESSAGE,
+                    target=3,
+                    monitored_resource_labels={
+                        "subscription_id": "test_subscription_id",
+                        "project_id": "test_project_id",
+                    },
+                ),
+            ],
+        )
+
+        expected_deployed_model = gca_endpoint_v1beta1.DeployedModel(
+            dedicated_resources=expected_dedicated_resources,
+            model=test_model.resource_name,
+            display_name=None,
+            service_account=_TEST_SERVICE_ACCOUNT,
+            enable_container_logging=True,
+            faster_deployment_config=gca_endpoint_v1beta1.FasterDeploymentConfig(),
+        )
+        preview_deploy_model_mock.assert_called_once_with(
+            endpoint=test_endpoint.resource_name,
+            deployed_model=expected_deployed_model,
+            traffic_split={"0": 100},
+            metadata=(),
+            timeout=None,
+        )
+
     @pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock")
     @pytest.mark.parametrize("sync", [True, False])
     def test_deploy_with_explanations(self, deploy_model_with_explanations_mock, sync):
diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py
index cf0047af03..a19ffdf6f6 100644
--- a/tests/unit/aiplatform/test_models.py
+++ b/tests/unit/aiplatform/test_models.py
@@ -524,6 +524,9 @@
 _TEST_METRIC_NAME_REQUEST_COUNT = (
     "aiplatform.googleapis.com/prediction/online/request_count"
 )
+_TEST_METRIC_NAME_PUBSUB_NUM_UNDELIVERED_MESSAGE = (
+    "pubsub.googleapis.com/subscription/num_undelivered_messages"
+)
 
 _TEST_LABELS = {"label1": "value1", "label2": "value2"}
 
@@ -2556,6 +2559,72 @@ def test_preview_deploy_no_endpoint_dedicated_resources_autoscaling_request_coun
             timeout=None,
         )
 
+    @pytest.mark.usefixtures(
+        "get_model_mock",
+        "create_endpoint_mock",
+        "get_endpoint_mock",
+    )
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_preview_deploy_no_endpoint_dedicated_resources_autoscaling_pubsub_num_undelivered_messages(
+        self, preview_deploy_model_mock, sync
+    ):
+        test_model = preview_models.Model(_TEST_ID).preview
+        test_model._gca_resource.supported_deployment_resources_types.append(
+            aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
+        )
+
+        test_endpoint = test_model.deploy(
+            machine_type=_TEST_MACHINE_TYPE,
+            accelerator_type=_TEST_ACCELERATOR_TYPE,
+            accelerator_count=_TEST_ACCELERATOR_COUNT,
+            sync=sync,
+            deploy_request_timeout=None,
+            system_labels=_TEST_LABELS,
+            autoscaling_target_pubsub_num_undelivered_messages=3,
+            autoscaling_pubsub_subscription_labels={
+                "subscription_id": "test_subscription_id",
+                "project_id": "test_project_id",
+            },
+        )
+
+        if not sync:
+            test_endpoint.wait()
+
+        expected_dedicated_resources = gca_machine_resources_v1beta1.DedicatedResources(
+            machine_spec=gca_machine_resources_v1beta1.MachineSpec(
+                machine_type=_TEST_MACHINE_TYPE,
+                accelerator_type=_TEST_ACCELERATOR_TYPE,
+                accelerator_count=_TEST_ACCELERATOR_COUNT,
+            ),
+            min_replica_count=1,
+            max_replica_count=1,
+            autoscaling_metric_specs=[
+                gca_machine_resources_v1beta1.AutoscalingMetricSpec(
+                    metric_name=_TEST_METRIC_NAME_PUBSUB_NUM_UNDELIVERED_MESSAGE,
+                    target=3,
+                    monitored_resource_labels={
+                        "subscription_id": "test_subscription_id",
+                        "project_id": "test_project_id",
+                    },
+                ),
+            ],
+        )
+        expected_deployed_model = gca_endpoint_v1beta1.DeployedModel(
+            dedicated_resources=expected_dedicated_resources,
+            model=test_model.resource_name,
+            display_name=None,
+            enable_container_logging=True,
+            faster_deployment_config=gca_endpoint_v1beta1.FasterDeploymentConfig(),
+            system_labels=_TEST_LABELS,
+        )
+        preview_deploy_model_mock.assert_called_once_with(
+            endpoint=test_endpoint.resource_name,
+            deployed_model=expected_deployed_model,
+            traffic_split={"0": 100},
+            metadata=(),
+            timeout=None,
+        )
+
     @pytest.mark.usefixtures(
         "get_endpoint_mock",
         "get_model_mock",

From 4b7d43ed4eb883ec785548bad83291d2972a2d5e Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Fri, 8 Aug 2025 10:08:54 -0700
Subject: [PATCH 5/5] chore(main): release 1.108.0 (#5686)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 .release-please-manifest.json                       |  2 +-
 CHANGELOG.md                                        | 13 +++++++++++++
 google/cloud/aiplatform/gapic_version.py            |  2 +-
 .../v1/schema/predict/instance/gapic_version.py     |  2 +-
 .../v1/schema/predict/instance_v1/gapic_version.py  |  2 +-
 .../v1/schema/predict/params/gapic_version.py       |  2 +-
 .../v1/schema/predict/params_v1/gapic_version.py    |  2 +-
 .../v1/schema/predict/prediction/gapic_version.py   |  2 +-
 .../schema/predict/prediction_v1/gapic_version.py   |  2 +-
 .../schema/trainingjob/definition/gapic_version.py  |  2 +-
 .../trainingjob/definition_v1/gapic_version.py      |  2 +-
 .../schema/predict/instance/gapic_version.py        |  2 +-
 .../predict/instance_v1beta1/gapic_version.py       |  2 +-
 .../v1beta1/schema/predict/params/gapic_version.py  |  2 +-
 .../schema/predict/params_v1beta1/gapic_version.py  |  2 +-
 .../schema/predict/prediction/gapic_version.py      |  2 +-
 .../predict/prediction_v1beta1/gapic_version.py     |  2 +-
 .../schema/trainingjob/definition/gapic_version.py  |  2 +-
 .../trainingjob/definition_v1beta1/gapic_version.py |  2 +-
 google/cloud/aiplatform/version.py                  |  2 +-
 google/cloud/aiplatform_v1/gapic_version.py         |  2 +-
 google/cloud/aiplatform_v1beta1/gapic_version.py    |  2 +-
 pypi/_vertex_ai_placeholder/version.py              |  2 +-
 ...snippet_metadata_google.cloud.aiplatform.v1.json |  2 +-
 ...et_metadata_google.cloud.aiplatform.v1beta1.json |  2 +-
 25 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index ea2a15e049..d4e91c984f 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-    ".": "1.107.0"
+    ".": "1.108.0"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 06eefb9bf4..2956db1916 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## [1.108.0](https://github.com/googleapis/python-aiplatform/compare/v1.107.0...v1.108.0) (2025-08-07)
+
+
+### Features
+
+* Add autoscaling_target_pubsub_num_undelivered_messages option in Preview model deployment on Endpoint & Model classes. ([52eacce](https://github.com/googleapis/python-aiplatform/commit/52eacce2a4150721780b815764dce1fc0dd05a2a))
+
+
+### Bug Fixes
+
+* GenAI SDK client - Fix typo in error message for optimize_prompt ([63e1caa](https://github.com/googleapis/python-aiplatform/commit/63e1caa8879c237125aca93f184dfd06689fc128))
+* Set the `agent_framework` when initializing module-based agent engine ([6c15762](https://github.com/googleapis/python-aiplatform/commit/6c15762a97ac6e4cc751a50c680c82443c41adf5))
+
 ## [1.107.0](https://github.com/googleapis/python-aiplatform/compare/v1.106.0...v1.107.0) (2025-08-06)
 
 
diff --git a/google/cloud/aiplatform/gapic_version.py b/google/cloud/aiplatform/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/gapic_version.py
+++ b/google/cloud/aiplatform/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/instance/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/instance_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/params/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/params_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/prediction/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/predict/prediction_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py
+++ b/google/cloud/aiplatform/v1/schema/trainingjob/definition_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/instance_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/params/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/params_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/predict/prediction_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform/v1beta1/schema/trainingjob/definition_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform/version.py b/google/cloud/aiplatform/version.py
index a82ab3f92d..ecf5efb7d7 100644
--- a/google/cloud/aiplatform/version.py
+++ b/google/cloud/aiplatform/version.py
@@ -15,4 +15,4 @@
 # limitations under the License.
 #
 
-__version__ = "1.107.0"
+__version__ = "1.108.0"
diff --git a/google/cloud/aiplatform_v1/gapic_version.py b/google/cloud/aiplatform_v1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform_v1/gapic_version.py
+++ b/google/cloud/aiplatform_v1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/google/cloud/aiplatform_v1beta1/gapic_version.py b/google/cloud/aiplatform_v1beta1/gapic_version.py
index e78c276de7..59135c65bd 100644
--- a/google/cloud/aiplatform_v1beta1/gapic_version.py
+++ b/google/cloud/aiplatform_v1beta1/gapic_version.py
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-__version__ = "1.107.0"  # {x-release-please-version}
+__version__ = "1.108.0"  # {x-release-please-version}
diff --git a/pypi/_vertex_ai_placeholder/version.py b/pypi/_vertex_ai_placeholder/version.py
index 0556acd6c8..795ab1808b 100644
--- a/pypi/_vertex_ai_placeholder/version.py
+++ b/pypi/_vertex_ai_placeholder/version.py
@@ -15,4 +15,4 @@
 # limitations under the License.
 #
 
-__version__ = "1.107.0"
+__version__ = "1.108.0"
diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
index 18f464d596..062ad7b5e0 100644
--- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
+++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-aiplatform",
-    "version": "1.107.0"
+    "version": "1.108.0"
   },
   "snippets": [
     {
diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
index bccb1b7423..4035068c6f 100644
--- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
+++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-aiplatform",
-    "version": "1.107.0"
+    "version": "1.108.0"
   },
   "snippets": [
     {