From 348b9f81756db7a00b0ff01d3144b417571489eb Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Tue, 4 Nov 2025 07:01:44 -0500
Subject: [PATCH 1/3] Implement update_by_metadata

---
 pinecone/db_control/request_factory.py        |  23 +---
 pinecone/db_data/index.py                     |  87 +++++++++++-
 pinecone/db_data/index_asyncio.py             | 125 +++++++++++++++++-
 pinecone/db_data/index_asyncio_interface.py   |  76 ++++++++---
 pinecone/db_data/interfaces.py                |  63 +++++++--
 pinecone/db_data/request_factory.py           |  16 ++-
 pinecone/grpc/index_grpc.py                   |  90 ++++++++++---
 pinecone/grpc/utils.py                        |  12 +-
 .../data_grpc_futures/test_query_future.py    |  13 +-
 tests/unit/test_index.py                      |  60 ++++++++-
 tests/unit_grpc/test_grpc_index_update.py     |  64 +++++++++
 11 files changed, 552 insertions(+), 77 deletions(-)

diff --git a/pinecone/db_control/request_factory.py b/pinecone/db_control/request_factory.py
index 32a45648..229e92fb 100644
--- a/pinecone/db_control/request_factory.py
+++ b/pinecone/db_control/request_factory.py
@@ -245,25 +245,10 @@ def __parse_index_spec(spec: Union[Dict, ServerlessSpec, PodSpec, ByocSpec]) ->
                 if "schema" in spec["serverless"]:
                     schema_dict = spec["serverless"]["schema"]
                     if isinstance(schema_dict, dict):
-                        # Process fields if present, otherwise pass through as-is
-                        schema_kwargs = {}
-                        if "fields" in schema_dict:
-                            fields = {}
-                            for field_name, field_config in schema_dict["fields"].items():
-                                if isinstance(field_config, dict):
-                                    # Pass through the entire field_config dict to allow future API fields
-                                    fields[field_name] = BackupModelSchemaFields(**field_config)
-                                else:
-                                    # If not a dict, create with default filterable=True
-                                    fields[field_name] = BackupModelSchemaFields(filterable=True)
-                            schema_kwargs["fields"] = fields
-
-                        # Pass through any other fields in schema_dict to allow future API fields
-                        for key, value in schema_dict.items():
-                            if key != "fields":
-                                schema_kwargs[key] = value
-
-                        spec["serverless"]["schema"] = BackupModelSchema(**schema_kwargs)
+                        # Use the helper method to handle both formats correctly
+                        spec["serverless"]["schema"] = (
+                            PineconeDBControlRequestFactory.__parse_schema(schema_dict)
+                        )
 
                 index_spec = IndexSpec(serverless=ServerlessSpecModel(**spec["serverless"]))
             elif "pod" in spec:
diff --git a/pinecone/db_data/index.py b/pinecone/db_data/index.py
index 20feab7f..c52ae8a7 100644
--- a/pinecone/db_data/index.py
+++ b/pinecone/db_data/index.py
@@ -514,25 +514,108 @@ def query_namespaces(
     @validate_and_convert_errors
     def update(
         self,
-        id: str,
+        id: Optional[str] = None,
         values: Optional[List[float]] = None,
         set_metadata: Optional[VectorMetadataTypedDict] = None,
         namespace: Optional[str] = None,
         sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
+        filter: Optional[FilterTypedDict] = None,
+        dry_run: Optional[bool] = None,
         **kwargs,
     ) -> Dict[str, Any]:
-        return self._vector_api.update_vector(
+        """Update vector(s) in a namespace by ID or metadata filter.
+
+        The update can be performed by vector ID or by metadata filter. When updating by ID,
+        a single vector is updated. When updating by metadata filter, all vectors matching
+        the filter are updated.
+
+        If a value is included, it will overwrite the previous value.
+        If a set_metadata is included, the values of the fields specified in it will be
+        added or overwrite the previous value.
+
+        Examples:
+
+        Update by ID:
+
+        .. code-block:: python
+
+            >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
+            >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace')
+            >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
+            >>>              namespace='my_namespace')
+            >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]),
+            >>>              namespace='my_namespace')
+
+        Update by metadata filter:
+
+        .. code-block:: python
+
+            >>> # Update metadata for all vectors matching a filter
+            >>> index.update(
+            ...     filter={'genre': {'$eq': 'comedy'}},
+            ...     set_metadata={'status': 'active'},
+            ...     namespace='my_namespace'
+            ... )
+            >>> # Preview how many vectors would be updated (dry run)
+            >>> result = index.update(
+            ...     filter={'year': {'$gte': 2020}},
+            ...     set_metadata={'updated': True},
+            ...     dry_run=True,
+            ...     namespace='my_namespace'
+            ... )
+            >>> print(f"Would update {result.get('matched_records', 0)} vectors")
+
+        Args:
+            id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
+            values (List[float]): Vector values to set. [optional]
+            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
+                Metadata to set for vector(s). [optional]
+            namespace (str): Namespace name where to update the vector(s). [optional]
+            sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
+                Expected to be either a SparseValues object or a dict of the form:
+                {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
+                When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
+                See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
+            dry_run (bool): If True, return the number of records that match the filter without executing the update.
+                Only meaningful when filter is provided. Defaults to False. [optional]
+
+        Returns:
+            Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
+                When updating by filter, the dictionary may contain a 'matched_records' key indicating
+                how many records matched the filter (even when dry_run is False).
+
+        Raises:
+            ValueError: If both id and filter are provided, or if neither is provided.
+        """
+        if id is not None and filter is not None:
+            raise ValueError(
+                "Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors."
+            )
+        if id is None and filter is None:
+            raise ValueError("Either 'id' or 'filter' must be provided.")
+
+        response = self._vector_api.update_vector(
             IndexRequestFactory.update_request(
                 id=id,
                 values=values,
                 set_metadata=set_metadata,
                 namespace=namespace,
                 sparse_values=sparse_values,
+                filter=filter,
+                dry_run=dry_run,
                 **kwargs,
             ),
             **self._openapi_kwargs(kwargs),
         )
 
+        # Convert UpdateResponse to dict, including matched_records if present
+        result = {}
+        if hasattr(response, "matched_records") and response.matched_records is not None:
+            result["matched_records"] = response.matched_records
+
+        return result
+
     @validate_and_convert_errors
     def describe_index_stats(
         self, filter: Optional[FilterTypedDict] = None, **kwargs
diff --git a/pinecone/db_data/index_asyncio.py b/pinecone/db_data/index_asyncio.py
index b1818d7c..9ca9e0fc 100644
--- a/pinecone/db_data/index_asyncio.py
+++ b/pinecone/db_data/index_asyncio.py
@@ -554,25 +554,146 @@ async def query_namespaces(
     @validate_and_convert_errors
     async def update(
         self,
-        id: str,
+        id: Optional[str] = None,
         values: Optional[List[float]] = None,
         set_metadata: Optional[VectorMetadataTypedDict] = None,
         namespace: Optional[str] = None,
         sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
+        filter: Optional[FilterTypedDict] = None,
+        dry_run: Optional[bool] = None,
         **kwargs,
     ) -> Dict[str, Any]:
-        return await self._vector_api.update_vector(
+        """Update vector(s) in a namespace by ID or metadata filter.
+
+        The update can be performed by vector ID or by metadata filter. When updating by ID,
+        a single vector is updated. When updating by metadata filter, all vectors matching
+        the filter are updated.
+
+        If a value is included, it will overwrite the previous value.
+        If a set_metadata is included, the values of the fields specified in it will be
+        added or overwrite the previous value.
+
+        Examples:
+
+        Update by ID:
+
+        .. code-block:: python
+
+            import asyncio
+            from pinecone import Pinecone, Vector, SparseValues
+
+            async def main():
+                pc = Pinecone()
+                async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
+                    # Update vector values
+                    await idx.update(
+                        id='id1',
+                        values=[0.1, 0.2, 0.3, ...],
+                        namespace='my_namespace'
+                    )
+
+                    # Update metadata
+                    await idx.update(
+                        id='id1',
+                        set_metadata={'key': 'value'},
+                        namespace='my_namespace'
+                    )
+
+                    # Update sparse values
+                    await idx.update(
+                        id='id1',
+                        sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
+                        namespace='my_namespace'
+                    )
+
+                    # Update sparse values with SparseValues object
+                    await idx.update(
+                        id='id1',
+                        sparse_values=SparseValues(indices=[234781, 5432], values=[0.2, 0.4]),
+                        namespace='my_namespace'
+                    )
+
+            asyncio.run(main())
+
+        Update by metadata filter:
+
+        .. code-block:: python
+
+            import asyncio
+            from pinecone import Pinecone
+
+            async def main():
+                pc = Pinecone()
+                async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
+                    # Update metadata for all vectors matching a filter
+                    await idx.update(
+                        filter={'genre': {'$eq': 'comedy'}},
+                        set_metadata={'status': 'active'},
+                        namespace='my_namespace'
+                    )
+
+                    # Preview how many vectors would be updated (dry run)
+                    result = await idx.update(
+                        filter={'year': {'$gte': 2020}},
+                        set_metadata={'updated': True},
+                        dry_run=True,
+                        namespace='my_namespace'
+                    )
+                    print(f"Would update {result.get('matched_records', 0)} vectors")
+
+            asyncio.run(main())
+
+        Args:
+            id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
+            values (List[float]): Vector values to set. [optional]
+            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
+                Metadata to set for vector(s). [optional]
+            namespace (str): Namespace name where to update the vector(s). [optional]
+            sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
+                Expected to be either a SparseValues object or a dict of the form:
+                {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
+                When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
+                See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
+            dry_run (bool): If True, return the number of records that match the filter without executing the update.
+                Only meaningful when filter is provided. Defaults to False. [optional]
+
+        Returns:
+            Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
+                When updating by filter, the dictionary may contain a 'matched_records' key indicating
+                how many records matched the filter (even when dry_run is False).
+
+        Raises:
+            ValueError: If both id and filter are provided, or if neither is provided.
+        """
+        if id is not None and filter is not None:
+            raise ValueError(
+                "Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors."
+            )
+        if id is None and filter is None:
+            raise ValueError("Either 'id' or 'filter' must be provided.")
+
+        response = await self._vector_api.update_vector(
             IndexRequestFactory.update_request(
                 id=id,
                 values=values,
                 set_metadata=set_metadata,
                 namespace=namespace,
                 sparse_values=sparse_values,
+                filter=filter,
+                dry_run=dry_run,
                 **kwargs,
             ),
             **self._openapi_kwargs(kwargs),
         )
 
+        # Convert UpdateResponse to dict, including matched_records if present
+        result = {}
+        if hasattr(response, "matched_records") and response.matched_records is not None:
+            result["matched_records"] = response.matched_records
+
+        return result
+
     @validate_and_convert_errors
     async def describe_index_stats(
         self, filter: Optional[FilterTypedDict] = None, **kwargs
diff --git a/pinecone/db_data/index_asyncio_interface.py b/pinecone/db_data/index_asyncio_interface.py
index 3f3838ec..54eccbc0 100644
--- a/pinecone/db_data/index_asyncio_interface.py
+++ b/pinecone/db_data/index_asyncio_interface.py
@@ -520,33 +520,29 @@ async def main():
     @abstractmethod
     async def update(
         self,
-        id: str,
+        id: Optional[str] = None,
         values: Optional[List[float]] = None,
         set_metadata: Optional[VectorMetadataTypedDict] = None,
         namespace: Optional[str] = None,
         sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
+        filter: Optional[FilterTypedDict] = None,
+        dry_run: Optional[bool] = None,
         **kwargs,
     ) -> Dict[str, Any]:
-        """
-        The Update operation updates vector in a namespace.
+        """The Update operation updates vector(s) in a namespace.
 
-        Args:
-            id (str): Vector's unique id.
-            values (List[float]): vector values to set. [optional]
-            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
-                metadata to set for vector. [optional]
-            namespace (str): Namespace name where to update the vector.. [optional]
-            sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
-                           Expected to be either a SparseValues object or a dict of the form:
-                           {'indices': List[int], 'values': List[float]} where the lists each have the same length.
+        The update can be performed by vector ID or by metadata filter. When updating by ID,
+        a single vector is updated. When updating by metadata filter, all vectors matching
+        the filter are updated.
 
         If a value is included, it will overwrite the previous value.
-        If a set_metadata is included,
-        the values of the fields specified in it will be added or overwrite the previous value.
-
+        If a set_metadata is included, the values of the fields specified in it will be
+        added or overwrite the previous value.
 
         Examples:
 
+        Update by ID:
+
         .. code-block:: python
 
             import asyncio
@@ -585,6 +581,56 @@ async def main():
 
             asyncio.run(main())
 
+        Update by metadata filter:
+
+        .. code-block:: python
+
+            import asyncio
+            from pinecone import Pinecone
+
+            async def main():
+                pc = Pinecone()
+                async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
+                    # Update metadata for all vectors matching a filter
+                    await idx.update(
+                        filter={'genre': {'$eq': 'comedy'}},
+                        set_metadata={'status': 'active'},
+                        namespace='my_namespace'
+                    )
+
+                    # Preview how many vectors would be updated (dry run)
+                    result = await idx.update(
+                        filter={'year': {'$gte': 2020}},
+                        set_metadata={'updated': True},
+                        dry_run=True,
+                        namespace='my_namespace'
+                    )
+                    print(f"Would update {result.get('matched_records', 0)} vectors")
+
+            asyncio.run(main())
+
+        Args:
+            id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
+            values (List[float]): Vector values to set. [optional]
+            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
+                Metadata to set for vector(s). [optional]
+            namespace (str): Namespace name where to update the vector(s). [optional]
+            sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
+                Expected to be either a SparseValues object or a dict of the form:
+                {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
+                When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
+                See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
+            dry_run (bool): If True, return the number of records that match the filter without executing the update.
+                Only meaningful when filter is provided. Defaults to False. [optional]
+
+        Returns:
+            Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
+                When updating by filter, the dictionary may contain a 'matched_records' key indicating
+                how many records matched the filter (even when dry_run is False).
+
+        Raises:
+            ValueError: If both id and filter are provided, or if neither is provided.
         """
         pass
 
diff --git a/pinecone/db_data/interfaces.py b/pinecone/db_data/interfaces.py
index 3b1e3be6..b04f9946 100644
--- a/pinecone/db_data/interfaces.py
+++ b/pinecone/db_data/interfaces.py
@@ -705,21 +705,29 @@ def query_namespaces(
     @abstractmethod
     def update(
         self,
-        id: str,
+        id: Optional[str] = None,
         values: Optional[List[float]] = None,
         set_metadata: Optional[VectorMetadataTypedDict] = None,
         namespace: Optional[str] = None,
         sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
+        filter: Optional[FilterTypedDict] = None,
+        dry_run: Optional[bool] = None,
         **kwargs,
     ) -> Dict[str, Any]:
-        """
-        The Update operation updates vector in a namespace.
+        """The Update operation updates vector(s) in a namespace.
+
+        The update can be performed by vector ID or by metadata filter. When updating by ID,
+        a single vector is updated. When updating by metadata filter, all vectors matching
+        the filter are updated.
+
         If a value is included, it will overwrite the previous value.
-        If a set_metadata is included,
-        the values of the fields specified in it will be added or overwrite the previous value.
+        If a set_metadata is included, the values of the fields specified in it will be
+        added or overwrite the previous value.
 
         Examples:
 
+        Update by ID:
+
         .. code-block:: python
 
             >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
@@ -729,18 +737,47 @@ def update(
             >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]),
             >>>              namespace='my_namespace')
 
+        Update by metadata filter:
+
+        .. code-block:: python
+
+            >>> # Update metadata for all vectors matching a filter
+            >>> index.update(
+            ...     filter={'genre': {'$eq': 'comedy'}},
+            ...     set_metadata={'status': 'active'},
+            ...     namespace='my_namespace'
+            ... )
+            >>> # Preview how many vectors would be updated (dry run)
+            >>> result = index.update(
+            ...     filter={'year': {'$gte': 2020}},
+            ...     set_metadata={'updated': True},
+            ...     dry_run=True,
+            ...     namespace='my_namespace'
+            ... )
+            >>> print(f"Would update {result.get('matched_records', 0)} vectors")
+
         Args:
-            id (str): Vector's unique id.
-            values (List[float]): vector values to set. [optional]
+            id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
+            values (List[float]): Vector values to set. [optional]
             set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
-                metadata to set for vector. [optional]
-            namespace (str): Namespace name where to update the vector.. [optional]
-            sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
-                           Expected to be either a SparseValues object or a dict of the form:
-                           {'indices': List[int], 'values': List[float]} where the lists each have the same length.
+                Metadata to set for vector(s). [optional]
+            namespace (str): Namespace name where to update the vector(s). [optional]
+            sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
+                Expected to be either a SparseValues object or a dict of the form:
+                {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
+                When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
+                See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
+            dry_run (bool): If True, return the number of records that match the filter without executing the update.
+                Only meaningful when filter is provided. Defaults to False. [optional]
 
+        Returns:
+            Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
+                When updating by filter, the dictionary may contain a 'matched_records' key indicating
+                how many records matched the filter (even when dry_run is False).
 
-        Returns: An empty dictionary if the update was successful.
+        Raises:
+            ValueError: If both id and filter are provided, or if neither is provided.
         """
         pass
 
diff --git a/pinecone/db_data/request_factory.py b/pinecone/db_data/request_factory.py
index 64bb65d9..c4290cda 100644
--- a/pinecone/db_data/request_factory.py
+++ b/pinecone/db_data/request_factory.py
@@ -135,11 +135,13 @@ def fetch_by_metadata_request(
 
     @staticmethod
     def update_request(
-        id: str,
+        id: Optional[str] = None,
         values: Optional[List[float]] = None,
         set_metadata: Optional[VectorMetadataTypedDict] = None,
         namespace: Optional[str] = None,
         sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
+        filter: Optional[FilterTypedDict] = None,
+        dry_run: Optional[bool] = None,
         **kwargs,
     ) -> UpdateRequest:
         _check_type = kwargs.pop("_check_type", False)
@@ -150,12 +152,18 @@ def update_request(
                 ("set_metadata", set_metadata),
                 ("namespace", namespace),
                 ("sparse_values", sparse_values_normalized),
+                ("filter", filter),
+                ("dry_run", dry_run),
             ]
         )
 
-        return UpdateRequest(
-            id=id, **args_dict, _check_type=_check_type, **non_openapi_kwargs(kwargs)
-        )
+        # Only include id if it's not None
+        request_kwargs = {"_check_type": _check_type, **non_openapi_kwargs(kwargs)}
+        if id is not None:
+            request_kwargs["id"] = id
+        request_kwargs.update(args_dict)
+
+        return UpdateRequest(**request_kwargs)
 
     @staticmethod
     def describe_index_stats_request(
diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py
index a3ac23d7..8d99ec12 100644
--- a/pinecone/grpc/index_grpc.py
+++ b/pinecone/grpc/index_grpc.py
@@ -45,7 +45,6 @@
     ListRequest,
     DescribeIndexStatsRequest,
     DeleteResponse,
-    UpdateResponse,
     SparseValues as GRPCSparseValues,
     DescribeNamespaceRequest,
     DeleteNamespaceRequest,
@@ -578,22 +577,30 @@ def query_namespaces(
 
     def update(
         self,
-        id: str,
+        id: Optional[str] = None,
         async_req: bool = False,
         values: Optional[List[float]] = None,
         set_metadata: Optional[VectorMetadataTypedDict] = None,
         namespace: Optional[str] = None,
         sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None,
+        filter: Optional[FilterTypedDict] = None,
+        dry_run: Optional[bool] = None,
         **kwargs,
-    ) -> Union[UpdateResponse, PineconeGrpcFuture]:
-        """
-        The Update operation updates vector in a namespace.
+    ) -> Union[Dict[str, Any], PineconeGrpcFuture]:
+        """Update vector(s) in a namespace by ID or metadata filter.
+
+        The update can be performed by vector ID or by metadata filter. When updating by ID,
+        a single vector is updated. When updating by metadata filter, all vectors matching
+        the filter are updated.
+
         If a value is included, it will overwrite the previous value.
-        If a set_metadata is included,
-        the values of the fields specified in it will be added or overwrite the previous value.
+        If a set_metadata is included, the values of the fields specified in it will be
+        added or overwrite the previous value.
 
         Examples:
 
+        Update by ID:
+
         .. code-block:: python
 
             >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
@@ -603,26 +610,68 @@ def update(
             >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]),
             >>>              namespace='my_namespace')
 
+        Update by metadata filter:
+
+        .. code-block:: python
+
+            >>> # Update metadata for all vectors matching a filter
+            >>> index.update(
+            ...     filter={'genre': {'$eq': 'comedy'}},
+            ...     set_metadata={'status': 'active'},
+            ...     namespace='my_namespace'
+            ... )
+            >>> # Preview how many vectors would be updated (dry run)
+            >>> result = index.update(
+            ...     filter={'year': {'$gte': 2020}},
+            ...     set_metadata={'updated': True},
+            ...     dry_run=True,
+            ...     namespace='my_namespace'
+            ... )
+            >>> print(f"Would update {result.get('matched_records', 0)} vectors")
+
         Args:
-            id (str): Vector's unique id.
+            id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
             async_req (bool): If True, the update operation will be performed asynchronously.
                               Defaults to False. [optional]
-            values (List[float]): vector values to set. [optional]
+            values (List[float]): Vector values to set. [optional]
             set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
-                metadata to set for vector. [optional]
-            namespace (str): Namespace name where to update the vector.. [optional]
-            sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
-                           Expected to be either a GRPCSparseValues object or a dict of the form:
-                           {'indices': List[int], 'values': List[float]} where the lists each have the same length.
+                Metadata to set for vector(s). [optional]
+            namespace (str): Namespace name where to update the vector(s). [optional]
+            sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
+                Expected to be either a GRPCSparseValues object or a dict of the form:
+                {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
+                When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
+                See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
+            dry_run (bool): If True, return the number of records that match the filter without executing the update.
+                Only meaningful when filter is provided. Defaults to False. [optional]
 
+        Returns:
+            Dict[str, Any] or PineconeGrpcFuture: An empty dictionary if the update was successful when updating by ID.
+                When updating by filter, the dictionary may contain a 'matched_records' key indicating
+                how many records matched the filter (even when dry_run is False).
+                If async_req is True, returns a PineconeGrpcFuture object.
 
-        Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
+        Raises:
+            ValueError: If both id and filter are provided, or if neither is provided.
         """
+        if id is not None and filter is not None:
+            raise ValueError(
+                "Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors."
+            )
+        if id is None and filter is None:
+            raise ValueError("Either 'id' or 'filter' must be provided.")
+
         if set_metadata is not None:
             set_metadata_struct = dict_to_proto_struct(set_metadata)
         else:
             set_metadata_struct = None
 
+        if filter is not None:
+            filter_struct = dict_to_proto_struct(filter)
+        else:
+            filter_struct = None
+
         timeout = kwargs.pop("timeout", None)
         sparse_values = SparseValuesFactory.build(sparse_values)
         args_dict = self._parse_non_empty_args(
@@ -631,17 +680,24 @@ def update(
                 ("set_metadata", set_metadata_struct),
                 ("namespace", namespace),
                 ("sparse_values", sparse_values),
+                ("filter", filter_struct),
+                ("dry_run", dry_run),
             ]
         )
 
-        request = UpdateRequest(id=id, **args_dict)
+        # Only include id if it's not None
+        request_kwargs = args_dict.copy()
+        if id is not None:
+            request_kwargs["id"] = id
+        request = UpdateRequest(**request_kwargs)
         if async_req:
             future = self.runner.run(self.stub.Update.future, request, timeout=timeout)
             return PineconeGrpcFuture(
                 future, timeout=timeout, result_transformer=parse_update_response
             )
         else:
-            return self.runner.run(self.stub.Update, request, timeout=timeout)
+            response = self.runner.run(self.stub.Update, request, timeout=timeout)
+            return parse_update_response(response)
 
     def list_paginated(
         self,
diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py
index 263da0c6..61aaec19 100644
--- a/pinecone/grpc/utils.py
+++ b/pinecone/grpc/utils.py
@@ -102,7 +102,17 @@ def parse_upsert_response(response: Message, _check_type: bool = False):
 
 
 def parse_update_response(response: Union[dict, Message], _check_type: bool = False):
-    return {}
+    """Parse UpdateResponse from gRPC, including matched_records if present."""
+    if isinstance(response, Message):
+        json_response = json_format.MessageToDict(response)
+    else:
+        json_response = response
+
+    result = {}
+    if "matchedRecords" in json_response:
+        result["matched_records"] = json_response["matchedRecords"]
+
+    return result
 
 
 def parse_delete_response(response: Union[dict, Message], _check_type: bool = False):
diff --git a/tests/integration/data_grpc_futures/test_query_future.py b/tests/integration/data_grpc_futures/test_query_future.py
index e2fbb1d6..87701263 100644
--- a/tests/integration/data_grpc_futures/test_query_future.py
+++ b/tests/integration/data_grpc_futures/test_query_future.py
@@ -178,8 +178,17 @@ def test_query_by_vector_include_values_and_metadata(
         ]
         # Check that we have at least the vectors we seeded
         assert len(matches_with_metadata) >= 3
-        assert find_by_id(query_result.matches, "4") is not None
-        assert find_by_id(query_result.matches, "4").metadata["genre"] == "action"
+        # Check that at least one of our seeded vectors with metadata is present
+        # (ID 4, 5, or 6 were seeded with metadata: genre and runtime)
+        seeded_ids_with_metadata = ["4", "5", "6"]
+        found_seeded = [
+            match for match in matches_with_metadata if match.id in seeded_ids_with_metadata
+        ]
+        assert (
+            len(found_seeded) > 0
+        ), "Expected at least one of the seeded vectors (4, 5, 6) to be in results"
+        # Verify the found vector has the expected metadata structure
+        assert "genre" in found_seeded[0].metadata
         assert len(query_result.matches[0].values) == self.expected_dimension
 
 
diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py
index 30063811..1a4a77ac 100644
--- a/tests/unit/test_index.py
+++ b/tests/unit/test_index.py
@@ -502,11 +502,67 @@ def test_update_byIdAnValues_updateByIdAndValues(self, mocker):
 
     def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(self, mocker):
         mocker.patch.object(self.index._vector_api, "update_vector", autospec=True)
-        self.index.update("vec1", values=self.vals1, metadata=self.md1)
+        self.index.update("vec1", values=self.vals1, set_metadata=self.md1)
         self.index._vector_api.update_vector.assert_called_once_with(
-            oai.UpdateRequest(id="vec1", values=self.vals1, metadata=self.md1)
+            oai.UpdateRequest(id="vec1", values=self.vals1, set_metadata=self.md1)
         )
 
+    def test_update_byFilter_updateByFilter(self, mocker):
+        filter_dict = {"genre": {"$eq": "comedy"}}
+        response = oai.UpdateResponse(matched_records=5)
+        mocker.patch.object(
+            self.index._vector_api, "update_vector", return_value=response, autospec=True
+        )
+        result = self.index.update(filter=filter_dict, set_metadata=self.md1, namespace="ns")
+        self.index._vector_api.update_vector.assert_called_once_with(
+            oai.UpdateRequest(filter=filter_dict, set_metadata=self.md1, namespace="ns")
+        )
+        assert result["matched_records"] == 5
+
+    def test_update_byFilterWithDryRun_updateByFilterWithDryRun(self, mocker):
+        filter_dict = {"year": {"$gte": 2020}}
+        response = oai.UpdateResponse(matched_records=10)
+        mocker.patch.object(
+            self.index._vector_api, "update_vector", return_value=response, autospec=True
+        )
+        result = self.index.update(
+            filter=filter_dict, set_metadata=self.md1, dry_run=True, namespace="ns"
+        )
+        self.index._vector_api.update_vector.assert_called_once_with(
+            oai.UpdateRequest(
+                filter=filter_dict, set_metadata=self.md1, dry_run=True, namespace="ns"
+            )
+        )
+        assert result["matched_records"] == 10
+
+    def test_update_byFilterWithValues_updateByFilterWithValues(self, mocker):
+        filter_dict = {"status": "active"}
+        response = oai.UpdateResponse(matched_records=3)
+        mocker.patch.object(
+            self.index._vector_api, "update_vector", return_value=response, autospec=True
+        )
+        result = self.index.update(filter=filter_dict, values=self.vals1, namespace="ns")
+        self.index._vector_api.update_vector.assert_called_once_with(
+            oai.UpdateRequest(filter=filter_dict, values=self.vals1, namespace="ns")
+        )
+        assert result["matched_records"] == 3
+
+    def test_update_bothIdAndFilter_raisesValueError(self, mocker):
+        with pytest.raises(ValueError, match="Cannot provide both 'id' and 'filter'"):
+            self.index.update(id="vec1", filter={"genre": "comedy"})
+
+    def test_update_neitherIdNorFilter_raisesValueError(self, mocker):
+        with pytest.raises(ValueError, match="Either 'id' or 'filter' must be provided"):
+            self.index.update(values=self.vals1)
+
+    def test_update_byId_returnsEmptyDict(self, mocker):
+        response = oai.UpdateResponse()
+        mocker.patch.object(
+            self.index._vector_api, "update_vector", return_value=response, autospec=True
+        )
+        result = self.index.update(id="vec1", values=self.vals1)
+        assert result == {}
+
     # endregion
 
     # region: describe index tests
diff --git a/tests/unit_grpc/test_grpc_index_update.py b/tests/unit_grpc/test_grpc_index_update.py
index 1d5e7bd7..b46920bd 100644
--- a/tests/unit_grpc/test_grpc_index_update.py
+++ b/tests/unit_grpc/test_grpc_index_update.py
@@ -39,3 +39,67 @@ def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(
             UpdateRequest(id="vec1", values=vals1, set_metadata=dict_to_proto_struct(md1)),
             timeout=None,
         )
+
+    def test_update_byFilter_updateByFilter(self, mocker, md1):
+        mocker.patch.object(self.index.runner, "run", autospec=True)
+        filter_dict = {"genre": {"$eq": "comedy"}}
+        self.index.update(filter=filter_dict, set_metadata=md1, namespace="ns")
+        self.index.runner.run.assert_called_once_with(
+            self.index.stub.Update,
+            UpdateRequest(
+                filter=dict_to_proto_struct(filter_dict),
+                set_metadata=dict_to_proto_struct(md1),
+                namespace="ns",
+            ),
+            timeout=None,
+        )
+
+    def test_update_byFilterWithDryRun_updateByFilterWithDryRun(self, mocker, md1):
+        mocker.patch.object(self.index.runner, "run", autospec=True)
+        filter_dict = {"year": {"$gte": 2020}}
+        self.index.update(filter=filter_dict, set_metadata=md1, dry_run=True, namespace="ns")
+        self.index.runner.run.assert_called_once_with(
+            self.index.stub.Update,
+            UpdateRequest(
+                filter=dict_to_proto_struct(filter_dict),
+                set_metadata=dict_to_proto_struct(md1),
+                dry_run=True,
+                namespace="ns",
+            ),
+            timeout=None,
+        )
+
+    def test_update_byFilterAsync_updateByFilterAsync(self, mocker, md1):
+        mocker.patch.object(self.index.runner, "run", autospec=True)
+        filter_dict = {"status": "active"}
+        self.index.update(filter=filter_dict, set_metadata=md1, async_req=True, namespace="ns")
+        self.index.runner.run.assert_called_once_with(
+            self.index.stub.Update.future,
+            UpdateRequest(
+                filter=dict_to_proto_struct(filter_dict),
+                set_metadata=dict_to_proto_struct(md1),
+                namespace="ns",
+            ),
+            timeout=None,
+        )
+
+    def test_update_bothIdAndFilter_raisesValueError(self, mocker):
+        import pytest
+
+        with pytest.raises(ValueError, match="Cannot provide both 'id' and 'filter'"):
+            self.index.update(id="vec1", filter={"genre": "comedy"})
+
+    def test_update_neitherIdNorFilter_raisesValueError(self, mocker):
+        import pytest
+
+        with pytest.raises(ValueError, match="Either 'id' or 'filter' must be provided"):
+            self.index.update(values=[0.1, 0.2, 0.3])
+
+    def test_update_byFilter_returnsMatchedRecords(self, mocker, md1):
+        filter_dict = {"genre": {"$eq": "comedy"}}
+        # Create a mock response dict that parse_update_response will convert
+        response_dict = {"matchedRecords": 5}
+        mocker.patch.object(self.index.runner, "run", return_value=response_dict)
+
+        result = self.index.update(filter=filter_dict, set_metadata=md1, namespace="ns")
+        assert result["matched_records"] == 5

From 86667746960f296919970b25cc2473a03dfcafee Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Tue, 4 Nov 2025 12:44:48 -0500
Subject: [PATCH 2/3] Add more tests for update by id

---
 tests/integration/data/test_update.py         | 186 +++++++++++++++
 .../data_grpc_futures/test_update_future.py   | 219 ++++++++++++++++++
 2 files changed, 405 insertions(+)
 create mode 100644 tests/integration/data/test_update.py

diff --git a/tests/integration/data/test_update.py b/tests/integration/data/test_update.py
new file mode 100644
index 00000000..d188acc4
--- /dev/null
+++ b/tests/integration/data/test_update.py
@@ -0,0 +1,186 @@
+import pytest
+import time
+from pinecone import Vector
+from ..helpers import poll_fetch_for_ids_in_namespace, embedding_values, random_string
+
+
+@pytest.fixture(scope="session")
+def update_namespace():
+    return random_string(10)
+
+
+def seed_for_update(idx, namespace):
+    """Seed test data for update tests."""
+    logger = __import__("logging").getLogger(__name__)
+    logger.info(f"Seeding vectors for update tests in namespace '{namespace}'")
+    idx.upsert(
+        vectors=[
+            Vector(
+                id=str(i),
+                values=embedding_values(2),
+                metadata={"genre": "action", "year": 2020, "status": "active"},
+            )
+            for i in range(10)
+        ],
+        namespace=namespace,
+    )
+    poll_fetch_for_ids_in_namespace(idx, ids=[str(i) for i in range(10)], namespace=namespace)
+
+
+@pytest.fixture(scope="class")
+def seed_for_update_tests(idx, update_namespace):
+    seed_for_update(idx, update_namespace)
+    seed_for_update(idx, "")
+    yield
+
+
+def poll_until_update_reflected(
+    idx, vector_id, namespace, expected_values=None, expected_metadata=None, timeout=180
+):
+    """Poll fetch until update is reflected in the vector."""
+    logger = __import__("logging").getLogger(__name__)
+    delta_t = 2  # Start with shorter interval
+    total_time = 0
+    max_delta_t = 10  # Max interval
+
+    while total_time < timeout:
+        logger.debug(
+            f'Polling for update on vector "{vector_id}" in namespace "{namespace}". Total time waited: {total_time} seconds'
+        )
+        try:
+            results = idx.fetch(ids=[vector_id], namespace=namespace)
+            if vector_id in results.vectors:
+                vec = results.vectors[vector_id]
+
+                # If both are None, we just check that the vector exists
+                if expected_values is None and expected_metadata is None:
+                    return  # Vector exists, we're done
+
+                values_match = True
+                metadata_match = True
+
+                if expected_values is not None:
+                    if vec.values is None:
+                        values_match = False
+                    else:
+                        if len(vec.values) != len(expected_values):
+                            values_match = False
+                        else:
+                            values_match = all(
+                                vec.values[i] == pytest.approx(expected_values[i], 0.01)
+                                for i in range(len(expected_values))
+                            )
+
+                if expected_metadata is not None:
+                    metadata_match = vec.metadata == expected_metadata
+
+                if values_match and metadata_match:
+                    logger.debug(f"Update reflected for vector {vector_id}")
+                    return  # Update is reflected
+        except Exception as e:
+            logger.debug(f"Error while polling: {e}")
+
+        time.sleep(delta_t)
+        total_time += delta_t
+        # Gradually increase interval up to max
+        delta_t = min(delta_t * 1.5, max_delta_t)
+
+    raise TimeoutError(
+        f"Timed out waiting for update on vector {vector_id} in namespace {namespace} after {total_time} seconds"
+    )
+
+
+@pytest.mark.usefixtures("seed_for_update_tests")
+class TestUpdate:
+    @pytest.mark.parametrize("use_nondefault_namespace", [True, False])
+    def test_update_values(self, idx, update_namespace, use_nondefault_namespace):
+        """Test updating vector values by ID."""
+        target_namespace = update_namespace if use_nondefault_namespace else ""
+        vector_id = "1"
+
+        # Update values
+        new_values = embedding_values(2)
+        idx.update(id=vector_id, values=new_values, namespace=target_namespace)
+
+        # Wait for update to be reflected
+        poll_until_update_reflected(
+            idx, vector_id, target_namespace, expected_values=new_values, timeout=180
+        )
+
+        # Verify the update
+        fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
+        assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01)
+        assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01)
+
+    @pytest.mark.parametrize("use_nondefault_namespace", [True, False])
+    def test_update_metadata(self, idx, update_namespace, use_nondefault_namespace):
+        """Test updating vector metadata by ID."""
+        target_namespace = update_namespace if use_nondefault_namespace else ""
+        vector_id = "2"
+
+        # Update metadata
+        new_metadata = {"genre": "comedy", "year": 2021, "status": "inactive"}
+        idx.update(id=vector_id, set_metadata=new_metadata, namespace=target_namespace)
+
+        # Wait for update to be reflected
+        poll_until_update_reflected(
+            idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180
+        )
+
+        # Verify the update
+        fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
+        assert fetched_vec.vectors[vector_id].metadata == new_metadata
+
+    @pytest.mark.parametrize("use_nondefault_namespace", [True, False])
+    def test_update_values_and_metadata(self, idx, update_namespace, use_nondefault_namespace):
+        """Test updating both vector values and metadata by ID."""
+        target_namespace = update_namespace if use_nondefault_namespace else ""
+        vector_id = "3"
+
+        # Update both values and metadata
+        new_values = embedding_values(2)
+        new_metadata = {"genre": "drama", "year": 2022, "status": "pending"}
+        idx.update(
+            id=vector_id, values=new_values, set_metadata=new_metadata, namespace=target_namespace
+        )
+
+        # Wait for update to be reflected
+        poll_until_update_reflected(
+            idx,
+            vector_id,
+            target_namespace,
+            expected_values=new_values,
+            expected_metadata=new_metadata,
+            timeout=180,
+        )
+
+        # Verify the update
+        fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
+        assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01)
+        assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01)
+        assert fetched_vec.vectors[vector_id].metadata == new_metadata
+
+    def test_update_only_metadata_no_values(self, idx, update_namespace):
+        """Test updating only metadata without providing values."""
+        target_namespace = update_namespace
+        vector_id = "4"
+
+        # Get original values first
+        original_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
+        original_values = original_vec.vectors[vector_id].values
+
+        # Update only metadata
+        new_metadata = {"genre": "thriller", "year": 2023}
+        idx.update(id=vector_id, set_metadata=new_metadata, namespace=target_namespace)
+
+        # Wait for update to be reflected
+        poll_until_update_reflected(
+            idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180
+        )
+
+        # Verify metadata updated but values unchanged
+        fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
+        assert fetched_vec.vectors[vector_id].metadata == new_metadata
+        # Values should remain the same (approximately, due to floating point)
+        assert len(fetched_vec.vectors[vector_id].values) == len(original_values)
+        assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(original_values[0], 0.01)
diff --git a/tests/integration/data_grpc_futures/test_update_future.py b/tests/integration/data_grpc_futures/test_update_future.py
index e69de29b..6a98ef08 100644
--- a/tests/integration/data_grpc_futures/test_update_future.py
+++ b/tests/integration/data_grpc_futures/test_update_future.py
@@ -0,0 +1,219 @@
+import pytest
+import time
+from pinecone import Vector
+from ..helpers import poll_stats_for_namespace, embedding_values, generate_name
+
+
+@pytest.fixture(scope="class")
+def namespace_update_async(request):
+    return generate_name(request.node.name, "update-namespace")
+
+
+def seed_for_update_async(idx, namespace):
+    """Seed test data for async update tests."""
+    logger = __import__("logging").getLogger(__name__)
+    logger.info(f"Seeding vectors for async update tests in namespace '{namespace}'")
+    idx.upsert(
+        vectors=[
+            Vector(
+                id=str(i),
+                values=embedding_values(2),
+                metadata={"genre": "action", "year": 2020, "status": "active"},
+            )
+            for i in range(10)
+        ],
+        namespace=namespace,
+    )
+    poll_stats_for_namespace(idx, namespace, 10)
+
+
+@pytest.fixture(scope="class")
+def seed_for_update_async_tests(idx, namespace_update_async):
+    seed_for_update_async(idx, namespace_update_async)
+    yield
+
+
+def poll_until_update_reflected_async(
+    idx, vector_id, namespace, expected_values=None, expected_metadata=None, timeout=180
+):
+    """Poll fetch until update is reflected in the vector (for async updates)."""
+    logger = __import__("logging").getLogger(__name__)
+    delta_t = 2  # Start with shorter interval
+    total_time = 0
+    max_delta_t = 10  # Max interval
+
+    while total_time < timeout:
+        logger.debug(
+            f'Polling for async update on vector "{vector_id}" in namespace "{namespace}". Total time waited: {total_time} seconds'
+        )
+        try:
+            results = idx.fetch(ids=[vector_id], namespace=namespace)
+            if vector_id in results.vectors:
+                vec = results.vectors[vector_id]
+
+                # If both are None, we just check that the vector exists
+                if expected_values is None and expected_metadata is None:
+                    return  # Vector exists, we're done
+
+                values_match = True
+                metadata_match = True
+
+                if expected_values is not None:
+                    if vec.values is None:
+                        values_match = False
+                    else:
+                        if len(vec.values) != len(expected_values):
+                            values_match = False
+                        else:
+                            values_match = all(
+                                vec.values[i] == pytest.approx(expected_values[i], 0.01)
+                                for i in range(len(expected_values))
+                            )
+
+                if expected_metadata is not None:
+                    metadata_match = vec.metadata == expected_metadata
+
+                if values_match and metadata_match:
+                    logger.debug(f"Update reflected for vector {vector_id}")
+                    return  # Update is reflected
+        except Exception as e:
+            logger.debug(f"Error while polling: {e}")
+
+        time.sleep(delta_t)
+        total_time += delta_t
+        # Gradually increase interval up to max
+        delta_t = min(delta_t * 1.5, max_delta_t)
+
+    raise TimeoutError(
+        f"Timed out waiting for async update on vector {vector_id} in namespace {namespace} after {total_time} seconds"
+    )
+
+
+@pytest.mark.usefixtures("seed_for_update_async_tests")
+class TestUpdateWithAsyncReq:
+    def test_update_values_async(self, idx, namespace_update_async):
+        """Test updating vector values by ID with async_req=True."""
+        target_namespace = namespace_update_async
+        vector_id = "1"
+
+        # Update values with async request
+        new_values = embedding_values(2)
+        future = idx.update(
+            id=vector_id, values=new_values, namespace=target_namespace, async_req=True
+        )
+
+        # Wait for future to complete
+        result = future.result()
+        assert result == {}  # Update response should be empty dict
+
+        # Wait for update to be reflected
+        poll_until_update_reflected_async(
+            idx, vector_id, target_namespace, expected_values=new_values, timeout=180
+        )
+
+        # Verify the update
+        fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
+        assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01)
+        assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01)
+
+    def test_update_metadata_async(self, idx, namespace_update_async):
+        """Test updating vector metadata by ID with async_req=True."""
+        target_namespace = namespace_update_async
+        vector_id = "2"
+
+        # Update metadata with async request
+        new_metadata = {"genre": "comedy", "year": 2021, "status": "inactive"}
+        future = idx.update(
+            id=vector_id, set_metadata=new_metadata, namespace=target_namespace, async_req=True
+        )
+
+        # Wait for future to complete
+        result = future.result()
+        assert result == {}  # Update response should be empty dict
+
+        # Wait for update to be reflected
+        poll_until_update_reflected_async(
+            idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180
+        )
+
+        # Verify the update
+        fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
+        assert fetched_vec.vectors[vector_id].metadata == new_metadata
+
+    def test_update_values_and_metadata_async(self, idx, namespace_update_async):
+        """Test updating both vector values and metadata by ID with async_req=True."""
+        target_namespace = namespace_update_async
+        vector_id = "3"
+
+        # Update both values and metadata with async request
+        new_values = embedding_values(2)
+        new_metadata = {"genre": "drama", "year": 2022, "status": "pending"}
+        future = idx.update(
+            id=vector_id,
+            values=new_values,
+            set_metadata=new_metadata,
+            namespace=target_namespace,
+            async_req=True,
+        )
+
+        # Wait for future to complete
+        result = future.result()
+        assert result == {}  # Update response should be empty dict
+
+        # Wait for update to be reflected
+        poll_until_update_reflected_async(
+            idx,
+            vector_id,
+            target_namespace,
+            expected_values=new_values,
+            expected_metadata=new_metadata,
+            timeout=180,
+        )
+
+        # Verify the update
+        fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
+        assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01)
+        assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01)
+        assert fetched_vec.vectors[vector_id].metadata == new_metadata
+
+    def test_update_multiple_async(self, idx, namespace_update_async):
+        """Test updating multiple vectors asynchronously."""
+        target_namespace = namespace_update_async
+
+        # Update multiple vectors with async requests
+        futures = []
+        updates = []
+        for i in range(5, 8):
+            new_values = embedding_values(2)
+            new_metadata = {"genre": f"genre_{i}", "updated": True}
+            future = idx.update(
+                id=str(i),
+                values=new_values,
+                set_metadata=new_metadata,
+                namespace=target_namespace,
+                async_req=True,
+            )
+            futures.append(future)
+            updates.append((str(i), new_values, new_metadata))
+
+        # Wait for all futures to complete
+        for future in futures:
+            result = future.result()
+            assert result == {}  # Update response should be empty dict
+
+        # Wait for all updates to be reflected
+        for vector_id, new_values, new_metadata in updates:
+            poll_until_update_reflected_async(
+                idx,
+                vector_id,
+                target_namespace,
+                expected_values=new_values,
+                expected_metadata=new_metadata,
+                timeout=180,
+            )
+
+        # Verify all updates
+        fetched_vecs = idx.fetch(ids=[str(i) for i in range(5, 8)], namespace=target_namespace)
+        for vector_id, new_values, new_metadata in updates:
+            assert fetched_vecs.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01)
+            assert fetched_vecs.vectors[vector_id].metadata == new_metadata

From 413bdd5601c22017ef49754be87520cd601fcbdf Mon Sep 17 00:00:00 2001
From: Jen Hamon <jhamon@pinecone.io>
Date: Tue, 4 Nov 2025 13:28:17 -0500
Subject: [PATCH 3/3] Iterate on tests

---
 tests/integration/data/test_update.py         | 46 ++++++++++++++++---
 .../data_grpc_futures/test_update_future.py   | 21 +++++++--
 2 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/tests/integration/data/test_update.py b/tests/integration/data/test_update.py
index d188acc4..8b7106ad 100644
--- a/tests/integration/data/test_update.py
+++ b/tests/integration/data/test_update.py
@@ -72,7 +72,14 @@ def poll_until_update_reflected(
                             )
 
                 if expected_metadata is not None:
-                    metadata_match = vec.metadata == expected_metadata
+                    # Check that all expected metadata fields are present and match
+                    # (metadata may be merged, so we check for our fields specifically)
+                    if vec.metadata is None:
+                        metadata_match = False
+                    else:
+                        metadata_match = all(
+                            vec.metadata.get(k) == v for k, v in expected_metadata.items()
+                        )
 
                 if values_match and metadata_match:
                     logger.debug(f"Update reflected for vector {vector_id}")
@@ -173,14 +180,41 @@ def test_update_only_metadata_no_values(self, idx, update_namespace):
         new_metadata = {"genre": "thriller", "year": 2023}
         idx.update(id=vector_id, set_metadata=new_metadata, namespace=target_namespace)
 
-        # Wait for update to be reflected
-        poll_until_update_reflected(
-            idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180
-        )
+        # Wait for update to be reflected - check that specified fields are present
+        # Note: set_metadata may replace or merge, so we check for the fields we set
+        def check_metadata_update():
+            fetched = idx.fetch(ids=[vector_id], namespace=target_namespace)
+            if vector_id in fetched.vectors:
+                vec = fetched.vectors[vector_id]
+                if vec.metadata is not None:
+                    # Check that our specified fields match
+                    return (
+                        vec.metadata.get("genre") == "thriller" and vec.metadata.get("year") == 2023
+                    )
+            return False
+
+        timeout = 180
+        delta_t = 2
+        total_time = 0
+        max_delta_t = 10
+
+        while total_time < timeout:
+            if check_metadata_update():
+                break
+            time.sleep(delta_t)
+            total_time += delta_t
+            delta_t = min(delta_t * 1.5, max_delta_t)
+        else:
+            raise TimeoutError(
+                f"Timed out waiting for metadata update on vector {vector_id} in namespace {target_namespace}"
+            )
 
         # Verify metadata updated but values unchanged
         fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace)
-        assert fetched_vec.vectors[vector_id].metadata == new_metadata
+        # Check that the fields we set are present
+        assert fetched_vec.vectors[vector_id].metadata is not None
+        assert fetched_vec.vectors[vector_id].metadata.get("genre") == "thriller"
+        assert fetched_vec.vectors[vector_id].metadata.get("year") == 2023
         # Values should remain the same (approximately, due to floating point)
         assert len(fetched_vec.vectors[vector_id].values) == len(original_values)
         assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(original_values[0], 0.01)
diff --git a/tests/integration/data_grpc_futures/test_update_future.py b/tests/integration/data_grpc_futures/test_update_future.py
index 6a98ef08..cf80717e 100644
--- a/tests/integration/data_grpc_futures/test_update_future.py
+++ b/tests/integration/data_grpc_futures/test_update_future.py
@@ -71,7 +71,14 @@ def poll_until_update_reflected_async(
                             )
 
                 if expected_metadata is not None:
-                    metadata_match = vec.metadata == expected_metadata
+                    # Check that all expected metadata fields are present and match
+                    # (metadata may be merged, so we check for our fields specifically)
+                    if vec.metadata is None:
+                        metadata_match = False
+                    else:
+                        metadata_match = all(
+                            vec.metadata.get(k) == v for k, v in expected_metadata.items()
+                        )
 
                 if values_match and metadata_match:
                     logger.debug(f"Update reflected for vector {vector_id}")
@@ -201,7 +208,8 @@ def test_update_multiple_async(self, idx, namespace_update_async):
             result = future.result()
             assert result == {}  # Update response should be empty dict
 
-        # Wait for all updates to be reflected
+        # Wait for all updates to be reflected - check each one individually
+        # with a reasonable timeout per vector
         for vector_id, new_values, new_metadata in updates:
             poll_until_update_reflected_async(
                 idx,
@@ -209,11 +217,16 @@ def test_update_multiple_async(self, idx, namespace_update_async):
                 target_namespace,
                 expected_values=new_values,
                 expected_metadata=new_metadata,
-                timeout=180,
+                timeout=240,  # Increased timeout for async operations
             )
 
         # Verify all updates
         fetched_vecs = idx.fetch(ids=[str(i) for i in range(5, 8)], namespace=target_namespace)
         for vector_id, new_values, new_metadata in updates:
             assert fetched_vecs.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01)
-            assert fetched_vecs.vectors[vector_id].metadata == new_metadata
+            # Check that metadata fields are present (may be merged with existing)
+            assert fetched_vecs.vectors[vector_id].metadata is not None
+            assert fetched_vecs.vectors[vector_id].metadata.get("genre") == new_metadata["genre"]
+            assert (
+                fetched_vecs.vectors[vector_id].metadata.get("updated") == new_metadata["updated"]
+            )