From 348b9f81756db7a00b0ff01d3144b417571489eb Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 4 Nov 2025 07:01:44 -0500 Subject: [PATCH 1/3] Implement update_by_metadata --- pinecone/db_control/request_factory.py | 23 +--- pinecone/db_data/index.py | 87 +++++++++++- pinecone/db_data/index_asyncio.py | 125 +++++++++++++++++- pinecone/db_data/index_asyncio_interface.py | 76 ++++++++--- pinecone/db_data/interfaces.py | 63 +++++++-- pinecone/db_data/request_factory.py | 16 ++- pinecone/grpc/index_grpc.py | 90 ++++++++++--- pinecone/grpc/utils.py | 12 +- .../data_grpc_futures/test_query_future.py | 13 +- tests/unit/test_index.py | 60 ++++++++- tests/unit_grpc/test_grpc_index_update.py | 64 +++++++++ 11 files changed, 552 insertions(+), 77 deletions(-) diff --git a/pinecone/db_control/request_factory.py b/pinecone/db_control/request_factory.py index 32a45648..229e92fb 100644 --- a/pinecone/db_control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -245,25 +245,10 @@ def __parse_index_spec(spec: Union[Dict, ServerlessSpec, PodSpec, ByocSpec]) -> if "schema" in spec["serverless"]: schema_dict = spec["serverless"]["schema"] if isinstance(schema_dict, dict): - # Process fields if present, otherwise pass through as-is - schema_kwargs = {} - if "fields" in schema_dict: - fields = {} - for field_name, field_config in schema_dict["fields"].items(): - if isinstance(field_config, dict): - # Pass through the entire field_config dict to allow future API fields - fields[field_name] = BackupModelSchemaFields(**field_config) - else: - # If not a dict, create with default filterable=True - fields[field_name] = BackupModelSchemaFields(filterable=True) - schema_kwargs["fields"] = fields - - # Pass through any other fields in schema_dict to allow future API fields - for key, value in schema_dict.items(): - if key != "fields": - schema_kwargs[key] = value - - spec["serverless"]["schema"] = BackupModelSchema(**schema_kwargs) + # Use the helper method to handle both formats correctly + spec["serverless"]["schema"] = ( + PineconeDBControlRequestFactory.__parse_schema(schema_dict) + ) index_spec = IndexSpec(serverless=ServerlessSpecModel(**spec["serverless"])) elif "pod" in spec: diff --git a/pinecone/db_data/index.py b/pinecone/db_data/index.py index 20feab7f..c52ae8a7 100644 --- a/pinecone/db_data/index.py +++ b/pinecone/db_data/index.py @@ -514,25 +514,108 @@ def query_namespaces( @validate_and_convert_errors def update( self, - id: str, + id: Optional[str] = None, values: Optional[List[float]] = None, set_metadata: Optional[VectorMetadataTypedDict] = None, namespace: Optional[str] = None, sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, + filter: Optional[FilterTypedDict] = None, + dry_run: Optional[bool] = None, **kwargs, ) -> Dict[str, Any]: - return self._vector_api.update_vector( + """Update vector(s) in a namespace by ID or metadata filter. + + The update can be performed by vector ID or by metadata filter. When updating by ID, + a single vector is updated. When updating by metadata filter, all vectors matching + the filter are updated. + + If a value is included, it will overwrite the previous value. + If a set_metadata is included, the values of the fields specified in it will be + added or overwrite the previous value. + + Examples: + + Update by ID: + + .. code-block:: python + + >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') + >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') + >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, + >>> namespace='my_namespace') + >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), + >>> namespace='my_namespace') + + Update by metadata filter: + + .. code-block:: python + + >>> # Update metadata for all vectors matching a filter + >>> index.update( + ... filter={'genre': {'$eq': 'comedy'}}, + ... set_metadata={'status': 'active'}, + ... namespace='my_namespace' + ... ) + >>> # Preview how many vectors would be updated (dry run) + >>> result = index.update( + ... filter={'year': {'$gte': 2020}}, + ... set_metadata={'updated': True}, + ... dry_run=True, + ... namespace='my_namespace' + ... ) + >>> print(f"Would update {result.get('matched_records', 0)} vectors") + + Args: + id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional] + values (List[float]): Vector values to set. [optional] + set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): + Metadata to set for vector(s). [optional] + namespace (str): Namespace name where to update the vector(s). [optional] + sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector. + Expected to be either a SparseValues object or a dict of the form: + {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional] + filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression. + When provided, the update is applied to all records that match the filter. Mutually exclusive with id. + See `metadata filtering `_ [optional] + dry_run (bool): If True, return the number of records that match the filter without executing the update. + Only meaningful when filter is provided. Defaults to False. [optional] + + Returns: + Dict[str, Any]: An empty dictionary if the update was successful when updating by ID. + When updating by filter, the dictionary may contain a 'matched_records' key indicating + how many records matched the filter (even when dry_run is False). + + Raises: + ValueError: If both id and filter are provided, or if neither is provided. + """ + if id is not None and filter is not None: + raise ValueError( + "Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors." + ) + if id is None and filter is None: + raise ValueError("Either 'id' or 'filter' must be provided.") + + response = self._vector_api.update_vector( IndexRequestFactory.update_request( id=id, values=values, set_metadata=set_metadata, namespace=namespace, sparse_values=sparse_values, + filter=filter, + dry_run=dry_run, **kwargs, ), **self._openapi_kwargs(kwargs), ) + # Convert UpdateResponse to dict, including matched_records if present + result = {} + if hasattr(response, "matched_records") and response.matched_records is not None: + result["matched_records"] = response.matched_records + + return result + @validate_and_convert_errors def describe_index_stats( self, filter: Optional[FilterTypedDict] = None, **kwargs diff --git a/pinecone/db_data/index_asyncio.py b/pinecone/db_data/index_asyncio.py index b1818d7c..9ca9e0fc 100644 --- a/pinecone/db_data/index_asyncio.py +++ b/pinecone/db_data/index_asyncio.py @@ -554,25 +554,146 @@ async def query_namespaces( @validate_and_convert_errors async def update( self, - id: str, + id: Optional[str] = None, values: Optional[List[float]] = None, set_metadata: Optional[VectorMetadataTypedDict] = None, namespace: Optional[str] = None, sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, + filter: Optional[FilterTypedDict] = None, + dry_run: Optional[bool] = None, **kwargs, ) -> Dict[str, Any]: - return await self._vector_api.update_vector( + """Update vector(s) in a namespace by ID or metadata filter. + + The update can be performed by vector ID or by metadata filter. When updating by ID, + a single vector is updated. When updating by metadata filter, all vectors matching + the filter are updated. + + If a value is included, it will overwrite the previous value. + If a set_metadata is included, the values of the fields specified in it will be + added or overwrite the previous value. + + Examples: + + Update by ID: + + .. code-block:: python + + import asyncio + from pinecone import Pinecone, Vector, SparseValues + + async def main(): + pc = Pinecone() + async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: + # Update vector values + await idx.update( + id='id1', + values=[0.1, 0.2, 0.3, ...], + namespace='my_namespace' + ) + + # Update metadata + await idx.update( + id='id1', + set_metadata={'key': 'value'}, + namespace='my_namespace' + ) + + # Update sparse values + await idx.update( + id='id1', + sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, + namespace='my_namespace' + ) + + # Update sparse values with SparseValues object + await idx.update( + id='id1', + sparse_values=SparseValues(indices=[234781, 5432], values=[0.2, 0.4]), + namespace='my_namespace' + ) + + asyncio.run(main()) + + Update by metadata filter: + + .. code-block:: python + + import asyncio + from pinecone import Pinecone + + async def main(): + pc = Pinecone() + async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: + # Update metadata for all vectors matching a filter + await idx.update( + filter={'genre': {'$eq': 'comedy'}}, + set_metadata={'status': 'active'}, + namespace='my_namespace' + ) + + # Preview how many vectors would be updated (dry run) + result = await idx.update( + filter={'year': {'$gte': 2020}}, + set_metadata={'updated': True}, + dry_run=True, + namespace='my_namespace' + ) + print(f"Would update {result.get('matched_records', 0)} vectors") + + asyncio.run(main()) + + Args: + id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional] + values (List[float]): Vector values to set. [optional] + set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): + Metadata to set for vector(s). [optional] + namespace (str): Namespace name where to update the vector(s). [optional] + sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector. + Expected to be either a SparseValues object or a dict of the form: + {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional] + filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression. + When provided, the update is applied to all records that match the filter. Mutually exclusive with id. + See `metadata filtering `_ [optional] + dry_run (bool): If True, return the number of records that match the filter without executing the update. + Only meaningful when filter is provided. Defaults to False. [optional] + + Returns: + Dict[str, Any]: An empty dictionary if the update was successful when updating by ID. + When updating by filter, the dictionary may contain a 'matched_records' key indicating + how many records matched the filter (even when dry_run is False). + + Raises: + ValueError: If both id and filter are provided, or if neither is provided. + """ + if id is not None and filter is not None: + raise ValueError( + "Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors." + ) + if id is None and filter is None: + raise ValueError("Either 'id' or 'filter' must be provided.") + + response = await self._vector_api.update_vector( IndexRequestFactory.update_request( id=id, values=values, set_metadata=set_metadata, namespace=namespace, sparse_values=sparse_values, + filter=filter, + dry_run=dry_run, **kwargs, ), **self._openapi_kwargs(kwargs), ) + # Convert UpdateResponse to dict, including matched_records if present + result = {} + if hasattr(response, "matched_records") and response.matched_records is not None: + result["matched_records"] = response.matched_records + + return result + @validate_and_convert_errors async def describe_index_stats( self, filter: Optional[FilterTypedDict] = None, **kwargs diff --git a/pinecone/db_data/index_asyncio_interface.py b/pinecone/db_data/index_asyncio_interface.py index 3f3838ec..54eccbc0 100644 --- a/pinecone/db_data/index_asyncio_interface.py +++ b/pinecone/db_data/index_asyncio_interface.py @@ -520,33 +520,29 @@ async def main(): @abstractmethod async def update( self, - id: str, + id: Optional[str] = None, values: Optional[List[float]] = None, set_metadata: Optional[VectorMetadataTypedDict] = None, namespace: Optional[str] = None, sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, + filter: Optional[FilterTypedDict] = None, + dry_run: Optional[bool] = None, **kwargs, ) -> Dict[str, Any]: - """ - The Update operation updates vector in a namespace. + """The Update operation updates vector(s) in a namespace. - Args: - id (str): Vector's unique id. - values (List[float]): vector values to set. [optional] - set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): - metadata to set for vector. [optional] - namespace (str): Namespace name where to update the vector.. [optional] - sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. - Expected to be either a SparseValues object or a dict of the form: - {'indices': List[int], 'values': List[float]} where the lists each have the same length. + The update can be performed by vector ID or by metadata filter. When updating by ID, + a single vector is updated. When updating by metadata filter, all vectors matching + the filter are updated. If a value is included, it will overwrite the previous value. - If a set_metadata is included, - the values of the fields specified in it will be added or overwrite the previous value. - + If a set_metadata is included, the values of the fields specified in it will be + added or overwrite the previous value. Examples: + Update by ID: + .. code-block:: python import asyncio @@ -585,6 +581,56 @@ async def main(): asyncio.run(main()) + Update by metadata filter: + + .. code-block:: python + + import asyncio + from pinecone import Pinecone + + async def main(): + pc = Pinecone() + async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx: + # Update metadata for all vectors matching a filter + await idx.update( + filter={'genre': {'$eq': 'comedy'}}, + set_metadata={'status': 'active'}, + namespace='my_namespace' + ) + + # Preview how many vectors would be updated (dry run) + result = await idx.update( + filter={'year': {'$gte': 2020}}, + set_metadata={'updated': True}, + dry_run=True, + namespace='my_namespace' + ) + print(f"Would update {result.get('matched_records', 0)} vectors") + + asyncio.run(main()) + + Args: + id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional] + values (List[float]): Vector values to set. [optional] + set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): + Metadata to set for vector(s). [optional] + namespace (str): Namespace name where to update the vector(s). [optional] + sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector. + Expected to be either a SparseValues object or a dict of the form: + {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional] + filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression. + When provided, the update is applied to all records that match the filter. Mutually exclusive with id. + See `metadata filtering `_ [optional] + dry_run (bool): If True, return the number of records that match the filter without executing the update. + Only meaningful when filter is provided. Defaults to False. [optional] + + Returns: + Dict[str, Any]: An empty dictionary if the update was successful when updating by ID. + When updating by filter, the dictionary may contain a 'matched_records' key indicating + how many records matched the filter (even when dry_run is False). + + Raises: + ValueError: If both id and filter are provided, or if neither is provided. """ pass diff --git a/pinecone/db_data/interfaces.py b/pinecone/db_data/interfaces.py index 3b1e3be6..b04f9946 100644 --- a/pinecone/db_data/interfaces.py +++ b/pinecone/db_data/interfaces.py @@ -705,21 +705,29 @@ def query_namespaces( @abstractmethod def update( self, - id: str, + id: Optional[str] = None, values: Optional[List[float]] = None, set_metadata: Optional[VectorMetadataTypedDict] = None, namespace: Optional[str] = None, sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, + filter: Optional[FilterTypedDict] = None, + dry_run: Optional[bool] = None, **kwargs, ) -> Dict[str, Any]: - """ - The Update operation updates vector in a namespace. + """The Update operation updates vector(s) in a namespace. + + The update can be performed by vector ID or by metadata filter. When updating by ID, + a single vector is updated. When updating by metadata filter, all vectors matching + the filter are updated. + If a value is included, it will overwrite the previous value. - If a set_metadata is included, - the values of the fields specified in it will be added or overwrite the previous value. + If a set_metadata is included, the values of the fields specified in it will be + added or overwrite the previous value. Examples: + Update by ID: + .. code-block:: python >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') @@ -729,18 +737,47 @@ def update( >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), >>> namespace='my_namespace') + Update by metadata filter: + + .. code-block:: python + + >>> # Update metadata for all vectors matching a filter + >>> index.update( + ... filter={'genre': {'$eq': 'comedy'}}, + ... set_metadata={'status': 'active'}, + ... namespace='my_namespace' + ... ) + >>> # Preview how many vectors would be updated (dry run) + >>> result = index.update( + ... filter={'year': {'$gte': 2020}}, + ... set_metadata={'updated': True}, + ... dry_run=True, + ... namespace='my_namespace' + ... ) + >>> print(f"Would update {result.get('matched_records', 0)} vectors") + Args: - id (str): Vector's unique id. - values (List[float]): vector values to set. [optional] + id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional] + values (List[float]): Vector values to set. [optional] set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): - metadata to set for vector. [optional] - namespace (str): Namespace name where to update the vector.. [optional] - sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. - Expected to be either a SparseValues object or a dict of the form: - {'indices': List[int], 'values': List[float]} where the lists each have the same length. + Metadata to set for vector(s). [optional] + namespace (str): Namespace name where to update the vector(s). [optional] + sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector. + Expected to be either a SparseValues object or a dict of the form: + {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional] + filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression. + When provided, the update is applied to all records that match the filter. Mutually exclusive with id. + See `metadata filtering `_ [optional] + dry_run (bool): If True, return the number of records that match the filter without executing the update. + Only meaningful when filter is provided. Defaults to False. [optional] + Returns: + Dict[str, Any]: An empty dictionary if the update was successful when updating by ID. + When updating by filter, the dictionary may contain a 'matched_records' key indicating + how many records matched the filter (even when dry_run is False). - Returns: An empty dictionary if the update was successful. + Raises: + ValueError: If both id and filter are provided, or if neither is provided. """ pass diff --git a/pinecone/db_data/request_factory.py b/pinecone/db_data/request_factory.py index 64bb65d9..c4290cda 100644 --- a/pinecone/db_data/request_factory.py +++ b/pinecone/db_data/request_factory.py @@ -135,11 +135,13 @@ def fetch_by_metadata_request( @staticmethod def update_request( - id: str, + id: Optional[str] = None, values: Optional[List[float]] = None, set_metadata: Optional[VectorMetadataTypedDict] = None, namespace: Optional[str] = None, sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None, + filter: Optional[FilterTypedDict] = None, + dry_run: Optional[bool] = None, **kwargs, ) -> UpdateRequest: _check_type = kwargs.pop("_check_type", False) @@ -150,12 +152,18 @@ def update_request( ("set_metadata", set_metadata), ("namespace", namespace), ("sparse_values", sparse_values_normalized), + ("filter", filter), + ("dry_run", dry_run), ] ) - return UpdateRequest( - id=id, **args_dict, _check_type=_check_type, **non_openapi_kwargs(kwargs) - ) + # Only include id if it's not None + request_kwargs = {"_check_type": _check_type, **non_openapi_kwargs(kwargs)} + if id is not None: + request_kwargs["id"] = id + request_kwargs.update(args_dict) + + return UpdateRequest(**request_kwargs) @staticmethod def describe_index_stats_request( diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py index a3ac23d7..8d99ec12 100644 --- a/pinecone/grpc/index_grpc.py +++ b/pinecone/grpc/index_grpc.py @@ -45,7 +45,6 @@ ListRequest, DescribeIndexStatsRequest, DeleteResponse, - UpdateResponse, SparseValues as GRPCSparseValues, DescribeNamespaceRequest, DeleteNamespaceRequest, @@ -578,22 +577,30 @@ def query_namespaces( def update( self, - id: str, + id: Optional[str] = None, async_req: bool = False, values: Optional[List[float]] = None, set_metadata: Optional[VectorMetadataTypedDict] = None, namespace: Optional[str] = None, sparse_values: Optional[Union[GRPCSparseValues, SparseVectorTypedDict]] = None, + filter: Optional[FilterTypedDict] = None, + dry_run: Optional[bool] = None, **kwargs, - ) -> Union[UpdateResponse, PineconeGrpcFuture]: - """ - The Update operation updates vector in a namespace. + ) -> Union[Dict[str, Any], PineconeGrpcFuture]: + """Update vector(s) in a namespace by ID or metadata filter. + + The update can be performed by vector ID or by metadata filter. When updating by ID, + a single vector is updated. When updating by metadata filter, all vectors matching + the filter are updated. + If a value is included, it will overwrite the previous value. - If a set_metadata is included, - the values of the fields specified in it will be added or overwrite the previous value. + If a set_metadata is included, the values of the fields specified in it will be + added or overwrite the previous value. Examples: + Update by ID: + .. code-block:: python >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') @@ -603,26 +610,68 @@ def update( >>> index.update(id='id1', values=[1, 2, 3], sparse_values=GRPCSparseValues(indices=[1, 2], values=[0.2, 0.4]), >>> namespace='my_namespace') + Update by metadata filter: + + .. code-block:: python + + >>> # Update metadata for all vectors matching a filter + >>> index.update( + ... filter={'genre': {'$eq': 'comedy'}}, + ... set_metadata={'status': 'active'}, + ... namespace='my_namespace' + ... ) + >>> # Preview how many vectors would be updated (dry run) + >>> result = index.update( + ... filter={'year': {'$gte': 2020}}, + ... set_metadata={'updated': True}, + ... dry_run=True, + ... namespace='my_namespace' + ... ) + >>> print(f"Would update {result.get('matched_records', 0)} vectors") + Args: - id (str): Vector's unique id. + id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional] async_req (bool): If True, the update operation will be performed asynchronously. Defaults to False. [optional] - values (List[float]): vector values to set. [optional] + values (List[float]): Vector values to set. [optional] set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): - metadata to set for vector. [optional] - namespace (str): Namespace name where to update the vector.. [optional] - sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. - Expected to be either a GRPCSparseValues object or a dict of the form: - {'indices': List[int], 'values': List[float]} where the lists each have the same length. + Metadata to set for vector(s). [optional] + namespace (str): Namespace name where to update the vector(s). [optional] + sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector. + Expected to be either a GRPCSparseValues object or a dict of the form: + {'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional] + filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression. + When provided, the update is applied to all records that match the filter. Mutually exclusive with id. + See `metadata filtering `_ [optional] + dry_run (bool): If True, return the number of records that match the filter without executing the update. + Only meaningful when filter is provided. Defaults to False. [optional] + Returns: + Dict[str, Any] or PineconeGrpcFuture: An empty dictionary if the update was successful when updating by ID. + When updating by filter, the dictionary may contain a 'matched_records' key indicating + how many records matched the filter (even when dry_run is False). + If async_req is True, returns a PineconeGrpcFuture object. - Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. + Raises: + ValueError: If both id and filter are provided, or if neither is provided. """ + if id is not None and filter is not None: + raise ValueError( + "Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors." + ) + if id is None and filter is None: + raise ValueError("Either 'id' or 'filter' must be provided.") + if set_metadata is not None: set_metadata_struct = dict_to_proto_struct(set_metadata) else: set_metadata_struct = None + if filter is not None: + filter_struct = dict_to_proto_struct(filter) + else: + filter_struct = None + timeout = kwargs.pop("timeout", None) sparse_values = SparseValuesFactory.build(sparse_values) args_dict = self._parse_non_empty_args( @@ -631,17 +680,24 @@ def update( ("set_metadata", set_metadata_struct), ("namespace", namespace), ("sparse_values", sparse_values), + ("filter", filter_struct), + ("dry_run", dry_run), ] ) - request = UpdateRequest(id=id, **args_dict) + # Only include id if it's not None + request_kwargs = args_dict.copy() + if id is not None: + request_kwargs["id"] = id + request = UpdateRequest(**request_kwargs) if async_req: future = self.runner.run(self.stub.Update.future, request, timeout=timeout) return PineconeGrpcFuture( future, timeout=timeout, result_transformer=parse_update_response ) else: - return self.runner.run(self.stub.Update, request, timeout=timeout) + response = self.runner.run(self.stub.Update, request, timeout=timeout) + return parse_update_response(response) def list_paginated( self, diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index 263da0c6..61aaec19 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -102,7 +102,17 @@ def parse_upsert_response(response: Message, _check_type: bool = False): def parse_update_response(response: Union[dict, Message], _check_type: bool = False): - return {} + """Parse UpdateResponse from gRPC, including matched_records if present.""" + if isinstance(response, Message): + json_response = json_format.MessageToDict(response) + else: + json_response = response + + result = {} + if "matchedRecords" in json_response: + result["matched_records"] = json_response["matchedRecords"] + + return result def parse_delete_response(response: Union[dict, Message], _check_type: bool = False): diff --git a/tests/integration/data_grpc_futures/test_query_future.py b/tests/integration/data_grpc_futures/test_query_future.py index e2fbb1d6..87701263 100644 --- a/tests/integration/data_grpc_futures/test_query_future.py +++ b/tests/integration/data_grpc_futures/test_query_future.py @@ -178,8 +178,17 @@ def test_query_by_vector_include_values_and_metadata( ] # Check that we have at least the vectors we seeded assert len(matches_with_metadata) >= 3 - assert find_by_id(query_result.matches, "4") is not None - assert find_by_id(query_result.matches, "4").metadata["genre"] == "action" + # Check that at least one of our seeded vectors with metadata is present + # (ID 4, 5, or 6 were seeded with metadata: genre and runtime) + seeded_ids_with_metadata = ["4", "5", "6"] + found_seeded = [ + match for match in matches_with_metadata if match.id in seeded_ids_with_metadata + ] + assert ( + len(found_seeded) > 0 + ), "Expected at least one of the seeded vectors (4, 5, 6) to be in results" + # Verify the found vector has the expected metadata structure + assert "genre" in found_seeded[0].metadata assert len(query_result.matches[0].values) == self.expected_dimension diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 30063811..1a4a77ac 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -502,11 +502,67 @@ def test_update_byIdAnValues_updateByIdAndValues(self, mocker): def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(self, mocker): mocker.patch.object(self.index._vector_api, "update_vector", autospec=True) - self.index.update("vec1", values=self.vals1, metadata=self.md1) + self.index.update("vec1", values=self.vals1, set_metadata=self.md1) self.index._vector_api.update_vector.assert_called_once_with( - oai.UpdateRequest(id="vec1", values=self.vals1, metadata=self.md1) + oai.UpdateRequest(id="vec1", values=self.vals1, set_metadata=self.md1) ) + def test_update_byFilter_updateByFilter(self, mocker): + filter_dict = {"genre": {"$eq": "comedy"}} + response = oai.UpdateResponse(matched_records=5) + mocker.patch.object( + self.index._vector_api, "update_vector", return_value=response, autospec=True + ) + result = self.index.update(filter=filter_dict, set_metadata=self.md1, namespace="ns") + self.index._vector_api.update_vector.assert_called_once_with( + oai.UpdateRequest(filter=filter_dict, set_metadata=self.md1, namespace="ns") + ) + assert result["matched_records"] == 5 + + def test_update_byFilterWithDryRun_updateByFilterWithDryRun(self, mocker): + filter_dict = {"year": {"$gte": 2020}} + response = oai.UpdateResponse(matched_records=10) + mocker.patch.object( + self.index._vector_api, "update_vector", return_value=response, autospec=True + ) + result = self.index.update( + filter=filter_dict, set_metadata=self.md1, dry_run=True, namespace="ns" + ) + self.index._vector_api.update_vector.assert_called_once_with( + oai.UpdateRequest( + filter=filter_dict, set_metadata=self.md1, dry_run=True, namespace="ns" + ) + ) + assert result["matched_records"] == 10 + + def test_update_byFilterWithValues_updateByFilterWithValues(self, mocker): + filter_dict = {"status": "active"} + response = oai.UpdateResponse(matched_records=3) + mocker.patch.object( + self.index._vector_api, "update_vector", return_value=response, autospec=True + ) + result = self.index.update(filter=filter_dict, values=self.vals1, namespace="ns") + self.index._vector_api.update_vector.assert_called_once_with( + oai.UpdateRequest(filter=filter_dict, values=self.vals1, namespace="ns") + ) + assert result["matched_records"] == 3 + + def test_update_bothIdAndFilter_raisesValueError(self, mocker): + with pytest.raises(ValueError, match="Cannot provide both 'id' and 'filter'"): + self.index.update(id="vec1", filter={"genre": "comedy"}) + + def test_update_neitherIdNorFilter_raisesValueError(self, mocker): + with pytest.raises(ValueError, match="Either 'id' or 'filter' must be provided"): + self.index.update(values=self.vals1) + + def test_update_byId_returnsEmptyDict(self, mocker): + response = oai.UpdateResponse() + mocker.patch.object( + self.index._vector_api, "update_vector", return_value=response, autospec=True + ) + result = self.index.update(id="vec1", values=self.vals1) + assert result == {} + # endregion # region: describe index tests diff --git a/tests/unit_grpc/test_grpc_index_update.py b/tests/unit_grpc/test_grpc_index_update.py index 1d5e7bd7..b46920bd 100644 --- a/tests/unit_grpc/test_grpc_index_update.py +++ b/tests/unit_grpc/test_grpc_index_update.py @@ -39,3 +39,67 @@ def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata( UpdateRequest(id="vec1", values=vals1, set_metadata=dict_to_proto_struct(md1)), timeout=None, ) + + def test_update_byFilter_updateByFilter(self, mocker, md1): + mocker.patch.object(self.index.runner, "run", autospec=True) + filter_dict = {"genre": {"$eq": "comedy"}} + self.index.update(filter=filter_dict, set_metadata=md1, namespace="ns") + self.index.runner.run.assert_called_once_with( + self.index.stub.Update, + UpdateRequest( + filter=dict_to_proto_struct(filter_dict), + set_metadata=dict_to_proto_struct(md1), + namespace="ns", + ), + timeout=None, + ) + + def test_update_byFilterWithDryRun_updateByFilterWithDryRun(self, mocker, md1): + mocker.patch.object(self.index.runner, "run", autospec=True) + filter_dict = {"year": {"$gte": 2020}} + self.index.update(filter=filter_dict, set_metadata=md1, dry_run=True, namespace="ns") + self.index.runner.run.assert_called_once_with( + self.index.stub.Update, + UpdateRequest( + filter=dict_to_proto_struct(filter_dict), + set_metadata=dict_to_proto_struct(md1), + dry_run=True, + namespace="ns", + ), + timeout=None, + ) + + def test_update_byFilterAsync_updateByFilterAsync(self, mocker, md1): + mocker.patch.object(self.index.runner, "run", autospec=True) + filter_dict = {"status": "active"} + self.index.update(filter=filter_dict, set_metadata=md1, async_req=True, namespace="ns") + self.index.runner.run.assert_called_once_with( + self.index.stub.Update.future, + UpdateRequest( + filter=dict_to_proto_struct(filter_dict), + set_metadata=dict_to_proto_struct(md1), + namespace="ns", + ), + timeout=None, + ) + + def test_update_bothIdAndFilter_raisesValueError(self, mocker): + import pytest + + with pytest.raises(ValueError, match="Cannot provide both 'id' and 'filter'"): + self.index.update(id="vec1", filter={"genre": "comedy"}) + + def test_update_neitherIdNorFilter_raisesValueError(self, mocker): + import pytest + + with pytest.raises(ValueError, match="Either 'id' or 'filter' must be provided"): + self.index.update(values=[0.1, 0.2, 0.3]) + + def test_update_byFilter_returnsMatchedRecords(self, mocker, md1): + filter_dict = {"genre": {"$eq": "comedy"}} + # Create a mock response dict that parse_update_response will convert + response_dict = {"matchedRecords": 5} + mocker.patch.object(self.index.runner, "run", return_value=response_dict) + + result = self.index.update(filter=filter_dict, set_metadata=md1, namespace="ns") + assert result["matched_records"] == 5 From 86667746960f296919970b25cc2473a03dfcafee Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 4 Nov 2025 12:44:48 -0500 Subject: [PATCH 2/3] Add more tests for update by id --- tests/integration/data/test_update.py | 186 +++++++++++++++ .../data_grpc_futures/test_update_future.py | 219 ++++++++++++++++++ 2 files changed, 405 insertions(+) create mode 100644 tests/integration/data/test_update.py diff --git a/tests/integration/data/test_update.py b/tests/integration/data/test_update.py new file mode 100644 index 00000000..d188acc4 --- /dev/null +++ b/tests/integration/data/test_update.py @@ -0,0 +1,186 @@ +import pytest +import time +from pinecone import Vector +from ..helpers import poll_fetch_for_ids_in_namespace, embedding_values, random_string + + +@pytest.fixture(scope="session") +def update_namespace(): + return random_string(10) + + +def seed_for_update(idx, namespace): + """Seed test data for update tests.""" + logger = __import__("logging").getLogger(__name__) + logger.info(f"Seeding vectors for update tests in namespace '{namespace}'") + idx.upsert( + vectors=[ + Vector( + id=str(i), + values=embedding_values(2), + metadata={"genre": "action", "year": 2020, "status": "active"}, + ) + for i in range(10) + ], + namespace=namespace, + ) + poll_fetch_for_ids_in_namespace(idx, ids=[str(i) for i in range(10)], namespace=namespace) + + +@pytest.fixture(scope="class") +def seed_for_update_tests(idx, update_namespace): + seed_for_update(idx, update_namespace) + seed_for_update(idx, "") + yield + + +def poll_until_update_reflected( + idx, vector_id, namespace, expected_values=None, expected_metadata=None, timeout=180 +): + """Poll fetch until update is reflected in the vector.""" + logger = __import__("logging").getLogger(__name__) + delta_t = 2 # Start with shorter interval + total_time = 0 + max_delta_t = 10 # Max interval + + while total_time < timeout: + logger.debug( + f'Polling for update on vector "{vector_id}" in namespace "{namespace}". Total time waited: {total_time} seconds' + ) + try: + results = idx.fetch(ids=[vector_id], namespace=namespace) + if vector_id in results.vectors: + vec = results.vectors[vector_id] + + # If both are None, we just check that the vector exists + if expected_values is None and expected_metadata is None: + return # Vector exists, we're done + + values_match = True + metadata_match = True + + if expected_values is not None: + if vec.values is None: + values_match = False + else: + if len(vec.values) != len(expected_values): + values_match = False + else: + values_match = all( + vec.values[i] == pytest.approx(expected_values[i], 0.01) + for i in range(len(expected_values)) + ) + + if expected_metadata is not None: + metadata_match = vec.metadata == expected_metadata + + if values_match and metadata_match: + logger.debug(f"Update reflected for vector {vector_id}") + return # Update is reflected + except Exception as e: + logger.debug(f"Error while polling: {e}") + + time.sleep(delta_t) + total_time += delta_t + # Gradually increase interval up to max + delta_t = min(delta_t * 1.5, max_delta_t) + + raise TimeoutError( + f"Timed out waiting for update on vector {vector_id} in namespace {namespace} after {total_time} seconds" + ) + + +@pytest.mark.usefixtures("seed_for_update_tests") +class TestUpdate: + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_update_values(self, idx, update_namespace, use_nondefault_namespace): + """Test updating vector values by ID.""" + target_namespace = update_namespace if use_nondefault_namespace else "" + vector_id = "1" + + # Update values + new_values = embedding_values(2) + idx.update(id=vector_id, values=new_values, namespace=target_namespace) + + # Wait for update to be reflected + poll_until_update_reflected( + idx, vector_id, target_namespace, expected_values=new_values, timeout=180 + ) + + # Verify the update + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) + assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01) + assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01) + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_update_metadata(self, idx, update_namespace, use_nondefault_namespace): + """Test updating vector metadata by ID.""" + target_namespace = update_namespace if use_nondefault_namespace else "" + vector_id = "2" + + # Update metadata + new_metadata = {"genre": "comedy", "year": 2021, "status": "inactive"} + idx.update(id=vector_id, set_metadata=new_metadata, namespace=target_namespace) + + # Wait for update to be reflected + poll_until_update_reflected( + idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180 + ) + + # Verify the update + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) + assert fetched_vec.vectors[vector_id].metadata == new_metadata + + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) + def test_update_values_and_metadata(self, idx, update_namespace, use_nondefault_namespace): + """Test updating both vector values and metadata by ID.""" + target_namespace = update_namespace if use_nondefault_namespace else "" + vector_id = "3" + + # Update both values and metadata + new_values = embedding_values(2) + new_metadata = {"genre": "drama", "year": 2022, "status": "pending"} + idx.update( + id=vector_id, values=new_values, set_metadata=new_metadata, namespace=target_namespace + ) + + # Wait for update to be reflected + poll_until_update_reflected( + idx, + vector_id, + target_namespace, + expected_values=new_values, + expected_metadata=new_metadata, + timeout=180, + ) + + # Verify the update + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) + assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01) + assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01) + assert fetched_vec.vectors[vector_id].metadata == new_metadata + + def test_update_only_metadata_no_values(self, idx, update_namespace): + """Test updating only metadata without providing values.""" + target_namespace = update_namespace + vector_id = "4" + + # Get original values first + original_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) + original_values = original_vec.vectors[vector_id].values + + # Update only metadata + new_metadata = {"genre": "thriller", "year": 2023} + idx.update(id=vector_id, set_metadata=new_metadata, namespace=target_namespace) + + # Wait for update to be reflected + poll_until_update_reflected( + idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180 + ) + + # Verify metadata updated but values unchanged + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) + assert fetched_vec.vectors[vector_id].metadata == new_metadata + # Values should remain the same (approximately, due to floating point) + assert len(fetched_vec.vectors[vector_id].values) == len(original_values) + assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(original_values[0], 0.01) diff --git a/tests/integration/data_grpc_futures/test_update_future.py b/tests/integration/data_grpc_futures/test_update_future.py index e69de29b..6a98ef08 100644 --- a/tests/integration/data_grpc_futures/test_update_future.py +++ b/tests/integration/data_grpc_futures/test_update_future.py @@ -0,0 +1,219 @@ +import pytest +import time +from pinecone import Vector +from ..helpers import poll_stats_for_namespace, embedding_values, generate_name + + +@pytest.fixture(scope="class") +def namespace_update_async(request): + return generate_name(request.node.name, "update-namespace") + + +def seed_for_update_async(idx, namespace): + """Seed test data for async update tests.""" + logger = __import__("logging").getLogger(__name__) + logger.info(f"Seeding vectors for async update tests in namespace '{namespace}'") + idx.upsert( + vectors=[ + Vector( + id=str(i), + values=embedding_values(2), + metadata={"genre": "action", "year": 2020, "status": "active"}, + ) + for i in range(10) + ], + namespace=namespace, + ) + poll_stats_for_namespace(idx, namespace, 10) + + +@pytest.fixture(scope="class") +def seed_for_update_async_tests(idx, namespace_update_async): + seed_for_update_async(idx, namespace_update_async) + yield + + +def poll_until_update_reflected_async( + idx, vector_id, namespace, expected_values=None, expected_metadata=None, timeout=180 +): + """Poll fetch until update is reflected in the vector (for async updates).""" + logger = __import__("logging").getLogger(__name__) + delta_t = 2 # Start with shorter interval + total_time = 0 + max_delta_t = 10 # Max interval + + while total_time < timeout: + logger.debug( + f'Polling for async update on vector "{vector_id}" in namespace "{namespace}". Total time waited: {total_time} seconds' + ) + try: + results = idx.fetch(ids=[vector_id], namespace=namespace) + if vector_id in results.vectors: + vec = results.vectors[vector_id] + + # If both are None, we just check that the vector exists + if expected_values is None and expected_metadata is None: + return # Vector exists, we're done + + values_match = True + metadata_match = True + + if expected_values is not None: + if vec.values is None: + values_match = False + else: + if len(vec.values) != len(expected_values): + values_match = False + else: + values_match = all( + vec.values[i] == pytest.approx(expected_values[i], 0.01) + for i in range(len(expected_values)) + ) + + if expected_metadata is not None: + metadata_match = vec.metadata == expected_metadata + + if values_match and metadata_match: + logger.debug(f"Update reflected for vector {vector_id}") + return # Update is reflected + except Exception as e: + logger.debug(f"Error while polling: {e}") + + time.sleep(delta_t) + total_time += delta_t + # Gradually increase interval up to max + delta_t = min(delta_t * 1.5, max_delta_t) + + raise TimeoutError( + f"Timed out waiting for async update on vector {vector_id} in namespace {namespace} after {total_time} seconds" + ) + + +@pytest.mark.usefixtures("seed_for_update_async_tests") +class TestUpdateWithAsyncReq: + def test_update_values_async(self, idx, namespace_update_async): + """Test updating vector values by ID with async_req=True.""" + target_namespace = namespace_update_async + vector_id = "1" + + # Update values with async request + new_values = embedding_values(2) + future = idx.update( + id=vector_id, values=new_values, namespace=target_namespace, async_req=True + ) + + # Wait for future to complete + result = future.result() + assert result == {} # Update response should be empty dict + + # Wait for update to be reflected + poll_until_update_reflected_async( + idx, vector_id, target_namespace, expected_values=new_values, timeout=180 + ) + + # Verify the update + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) + assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01) + assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01) + + def test_update_metadata_async(self, idx, namespace_update_async): + """Test updating vector metadata by ID with async_req=True.""" + target_namespace = namespace_update_async + vector_id = "2" + + # Update metadata with async request + new_metadata = {"genre": "comedy", "year": 2021, "status": "inactive"} + future = idx.update( + id=vector_id, set_metadata=new_metadata, namespace=target_namespace, async_req=True + ) + + # Wait for future to complete + result = future.result() + assert result == {} # Update response should be empty dict + + # Wait for update to be reflected + poll_until_update_reflected_async( + idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180 + ) + + # Verify the update + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) + assert fetched_vec.vectors[vector_id].metadata == new_metadata + + def test_update_values_and_metadata_async(self, idx, namespace_update_async): + """Test updating both vector values and metadata by ID with async_req=True.""" + target_namespace = namespace_update_async + vector_id = "3" + + # Update both values and metadata with async request + new_values = embedding_values(2) + new_metadata = {"genre": "drama", "year": 2022, "status": "pending"} + future = idx.update( + id=vector_id, + values=new_values, + set_metadata=new_metadata, + namespace=target_namespace, + async_req=True, + ) + + # Wait for future to complete + result = future.result() + assert result == {} # Update response should be empty dict + + # Wait for update to be reflected + poll_until_update_reflected_async( + idx, + vector_id, + target_namespace, + expected_values=new_values, + expected_metadata=new_metadata, + timeout=180, + ) + + # Verify the update + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) + assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01) + assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01) + assert fetched_vec.vectors[vector_id].metadata == new_metadata + + def test_update_multiple_async(self, idx, namespace_update_async): + """Test updating multiple vectors asynchronously.""" + target_namespace = namespace_update_async + + # Update multiple vectors with async requests + futures = [] + updates = [] + for i in range(5, 8): + new_values = embedding_values(2) + new_metadata = {"genre": f"genre_{i}", "updated": True} + future = idx.update( + id=str(i), + values=new_values, + set_metadata=new_metadata, + namespace=target_namespace, + async_req=True, + ) + futures.append(future) + updates.append((str(i), new_values, new_metadata)) + + # Wait for all futures to complete + for future in futures: + result = future.result() + assert result == {} # Update response should be empty dict + + # Wait for all updates to be reflected + for vector_id, new_values, new_metadata in updates: + poll_until_update_reflected_async( + idx, + vector_id, + target_namespace, + expected_values=new_values, + expected_metadata=new_metadata, + timeout=180, + ) + + # Verify all updates + fetched_vecs = idx.fetch(ids=[str(i) for i in range(5, 8)], namespace=target_namespace) + for vector_id, new_values, new_metadata in updates: + assert fetched_vecs.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01) + assert fetched_vecs.vectors[vector_id].metadata == new_metadata From 413bdd5601c22017ef49754be87520cd601fcbdf Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 4 Nov 2025 13:28:17 -0500 Subject: [PATCH 3/3] Iterate on tests --- tests/integration/data/test_update.py | 46 ++++++++++++++++--- .../data_grpc_futures/test_update_future.py | 21 +++++++-- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/tests/integration/data/test_update.py b/tests/integration/data/test_update.py index d188acc4..8b7106ad 100644 --- a/tests/integration/data/test_update.py +++ b/tests/integration/data/test_update.py @@ -72,7 +72,14 @@ def poll_until_update_reflected( ) if expected_metadata is not None: - metadata_match = vec.metadata == expected_metadata + # Check that all expected metadata fields are present and match + # (metadata may be merged, so we check for our fields specifically) + if vec.metadata is None: + metadata_match = False + else: + metadata_match = all( + vec.metadata.get(k) == v for k, v in expected_metadata.items() + ) if values_match and metadata_match: logger.debug(f"Update reflected for vector {vector_id}") @@ -173,14 +180,41 @@ def test_update_only_metadata_no_values(self, idx, update_namespace): new_metadata = {"genre": "thriller", "year": 2023} idx.update(id=vector_id, set_metadata=new_metadata, namespace=target_namespace) - # Wait for update to be reflected - poll_until_update_reflected( - idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180 - ) + # Wait for update to be reflected - check that specified fields are present + # Note: set_metadata may replace or merge, so we check for the fields we set + def check_metadata_update(): + fetched = idx.fetch(ids=[vector_id], namespace=target_namespace) + if vector_id in fetched.vectors: + vec = fetched.vectors[vector_id] + if vec.metadata is not None: + # Check that our specified fields match + return ( + vec.metadata.get("genre") == "thriller" and vec.metadata.get("year") == 2023 + ) + return False + + timeout = 180 + delta_t = 2 + total_time = 0 + max_delta_t = 10 + + while total_time < timeout: + if check_metadata_update(): + break + time.sleep(delta_t) + total_time += delta_t + delta_t = min(delta_t * 1.5, max_delta_t) + else: + raise TimeoutError( + f"Timed out waiting for metadata update on vector {vector_id} in namespace {target_namespace}" + ) # Verify metadata updated but values unchanged fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) - assert fetched_vec.vectors[vector_id].metadata == new_metadata + # Check that the fields we set are present + assert fetched_vec.vectors[vector_id].metadata is not None + assert fetched_vec.vectors[vector_id].metadata.get("genre") == "thriller" + assert fetched_vec.vectors[vector_id].metadata.get("year") == 2023 # Values should remain the same (approximately, due to floating point) assert len(fetched_vec.vectors[vector_id].values) == len(original_values) assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(original_values[0], 0.01) diff --git a/tests/integration/data_grpc_futures/test_update_future.py b/tests/integration/data_grpc_futures/test_update_future.py index 6a98ef08..cf80717e 100644 --- a/tests/integration/data_grpc_futures/test_update_future.py +++ b/tests/integration/data_grpc_futures/test_update_future.py @@ -71,7 +71,14 @@ def poll_until_update_reflected_async( ) if expected_metadata is not None: - metadata_match = vec.metadata == expected_metadata + # Check that all expected metadata fields are present and match + # (metadata may be merged, so we check for our fields specifically) + if vec.metadata is None: + metadata_match = False + else: + metadata_match = all( + vec.metadata.get(k) == v for k, v in expected_metadata.items() + ) if values_match and metadata_match: logger.debug(f"Update reflected for vector {vector_id}") @@ -201,7 +208,8 @@ def test_update_multiple_async(self, idx, namespace_update_async): result = future.result() assert result == {} # Update response should be empty dict - # Wait for all updates to be reflected + # Wait for all updates to be reflected - check each one individually + # with a reasonable timeout per vector for vector_id, new_values, new_metadata in updates: poll_until_update_reflected_async( idx, @@ -209,11 +217,16 @@ def test_update_multiple_async(self, idx, namespace_update_async): target_namespace, expected_values=new_values, expected_metadata=new_metadata, - timeout=180, + timeout=240, # Increased timeout for async operations ) # Verify all updates fetched_vecs = idx.fetch(ids=[str(i) for i in range(5, 8)], namespace=target_namespace) for vector_id, new_values, new_metadata in updates: assert fetched_vecs.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01) - assert fetched_vecs.vectors[vector_id].metadata == new_metadata + # Check that metadata fields are present (may be merged with existing) + assert fetched_vecs.vectors[vector_id].metadata is not None + assert fetched_vecs.vectors[vector_id].metadata.get("genre") == new_metadata["genre"] + assert ( + fetched_vecs.vectors[vector_id].metadata.get("updated") == new_metadata["updated"] + )