Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 4 additions & 19 deletions pinecone/db_control/request_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,25 +245,10 @@ def __parse_index_spec(spec: Union[Dict, ServerlessSpec, PodSpec, ByocSpec]) ->
if "schema" in spec["serverless"]:
schema_dict = spec["serverless"]["schema"]
if isinstance(schema_dict, dict):
# Process fields if present, otherwise pass through as-is
schema_kwargs = {}
if "fields" in schema_dict:
fields = {}
for field_name, field_config in schema_dict["fields"].items():
if isinstance(field_config, dict):
# Pass through the entire field_config dict to allow future API fields
fields[field_name] = BackupModelSchemaFields(**field_config)
else:
# If not a dict, create with default filterable=True
fields[field_name] = BackupModelSchemaFields(filterable=True)
schema_kwargs["fields"] = fields

# Pass through any other fields in schema_dict to allow future API fields
for key, value in schema_dict.items():
if key != "fields":
schema_kwargs[key] = value

spec["serverless"]["schema"] = BackupModelSchema(**schema_kwargs)
# Use the helper method to handle both formats correctly
spec["serverless"]["schema"] = (
PineconeDBControlRequestFactory.__parse_schema(schema_dict)
)

index_spec = IndexSpec(serverless=ServerlessSpecModel(**spec["serverless"]))
elif "pod" in spec:
Expand Down
87 changes: 85 additions & 2 deletions pinecone/db_data/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,25 +514,108 @@ def query_namespaces(
@validate_and_convert_errors
def update(
self,
id: str,
id: Optional[str] = None,
values: Optional[List[float]] = None,
set_metadata: Optional[VectorMetadataTypedDict] = None,
namespace: Optional[str] = None,
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
filter: Optional[FilterTypedDict] = None,
dry_run: Optional[bool] = None,
**kwargs,
) -> Dict[str, Any]:
return self._vector_api.update_vector(
"""Update vector(s) in a namespace by ID or metadata filter.

The update can be performed by vector ID or by metadata filter. When updating by ID,
a single vector is updated. When updating by metadata filter, all vectors matching
the filter are updated.

If a value is included, it will overwrite the previous value.
If a set_metadata is included, the values of the fields specified in it will be
added or overwrite the previous value.

Examples:

Update by ID:

.. code-block:: python

>>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
>>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace')
>>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
>>> namespace='my_namespace')
>>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]),
>>> namespace='my_namespace')

Update by metadata filter:

.. code-block:: python

>>> # Update metadata for all vectors matching a filter
>>> index.update(
... filter={'genre': {'$eq': 'comedy'}},
... set_metadata={'status': 'active'},
... namespace='my_namespace'
... )
>>> # Preview how many vectors would be updated (dry run)
>>> result = index.update(
... filter={'year': {'$gte': 2020}},
... set_metadata={'updated': True},
... dry_run=True,
... namespace='my_namespace'
... )
>>> print(f"Would update {result.get('matched_records', 0)} vectors")

Args:
id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
values (List[float]): Vector values to set. [optional]
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
Metadata to set for vector(s). [optional]
namespace (str): Namespace name where to update the vector(s). [optional]
sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
dry_run (bool): If True, return the number of records that match the filter without executing the update.
Only meaningful when filter is provided. Defaults to False. [optional]

Returns:
Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
When updating by filter, the dictionary may contain a 'matched_records' key indicating
how many records matched the filter (even when dry_run is False).

Raises:
ValueError: If both id and filter are provided, or if neither is provided.
"""
if id is not None and filter is not None:
raise ValueError(
"Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors."
)
if id is None and filter is None:
raise ValueError("Either 'id' or 'filter' must be provided.")

response = self._vector_api.update_vector(
IndexRequestFactory.update_request(
id=id,
values=values,
set_metadata=set_metadata,
namespace=namespace,
sparse_values=sparse_values,
filter=filter,
dry_run=dry_run,
**kwargs,
),
**self._openapi_kwargs(kwargs),
)

# Convert UpdateResponse to dict, including matched_records if present
result = {}
if hasattr(response, "matched_records") and response.matched_records is not None:
result["matched_records"] = response.matched_records

return result

@validate_and_convert_errors
def describe_index_stats(
self, filter: Optional[FilterTypedDict] = None, **kwargs
Expand Down
125 changes: 123 additions & 2 deletions pinecone/db_data/index_asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,25 +554,146 @@ async def query_namespaces(
@validate_and_convert_errors
async def update(
self,
id: str,
id: Optional[str] = None,
values: Optional[List[float]] = None,
set_metadata: Optional[VectorMetadataTypedDict] = None,
namespace: Optional[str] = None,
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
filter: Optional[FilterTypedDict] = None,
dry_run: Optional[bool] = None,
**kwargs,
) -> Dict[str, Any]:
return await self._vector_api.update_vector(
"""Update vector(s) in a namespace by ID or metadata filter.

The update can be performed by vector ID or by metadata filter. When updating by ID,
a single vector is updated. When updating by metadata filter, all vectors matching
the filter are updated.

If a value is included, it will overwrite the previous value.
If a set_metadata is included, the values of the fields specified in it will be
added or overwrite the previous value.

Examples:

Update by ID:

.. code-block:: python

import asyncio
from pinecone import Pinecone, Vector, SparseValues

async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# Update vector values
await idx.update(
id='id1',
values=[0.1, 0.2, 0.3, ...],
namespace='my_namespace'
)

# Update metadata
await idx.update(
id='id1',
set_metadata={'key': 'value'},
namespace='my_namespace'
)

# Update sparse values
await idx.update(
id='id1',
sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
namespace='my_namespace'
)

# Update sparse values with SparseValues object
await idx.update(
id='id1',
sparse_values=SparseValues(indices=[234781, 5432], values=[0.2, 0.4]),
namespace='my_namespace'
)

asyncio.run(main())

Update by metadata filter:

.. code-block:: python

import asyncio
from pinecone import Pinecone

async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# Update metadata for all vectors matching a filter
await idx.update(
filter={'genre': {'$eq': 'comedy'}},
set_metadata={'status': 'active'},
namespace='my_namespace'
)

# Preview how many vectors would be updated (dry run)
result = await idx.update(
filter={'year': {'$gte': 2020}},
set_metadata={'updated': True},
dry_run=True,
namespace='my_namespace'
)
print(f"Would update {result.get('matched_records', 0)} vectors")

asyncio.run(main())

Args:
id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
values (List[float]): Vector values to set. [optional]
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
Metadata to set for vector(s). [optional]
namespace (str): Namespace name where to update the vector(s). [optional]
sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
dry_run (bool): If True, return the number of records that match the filter without executing the update.
Only meaningful when filter is provided. Defaults to False. [optional]

Returns:
Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
When updating by filter, the dictionary may contain a 'matched_records' key indicating
how many records matched the filter (even when dry_run is False).

Raises:
ValueError: If both id and filter are provided, or if neither is provided.
"""
if id is not None and filter is not None:
raise ValueError(
"Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors."
)
if id is None and filter is None:
raise ValueError("Either 'id' or 'filter' must be provided.")

response = await self._vector_api.update_vector(
IndexRequestFactory.update_request(
id=id,
values=values,
set_metadata=set_metadata,
namespace=namespace,
sparse_values=sparse_values,
filter=filter,
dry_run=dry_run,
**kwargs,
),
**self._openapi_kwargs(kwargs),
)

# Convert UpdateResponse to dict, including matched_records if present
result = {}
if hasattr(response, "matched_records") and response.matched_records is not None:
result["matched_records"] = response.matched_records

return result

@validate_and_convert_errors
async def describe_index_stats(
self, filter: Optional[FilterTypedDict] = None, **kwargs
Expand Down
76 changes: 61 additions & 15 deletions pinecone/db_data/index_asyncio_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,33 +520,29 @@ async def main():
@abstractmethod
async def update(
self,
id: str,
id: Optional[str] = None,
values: Optional[List[float]] = None,
set_metadata: Optional[VectorMetadataTypedDict] = None,
namespace: Optional[str] = None,
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
filter: Optional[FilterTypedDict] = None,
dry_run: Optional[bool] = None,
**kwargs,
) -> Dict[str, Any]:
"""
The Update operation updates vector in a namespace.
"""The Update operation updates vector(s) in a namespace.

Args:
id (str): Vector's unique id.
values (List[float]): vector values to set. [optional]
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
metadata to set for vector. [optional]
namespace (str): Namespace name where to update the vector.. [optional]
sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]} where the lists each have the same length.
The update can be performed by vector ID or by metadata filter. When updating by ID,
a single vector is updated. When updating by metadata filter, all vectors matching
the filter are updated.

If a value is included, it will overwrite the previous value.
If a set_metadata is included,
the values of the fields specified in it will be added or overwrite the previous value.

If a set_metadata is included, the values of the fields specified in it will be
added or overwrite the previous value.

Examples:

Update by ID:

.. code-block:: python

import asyncio
Expand Down Expand Up @@ -585,6 +581,56 @@ async def main():

asyncio.run(main())

Update by metadata filter:

.. code-block:: python

import asyncio
from pinecone import Pinecone

async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
# Update metadata for all vectors matching a filter
await idx.update(
filter={'genre': {'$eq': 'comedy'}},
set_metadata={'status': 'active'},
namespace='my_namespace'
)

# Preview how many vectors would be updated (dry run)
result = await idx.update(
filter={'year': {'$gte': 2020}},
set_metadata={'updated': True},
dry_run=True,
namespace='my_namespace'
)
print(f"Would update {result.get('matched_records', 0)} vectors")

asyncio.run(main())

Args:
id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
values (List[float]): Vector values to set. [optional]
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
Metadata to set for vector(s). [optional]
namespace (str): Namespace name where to update the vector(s). [optional]
sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
dry_run (bool): If True, return the number of records that match the filter without executing the update.
Only meaningful when filter is provided. Defaults to False. [optional]

Returns:
Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
When updating by filter, the dictionary may contain a 'matched_records' key indicating
how many records matched the filter (even when dry_run is False).

Raises:
ValueError: If both id and filter are provided, or if neither is provided.
"""
pass

Expand Down
Loading
Loading