From b1c8ce729444fe6c53e144bfc6e39ec255ec7d4b Mon Sep 17 00:00:00 2001
From: Miguel Grinberg
Date: Tue, 29 Jul 2025 17:04:46 +0100
Subject: [PATCH 01/21] first generator run for 9.1 (#3011)
---
elasticsearch/_async/client/__init__.py | 10 --
elasticsearch/_async/client/cluster.py | 17 ++-
elasticsearch/_async/client/indices.py | 134 +----------------------
elasticsearch/_async/client/inference.py | 111 ++++++++++++++++++-
elasticsearch/_async/client/ml.py | 2 +-
elasticsearch/_async/client/snapshot.py | 5 -
elasticsearch/_sync/client/__init__.py | 10 --
elasticsearch/_sync/client/cluster.py | 17 ++-
elasticsearch/_sync/client/indices.py | 134 +----------------------
elasticsearch/_sync/client/inference.py | 111 ++++++++++++++++++-
elasticsearch/_sync/client/ml.py | 2 +-
elasticsearch/_sync/client/snapshot.py | 5 -
elasticsearch/dsl/field.py | 8 --
elasticsearch/dsl/types.py | 50 +--------
14 files changed, 244 insertions(+), 372 deletions(-)
diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py
index 0874e120f..902834328 100644
--- a/elasticsearch/_async/client/__init__.py
+++ b/elasticsearch/_async/client/__init__.py
@@ -2234,7 +2234,6 @@ async def field_caps(
@_rewrite_parameters(
parameter_aliases={
"_source": "source",
- "_source_exclude_vectors": "source_exclude_vectors",
"_source_excludes": "source_excludes",
"_source_includes": "source_includes",
},
@@ -2254,7 +2253,6 @@ async def get(
refresh: t.Optional[bool] = None,
routing: t.Optional[str] = None,
source: t.Optional[t.Union[bool, t.Union[str, t.Sequence[str]]]] = None,
- source_exclude_vectors: t.Optional[bool] = None,
source_excludes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
source_includes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
stored_fields: t.Optional[t.Union[str, t.Sequence[str]]] = None,
@@ -2328,7 +2326,6 @@ async def get(
:param routing: A custom value used to route operations to a specific shard.
:param source: Indicates whether to return the `_source` field (`true` or `false`)
or lists the fields to return.
- :param source_exclude_vectors: Whether vectors should be excluded from _source
:param source_excludes: A comma-separated list of source fields to exclude from
the response. You can also use this parameter to exclude fields from the
subset specified in `_source_includes` query parameter. If the `_source`
@@ -2374,8 +2371,6 @@ async def get(
__query["routing"] = routing
if source is not None:
__query["_source"] = source
- if source_exclude_vectors is not None:
- __query["_source_exclude_vectors"] = source_exclude_vectors
if source_excludes is not None:
__query["_source_excludes"] = source_excludes
if source_includes is not None:
@@ -4309,7 +4304,6 @@ async def scroll(
),
parameter_aliases={
"_source": "source",
- "_source_exclude_vectors": "source_exclude_vectors",
"_source_excludes": "source_excludes",
"_source_includes": "source_includes",
"from": "from_",
@@ -4393,7 +4387,6 @@ async def search(
]
] = None,
source: t.Optional[t.Union[bool, t.Mapping[str, t.Any]]] = None,
- source_exclude_vectors: t.Optional[bool] = None,
source_excludes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
source_includes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
stats: t.Optional[t.Sequence[str]] = None,
@@ -4588,7 +4581,6 @@ async def search(
fields are returned in the `hits._source` property of the search response.
If the `stored_fields` property is specified, the `_source` property defaults
to `false`. Otherwise, it defaults to `true`.
- :param source_exclude_vectors: Whether vectors should be excluded from _source
:param source_excludes: A comma-separated list of source fields to exclude from
the response. You can also use this parameter to exclude fields from the
subset specified in `_source_includes` query parameter. If the `_source`
@@ -4713,8 +4705,6 @@ async def search(
__query["scroll"] = scroll
if search_type is not None:
__query["search_type"] = search_type
- if source_exclude_vectors is not None:
- __query["_source_exclude_vectors"] = source_exclude_vectors
if source_excludes is not None:
__query["_source_excludes"] = source_excludes
if source_includes is not None:
diff --git a/elasticsearch/_async/client/cluster.py b/elasticsearch/_async/client/cluster.py
index 91956f7c4..9ae420766 100644
--- a/elasticsearch/_async/client/cluster.py
+++ b/elasticsearch/_async/client/cluster.py
@@ -49,7 +49,6 @@ async def allocation_explain(
Explain the shard allocations.
Get explanations for shard allocations in the cluster.
- This API accepts the current_node, index, primary and shard parameters in the request body or in query parameters, but not in both at the same time.
For unassigned shards, it provides an explanation for why the shard is unassigned.
For assigned shards, it provides an explanation for why the shard is remaining on its current node and has not moved or rebalanced to another node.
This API can be very useful when attempting to diagnose why a shard is unassigned or why a shard continues to remain on its current node when you might expect otherwise.
@@ -58,16 +57,17 @@ async def allocation_explain(
``_
- :param current_node: Explain a shard only if it is currently located on the specified
- node name or node ID.
+ :param current_node: Specifies the node ID or the name of the node to only explain
+ a shard that is currently located on the specified node.
:param include_disk_info: If true, returns information about disk usage and shard
sizes.
:param include_yes_decisions: If true, returns YES decisions in explanation.
- :param index: The name of the index that you would like an explanation for.
+ :param index: Specifies the name of the index that you would like an explanation
+ for.
:param master_timeout: Period to wait for a connection to the master node.
- :param primary: If true, returns an explanation for the primary shard for the
- specified shard ID.
- :param shard: An identifier for the shard that you would like an explanation
+ :param primary: If true, returns explanation for the primary shard for the given
+ shard ID.
+ :param shard: Specifies the ID of the shard that you would like an explanation
for.
"""
__path_parts: t.Dict[str, str] = {}
@@ -1119,8 +1119,7 @@ async def state(
when unavailable (missing or closed)
:param local: Return local information, do not retrieve the state from master
node (default: false)
- :param master_timeout: Timeout for waiting for new cluster state in case it is
- blocked
+ :param master_timeout: Specify timeout for connection to master
:param wait_for_metadata_version: Wait for the metadata version to be equal or
greater than the specified metadata version
:param wait_for_timeout: The maximum time to wait for wait_for_metadata_version
diff --git a/elasticsearch/_async/client/indices.py b/elasticsearch/_async/client/indices.py
index 2a40027c2..0f627227c 100644
--- a/elasticsearch/_async/client/indices.py
+++ b/elasticsearch/_async/client/indices.py
@@ -1208,7 +1208,7 @@ async def delete_data_stream_options(
Removes the data stream options from a data stream.
- ``_
+ ``_
:param name: A comma-separated list of data streams of which the data stream
options will be deleted; use `*` to get all data streams
@@ -2542,57 +2542,6 @@ async def get_data_stream(
path_parts=__path_parts,
)
- @_rewrite_parameters()
- async def get_data_stream_mappings(
- self,
- *,
- name: t.Union[str, t.Sequence[str]],
- error_trace: t.Optional[bool] = None,
- filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
- human: t.Optional[bool] = None,
- master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- pretty: t.Optional[bool] = None,
- ) -> ObjectApiResponse[t.Any]:
- """
- .. raw:: html
-
- Get data stream mappings.
- Get mapping information for one or more data streams.
-
-
- ``_
-
- :param name: A comma-separated list of data streams or data stream patterns.
- Supports wildcards (`*`).
- :param master_timeout: The period to wait for a connection to the master node.
- If no response is received before the timeout expires, the request fails
- and returns an error.
- """
- if name in SKIP_IN_PATH:
- raise ValueError("Empty value passed for parameter 'name'")
- __path_parts: t.Dict[str, str] = {"name": _quote(name)}
- __path = f'/_data_stream/{__path_parts["name"]}/_mappings'
- __query: t.Dict[str, t.Any] = {}
- if error_trace is not None:
- __query["error_trace"] = error_trace
- if filter_path is not None:
- __query["filter_path"] = filter_path
- if human is not None:
- __query["human"] = human
- if master_timeout is not None:
- __query["master_timeout"] = master_timeout
- if pretty is not None:
- __query["pretty"] = pretty
- __headers = {"accept": "application/json"}
- return await self.perform_request( # type: ignore[return-value]
- "GET",
- __path,
- params=__query,
- headers=__headers,
- endpoint_id="indices.get_data_stream_mappings",
- path_parts=__path_parts,
- )
-
@_rewrite_parameters()
async def get_data_stream_options(
self,
@@ -2619,7 +2568,7 @@ async def get_data_stream_options(
Get the data stream options configuration of one or more data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams to limit the request. Supports
wildcards (`*`). To target all data streams, omit this parameter or use `*`
@@ -3704,83 +3653,6 @@ async def put_data_lifecycle(
path_parts=__path_parts,
)
- @_rewrite_parameters(
- body_name="mappings",
- )
- async def put_data_stream_mappings(
- self,
- *,
- name: t.Union[str, t.Sequence[str]],
- mappings: t.Optional[t.Mapping[str, t.Any]] = None,
- body: t.Optional[t.Mapping[str, t.Any]] = None,
- dry_run: t.Optional[bool] = None,
- error_trace: t.Optional[bool] = None,
- filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
- human: t.Optional[bool] = None,
- master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- pretty: t.Optional[bool] = None,
- timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- ) -> ObjectApiResponse[t.Any]:
- """
- .. raw:: html
-
- Update data stream mappings.
- This API can be used to override mappings on specific data streams. These overrides will take precedence over what
- is specified in the template that the data stream matches. The mapping change is only applied to new write indices
- that are created during rollover after this API is called. No indices are changed by this API.
-
-
- ``_
-
- :param name: A comma-separated list of data streams or data stream patterns.
- :param mappings:
- :param dry_run: If `true`, the request does not actually change the mappings
- on any data streams. Instead, it simulates changing the settings and reports
- back to the user what would have happened had these settings actually been
- applied.
- :param master_timeout: The period to wait for a connection to the master node.
- If no response is received before the timeout expires, the request fails
- and returns an error.
- :param timeout: The period to wait for a response. If no response is received
- before the timeout expires, the request fails and returns an error.
- """
- if name in SKIP_IN_PATH:
- raise ValueError("Empty value passed for parameter 'name'")
- if mappings is None and body is None:
- raise ValueError(
- "Empty value passed for parameters 'mappings' and 'body', one of them should be set."
- )
- elif mappings is not None and body is not None:
- raise ValueError("Cannot set both 'mappings' and 'body'")
- __path_parts: t.Dict[str, str] = {"name": _quote(name)}
- __path = f'/_data_stream/{__path_parts["name"]}/_mappings'
- __query: t.Dict[str, t.Any] = {}
- if dry_run is not None:
- __query["dry_run"] = dry_run
- if error_trace is not None:
- __query["error_trace"] = error_trace
- if filter_path is not None:
- __query["filter_path"] = filter_path
- if human is not None:
- __query["human"] = human
- if master_timeout is not None:
- __query["master_timeout"] = master_timeout
- if pretty is not None:
- __query["pretty"] = pretty
- if timeout is not None:
- __query["timeout"] = timeout
- __body = mappings if mappings is not None else body
- __headers = {"accept": "application/json", "content-type": "application/json"}
- return await self.perform_request( # type: ignore[return-value]
- "PUT",
- __path,
- params=__query,
- headers=__headers,
- body=__body,
- endpoint_id="indices.put_data_stream_mappings",
- path_parts=__path_parts,
- )
-
@_rewrite_parameters(
body_fields=("failure_store",),
)
@@ -3812,7 +3684,7 @@ async def put_data_stream_options(
Update the data stream options of the specified data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams used to limit the request.
Supports wildcards (`*`). To target all data streams use `*` or `_all`.
diff --git a/elasticsearch/_async/client/inference.py b/elasticsearch/_async/client/inference.py
index 58b51a72e..422e51522 100644
--- a/elasticsearch/_async/client/inference.py
+++ b/elasticsearch/_async/client/inference.py
@@ -391,8 +391,9 @@ async def put(
- AlibabaCloud AI Search (
completion, rerank, sparse_embedding, text_embedding)
- Amazon Bedrock (
completion, text_embedding)
+ - Amazon SageMaker (
chat_completion, completion, rerank, sparse_embedding, text_embedding)
- Anthropic (
completion)
- - Azure AI Studio (
completion, 'rerank', text_embedding)
+ - Azure AI Studio (
completion, text_embedding)
- Azure OpenAI (
completion, text_embedding)
- Cohere (
completion, rerank, text_embedding)
- DeepSeek (
completion, chat_completion)
@@ -659,6 +660,112 @@ async def put_amazonbedrock(
path_parts=__path_parts,
)
+ @_rewrite_parameters(
+ body_fields=(
+ "service",
+ "service_settings",
+ "chunking_settings",
+ "task_settings",
+ ),
+ )
+ async def put_amazonsagemaker(
+ self,
+ *,
+ task_type: t.Union[
+ str,
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
+ ],
+ amazonsagemaker_inference_id: str,
+ service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create an Amazon SageMaker inference endpoint.
+ Create an inference endpoint to perform an inference task with the amazon_sagemaker service.
+
+
+ ``_
+
+ :param task_type: The type of the inference task that the model will perform.
+ :param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
+ :param service: The type of service supported for the specified task type. In
+ this case, `amazon_sagemaker`.
+ :param service_settings: Settings used to install the inference model. These
+ settings are specific to the `amazon_sagemaker` service and `service_settings.api`
+ you specified.
+ :param chunking_settings: The chunking configuration object.
+ :param task_settings: Settings to configure the inference task. These settings
+ are specific to the task type and `service_settings.api` you specified.
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
+ to be created.
+ """
+ if task_type in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'task_type'")
+ if amazonsagemaker_inference_id in SKIP_IN_PATH:
+ raise ValueError(
+ "Empty value passed for parameter 'amazonsagemaker_inference_id'"
+ )
+ if service is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service'")
+ if service_settings is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service_settings'")
+ __path_parts: t.Dict[str, str] = {
+ "task_type": _quote(task_type),
+ "amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
+ }
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if service is not None:
+ __body["service"] = service
+ if service_settings is not None:
+ __body["service_settings"] = service_settings
+ if chunking_settings is not None:
+ __body["chunking_settings"] = chunking_settings
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return await self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.put_amazonsagemaker",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters(
body_fields=(
"service",
@@ -767,7 +874,7 @@ async def put_anthropic(
async def put_azureaistudio(
self,
*,
- task_type: t.Union[str, t.Literal["completion", "rerank", "text_embedding"]],
+ task_type: t.Union[str, t.Literal["completion", "text_embedding"]],
azureaistudio_inference_id: str,
service: t.Optional[t.Union[str, t.Literal["azureaistudio"]]] = None,
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
diff --git a/elasticsearch/_async/client/ml.py b/elasticsearch/_async/client/ml.py
index fb5af673f..6876e45bd 100644
--- a/elasticsearch/_async/client/ml.py
+++ b/elasticsearch/_async/client/ml.py
@@ -5716,7 +5716,7 @@ async def validate(
Validate an anomaly detection job.
- ``_
+ ``_
:param analysis_config:
:param analysis_limits:
diff --git a/elasticsearch/_async/client/snapshot.py b/elasticsearch/_async/client/snapshot.py
index c4ece9be9..31a3dff57 100644
--- a/elasticsearch/_async/client/snapshot.py
+++ b/elasticsearch/_async/client/snapshot.py
@@ -1266,11 +1266,6 @@ async def status(
If you omit the <snapshot> request path parameter, the request retrieves information only for currently running snapshots.
This usage is preferred.
If needed, you can specify <repository> and <snapshot> to retrieve information for specific snapshots, even if they're not currently running.
- Note that the stats will not be available for any shard snapshots in an ongoing snapshot completed by a node that (even momentarily) left the cluster.
- Loading the stats from the repository is an expensive operation (see the WARNING below).
- Therefore the stats values for such shards will be -1 even though the "stage" value will be "DONE", in order to minimize latency.
- A "description" field will be present for a shard snapshot completed by a departed node explaining why the shard snapshot's stats results are invalid.
- Consequently, the total stats for the index will be less than expected due to the missing values from these shards.
WARNING: Using the API to return the status of any snapshots other than currently running snapshots can be expensive.
The API requires a read from the repository for each shard in each snapshot.
For example, if you have 100 snapshots with 1,000 shards each, an API request that includes all snapshots will require 100,000 reads (100 snapshots x 1,000 shards).
diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py
index 5f7a4313d..40f4cbed6 100644
--- a/elasticsearch/_sync/client/__init__.py
+++ b/elasticsearch/_sync/client/__init__.py
@@ -2232,7 +2232,6 @@ def field_caps(
@_rewrite_parameters(
parameter_aliases={
"_source": "source",
- "_source_exclude_vectors": "source_exclude_vectors",
"_source_excludes": "source_excludes",
"_source_includes": "source_includes",
},
@@ -2252,7 +2251,6 @@ def get(
refresh: t.Optional[bool] = None,
routing: t.Optional[str] = None,
source: t.Optional[t.Union[bool, t.Union[str, t.Sequence[str]]]] = None,
- source_exclude_vectors: t.Optional[bool] = None,
source_excludes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
source_includes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
stored_fields: t.Optional[t.Union[str, t.Sequence[str]]] = None,
@@ -2326,7 +2324,6 @@ def get(
:param routing: A custom value used to route operations to a specific shard.
:param source: Indicates whether to return the `_source` field (`true` or `false`)
or lists the fields to return.
- :param source_exclude_vectors: Whether vectors should be excluded from _source
:param source_excludes: A comma-separated list of source fields to exclude from
the response. You can also use this parameter to exclude fields from the
subset specified in `_source_includes` query parameter. If the `_source`
@@ -2372,8 +2369,6 @@ def get(
__query["routing"] = routing
if source is not None:
__query["_source"] = source
- if source_exclude_vectors is not None:
- __query["_source_exclude_vectors"] = source_exclude_vectors
if source_excludes is not None:
__query["_source_excludes"] = source_excludes
if source_includes is not None:
@@ -4307,7 +4302,6 @@ def scroll(
),
parameter_aliases={
"_source": "source",
- "_source_exclude_vectors": "source_exclude_vectors",
"_source_excludes": "source_excludes",
"_source_includes": "source_includes",
"from": "from_",
@@ -4391,7 +4385,6 @@ def search(
]
] = None,
source: t.Optional[t.Union[bool, t.Mapping[str, t.Any]]] = None,
- source_exclude_vectors: t.Optional[bool] = None,
source_excludes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
source_includes: t.Optional[t.Union[str, t.Sequence[str]]] = None,
stats: t.Optional[t.Sequence[str]] = None,
@@ -4586,7 +4579,6 @@ def search(
fields are returned in the `hits._source` property of the search response.
If the `stored_fields` property is specified, the `_source` property defaults
to `false`. Otherwise, it defaults to `true`.
- :param source_exclude_vectors: Whether vectors should be excluded from _source
:param source_excludes: A comma-separated list of source fields to exclude from
the response. You can also use this parameter to exclude fields from the
subset specified in `_source_includes` query parameter. If the `_source`
@@ -4711,8 +4703,6 @@ def search(
__query["scroll"] = scroll
if search_type is not None:
__query["search_type"] = search_type
- if source_exclude_vectors is not None:
- __query["_source_exclude_vectors"] = source_exclude_vectors
if source_excludes is not None:
__query["_source_excludes"] = source_excludes
if source_includes is not None:
diff --git a/elasticsearch/_sync/client/cluster.py b/elasticsearch/_sync/client/cluster.py
index a56892d54..2d4eebc54 100644
--- a/elasticsearch/_sync/client/cluster.py
+++ b/elasticsearch/_sync/client/cluster.py
@@ -49,7 +49,6 @@ def allocation_explain(
Explain the shard allocations.
Get explanations for shard allocations in the cluster.
- This API accepts the current_node, index, primary and shard parameters in the request body or in query parameters, but not in both at the same time.
For unassigned shards, it provides an explanation for why the shard is unassigned.
For assigned shards, it provides an explanation for why the shard is remaining on its current node and has not moved or rebalanced to another node.
This API can be very useful when attempting to diagnose why a shard is unassigned or why a shard continues to remain on its current node when you might expect otherwise.
@@ -58,16 +57,17 @@ def allocation_explain(
``_
- :param current_node: Explain a shard only if it is currently located on the specified
- node name or node ID.
+ :param current_node: Specifies the node ID or the name of the node to only explain
+ a shard that is currently located on the specified node.
:param include_disk_info: If true, returns information about disk usage and shard
sizes.
:param include_yes_decisions: If true, returns YES decisions in explanation.
- :param index: The name of the index that you would like an explanation for.
+ :param index: Specifies the name of the index that you would like an explanation
+ for.
:param master_timeout: Period to wait for a connection to the master node.
- :param primary: If true, returns an explanation for the primary shard for the
- specified shard ID.
- :param shard: An identifier for the shard that you would like an explanation
+ :param primary: If true, returns explanation for the primary shard for the given
+ shard ID.
+ :param shard: Specifies the ID of the shard that you would like an explanation
for.
"""
__path_parts: t.Dict[str, str] = {}
@@ -1119,8 +1119,7 @@ def state(
when unavailable (missing or closed)
:param local: Return local information, do not retrieve the state from master
node (default: false)
- :param master_timeout: Timeout for waiting for new cluster state in case it is
- blocked
+ :param master_timeout: Specify timeout for connection to master
:param wait_for_metadata_version: Wait for the metadata version to be equal or
greater than the specified metadata version
:param wait_for_timeout: The maximum time to wait for wait_for_metadata_version
diff --git a/elasticsearch/_sync/client/indices.py b/elasticsearch/_sync/client/indices.py
index 8697d971b..ba1830708 100644
--- a/elasticsearch/_sync/client/indices.py
+++ b/elasticsearch/_sync/client/indices.py
@@ -1208,7 +1208,7 @@ def delete_data_stream_options(
Removes the data stream options from a data stream.
- ``_
+ ``_
:param name: A comma-separated list of data streams of which the data stream
options will be deleted; use `*` to get all data streams
@@ -2542,57 +2542,6 @@ def get_data_stream(
path_parts=__path_parts,
)
- @_rewrite_parameters()
- def get_data_stream_mappings(
- self,
- *,
- name: t.Union[str, t.Sequence[str]],
- error_trace: t.Optional[bool] = None,
- filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
- human: t.Optional[bool] = None,
- master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- pretty: t.Optional[bool] = None,
- ) -> ObjectApiResponse[t.Any]:
- """
- .. raw:: html
-
- Get data stream mappings.
- Get mapping information for one or more data streams.
-
-
- ``_
-
- :param name: A comma-separated list of data streams or data stream patterns.
- Supports wildcards (`*`).
- :param master_timeout: The period to wait for a connection to the master node.
- If no response is received before the timeout expires, the request fails
- and returns an error.
- """
- if name in SKIP_IN_PATH:
- raise ValueError("Empty value passed for parameter 'name'")
- __path_parts: t.Dict[str, str] = {"name": _quote(name)}
- __path = f'/_data_stream/{__path_parts["name"]}/_mappings'
- __query: t.Dict[str, t.Any] = {}
- if error_trace is not None:
- __query["error_trace"] = error_trace
- if filter_path is not None:
- __query["filter_path"] = filter_path
- if human is not None:
- __query["human"] = human
- if master_timeout is not None:
- __query["master_timeout"] = master_timeout
- if pretty is not None:
- __query["pretty"] = pretty
- __headers = {"accept": "application/json"}
- return self.perform_request( # type: ignore[return-value]
- "GET",
- __path,
- params=__query,
- headers=__headers,
- endpoint_id="indices.get_data_stream_mappings",
- path_parts=__path_parts,
- )
-
@_rewrite_parameters()
def get_data_stream_options(
self,
@@ -2619,7 +2568,7 @@ def get_data_stream_options(
Get the data stream options configuration of one or more data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams to limit the request. Supports
wildcards (`*`). To target all data streams, omit this parameter or use `*`
@@ -3704,83 +3653,6 @@ def put_data_lifecycle(
path_parts=__path_parts,
)
- @_rewrite_parameters(
- body_name="mappings",
- )
- def put_data_stream_mappings(
- self,
- *,
- name: t.Union[str, t.Sequence[str]],
- mappings: t.Optional[t.Mapping[str, t.Any]] = None,
- body: t.Optional[t.Mapping[str, t.Any]] = None,
- dry_run: t.Optional[bool] = None,
- error_trace: t.Optional[bool] = None,
- filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
- human: t.Optional[bool] = None,
- master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- pretty: t.Optional[bool] = None,
- timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- ) -> ObjectApiResponse[t.Any]:
- """
- .. raw:: html
-
- Update data stream mappings.
- This API can be used to override mappings on specific data streams. These overrides will take precedence over what
- is specified in the template that the data stream matches. The mapping change is only applied to new write indices
- that are created during rollover after this API is called. No indices are changed by this API.
-
-
- ``_
-
- :param name: A comma-separated list of data streams or data stream patterns.
- :param mappings:
- :param dry_run: If `true`, the request does not actually change the mappings
- on any data streams. Instead, it simulates changing the settings and reports
- back to the user what would have happened had these settings actually been
- applied.
- :param master_timeout: The period to wait for a connection to the master node.
- If no response is received before the timeout expires, the request fails
- and returns an error.
- :param timeout: The period to wait for a response. If no response is received
- before the timeout expires, the request fails and returns an error.
- """
- if name in SKIP_IN_PATH:
- raise ValueError("Empty value passed for parameter 'name'")
- if mappings is None and body is None:
- raise ValueError(
- "Empty value passed for parameters 'mappings' and 'body', one of them should be set."
- )
- elif mappings is not None and body is not None:
- raise ValueError("Cannot set both 'mappings' and 'body'")
- __path_parts: t.Dict[str, str] = {"name": _quote(name)}
- __path = f'/_data_stream/{__path_parts["name"]}/_mappings'
- __query: t.Dict[str, t.Any] = {}
- if dry_run is not None:
- __query["dry_run"] = dry_run
- if error_trace is not None:
- __query["error_trace"] = error_trace
- if filter_path is not None:
- __query["filter_path"] = filter_path
- if human is not None:
- __query["human"] = human
- if master_timeout is not None:
- __query["master_timeout"] = master_timeout
- if pretty is not None:
- __query["pretty"] = pretty
- if timeout is not None:
- __query["timeout"] = timeout
- __body = mappings if mappings is not None else body
- __headers = {"accept": "application/json", "content-type": "application/json"}
- return self.perform_request( # type: ignore[return-value]
- "PUT",
- __path,
- params=__query,
- headers=__headers,
- body=__body,
- endpoint_id="indices.put_data_stream_mappings",
- path_parts=__path_parts,
- )
-
@_rewrite_parameters(
body_fields=("failure_store",),
)
@@ -3812,7 +3684,7 @@ def put_data_stream_options(
Update the data stream options of the specified data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams used to limit the request.
Supports wildcards (`*`). To target all data streams use `*` or `_all`.
diff --git a/elasticsearch/_sync/client/inference.py b/elasticsearch/_sync/client/inference.py
index b7bab7f1c..8505f25e5 100644
--- a/elasticsearch/_sync/client/inference.py
+++ b/elasticsearch/_sync/client/inference.py
@@ -391,8 +391,9 @@ def put(
- AlibabaCloud AI Search (
completion, rerank, sparse_embedding, text_embedding)
- Amazon Bedrock (
completion, text_embedding)
+ - Amazon SageMaker (
chat_completion, completion, rerank, sparse_embedding, text_embedding)
- Anthropic (
completion)
- - Azure AI Studio (
completion, 'rerank', text_embedding)
+ - Azure AI Studio (
completion, text_embedding)
- Azure OpenAI (
completion, text_embedding)
- Cohere (
completion, rerank, text_embedding)
- DeepSeek (
completion, chat_completion)
@@ -659,6 +660,112 @@ def put_amazonbedrock(
path_parts=__path_parts,
)
+ @_rewrite_parameters(
+ body_fields=(
+ "service",
+ "service_settings",
+ "chunking_settings",
+ "task_settings",
+ ),
+ )
+ def put_amazonsagemaker(
+ self,
+ *,
+ task_type: t.Union[
+ str,
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
+ ],
+ amazonsagemaker_inference_id: str,
+ service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create an Amazon SageMaker inference endpoint.
+ Create an inference endpoint to perform an inference task with the amazon_sagemaker service.
+
+
+ ``_
+
+ :param task_type: The type of the inference task that the model will perform.
+ :param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
+ :param service: The type of service supported for the specified task type. In
+ this case, `amazon_sagemaker`.
+ :param service_settings: Settings used to install the inference model. These
+ settings are specific to the `amazon_sagemaker` service and `service_settings.api`
+ you specified.
+ :param chunking_settings: The chunking configuration object.
+ :param task_settings: Settings to configure the inference task. These settings
+ are specific to the task type and `service_settings.api` you specified.
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
+ to be created.
+ """
+ if task_type in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'task_type'")
+ if amazonsagemaker_inference_id in SKIP_IN_PATH:
+ raise ValueError(
+ "Empty value passed for parameter 'amazonsagemaker_inference_id'"
+ )
+ if service is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service'")
+ if service_settings is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service_settings'")
+ __path_parts: t.Dict[str, str] = {
+ "task_type": _quote(task_type),
+ "amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
+ }
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if service is not None:
+ __body["service"] = service
+ if service_settings is not None:
+ __body["service_settings"] = service_settings
+ if chunking_settings is not None:
+ __body["chunking_settings"] = chunking_settings
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.put_amazonsagemaker",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters(
body_fields=(
"service",
@@ -767,7 +874,7 @@ def put_anthropic(
def put_azureaistudio(
self,
*,
- task_type: t.Union[str, t.Literal["completion", "rerank", "text_embedding"]],
+ task_type: t.Union[str, t.Literal["completion", "text_embedding"]],
azureaistudio_inference_id: str,
service: t.Optional[t.Union[str, t.Literal["azureaistudio"]]] = None,
service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
diff --git a/elasticsearch/_sync/client/ml.py b/elasticsearch/_sync/client/ml.py
index 690197642..a5e98142a 100644
--- a/elasticsearch/_sync/client/ml.py
+++ b/elasticsearch/_sync/client/ml.py
@@ -5716,7 +5716,7 @@ def validate(
Validate an anomaly detection job.
- ``_
+ ``_
:param analysis_config:
:param analysis_limits:
diff --git a/elasticsearch/_sync/client/snapshot.py b/elasticsearch/_sync/client/snapshot.py
index 09691ce8e..5a3a542c0 100644
--- a/elasticsearch/_sync/client/snapshot.py
+++ b/elasticsearch/_sync/client/snapshot.py
@@ -1266,11 +1266,6 @@ def status(
If you omit the <snapshot> request path parameter, the request retrieves information only for currently running snapshots.
This usage is preferred.
If needed, you can specify <repository> and <snapshot> to retrieve information for specific snapshots, even if they're not currently running.
- Note that the stats will not be available for any shard snapshots in an ongoing snapshot completed by a node that (even momentarily) left the cluster.
- Loading the stats from the repository is an expensive operation (see the WARNING below).
- Therefore the stats values for such shards will be -1 even though the "stage" value will be "DONE", in order to minimize latency.
- A "description" field will be present for a shard snapshot completed by a departed node explaining why the shard snapshot's stats results are invalid.
- Consequently, the total stats for the index will be less than expected due to the missing values from these shards.
WARNING: Using the API to return the status of any snapshots other than currently running snapshots can be expensive.
The API requires a read from the repository for each shard in each snapshot.
For example, if you have 100 snapshots with 1,000 shards each, an API request that includes all snapshots will require 100,000 reads (100 snapshots x 1,000 shards).
diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py
index c33261458..03c712e0a 100644
--- a/elasticsearch/dsl/field.py
+++ b/elasticsearch/dsl/field.py
@@ -3849,9 +3849,6 @@ class SemanticText(Field):
by using the Update mapping API. Use the Create inference API to
create the endpoint. If not specified, the inference endpoint
defined by inference_id will be used at both index and query time.
- :arg index_options: Settings for index_options that override any
- defaults used by semantic_text, for example specific quantization
- settings.
:arg chunking_settings: Settings for chunking text into smaller
passages. If specified, these will override the chunking settings
sent in the inference endpoint associated with inference_id. If
@@ -3867,9 +3864,6 @@ def __init__(
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
inference_id: Union[str, "DefaultType"] = DEFAULT,
search_inference_id: Union[str, "DefaultType"] = DEFAULT,
- index_options: Union[
- "types.SemanticTextIndexOptions", Dict[str, Any], "DefaultType"
- ] = DEFAULT,
chunking_settings: Union[
"types.ChunkingSettings", Dict[str, Any], "DefaultType"
] = DEFAULT,
@@ -3881,8 +3875,6 @@ def __init__(
kwargs["inference_id"] = inference_id
if search_inference_id is not DEFAULT:
kwargs["search_inference_id"] = search_inference_id
- if index_options is not DEFAULT:
- kwargs["index_options"] = index_options
if chunking_settings is not DEFAULT:
kwargs["chunking_settings"] = chunking_settings
super().__init__(*args, **kwargs)
diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py
index 383a69d83..452a945dd 100644
--- a/elasticsearch/dsl/types.py
+++ b/elasticsearch/dsl/types.py
@@ -144,26 +144,8 @@ def __init__(
class ChunkingSettings(AttrDict[Any]):
"""
- :arg strategy: (required) The chunking strategy: `sentence`, `word`,
- `none` or `recursive`. * If `strategy` is set to `recursive`,
- you must also specify: - `max_chunk_size` - either `separators`
- or`separator_group` Learn more about different chunking
- strategies in the linked documentation. Defaults to `sentence` if
- omitted.
- :arg separator_group: (required) This parameter is only applicable
- when using the `recursive` chunking strategy. Sets a predefined
- list of separators in the saved chunking settings based on the
- selected text type. Values can be `markdown` or `plaintext`.
- Using this parameter is an alternative to manually specifying a
- custom `separators` list.
- :arg separators: (required) A list of strings used as possible split
- points when chunking text with the `recursive` strategy. Each
- string can be a plain string or a regular expression (regex)
- pattern. The system tries each separator in order to split the
- text, starting from the first item in the list. After splitting,
- it attempts to recombine smaller pieces into larger chunks that
- stay within the `max_chunk_size` limit, to reduce the total number
- of chunks generated.
+ :arg strategy: (required) The chunking strategy: `sentence` or `word`.
+ Defaults to `sentence` if omitted.
:arg max_chunk_size: (required) The maximum size of a chunk in words.
This value cannot be higher than `300` or lower than `20` (for
`sentence` strategy) or `10` (for `word` strategy). Defaults to
@@ -178,8 +160,6 @@ class ChunkingSettings(AttrDict[Any]):
"""
strategy: Union[str, DefaultType]
- separator_group: Union[str, DefaultType]
- separators: Union[Sequence[str], DefaultType]
max_chunk_size: Union[int, DefaultType]
overlap: Union[int, DefaultType]
sentence_overlap: Union[int, DefaultType]
@@ -188,8 +168,6 @@ def __init__(
self,
*,
strategy: Union[str, DefaultType] = DEFAULT,
- separator_group: Union[str, DefaultType] = DEFAULT,
- separators: Union[Sequence[str], DefaultType] = DEFAULT,
max_chunk_size: Union[int, DefaultType] = DEFAULT,
overlap: Union[int, DefaultType] = DEFAULT,
sentence_overlap: Union[int, DefaultType] = DEFAULT,
@@ -197,10 +175,6 @@ def __init__(
):
if strategy is not DEFAULT:
kwargs["strategy"] = strategy
- if separator_group is not DEFAULT:
- kwargs["separator_group"] = separator_group
- if separators is not DEFAULT:
- kwargs["separators"] = separators
if max_chunk_size is not DEFAULT:
kwargs["max_chunk_size"] = max_chunk_size
if overlap is not DEFAULT:
@@ -3165,26 +3139,6 @@ def __init__(
super().__init__(kwargs)
-class SemanticTextIndexOptions(AttrDict[Any]):
- """
- :arg dense_vector:
- """
-
- dense_vector: Union["DenseVectorIndexOptions", Dict[str, Any], DefaultType]
-
- def __init__(
- self,
- *,
- dense_vector: Union[
- "DenseVectorIndexOptions", Dict[str, Any], DefaultType
- ] = DEFAULT,
- **kwargs: Any,
- ):
- if dense_vector is not DEFAULT:
- kwargs["dense_vector"] = dense_vector
- super().__init__(kwargs)
-
-
class ShapeFieldQuery(AttrDict[Any]):
"""
:arg indexed_shape: Queries using a pre-indexed shape.
From 7f79d0ea604477c07cb49b5af5e423b2858fa586 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg
Date: Wed, 30 Jul 2025 09:51:18 +0100
Subject: [PATCH 02/21] Release 9.1.0 (#3012)
---
docs/release-notes/index.md | 50 +++++++++++++++++++++++++++++++++++++
elasticsearch/_version.py | 2 +-
2 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md
index 314030cdd..76a1c610b 100644
--- a/docs/release-notes/index.md
+++ b/docs/release-notes/index.md
@@ -18,6 +18,56 @@ To check for security updates, go to [Security announcements for the Elastic sta
% *
% ### Fixes [elasticsearch-python-client-next-fixes]
+## 9.1.0 (2025-07-30)
+
+Enhancements
+
+* ES|QL query builder (technical preview) ([#2997](https://github.com/elastic/elasticsearch-py/pull/2997))
+* Update OpenTelemetry conventions ([#2999](https://github.com/elastic/elasticsearch-py/pull/2999))
+* Add option to disable accurate reporting of file and line location in warnings (Fixes #3003) ([#3006](https://github.com/elastic/elasticsearch-py/pull/3006))
+
+APIs
+
+* Remove `if_primary_term`, `if_seq_no` and `op_type` from Create API
+* Remove `master_timeout` from Ingest Get Ip Location Database API
+* Remove `application`, `priviledge` and `username` from the Security Get User API
+* Rename `type_query_string` to `type` in License Post Start Trial API
+* Add `require_data_stream` to Index API
+* Add `settings_filter` to Cluster Get Component Template API
+* Add `cause` to Cluster Put Component Template API
+* Add `master_timeout` to Cluster State API
+* Add `ccs_minimize_roundtrips` to EQL Search API
+* Add `keep_alive` and `keep_on_completion` to ES|QL Async Query API
+* Add `format` to ES|QL Async Query Get API
+* Add ES|QL Get Query and List Queries APIs
+* Add Indices Delete Data Stream Options API
+* Add Indices Get Data Stream Options and Put Data Stream Options APIS
+* Add Indices Get Data Stream Settings and Put Data Stream Settings APIs
+* Add `allow_no_indices`, `expand_wildcards` and `ignore_available` to Indices Recovery API
+* Add Indices Remove Block API
+* Add Amazon Sagemaker to Inference API
+* Add `input_type` to Inference API
+* Add `timeout` to all Inference Put APIs
+* Add Inference Put Custom API
+* Add Inference Put DeepSeek API
+* Add `task_settings` to Put HuggingFace API
+* Add `refresh` to Security Grant API Key API
+* Add `wait_for_completion` to the Snapshot Delete API
+* Add `state` to Snapshot Get API
+* Add `refresh` to Synonyms Put Synonym, Put Synonym Rule and Delete Synonym Rule APIs
+
+DSL
+
+* Handle lists in `copy_to` option in DSL field declarations correctly (Fixes #2992) ([#2993](https://github.com/elastic/elasticsearch-py/pull/2993))
+* Add `index_options` to SparseVector type
+* Add SparseVectorIndexOptions type
+* Add `key` to FiltersBucket type
+
+Other changes
+
+* Drop support for Python 3.8 ([#3001](https://github.com/elastic/elasticsearch-py/pull/3001))
+
+
## 9.0.2 (2025-06-05) [elasticsearch-python-client-902-release-notes]
diff --git a/elasticsearch/_version.py b/elasticsearch/_version.py
index 0624a7ff1..7b6c8994d 100644
--- a/elasticsearch/_version.py
+++ b/elasticsearch/_version.py
@@ -15,4 +15,4 @@
# specific language governing permissions and limitations
# under the License.
-__versionstr__ = "9.0.2"
+__versionstr__ = "9.1.0"
From 41b2064cb885278a791aa96131ecbcfd077c590e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 1 Aug 2025 17:01:35 +0100
Subject: [PATCH 03/21] Address integration test failures in Python 3.8 (#3018)
(#3023)
(cherry picked from commit 67c444c8143bcbaf7c9d80516538575c989e6cd6)
Co-authored-by: Miguel Grinberg
---
test_elasticsearch/test_server/test_rest_api_spec.py | 9 ++++++++-
test_elasticsearch/utils.py | 2 +-
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/test_elasticsearch/test_server/test_rest_api_spec.py b/test_elasticsearch/test_server/test_rest_api_spec.py
index a84f0822a..f12db87aa 100644
--- a/test_elasticsearch/test_server/test_rest_api_spec.py
+++ b/test_elasticsearch/test_server/test_rest_api_spec.py
@@ -78,6 +78,7 @@
"cluster/voting_config_exclusions",
"entsearch/10_basic",
"indices/clone",
+ "indices/data_stream_mappings[0]",
"indices/resolve_cluster",
"indices/settings",
"indices/split",
@@ -501,7 +502,13 @@ def remove_implicit_resolver(cls, tag_to_remove):
)
# Download the zip and start reading YAML from the files in memory
- package_zip = zipfile.ZipFile(io.BytesIO(http.request("GET", yaml_tests_url).data))
+ package_zip = zipfile.ZipFile(
+ io.BytesIO(
+ http.request(
+ "GET", yaml_tests_url, retries=urllib3.Retry(3, redirect=10)
+ ).data
+ )
+ )
for yaml_file in package_zip.namelist():
if not re.match(r"^.*\/tests\/.*\.ya?ml$", yaml_file):
diff --git a/test_elasticsearch/utils.py b/test_elasticsearch/utils.py
index 021deb76e..cfcb5259c 100644
--- a/test_elasticsearch/utils.py
+++ b/test_elasticsearch/utils.py
@@ -179,7 +179,7 @@ def wipe_data_streams(client):
def wipe_indices(client):
indices = client.cat.indices().strip().splitlines()
if len(indices) > 0:
- index_names = [i.split(" ")[2] for i in indices]
+ index_names = [i.split()[2] for i in indices]
client.options(ignore_status=404).indices.delete(
index=",".join(index_names),
expand_wildcards="all",
From 58ceb258f9c42f83a1ef37e8dadfad67918f0e82 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 1 Aug 2025 17:29:07 +0100
Subject: [PATCH 04/21] ES|QL query builder robustness fixes (#3017) (#3025)
* Add note on how to prevent ES|QL injection attacks
* Various additional query builder fixes
* linter fixes
(cherry picked from commit e3e85ed38c303b792cd44f892c961c3c45804c55)
Co-authored-by: Miguel Grinberg
---
docs/reference/esql-query-builder.md | 22 +++-
elasticsearch/esql/__init__.py | 1 +
elasticsearch/esql/esql.py | 119 +++++++++++++-----
elasticsearch/esql/functions.py | 56 +++++----
.../_async/test_esql.py | 12 +-
.../{ => test_integration}/_sync/test_esql.py | 12 +-
test_elasticsearch/test_esql.py | 16 ++-
7 files changed, 175 insertions(+), 63 deletions(-)
rename test_elasticsearch/test_dsl/{ => test_integration}/_async/test_esql.py (88%)
rename test_elasticsearch/test_dsl/{ => test_integration}/_sync/test_esql.py (88%)
diff --git a/docs/reference/esql-query-builder.md b/docs/reference/esql-query-builder.md
index 1cdc0c5b3..8390ea983 100644
--- a/docs/reference/esql-query-builder.md
+++ b/docs/reference/esql-query-builder.md
@@ -203,6 +203,26 @@ query = (
)
```
+### Preventing injection attacks
+
+ES|QL, like most query languages, is vulnerable to [code injection attacks](https://en.wikipedia.org/wiki/Code_injection) if untrusted data provided by users is added to a query. To eliminate this risk, ES|QL allows untrusted data to be given separately from the query as parameters.
+
+Continuing with the example above, let's assume that the application needs a `find_employee_by_name()` function that searches for the name given as an argument. If this argument is received by the application from users, then it is considered untrusted and should not be added to the query directly. Here is how to code the function in a secure manner:
+
+```python
+def find_employee_by_name(name):
+ query = (
+ ESQL.from_("employees")
+ .keep("first_name", "last_name", "height")
+ .where(E("first_name") == E("?"))
+ )
+ return client.esql.query(query=str(query), params=[name])
+```
+
+Here the part of the query in which the untrusted data needs to be inserted is replaced with a parameter, which in ES|QL is defined by the question mark. When using Python expressions, the parameter must be given as `E("?")` so that it is treated as an expression and not as a literal string.
+
+The list of values given in the `params` argument to the query endpoint are assigned in order to the parameters defined in the query.
+
## Using ES|QL functions
The ES|QL language includes a rich set of functions that can be used in expressions and conditionals. These can be included in expressions given as strings, as shown in the example below:
@@ -235,6 +255,6 @@ query = (
)
```
-Note that arguments passed to functions are assumed to be literals. When passing field names, it is necessary to wrap them with the `E()` helper function so that they are interpreted correctly.
+Note that arguments passed to functions are assumed to be literals. When passing field names, parameters or other ES|QL expressions, it is necessary to wrap them with the `E()` helper function so that they are interpreted correctly.
You can find the complete list of available functions in the Python client's [ES|QL API reference documentation](https://elasticsearch-py.readthedocs.io/en/stable/esql.html#module-elasticsearch.esql.functions).
diff --git a/elasticsearch/esql/__init__.py b/elasticsearch/esql/__init__.py
index d872c329a..8da8f852a 100644
--- a/elasticsearch/esql/__init__.py
+++ b/elasticsearch/esql/__init__.py
@@ -15,4 +15,5 @@
# specific language governing permissions and limitations
# under the License.
+from ..dsl import E # noqa: F401
from .esql import ESQL, and_, not_, or_ # noqa: F401
diff --git a/elasticsearch/esql/esql.py b/elasticsearch/esql/esql.py
index 07ccdf839..05f4e3e3e 100644
--- a/elasticsearch/esql/esql.py
+++ b/elasticsearch/esql/esql.py
@@ -16,6 +16,7 @@
# under the License.
import json
+import re
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional, Tuple, Type, Union
@@ -111,6 +112,29 @@ def render(self) -> str:
def _render_internal(self) -> str:
pass
+ @staticmethod
+ def _format_index(index: IndexType) -> str:
+ return index._index._name if hasattr(index, "_index") else str(index)
+
+ @staticmethod
+ def _format_id(id: FieldType, allow_patterns: bool = False) -> str:
+ s = str(id) # in case it is an InstrumentedField
+ if allow_patterns and "*" in s:
+ return s # patterns cannot be escaped
+ if re.fullmatch(r"[a-zA-Z_@][a-zA-Z0-9_\.]*", s):
+ return s
+ # this identifier needs to be escaped
+ s.replace("`", "``")
+ return f"`{s}`"
+
+ @staticmethod
+ def _format_expr(expr: ExpressionType) -> str:
+ return (
+ json.dumps(expr)
+ if not isinstance(expr, (str, InstrumentedExpression))
+ else str(expr)
+ )
+
def _is_forked(self) -> bool:
if self.__class__.__name__ == "Fork":
return True
@@ -427,7 +451,7 @@ def sample(self, probability: float) -> "Sample":
"""
return Sample(self, probability)
- def sort(self, *columns: FieldType) -> "Sort":
+ def sort(self, *columns: ExpressionType) -> "Sort":
"""The ``SORT`` processing command sorts a table on one or more columns.
:param columns: The columns to sort on.
@@ -570,15 +594,12 @@ def metadata(self, *fields: FieldType) -> "From":
return self
def _render_internal(self) -> str:
- indices = [
- index if isinstance(index, str) else index._index._name
- for index in self._indices
- ]
+ indices = [self._format_index(index) for index in self._indices]
s = f'{self.__class__.__name__.upper()} {", ".join(indices)}'
if self._metadata_fields:
s = (
s
- + f' METADATA {", ".join([str(field) for field in self._metadata_fields])}'
+ + f' METADATA {", ".join([self._format_id(field) for field in self._metadata_fields])}'
)
return s
@@ -594,7 +615,11 @@ class Row(ESQLBase):
def __init__(self, **params: ExpressionType):
super().__init__()
self._params = {
- k: json.dumps(v) if not isinstance(v, InstrumentedExpression) else v
+ self._format_id(k): (
+ json.dumps(v)
+ if not isinstance(v, InstrumentedExpression)
+ else self._format_expr(v)
+ )
for k, v in params.items()
}
@@ -615,7 +640,7 @@ def __init__(self, item: str):
self._item = item
def _render_internal(self) -> str:
- return f"SHOW {self._item}"
+ return f"SHOW {self._format_id(self._item)}"
class Branch(ESQLBase):
@@ -667,11 +692,11 @@ def as_(self, type_name: str, pvalue_name: str) -> "ChangePoint":
return self
def _render_internal(self) -> str:
- key = "" if not self._key else f" ON {self._key}"
+ key = "" if not self._key else f" ON {self._format_id(self._key)}"
names = (
""
if not self._type_name and not self._pvalue_name
- else f' AS {self._type_name or "type"}, {self._pvalue_name or "pvalue"}'
+ else f' AS {self._format_id(self._type_name or "type")}, {self._format_id(self._pvalue_name or "pvalue")}'
)
return f"CHANGE_POINT {self._value}{key}{names}"
@@ -709,12 +734,13 @@ def with_(self, inference_id: str) -> "Completion":
def _render_internal(self) -> str:
if self._inference_id is None:
raise ValueError("The completion command requires an inference ID")
+ with_ = {"inference_id": self._inference_id}
if self._named_prompt:
column = list(self._named_prompt.keys())[0]
prompt = list(self._named_prompt.values())[0]
- return f"COMPLETION {column} = {prompt} WITH {self._inference_id}"
+ return f"COMPLETION {self._format_id(column)} = {self._format_id(prompt)} WITH {json.dumps(with_)}"
else:
- return f"COMPLETION {self._prompt[0]} WITH {self._inference_id}"
+ return f"COMPLETION {self._format_id(self._prompt[0])} WITH {json.dumps(with_)}"
class Dissect(ESQLBase):
@@ -742,9 +768,13 @@ def append_separator(self, separator: str) -> "Dissect":
def _render_internal(self) -> str:
sep = (
- "" if self._separator is None else f' APPEND_SEPARATOR="{self._separator}"'
+ ""
+ if self._separator is None
+ else f" APPEND_SEPARATOR={json.dumps(self._separator)}"
+ )
+ return (
+ f"DISSECT {self._format_id(self._input)} {json.dumps(self._pattern)}{sep}"
)
- return f"DISSECT {self._input} {json.dumps(self._pattern)}{sep}"
class Drop(ESQLBase):
@@ -760,7 +790,7 @@ def __init__(self, parent: ESQLBase, *columns: FieldType):
self._columns = columns
def _render_internal(self) -> str:
- return f'DROP {", ".join([str(col) for col in self._columns])}'
+ return f'DROP {", ".join([self._format_id(col, allow_patterns=True) for col in self._columns])}'
class Enrich(ESQLBase):
@@ -814,12 +844,18 @@ def with_(self, *fields: FieldType, **named_fields: FieldType) -> "Enrich":
return self
def _render_internal(self) -> str:
- on = "" if self._match_field is None else f" ON {self._match_field}"
+ on = (
+ ""
+ if self._match_field is None
+ else f" ON {self._format_id(self._match_field)}"
+ )
with_ = ""
if self._named_fields:
- with_ = f' WITH {", ".join([f"{name} = {field}" for name, field in self._named_fields.items()])}'
+ with_ = f' WITH {", ".join([f"{self._format_id(name)} = {self._format_id(field)}" for name, field in self._named_fields.items()])}'
elif self._fields is not None:
- with_ = f' WITH {", ".join([str(field) for field in self._fields])}'
+ with_ = (
+ f' WITH {", ".join([self._format_id(field) for field in self._fields])}'
+ )
return f"ENRICH {self._policy}{on}{with_}"
@@ -832,7 +868,10 @@ class Eval(ESQLBase):
"""
def __init__(
- self, parent: ESQLBase, *columns: FieldType, **named_columns: FieldType
+ self,
+ parent: ESQLBase,
+ *columns: ExpressionType,
+ **named_columns: ExpressionType,
):
if columns and named_columns:
raise ValueError(
@@ -844,10 +883,13 @@ def __init__(
def _render_internal(self) -> str:
if isinstance(self._columns, dict):
cols = ", ".join(
- [f"{name} = {value}" for name, value in self._columns.items()]
+ [
+ f"{self._format_id(name)} = {self._format_expr(value)}"
+ for name, value in self._columns.items()
+ ]
)
else:
- cols = ", ".join([f"{col}" for col in self._columns])
+ cols = ", ".join([f"{self._format_expr(col)}" for col in self._columns])
return f"EVAL {cols}"
@@ -900,7 +942,7 @@ def __init__(self, parent: ESQLBase, input: FieldType, pattern: str):
self._pattern = pattern
def _render_internal(self) -> str:
- return f"GROK {self._input} {json.dumps(self._pattern)}"
+ return f"GROK {self._format_id(self._input)} {json.dumps(self._pattern)}"
class Keep(ESQLBase):
@@ -916,7 +958,7 @@ def __init__(self, parent: ESQLBase, *columns: FieldType):
self._columns = columns
def _render_internal(self) -> str:
- return f'KEEP {", ".join([f"{col}" for col in self._columns])}'
+ return f'KEEP {", ".join([f"{self._format_id(col, allow_patterns=True)}" for col in self._columns])}'
class Limit(ESQLBase):
@@ -932,7 +974,7 @@ def __init__(self, parent: ESQLBase, max_number_of_rows: int):
self._max_number_of_rows = max_number_of_rows
def _render_internal(self) -> str:
- return f"LIMIT {self._max_number_of_rows}"
+ return f"LIMIT {json.dumps(self._max_number_of_rows)}"
class LookupJoin(ESQLBase):
@@ -967,7 +1009,9 @@ def _render_internal(self) -> str:
if isinstance(self._lookup_index, str)
else self._lookup_index._index._name
)
- return f"LOOKUP JOIN {index} ON {self._field}"
+ return (
+ f"LOOKUP JOIN {self._format_index(index)} ON {self._format_id(self._field)}"
+ )
class MvExpand(ESQLBase):
@@ -983,7 +1027,7 @@ def __init__(self, parent: ESQLBase, column: FieldType):
self._column = column
def _render_internal(self) -> str:
- return f"MV_EXPAND {self._column}"
+ return f"MV_EXPAND {self._format_id(self._column)}"
class Rename(ESQLBase):
@@ -999,7 +1043,7 @@ def __init__(self, parent: ESQLBase, **columns: FieldType):
self._columns = columns
def _render_internal(self) -> str:
- return f'RENAME {", ".join([f"{old_name} AS {new_name}" for old_name, new_name in self._columns.items()])}'
+ return f'RENAME {", ".join([f"{self._format_id(old_name)} AS {self._format_id(new_name)}" for old_name, new_name in self._columns.items()])}'
class Sample(ESQLBase):
@@ -1015,7 +1059,7 @@ def __init__(self, parent: ESQLBase, probability: float):
self._probability = probability
def _render_internal(self) -> str:
- return f"SAMPLE {self._probability}"
+ return f"SAMPLE {json.dumps(self._probability)}"
class Sort(ESQLBase):
@@ -1026,12 +1070,16 @@ class Sort(ESQLBase):
in a single expression.
"""
- def __init__(self, parent: ESQLBase, *columns: FieldType):
+ def __init__(self, parent: ESQLBase, *columns: ExpressionType):
super().__init__(parent)
self._columns = columns
def _render_internal(self) -> str:
- return f'SORT {", ".join([f"{col}" for col in self._columns])}'
+ sorts = [
+ " ".join([self._format_id(term) for term in str(col).split(" ")])
+ for col in self._columns
+ ]
+ return f'SORT {", ".join([f"{sort}" for sort in sorts])}'
class Stats(ESQLBase):
@@ -1062,14 +1110,17 @@ def by(self, *grouping_expressions: ExpressionType) -> "Stats":
def _render_internal(self) -> str:
if isinstance(self._expressions, dict):
- exprs = [f"{key} = {value}" for key, value in self._expressions.items()]
+ exprs = [
+ f"{self._format_id(key)} = {self._format_expr(value)}"
+ for key, value in self._expressions.items()
+ ]
else:
- exprs = [f"{expr}" for expr in self._expressions]
+ exprs = [f"{self._format_expr(expr)}" for expr in self._expressions]
expression_separator = ",\n "
by = (
""
if self._grouping_expressions is None
- else f'\n BY {", ".join([f"{expr}" for expr in self._grouping_expressions])}'
+ else f'\n BY {", ".join([f"{self._format_expr(expr)}" for expr in self._grouping_expressions])}'
)
return f'STATS {expression_separator.join([f"{expr}" for expr in exprs])}{by}'
@@ -1087,7 +1138,7 @@ def __init__(self, parent: ESQLBase, *expressions: ExpressionType):
self._expressions = expressions
def _render_internal(self) -> str:
- return f'WHERE {" AND ".join([f"{expr}" for expr in self._expressions])}'
+ return f'WHERE {" AND ".join([f"{self._format_expr(expr)}" for expr in self._expressions])}'
def and_(*expressions: InstrumentedExpression) -> "InstrumentedExpression":
diff --git a/elasticsearch/esql/functions.py b/elasticsearch/esql/functions.py
index 515e3ddfc..91f18d2d8 100644
--- a/elasticsearch/esql/functions.py
+++ b/elasticsearch/esql/functions.py
@@ -19,11 +19,15 @@
from typing import Any
from elasticsearch.dsl.document_base import InstrumentedExpression
-from elasticsearch.esql.esql import ExpressionType
+from elasticsearch.esql.esql import ESQLBase, ExpressionType
def _render(v: Any) -> str:
- return json.dumps(v) if not isinstance(v, InstrumentedExpression) else str(v)
+ return (
+ json.dumps(v)
+ if not isinstance(v, InstrumentedExpression)
+ else ESQLBase._format_expr(v)
+ )
def abs(number: ExpressionType) -> InstrumentedExpression:
@@ -69,7 +73,9 @@ def atan2(
:param y_coordinate: y coordinate. If `null`, the function returns `null`.
:param x_coordinate: x coordinate. If `null`, the function returns `null`.
"""
- return InstrumentedExpression(f"ATAN2({y_coordinate}, {x_coordinate})")
+ return InstrumentedExpression(
+ f"ATAN2({_render(y_coordinate)}, {_render(x_coordinate)})"
+ )
def avg(number: ExpressionType) -> InstrumentedExpression:
@@ -114,7 +120,7 @@ def bucket(
:param to: End of the range. Can be a number, a date or a date expressed as a string.
"""
return InstrumentedExpression(
- f"BUCKET({_render(field)}, {_render(buckets)}, {from_}, {_render(to)})"
+ f"BUCKET({_render(field)}, {_render(buckets)}, {_render(from_)}, {_render(to)})"
)
@@ -169,7 +175,7 @@ def cidr_match(ip: ExpressionType, block_x: ExpressionType) -> InstrumentedExpre
:param ip: IP address of type `ip` (both IPv4 and IPv6 are supported).
:param block_x: CIDR block to test the IP against.
"""
- return InstrumentedExpression(f"CIDR_MATCH({_render(ip)}, {block_x})")
+ return InstrumentedExpression(f"CIDR_MATCH({_render(ip)}, {_render(block_x)})")
def coalesce(first: ExpressionType, rest: ExpressionType) -> InstrumentedExpression:
@@ -264,7 +270,7 @@ def date_diff(
:param end_timestamp: A string representing an end timestamp
"""
return InstrumentedExpression(
- f"DATE_DIFF({_render(unit)}, {start_timestamp}, {end_timestamp})"
+ f"DATE_DIFF({_render(unit)}, {_render(start_timestamp)}, {_render(end_timestamp)})"
)
@@ -285,7 +291,9 @@ def date_extract(
the function returns `null`.
:param date: Date expression. If `null`, the function returns `null`.
"""
- return InstrumentedExpression(f"DATE_EXTRACT({date_part}, {_render(date)})")
+ return InstrumentedExpression(
+ f"DATE_EXTRACT({_render(date_part)}, {_render(date)})"
+ )
def date_format(
@@ -301,7 +309,7 @@ def date_format(
"""
if date_format is not None:
return InstrumentedExpression(
- f"DATE_FORMAT({json.dumps(date_format)}, {_render(date)})"
+ f"DATE_FORMAT({_render(date_format)}, {_render(date)})"
)
else:
return InstrumentedExpression(f"DATE_FORMAT({_render(date)})")
@@ -317,7 +325,9 @@ def date_parse(
:param date_string: Date expression as a string. If `null` or an empty
string, the function returns `null`.
"""
- return InstrumentedExpression(f"DATE_PARSE({date_pattern}, {date_string})")
+ return InstrumentedExpression(
+ f"DATE_PARSE({_render(date_pattern)}, {_render(date_string)})"
+ )
def date_trunc(
@@ -929,7 +939,7 @@ def replace(
:param new_string: Replacement string.
"""
return InstrumentedExpression(
- f"REPLACE({_render(string)}, {_render(regex)}, {new_string})"
+ f"REPLACE({_render(string)}, {_render(regex)}, {_render(new_string)})"
)
@@ -1004,7 +1014,7 @@ def scalb(d: ExpressionType, scale_factor: ExpressionType) -> InstrumentedExpres
:param scale_factor: Numeric expression for the scale factor. If `null`, the
function returns `null`.
"""
- return InstrumentedExpression(f"SCALB({_render(d)}, {scale_factor})")
+ return InstrumentedExpression(f"SCALB({_render(d)}, {_render(scale_factor)})")
def sha1(input: ExpressionType) -> InstrumentedExpression:
@@ -1116,7 +1126,7 @@ def st_contains(
first. This means it is not possible to combine `geo_*` and
`cartesian_*` parameters.
"""
- return InstrumentedExpression(f"ST_CONTAINS({geom_a}, {geom_b})")
+ return InstrumentedExpression(f"ST_CONTAINS({_render(geom_a)}, {_render(geom_b)})")
def st_disjoint(
@@ -1135,7 +1145,7 @@ def st_disjoint(
first. This means it is not possible to combine `geo_*` and
`cartesian_*` parameters.
"""
- return InstrumentedExpression(f"ST_DISJOINT({geom_a}, {geom_b})")
+ return InstrumentedExpression(f"ST_DISJOINT({_render(geom_a)}, {_render(geom_b)})")
def st_distance(
@@ -1153,7 +1163,7 @@ def st_distance(
also have the same coordinate system as the first. This means it
is not possible to combine `geo_point` and `cartesian_point` parameters.
"""
- return InstrumentedExpression(f"ST_DISTANCE({geom_a}, {geom_b})")
+ return InstrumentedExpression(f"ST_DISTANCE({_render(geom_a)}, {_render(geom_b)})")
def st_envelope(geometry: ExpressionType) -> InstrumentedExpression:
@@ -1208,7 +1218,7 @@ def st_geohash_to_long(grid_id: ExpressionType) -> InstrumentedExpression:
:param grid_id: Input geohash grid-id. The input can be a single- or
multi-valued column or an expression.
"""
- return InstrumentedExpression(f"ST_GEOHASH_TO_LONG({grid_id})")
+ return InstrumentedExpression(f"ST_GEOHASH_TO_LONG({_render(grid_id)})")
def st_geohash_to_string(grid_id: ExpressionType) -> InstrumentedExpression:
@@ -1218,7 +1228,7 @@ def st_geohash_to_string(grid_id: ExpressionType) -> InstrumentedExpression:
:param grid_id: Input geohash grid-id. The input can be a single- or
multi-valued column or an expression.
"""
- return InstrumentedExpression(f"ST_GEOHASH_TO_STRING({grid_id})")
+ return InstrumentedExpression(f"ST_GEOHASH_TO_STRING({_render(grid_id)})")
def st_geohex(
@@ -1254,7 +1264,7 @@ def st_geohex_to_long(grid_id: ExpressionType) -> InstrumentedExpression:
:param grid_id: Input geohex grid-id. The input can be a single- or
multi-valued column or an expression.
"""
- return InstrumentedExpression(f"ST_GEOHEX_TO_LONG({grid_id})")
+ return InstrumentedExpression(f"ST_GEOHEX_TO_LONG({_render(grid_id)})")
def st_geohex_to_string(grid_id: ExpressionType) -> InstrumentedExpression:
@@ -1264,7 +1274,7 @@ def st_geohex_to_string(grid_id: ExpressionType) -> InstrumentedExpression:
:param grid_id: Input Geohex grid-id. The input can be a single- or
multi-valued column or an expression.
"""
- return InstrumentedExpression(f"ST_GEOHEX_TO_STRING({grid_id})")
+ return InstrumentedExpression(f"ST_GEOHEX_TO_STRING({_render(grid_id)})")
def st_geotile(
@@ -1300,7 +1310,7 @@ def st_geotile_to_long(grid_id: ExpressionType) -> InstrumentedExpression:
:param grid_id: Input geotile grid-id. The input can be a single- or
multi-valued column or an expression.
"""
- return InstrumentedExpression(f"ST_GEOTILE_TO_LONG({grid_id})")
+ return InstrumentedExpression(f"ST_GEOTILE_TO_LONG({_render(grid_id)})")
def st_geotile_to_string(grid_id: ExpressionType) -> InstrumentedExpression:
@@ -1310,7 +1320,7 @@ def st_geotile_to_string(grid_id: ExpressionType) -> InstrumentedExpression:
:param grid_id: Input geotile grid-id. The input can be a single- or
multi-valued column or an expression.
"""
- return InstrumentedExpression(f"ST_GEOTILE_TO_STRING({grid_id})")
+ return InstrumentedExpression(f"ST_GEOTILE_TO_STRING({_render(grid_id)})")
def st_intersects(
@@ -1330,7 +1340,9 @@ def st_intersects(
first. This means it is not possible to combine `geo_*` and
`cartesian_*` parameters.
"""
- return InstrumentedExpression(f"ST_INTERSECTS({geom_a}, {geom_b})")
+ return InstrumentedExpression(
+ f"ST_INTERSECTS({_render(geom_a)}, {_render(geom_b)})"
+ )
def st_within(geom_a: ExpressionType, geom_b: ExpressionType) -> InstrumentedExpression:
@@ -1346,7 +1358,7 @@ def st_within(geom_a: ExpressionType, geom_b: ExpressionType) -> InstrumentedExp
first. This means it is not possible to combine `geo_*` and
`cartesian_*` parameters.
"""
- return InstrumentedExpression(f"ST_WITHIN({geom_a}, {geom_b})")
+ return InstrumentedExpression(f"ST_WITHIN({_render(geom_a)}, {_render(geom_b)})")
def st_x(point: ExpressionType) -> InstrumentedExpression:
diff --git a/test_elasticsearch/test_dsl/_async/test_esql.py b/test_elasticsearch/test_dsl/test_integration/_async/test_esql.py
similarity index 88%
rename from test_elasticsearch/test_dsl/_async/test_esql.py
rename to test_elasticsearch/test_dsl/test_integration/_async/test_esql.py
index 7aacb833c..27d26ca99 100644
--- a/test_elasticsearch/test_dsl/_async/test_esql.py
+++ b/test_elasticsearch/test_dsl/test_integration/_async/test_esql.py
@@ -17,7 +17,7 @@
import pytest
-from elasticsearch.dsl import AsyncDocument, M
+from elasticsearch.dsl import AsyncDocument, E, M
from elasticsearch.esql import ESQL, functions
@@ -91,3 +91,13 @@ async def test_esql(async_client):
)
r = await async_client.esql.query(query=str(query))
assert r.body["values"] == [[1.95]]
+
+ # find employees by name using a parameter
+ query = (
+ ESQL.from_(Employee)
+ .where(Employee.first_name == E("?"))
+ .keep(Employee.last_name)
+ .sort(Employee.last_name.desc())
+ )
+ r = await async_client.esql.query(query=str(query), params=["Maria"])
+ assert r.body["values"] == [["Luna"], ["Cannon"]]
diff --git a/test_elasticsearch/test_dsl/_sync/test_esql.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_esql.py
similarity index 88%
rename from test_elasticsearch/test_dsl/_sync/test_esql.py
rename to test_elasticsearch/test_dsl/test_integration/_sync/test_esql.py
index 1c4084fc7..85ceee5ae 100644
--- a/test_elasticsearch/test_dsl/_sync/test_esql.py
+++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_esql.py
@@ -17,7 +17,7 @@
import pytest
-from elasticsearch.dsl import Document, M
+from elasticsearch.dsl import Document, E, M
from elasticsearch.esql import ESQL, functions
@@ -91,3 +91,13 @@ def test_esql(client):
)
r = client.esql.query(query=str(query))
assert r.body["values"] == [[1.95]]
+
+ # find employees by name using a parameter
+ query = (
+ ESQL.from_(Employee)
+ .where(Employee.first_name == E("?"))
+ .keep(Employee.last_name)
+ .sort(Employee.last_name.desc())
+ )
+ r = client.esql.query(query=str(query), params=["Maria"])
+ assert r.body["values"] == [["Luna"], ["Cannon"]]
diff --git a/test_elasticsearch/test_esql.py b/test_elasticsearch/test_esql.py
index 70c9ec679..35b026fb5 100644
--- a/test_elasticsearch/test_esql.py
+++ b/test_elasticsearch/test_esql.py
@@ -84,7 +84,7 @@ def test_completion():
assert (
query.render()
== """ROW question = "What is Elasticsearch?"
-| COMPLETION question WITH test_completion_model
+| COMPLETION question WITH {"inference_id": "test_completion_model"}
| KEEP question, completion"""
)
@@ -97,7 +97,7 @@ def test_completion():
assert (
query.render()
== """ROW question = "What is Elasticsearch?"
-| COMPLETION answer = question WITH test_completion_model
+| COMPLETION answer = question WITH {"inference_id": "test_completion_model"}
| KEEP question, answer"""
)
@@ -128,7 +128,7 @@ def test_completion():
"Synopsis: ", synopsis, "\\n",
"Actors: ", MV_CONCAT(actors, ", "), "\\n",
)
-| COMPLETION summary = prompt WITH test_completion_model
+| COMPLETION summary = prompt WITH {"inference_id": "test_completion_model"}
| KEEP title, summary, rating"""
)
@@ -160,7 +160,7 @@ def test_completion():
| SORT rating DESC
| LIMIT 10
| EVAL prompt = CONCAT("Summarize this movie using the following information: \\n", "Title: ", title, "\\n", "Synopsis: ", synopsis, "\\n", "Actors: ", MV_CONCAT(actors, ", "), "\\n")
-| COMPLETION summary = prompt WITH test_completion_model
+| COMPLETION summary = prompt WITH {"inference_id": "test_completion_model"}
| KEEP title, summary, rating"""
)
@@ -713,3 +713,11 @@ def test_match_operator():
== """FROM books
| WHERE author:"Faulkner\""""
)
+
+
+def test_parameters():
+ query = ESQL.from_("employees").where("name == ?")
+ assert query.render() == "FROM employees\n| WHERE name == ?"
+
+ query = ESQL.from_("employees").where(E("name") == E("?"))
+ assert query.render() == "FROM employees\n| WHERE name == ?"
From 5fca28c482846fbf23993946a4e26b1b70189238 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 5 Aug 2025 15:17:13 +0100
Subject: [PATCH 05/21] Minor improvement to fix in #3018 (#3031) (#3036)
(cherry picked from commit a67c2eef139b1f55fca8ffa79f8c19d6a03dc6cc)
Co-authored-by: Miguel Grinberg
---
test_elasticsearch/test_server/test_rest_api_spec.py | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/test_elasticsearch/test_server/test_rest_api_spec.py b/test_elasticsearch/test_server/test_rest_api_spec.py
index f12db87aa..768453c10 100644
--- a/test_elasticsearch/test_server/test_rest_api_spec.py
+++ b/test_elasticsearch/test_server/test_rest_api_spec.py
@@ -495,20 +495,14 @@ def remove_implicit_resolver(cls, tag_to_remove):
# Try loading the REST API test specs from the Elastic Artifacts API
try:
# Construct the HTTP and Elasticsearch client
- http = urllib3.PoolManager(retries=10)
+ http = urllib3.PoolManager(retries=urllib3.Retry(total=10))
yaml_tests_url = (
"https://api.github.com/repos/elastic/elasticsearch-clients-tests/zipball/main"
)
# Download the zip and start reading YAML from the files in memory
- package_zip = zipfile.ZipFile(
- io.BytesIO(
- http.request(
- "GET", yaml_tests_url, retries=urllib3.Retry(3, redirect=10)
- ).data
- )
- )
+ package_zip = zipfile.ZipFile(io.BytesIO(http.request("GET", yaml_tests_url).data))
for yaml_file in package_zip.namelist():
if not re.match(r"^.*\/tests\/.*\.ya?ml$", yaml_file):
From 11c0f4806e0ba4ac09e13073c1faa8bba4ded840 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 11 Aug 2025 13:37:49 +0400
Subject: [PATCH 06/21] Fix new parameter name in breaking changes docs (#3038)
(#3040)
(cherry picked from commit 6bfbdaf031186202fcc2250ee6703362878f1342)
Co-authored-by: Quentin Pradet
---
docs/release-notes/breaking-changes.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/release-notes/breaking-changes.md b/docs/release-notes/breaking-changes.md
index 640a57036..0a354b9ce 100644
--- a/docs/release-notes/breaking-changes.md
+++ b/docs/release-notes/breaking-changes.md
@@ -28,7 +28,7 @@ For more information, check [PR #2840](https://github.com/elastic/elasticsearch-
* `host_info_callback` is now `sniffed_node_callback`
* `sniffer_timeout` is now `min_delay_between_sniffing`
* `sniff_on_connection_fail` is now `sniff_on_node_failure`
- * `maxsize` is now `connection_per_node`
+ * `maxsize` is now `connections_per_node`
::::
::::{dropdown} Remove deprecated url_prefix and use_ssl host keys
@@ -50,4 +50,4 @@ Elasticsearch 9 removed the kNN search and Unfreeze index APIs.
**Action**
* The kNN search API has been replaced by the `knn` option in the search API since Elasticsearch 8.4.
* The Unfreeze index API was deprecated in Elasticsearch 7.14 and has been removed in Elasticsearch 9.
- ::::
\ No newline at end of file
+ ::::
From cf6f5c5144bf0c7e72778cc5561229bc6451cd4e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 20 Aug 2025 16:21:21 +0100
Subject: [PATCH 07/21] DSL: preserve the `skip_empty` setting in `to_dict()`
recursive serializations (#3041) (#3047)
* Try reproducing DSL issue 1577
* better attempt to reproduce
* preserve skip_empty setting in recursive serializations
---------
(cherry picked from commit 4761d56405437929e67080ec7890204175cc2513)
Co-authored-by: Quentin Pradet
Co-authored-by: Miguel Grinberg
---
elasticsearch/dsl/field.py | 33 +++++++++++++------
elasticsearch/dsl/utils.py | 2 +-
.../test_integration/_async/test_document.py | 11 +++++--
.../test_integration/_sync/test_document.py | 11 +++++--
utils/templates/field.py.tpl | 27 +++++++++------
5 files changed, 59 insertions(+), 25 deletions(-)
diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py
index 03c712e0a..895765e66 100644
--- a/elasticsearch/dsl/field.py
+++ b/elasticsearch/dsl/field.py
@@ -119,9 +119,16 @@ def __init__(
def __getitem__(self, subfield: str) -> "Field":
return cast(Field, self._params.get("fields", {})[subfield])
- def _serialize(self, data: Any) -> Any:
+ def _serialize(self, data: Any, skip_empty: bool) -> Any:
return data
+ def _safe_serialize(self, data: Any, skip_empty: bool) -> Any:
+ try:
+ return self._serialize(data, skip_empty)
+ except TypeError:
+ # older method signature, without skip_empty
+ return self._serialize(data) # type: ignore[call-arg]
+
def _deserialize(self, data: Any) -> Any:
return data
@@ -133,10 +140,16 @@ def empty(self) -> Optional[Any]:
return AttrList([])
return self._empty()
- def serialize(self, data: Any) -> Any:
+ def serialize(self, data: Any, skip_empty: bool = True) -> Any:
if isinstance(data, (list, AttrList, tuple)):
- return list(map(self._serialize, cast(Iterable[Any], data)))
- return self._serialize(data)
+ return list(
+ map(
+ self._safe_serialize,
+ cast(Iterable[Any], data),
+ [skip_empty] * len(data),
+ )
+ )
+ return self._safe_serialize(data, skip_empty)
def deserialize(self, data: Any) -> Any:
if isinstance(data, (list, AttrList, tuple)):
@@ -186,7 +199,7 @@ def _deserialize(self, data: Any) -> Range["_SupportsComparison"]:
data = {k: self._core_field.deserialize(v) for k, v in data.items()} # type: ignore[union-attr]
return Range(data)
- def _serialize(self, data: Any) -> Optional[Dict[str, Any]]:
+ def _serialize(self, data: Any, skip_empty: bool) -> Optional[Dict[str, Any]]:
if data is None:
return None
if not isinstance(data, collections.abc.Mapping):
@@ -550,7 +563,7 @@ def _deserialize(self, data: Any) -> "InnerDoc":
return self._wrap(data)
def _serialize(
- self, data: Optional[Union[Dict[str, Any], "InnerDoc"]]
+ self, data: Optional[Union[Dict[str, Any], "InnerDoc"]], skip_empty: bool
) -> Optional[Dict[str, Any]]:
if data is None:
return None
@@ -559,7 +572,7 @@ def _serialize(
if isinstance(data, collections.abc.Mapping):
return data
- return data.to_dict()
+ return data.to_dict(skip_empty=skip_empty)
def clean(self, data: Any) -> Any:
data = super().clean(data)
@@ -768,7 +781,7 @@ def clean(self, data: str) -> str:
def _deserialize(self, data: Any) -> bytes:
return base64.b64decode(data)
- def _serialize(self, data: Any) -> Optional[str]:
+ def _serialize(self, data: Any, skip_empty: bool) -> Optional[str]:
if data is None:
return None
return base64.b64encode(data).decode()
@@ -2619,7 +2632,7 @@ def _deserialize(self, data: Any) -> Union["IPv4Address", "IPv6Address"]:
# the ipaddress library for pypy only accepts unicode.
return ipaddress.ip_address(unicode(data))
- def _serialize(self, data: Any) -> Optional[str]:
+ def _serialize(self, data: Any, skip_empty: bool) -> Optional[str]:
if data is None:
return None
return str(data)
@@ -3367,7 +3380,7 @@ def __init__(
def _deserialize(self, data: Any) -> "Query":
return Q(data) # type: ignore[no-any-return]
- def _serialize(self, data: Any) -> Optional[Dict[str, Any]]:
+ def _serialize(self, data: Any, skip_empty: bool) -> Optional[Dict[str, Any]]:
if data is None:
return None
return data.to_dict() # type: ignore[no-any-return]
diff --git a/elasticsearch/dsl/utils.py b/elasticsearch/dsl/utils.py
index 127a48cc2..cce3c052c 100644
--- a/elasticsearch/dsl/utils.py
+++ b/elasticsearch/dsl/utils.py
@@ -603,7 +603,7 @@ def to_dict(self, skip_empty: bool = True) -> Dict[str, Any]:
# if this is a mapped field,
f = self.__get_field(k)
if f and f._coerce:
- v = f.serialize(v)
+ v = f.serialize(v, skip_empty=skip_empty)
# if someone assigned AttrList, unwrap it
if isinstance(v, AttrList):
diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_document.py b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py
index 99f475cf1..3d769c606 100644
--- a/test_elasticsearch/test_dsl/test_integration/_async/test_document.py
+++ b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py
@@ -630,7 +630,9 @@ async def test_can_save_to_different_index(
async def test_save_without_skip_empty_will_include_empty_fields(
async_write_client: AsyncElasticsearch,
) -> None:
- test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42})
+ test_repo = Repository(
+ field_1=[], field_2=None, field_3={}, owner={"name": None}, meta={"id": 42}
+ )
assert await test_repo.save(index="test-document", skip_empty=False)
assert_doc_equals(
@@ -638,7 +640,12 @@ async def test_save_without_skip_empty_will_include_empty_fields(
"found": True,
"_index": "test-document",
"_id": "42",
- "_source": {"field_1": [], "field_2": None, "field_3": {}},
+ "_source": {
+ "field_1": [],
+ "field_2": None,
+ "field_3": {},
+ "owner": {"name": None},
+ },
},
await async_write_client.get(index="test-document", id=42),
)
diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py
index 05dd05fd9..a005d45bf 100644
--- a/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py
+++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py
@@ -624,7 +624,9 @@ def test_can_save_to_different_index(
def test_save_without_skip_empty_will_include_empty_fields(
write_client: Elasticsearch,
) -> None:
- test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42})
+ test_repo = Repository(
+ field_1=[], field_2=None, field_3={}, owner={"name": None}, meta={"id": 42}
+ )
assert test_repo.save(index="test-document", skip_empty=False)
assert_doc_equals(
@@ -632,7 +634,12 @@ def test_save_without_skip_empty_will_include_empty_fields(
"found": True,
"_index": "test-document",
"_id": "42",
- "_source": {"field_1": [], "field_2": None, "field_3": {}},
+ "_source": {
+ "field_1": [],
+ "field_2": None,
+ "field_3": {},
+ "owner": {"name": None},
+ },
},
write_client.get(index="test-document", id=42),
)
diff --git a/utils/templates/field.py.tpl b/utils/templates/field.py.tpl
index 8a4c73f33..8699d852e 100644
--- a/utils/templates/field.py.tpl
+++ b/utils/templates/field.py.tpl
@@ -119,9 +119,16 @@ class Field(DslBase):
def __getitem__(self, subfield: str) -> "Field":
return cast(Field, self._params.get("fields", {})[subfield])
- def _serialize(self, data: Any) -> Any:
+ def _serialize(self, data: Any, skip_empty: bool) -> Any:
return data
+ def _safe_serialize(self, data: Any, skip_empty: bool) -> Any:
+ try:
+ return self._serialize(data, skip_empty)
+ except TypeError:
+ # older method signature, without skip_empty
+ return self._serialize(data) # type: ignore[call-arg]
+
def _deserialize(self, data: Any) -> Any:
return data
@@ -133,10 +140,10 @@ class Field(DslBase):
return AttrList([])
return self._empty()
- def serialize(self, data: Any) -> Any:
+ def serialize(self, data: Any, skip_empty: bool = True) -> Any:
if isinstance(data, (list, AttrList, tuple)):
- return list(map(self._serialize, cast(Iterable[Any], data)))
- return self._serialize(data)
+ return list(map(self._safe_serialize, cast(Iterable[Any], data), [skip_empty] * len(data)))
+ return self._safe_serialize(data, skip_empty)
def deserialize(self, data: Any) -> Any:
if isinstance(data, (list, AttrList, tuple)):
@@ -186,7 +193,7 @@ class RangeField(Field):
data = {k: self._core_field.deserialize(v) for k, v in data.items()} # type: ignore[union-attr]
return Range(data)
- def _serialize(self, data: Any) -> Optional[Dict[str, Any]]:
+ def _serialize(self, data: Any, skip_empty: bool) -> Optional[Dict[str, Any]]:
if data is None:
return None
if not isinstance(data, collections.abc.Mapping):
@@ -318,7 +325,7 @@ class {{ k.name }}({{ k.parent }}):
return self._wrap(data)
def _serialize(
- self, data: Optional[Union[Dict[str, Any], "InnerDoc"]]
+ self, data: Optional[Union[Dict[str, Any], "InnerDoc"]], skip_empty: bool
) -> Optional[Dict[str, Any]]:
if data is None:
return None
@@ -327,7 +334,7 @@ class {{ k.name }}({{ k.parent }}):
if isinstance(data, collections.abc.Mapping):
return data
- return data.to_dict()
+ return data.to_dict(skip_empty=skip_empty)
def clean(self, data: Any) -> Any:
data = super().clean(data)
@@ -433,7 +440,7 @@ class {{ k.name }}({{ k.parent }}):
# the ipaddress library for pypy only accepts unicode.
return ipaddress.ip_address(unicode(data))
- def _serialize(self, data: Any) -> Optional[str]:
+ def _serialize(self, data: Any, skip_empty: bool) -> Optional[str]:
if data is None:
return None
return str(data)
@@ -448,7 +455,7 @@ class {{ k.name }}({{ k.parent }}):
def _deserialize(self, data: Any) -> bytes:
return base64.b64decode(data)
- def _serialize(self, data: Any) -> Optional[str]:
+ def _serialize(self, data: Any, skip_empty: bool) -> Optional[str]:
if data is None:
return None
return base64.b64encode(data).decode()
@@ -458,7 +465,7 @@ class {{ k.name }}({{ k.parent }}):
def _deserialize(self, data: Any) -> "Query":
return Q(data) # type: ignore[no-any-return]
- def _serialize(self, data: Any) -> Optional[Dict[str, Any]]:
+ def _serialize(self, data: Any, skip_empty: bool) -> Optional[Dict[str, Any]]:
if data is None:
return None
return data.to_dict() # type: ignore[no-any-return]
From a5621946ad57f42fd1b406224aa862d0bd131e43 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 3 Sep 2025 15:50:21 +0000
Subject: [PATCH 08/21] Fix ES|QL multi_match() signature (#3052) (#3055)
This change was reviewed when it was part of #3048.
(cherry picked from commit cece9a7b9b5833336486b6c65292173929d2288e)
Co-authored-by: Miguel Grinberg
---
elasticsearch/esql/functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/elasticsearch/esql/functions.py b/elasticsearch/esql/functions.py
index 91f18d2d8..6f47b2c79 100644
--- a/elasticsearch/esql/functions.py
+++ b/elasticsearch/esql/functions.py
@@ -649,7 +649,7 @@ def min_over_time(field: ExpressionType) -> InstrumentedExpression:
def multi_match(
- query: ExpressionType, fields: ExpressionType, options: ExpressionType = None
+ query: ExpressionType, *fields: ExpressionType, options: ExpressionType = None
) -> InstrumentedExpression:
"""Use `MULTI_MATCH` to perform a multi-match query on the specified field.
The multi_match query builds on the match query to allow multi-field queries.
From 2aa045993b097e6dd60691953041f707cf696932 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 3 Sep 2025 17:28:50 +0100
Subject: [PATCH 09/21] ES|QL query builder integration with the DSL module
(#3048) (#3057)
* ES|QL query builder integration with the DSL module
* esql DSL tests
* more esql DSL tests
* documentation
* add esql+dsl example
* review feedback
(cherry picked from commit 228e66c702f06e2d8b4e33d77131c0f21c6b34d2)
Co-authored-by: Miguel Grinberg
---
docs/reference/dsl_how_to_guides.md | 121 ++++++++++++
docs/reference/dsl_tutorials.md | 18 +-
docs/reference/esql-query-builder.md | 8 +-
elasticsearch/dsl/_async/document.py | 84 ++++++++
elasticsearch/dsl/_sync/document.py | 84 ++++++++
elasticsearch/dsl/document_base.py | 42 ++++
elasticsearch/esql/__init__.py | 2 +-
elasticsearch/esql/functions.py | 4 +-
examples/dsl/async/esql_employees.py | 170 ++++++++++++++++
examples/dsl/esql_employees.py | 169 ++++++++++++++++
examples/dsl/semantic_text.py | 2 +-
examples/dsl/sparse_vectors.py | 2 +-
examples/dsl/vectors.py | 2 +-
.../test_integration/_async/test_esql.py | 183 ++++++++++++++++--
.../test_integration/_sync/test_esql.py | 183 ++++++++++++++++--
utils/run-unasync-dsl.py | 2 +-
16 files changed, 1031 insertions(+), 45 deletions(-)
create mode 100644 examples/dsl/async/esql_employees.py
create mode 100644 examples/dsl/esql_employees.py
diff --git a/docs/reference/dsl_how_to_guides.md b/docs/reference/dsl_how_to_guides.md
index ce128528a..5f0884c3c 100644
--- a/docs/reference/dsl_how_to_guides.md
+++ b/docs/reference/dsl_how_to_guides.md
@@ -1425,6 +1425,127 @@ print(response.took)
If you want to inspect the contents of the `response` objects, just use its `to_dict` method to get access to the raw data for pretty printing.
+## ES|QL Queries
+
+When working with `Document` classes, you can use the ES|QL query language to retrieve documents. For this you can use the `esql_from()` and `esql_execute()` methods available to all sub-classes of `Document`.
+
+Consider the following `Employee` document definition:
+
+```python
+from elasticsearch.dsl import Document, InnerDoc, M
+
+class Address(InnerDoc):
+ address: M[str]
+ city: M[str]
+ zip_code: M[str]
+
+class Employee(Document):
+ emp_no: M[int]
+ first_name: M[str]
+ last_name: M[str]
+ height: M[float]
+ still_hired: M[bool]
+ address: M[Address]
+
+ class Index:
+ name = 'employees'
+```
+
+The `esql_from()` method creates a base ES|QL query for the index associated with the document class. The following example creates a base query for the `Employee` class:
+
+```python
+query = Employee.esql_from()
+```
+
+This query includes a `FROM` command with the index name, and a `KEEP` command that retrieves all the document attributes.
+
+To execute this query and receive the results, you can pass the query to the `esql_execute()` method:
+
+```python
+for emp in Employee.esql_execute(query):
+ print(f"{emp.name} from {emp.address.city} is {emp.height:.2f}m tall")
+```
+
+In this example, the `esql_execute()` class method runs the query and returns all the documents in the index, up to the maximum of 1000 results allowed by ES|QL. Here is a possible output from this example:
+
+```
+Kevin Macias from North Robert is 1.60m tall
+Drew Harris from Boltonshire is 1.68m tall
+Julie Williams from Maddoxshire is 1.99m tall
+Christopher Jones from Stevenbury is 1.98m tall
+Anthony Lopez from Port Sarahtown is 2.42m tall
+Tricia Stone from North Sueshire is 2.39m tall
+Katherine Ramirez from Kimberlyton is 1.83m tall
+...
+```
+
+To search for specific documents you can extend the base query with additional ES|QL commands that narrow the search criteria. The next example searches for documents that include only employees that are taller than 2 meters, sorted by their last name. It also limits the results to 4 people:
+
+```python
+query = (
+ Employee.esql_from()
+ .where(Employee.height > 2)
+ .sort(Employee.last_name)
+ .limit(4)
+)
+```
+
+When running this query with the same for-loop shown above, possible results would be:
+
+```
+Michael Adkins from North Stacey is 2.48m tall
+Kimberly Allen from Toddside is 2.24m tall
+Crystal Austin from East Michaelchester is 2.30m tall
+Rebecca Berger from Lake Adrianside is 2.40m tall
+```
+
+### Additional fields
+
+ES|QL provides a few ways to add new fields to a query, for example through the `EVAL` command. The following example shows a query that adds an evaluated field:
+
+```python
+from elasticsearch.esql import E, functions
+
+query = (
+ Employee.esql_from()
+ .eval(height_cm=functions.round(Employee.height * 100))
+ .where(E("height_cm") >= 200)
+ .sort(Employee.last_name)
+ .limit(10)
+)
+```
+
+In this example we are adding the height in centimeters to the query, calculated from the `height` document field, which is in meters. The `height_cm` calculated field is available to use in other query clauses, and in particular is referenced in `where()` in this example. Note how the new field is given as `E("height_cm")` in this clause. The `E()` wrapper tells the query builder that the argument is an ES|QL field name and not a string literal. This is done automatically for document fields that are given as class attributes, such as `Employee.height` in the `eval()`. The `E()` wrapper is only needed for fields that are not in the document.
+
+By default, the `esql_execute()` method returns only document instances. To receive any additional fields that are not part of the document in the query results, the `return_additional=True` argument can be passed to it, and then the results are returned as tuples with the document as first element, and a dictionary with the additional fields as second element:
+
+```python
+for emp, additional in Employee.esql_execute(query, return_additional=True):
+ print(emp.name, additional)
+```
+
+Example output from the query given above:
+
+```
+Michael Adkins {'height_cm': 248.0}
+Kimberly Allen {'height_cm': 224.0}
+Crystal Austin {'height_cm': 230.0}
+Rebecca Berger {'height_cm': 240.0}
+Katherine Blake {'height_cm': 214.0}
+Edward Butler {'height_cm': 246.0}
+Steven Carlson {'height_cm': 242.0}
+Mark Carter {'height_cm': 240.0}
+Joseph Castillo {'height_cm': 229.0}
+Alexander Cohen {'height_cm': 245.0}
+```
+
+### Missing fields
+
+The base query returned by the `esql_from()` method includes a `KEEP` command with the complete list of fields that are part of the document. If any subsequent clauses added to the query remove fields that are part of the document, then the `esql_execute()` method will raise an exception, because it will not be able construct complete document instances to return as results.
+
+To prevent errors, it is recommended that the `keep()` and `drop()` clauses are not used when working with `Document` instances.
+
+If a query has missing fields, it can be forced to execute without errors by passing the `ignore_missing_fields=True` argument to `esql_execute()`. When this option is used, returned documents will have any missing fields set to `None`.
## Using asyncio with Elasticsearch Python DSL [asyncio]
diff --git a/docs/reference/dsl_tutorials.md b/docs/reference/dsl_tutorials.md
index 77992587b..16224a13f 100644
--- a/docs/reference/dsl_tutorials.md
+++ b/docs/reference/dsl_tutorials.md
@@ -83,7 +83,7 @@ Let’s have a simple Python class representing an article in a blogging system:
```python
from datetime import datetime
-from elasticsearch.dsl import Document, Date, Integer, Keyword, Text, connections
+from elasticsearch.dsl import Document, Date, Integer, Keyword, Text, connections, mapped_field
# Define a default Elasticsearch client
connections.create_connection(hosts="https://localhost:9200/")
@@ -91,7 +91,7 @@ connections.create_connection(hosts="https://localhost:9200/")
class Article(Document):
title: str = mapped_field(Text(analyzer='snowball', fields={'raw': Keyword()}))
body: str = mapped_field(Text(analyzer='snowball'))
- tags: str = mapped_field(Keyword())
+ tags: list[str] = mapped_field(Keyword())
published_from: datetime
lines: int
@@ -216,6 +216,20 @@ response = ubq.execute()
As you can see, the `Update By Query` object provides many of the savings offered by the `Search` object, and additionally allows one to update the results of the search based on a script assigned in the same manner.
+## ES|QL Queries
+
+The DSL module features an integration with the ES|QL query builder, consisting of two methods available in all `Document` sub-classes: `esql_from()` and `esql_execute()`. Using the `Article` document from above, we can search for up to ten articles that include `"world"` in their titles with the following ES|QL query:
+
+```python
+from elasticsearch.esql import functions
+
+query = Article.esql_from().where(functions.match(Article.title, 'world')).limit(10)
+for a in Article.esql_execute(query):
+ print(a.title)
+```
+
+Review the [ES|QL Query Builder section](esql-query-builder.md) to learn more about building ES|QL queries in Python.
+
## Migration from the standard client [_migration_from_the_standard_client]
You don’t have to port your entire application to get the benefits of the DSL module, you can start gradually by creating a `Search` object from your existing `dict`, modifying it using the API and serializing it back to a `dict`:
diff --git a/docs/reference/esql-query-builder.md b/docs/reference/esql-query-builder.md
index 8390ea983..7ef9a0960 100644
--- a/docs/reference/esql-query-builder.md
+++ b/docs/reference/esql-query-builder.md
@@ -20,7 +20,7 @@ The ES|QL Query Builder allows you to construct ES|QL queries using Python synta
You can then see the assembled ES|QL query by printing the resulting query object:
```python
->>> query
+>>> print(query)
FROM employees
| SORT emp_no
| KEEP first_name, last_name, height
@@ -28,12 +28,12 @@ FROM employees
| LIMIT 3
```
-To execute this query, you can cast it to a string and pass the string to the `client.esql.query()` endpoint:
+To execute this query, you can pass it to the `client.esql.query()` endpoint:
```python
>>> from elasticsearch import Elasticsearch
>>> client = Elasticsearch(hosts=[os.environ['ELASTICSEARCH_URL']])
->>> response = client.esql.query(query=str(query))
+>>> response = client.esql.query(query=query)
```
The response body contains a `columns` attribute with the list of columns included in the results, and a `values` attribute with the list of results for the query, each given as a list of column values. Here is a possible response body returned by the example query given above:
@@ -216,7 +216,7 @@ def find_employee_by_name(name):
.keep("first_name", "last_name", "height")
.where(E("first_name") == E("?"))
)
- return client.esql.query(query=str(query), params=[name])
+ return client.esql.query(query=query, params=[name])
```
Here the part of the query in which the untrusted data needs to be inserted is replaced with a parameter, which in ES|QL is defined by the question mark. When using Python expressions, the parameter must be given as `E("?")` so that it is treated as an expression and not as a literal string.
diff --git a/elasticsearch/dsl/_async/document.py b/elasticsearch/dsl/_async/document.py
index de6e9eecc..53b4f12c3 100644
--- a/elasticsearch/dsl/_async/document.py
+++ b/elasticsearch/dsl/_async/document.py
@@ -20,6 +20,7 @@
TYPE_CHECKING,
Any,
AsyncIterable,
+ AsyncIterator,
Dict,
List,
Optional,
@@ -42,6 +43,7 @@
if TYPE_CHECKING:
from elasticsearch import AsyncElasticsearch
+ from elasticsearch.esql.esql import ESQLBase
class AsyncIndexMeta(DocumentMeta):
@@ -520,3 +522,85 @@ async def __anext__(self) -> Dict[str, Any]:
return action
return await async_bulk(es, Generate(actions), **kwargs)
+
+ @classmethod
+ async def esql_execute(
+ cls,
+ query: "ESQLBase",
+ return_additional: bool = False,
+ ignore_missing_fields: bool = False,
+ using: Optional[AsyncUsingType] = None,
+ **kwargs: Any,
+ ) -> AsyncIterator[Union[Self, Tuple[Self, Dict[str, Any]]]]:
+ """
+ Execute the given ES|QL query and return an iterator of 2-element tuples,
+ where the first element is an instance of this ``Document`` and the
+ second a dictionary with any remaining columns requested in the query.
+
+ :arg query: an ES|QL query object created with the ``esql_from()`` method.
+ :arg return_additional: if ``False`` (the default), this method returns
+ document objects. If set to ``True``, the method returns tuples with
+ a document in the first element and a dictionary with any additional
+ columns returned by the query in the second element.
+ :arg ignore_missing_fields: if ``False`` (the default), all the fields of
+ the document must be present in the query, or else an exception is
+ raised. Set to ``True`` to allow missing fields, which will result in
+ partially initialized document objects.
+ :arg using: connection alias to use, defaults to ``'default'``
+ :arg kwargs: additional options for the ``client.esql.query()`` function.
+ """
+ es = cls._get_connection(using)
+ response = await es.esql.query(query=str(query), **kwargs)
+ query_columns = [col["name"] for col in response.body.get("columns", [])]
+
+ # Here we get the list of columns defined in the document, which are the
+ # columns that we will take from each result to assemble the document
+ # object.
+ # When `for_esql=False` is passed below by default, the list will include
+ # nested fields, which ES|QL does not return, causing an error. When passing
+ # `ignore_missing_fields=True` the list will be generated with
+ # `for_esql=True`, so the error will not occur, but the documents will
+ # not have any Nested objects in them.
+ doc_fields = set(cls._get_field_names(for_esql=ignore_missing_fields))
+ if not ignore_missing_fields and not doc_fields.issubset(set(query_columns)):
+ raise ValueError(
+ f"Not all fields of {cls.__name__} were returned by the query. "
+ "Make sure your document does not use Nested fields, which are "
+ "currently not supported in ES|QL. To force the query to be "
+ "evaluated in spite of the missing fields, pass set the "
+ "ignore_missing_fields=True option in the esql_execute() call."
+ )
+ non_doc_fields: set[str] = set(query_columns) - doc_fields - {"_id"}
+ index_id = query_columns.index("_id")
+
+ results = response.body.get("values", [])
+ for column_values in results:
+ # create a dictionary with all the document fields, expanding the
+ # dot notation returned by ES|QL into the recursive dictionaries
+ # used by Document.from_dict()
+ doc_dict: Dict[str, Any] = {}
+ for col, val in zip(query_columns, column_values):
+ if col in doc_fields:
+ cols = col.split(".")
+ d = doc_dict
+ for c in cols[:-1]:
+ if c not in d:
+ d[c] = {}
+ d = d[c]
+ d[cols[-1]] = val
+
+ # create the document instance
+ obj = cls(meta={"_id": column_values[index_id]})
+ obj._from_dict(doc_dict)
+
+ if return_additional:
+ # build a dict with any other values included in the response
+ other = {
+ col: val
+ for col, val in zip(query_columns, column_values)
+ if col in non_doc_fields
+ }
+
+ yield obj, other
+ else:
+ yield obj
diff --git a/elasticsearch/dsl/_sync/document.py b/elasticsearch/dsl/_sync/document.py
index f68be4aae..07bda6ec1 100644
--- a/elasticsearch/dsl/_sync/document.py
+++ b/elasticsearch/dsl/_sync/document.py
@@ -21,6 +21,7 @@
Any,
Dict,
Iterable,
+ Iterator,
List,
Optional,
Tuple,
@@ -42,6 +43,7 @@
if TYPE_CHECKING:
from elasticsearch import Elasticsearch
+ from elasticsearch.esql.esql import ESQLBase
class IndexMeta(DocumentMeta):
@@ -512,3 +514,85 @@ def __next__(self) -> Dict[str, Any]:
return action
return bulk(es, Generate(actions), **kwargs)
+
+ @classmethod
+ def esql_execute(
+ cls,
+ query: "ESQLBase",
+ return_additional: bool = False,
+ ignore_missing_fields: bool = False,
+ using: Optional[UsingType] = None,
+ **kwargs: Any,
+ ) -> Iterator[Union[Self, Tuple[Self, Dict[str, Any]]]]:
+ """
+ Execute the given ES|QL query and return an iterator of 2-element tuples,
+ where the first element is an instance of this ``Document`` and the
+ second a dictionary with any remaining columns requested in the query.
+
+ :arg query: an ES|QL query object created with the ``esql_from()`` method.
+ :arg return_additional: if ``False`` (the default), this method returns
+ document objects. If set to ``True``, the method returns tuples with
+ a document in the first element and a dictionary with any additional
+ columns returned by the query in the second element.
+ :arg ignore_missing_fields: if ``False`` (the default), all the fields of
+ the document must be present in the query, or else an exception is
+ raised. Set to ``True`` to allow missing fields, which will result in
+ partially initialized document objects.
+ :arg using: connection alias to use, defaults to ``'default'``
+ :arg kwargs: additional options for the ``client.esql.query()`` function.
+ """
+ es = cls._get_connection(using)
+ response = es.esql.query(query=str(query), **kwargs)
+ query_columns = [col["name"] for col in response.body.get("columns", [])]
+
+ # Here we get the list of columns defined in the document, which are the
+ # columns that we will take from each result to assemble the document
+ # object.
+ # When `for_esql=False` is passed below by default, the list will include
+ # nested fields, which ES|QL does not return, causing an error. When passing
+ # `ignore_missing_fields=True` the list will be generated with
+ # `for_esql=True`, so the error will not occur, but the documents will
+ # not have any Nested objects in them.
+ doc_fields = set(cls._get_field_names(for_esql=ignore_missing_fields))
+ if not ignore_missing_fields and not doc_fields.issubset(set(query_columns)):
+ raise ValueError(
+ f"Not all fields of {cls.__name__} were returned by the query. "
+ "Make sure your document does not use Nested fields, which are "
+ "currently not supported in ES|QL. To force the query to be "
+ "evaluated in spite of the missing fields, pass set the "
+ "ignore_missing_fields=True option in the esql_execute() call."
+ )
+ non_doc_fields: set[str] = set(query_columns) - doc_fields - {"_id"}
+ index_id = query_columns.index("_id")
+
+ results = response.body.get("values", [])
+ for column_values in results:
+ # create a dictionary with all the document fields, expanding the
+ # dot notation returned by ES|QL into the recursive dictionaries
+ # used by Document.from_dict()
+ doc_dict: Dict[str, Any] = {}
+ for col, val in zip(query_columns, column_values):
+ if col in doc_fields:
+ cols = col.split(".")
+ d = doc_dict
+ for c in cols[:-1]:
+ if c not in d:
+ d[c] = {}
+ d = d[c]
+ d[cols[-1]] = val
+
+ # create the document instance
+ obj = cls(meta={"_id": column_values[index_id]})
+ obj._from_dict(doc_dict)
+
+ if return_additional:
+ # build a dict with any other values included in the response
+ other = {
+ col: val
+ for col, val in zip(query_columns, column_values)
+ if col in non_doc_fields
+ }
+
+ yield obj, other
+ else:
+ yield obj
diff --git a/elasticsearch/dsl/document_base.py b/elasticsearch/dsl/document_base.py
index 09da7d459..4df900a39 100644
--- a/elasticsearch/dsl/document_base.py
+++ b/elasticsearch/dsl/document_base.py
@@ -49,6 +49,7 @@
if TYPE_CHECKING:
from elastic_transport import ObjectApiResponse
+ from ..esql.esql import ESQLBase
from .index_base import IndexBase
@@ -602,3 +603,44 @@ def to_dict(self, include_meta: bool = False, skip_empty: bool = True) -> Dict[s
meta["_source"] = d
return meta
+
+ @classmethod
+ def _get_field_names(
+ cls, for_esql: bool = False, nested_class: Optional[type[InnerDoc]] = None
+ ) -> List[str]:
+ """Return the list of field names used by this document.
+ If the document has nested objects, their fields are reported using dot
+ notation. If the ``for_esql`` argument is set to ``True``, the list omits
+ nested fields, which are currently unsupported in ES|QL.
+ """
+ fields = []
+ class_ = nested_class or cls
+ for field_name in class_._doc_type.mapping:
+ field = class_._doc_type.mapping[field_name]
+ if isinstance(field, Object):
+ if for_esql and isinstance(field, Nested):
+ # ES|QL does not recognize Nested fields at this time
+ continue
+ sub_fields = cls._get_field_names(
+ for_esql=for_esql, nested_class=field._doc_class
+ )
+ for sub_field in sub_fields:
+ fields.append(f"{field_name}.{sub_field}")
+ else:
+ fields.append(field_name)
+ return fields
+
+ @classmethod
+ def esql_from(cls) -> "ESQLBase":
+ """Return a base ES|QL query for instances of this document class.
+
+ The returned query is initialized with ``FROM`` and ``KEEP`` statements,
+ and can be completed as desired.
+ """
+ from ..esql import ESQL # here to avoid circular imports
+
+ return (
+ ESQL.from_(cls)
+ .metadata("_id")
+ .keep("_id", *tuple(cls._get_field_names(for_esql=True)))
+ )
diff --git a/elasticsearch/esql/__init__.py b/elasticsearch/esql/__init__.py
index 8da8f852a..4a843ad59 100644
--- a/elasticsearch/esql/__init__.py
+++ b/elasticsearch/esql/__init__.py
@@ -16,4 +16,4 @@
# under the License.
from ..dsl import E # noqa: F401
-from .esql import ESQL, and_, not_, or_ # noqa: F401
+from .esql import ESQL, ESQLBase, and_, not_, or_ # noqa: F401
diff --git a/elasticsearch/esql/functions.py b/elasticsearch/esql/functions.py
index 6f47b2c79..162d7b95e 100644
--- a/elasticsearch/esql/functions.py
+++ b/elasticsearch/esql/functions.py
@@ -661,11 +661,11 @@ def multi_match(
"""
if options is not None:
return InstrumentedExpression(
- f"MULTI_MATCH({_render(query)}, {_render(fields)}, {_render(options)})"
+ f'MULTI_MATCH({_render(query)}, {", ".join([_render(c) for c in fields])}, {_render(options)})'
)
else:
return InstrumentedExpression(
- f"MULTI_MATCH({_render(query)}, {_render(fields)})"
+ f'MULTI_MATCH({_render(query)}, {", ".join([_render(c) for c in fields])})'
)
diff --git a/examples/dsl/async/esql_employees.py b/examples/dsl/async/esql_employees.py
new file mode 100644
index 000000000..986c84235
--- /dev/null
+++ b/examples/dsl/async/esql_employees.py
@@ -0,0 +1,170 @@
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+# ES|QL query builder example
+
+Requirements:
+
+$ pip install "elasticsearch[async]" faker
+
+To run the example:
+
+$ python esql_employees.py "name to search"
+
+The index will be created automatically with a list of 1000 randomly generated
+employees if it does not exist. Add `--recreate-index` or `-r` to the command
+to regenerate it.
+
+Examples:
+
+$ python esql_employees "Mark" # employees named Mark (first or last names)
+$ python esql_employees "Sarah" --limit 10 # up to 10 employees named Sarah
+$ python esql_employees "Sam" --sort height # sort results by height
+$ python esql_employees "Sam" --sort name # sort results by last name
+"""
+
+import argparse
+import asyncio
+import os
+import random
+
+from faker import Faker
+
+from elasticsearch.dsl import AsyncDocument, InnerDoc, M, async_connections
+from elasticsearch.esql import ESQLBase
+from elasticsearch.esql.functions import concat, multi_match
+
+fake = Faker()
+
+
+class Address(InnerDoc):
+ address: M[str]
+ city: M[str]
+ zip_code: M[str]
+
+
+class Employee(AsyncDocument):
+ emp_no: M[int]
+ first_name: M[str]
+ last_name: M[str]
+ height: M[float]
+ still_hired: M[bool]
+ address: M[Address]
+
+ class Index:
+ name = "employees"
+
+ @property
+ def name(self) -> str:
+ return f"{self.first_name} {self.last_name}"
+
+ def __repr__(self) -> str:
+ return f""
+
+
+async def create(num_employees: int = 1000) -> None:
+ print("Creating a new employee index...")
+ if await Employee._index.exists():
+ await Employee._index.delete()
+ await Employee.init()
+
+ for i in range(num_employees):
+ address = Address(
+ address=fake.address(), city=fake.city(), zip_code=fake.zipcode()
+ )
+ emp = Employee(
+ emp_no=10000 + i,
+ first_name=fake.first_name(),
+ last_name=fake.last_name(),
+ height=int((random.random() * 0.8 + 1.5) * 1000) / 1000,
+ still_hired=random.random() >= 0.5,
+ address=address,
+ )
+ await emp.save()
+ await Employee._index.refresh()
+
+
+async def search(query: str, limit: int, sort: str) -> None:
+ q: ESQLBase = (
+ Employee.esql_from()
+ .where(multi_match(query, Employee.first_name, Employee.last_name))
+ .eval(full_name=concat(Employee.first_name, " ", Employee.last_name))
+ )
+ if sort == "height":
+ q = q.sort(Employee.height.desc())
+ elif sort == "name":
+ q = q.sort(Employee.last_name.asc())
+ q = q.limit(limit)
+ async for result in Employee.esql_execute(q, return_additional=True):
+ assert type(result) == tuple
+ employee = result[0]
+ full_name = result[1]["full_name"]
+ print(
+ f"{full_name:<20}",
+ f"{'Hired' if employee.still_hired else 'Not hired':<10}",
+ f"{employee.height:5.2f}m",
+ f"{employee.address.city:<20}",
+ )
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description="Employee ES|QL example")
+ parser.add_argument(
+ "--recreate-index",
+ "-r",
+ action="https://github.com/store_true",
+ help="Recreate and populate the index",
+ )
+ parser.add_argument(
+ "--limit",
+ action="https://github.com/store",
+ type=int,
+ default=100,
+ help="Maximum number or employees to return (default: 100)",
+ )
+ parser.add_argument(
+ "--sort",
+ action="https://github.com/store",
+ type=str,
+ default=None,
+ help='Sort by "name" (ascending) or by "height" (descending) (default: no sorting)',
+ )
+ parser.add_argument(
+ "query", action="https://github.com/store", help="The name or partial name to search for"
+ )
+ return parser.parse_args()
+
+
+async def main() -> None:
+ args = parse_args()
+
+ # initiate the default connection to elasticsearch
+ async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
+
+ if args.recreate_index or not await Employee._index.exists():
+ await create()
+ await Employee.init()
+
+ await search(args.query, args.limit, args.sort)
+
+ # close the connection
+ await async_connections.get_connection().close()
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/examples/dsl/esql_employees.py b/examples/dsl/esql_employees.py
new file mode 100644
index 000000000..364a2c2d5
--- /dev/null
+++ b/examples/dsl/esql_employees.py
@@ -0,0 +1,169 @@
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+# ES|QL query builder example
+
+Requirements:
+
+$ pip install elasticsearch faker
+
+To run the example:
+
+$ python esql_employees.py "name to search"
+
+The index will be created automatically with a list of 1000 randomly generated
+employees if it does not exist. Add `--recreate-index` or `-r` to the command
+to regenerate it.
+
+Examples:
+
+$ python esql_employees "Mark" # employees named Mark (first or last names)
+$ python esql_employees "Sarah" --limit 10 # up to 10 employees named Sarah
+$ python esql_employees "Sam" --sort height # sort results by height
+$ python esql_employees "Sam" --sort name # sort results by last name
+"""
+
+import argparse
+import os
+import random
+
+from faker import Faker
+
+from elasticsearch.dsl import Document, InnerDoc, M, connections
+from elasticsearch.esql import ESQLBase
+from elasticsearch.esql.functions import concat, multi_match
+
+fake = Faker()
+
+
+class Address(InnerDoc):
+ address: M[str]
+ city: M[str]
+ zip_code: M[str]
+
+
+class Employee(Document):
+ emp_no: M[int]
+ first_name: M[str]
+ last_name: M[str]
+ height: M[float]
+ still_hired: M[bool]
+ address: M[Address]
+
+ class Index:
+ name = "employees"
+
+ @property
+ def name(self) -> str:
+ return f"{self.first_name} {self.last_name}"
+
+ def __repr__(self) -> str:
+ return f""
+
+
+def create(num_employees: int = 1000) -> None:
+ print("Creating a new employee index...")
+ if Employee._index.exists():
+ Employee._index.delete()
+ Employee.init()
+
+ for i in range(num_employees):
+ address = Address(
+ address=fake.address(), city=fake.city(), zip_code=fake.zipcode()
+ )
+ emp = Employee(
+ emp_no=10000 + i,
+ first_name=fake.first_name(),
+ last_name=fake.last_name(),
+ height=int((random.random() * 0.8 + 1.5) * 1000) / 1000,
+ still_hired=random.random() >= 0.5,
+ address=address,
+ )
+ emp.save()
+ Employee._index.refresh()
+
+
+def search(query: str, limit: int, sort: str) -> None:
+ q: ESQLBase = (
+ Employee.esql_from()
+ .where(multi_match(query, Employee.first_name, Employee.last_name))
+ .eval(full_name=concat(Employee.first_name, " ", Employee.last_name))
+ )
+ if sort == "height":
+ q = q.sort(Employee.height.desc())
+ elif sort == "name":
+ q = q.sort(Employee.last_name.asc())
+ q = q.limit(limit)
+ for result in Employee.esql_execute(q, return_additional=True):
+ assert type(result) == tuple
+ employee = result[0]
+ full_name = result[1]["full_name"]
+ print(
+ f"{full_name:<20}",
+ f"{'Hired' if employee.still_hired else 'Not hired':<10}",
+ f"{employee.height:5.2f}m",
+ f"{employee.address.city:<20}",
+ )
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description="Employee ES|QL example")
+ parser.add_argument(
+ "--recreate-index",
+ "-r",
+ action="https://github.com/store_true",
+ help="Recreate and populate the index",
+ )
+ parser.add_argument(
+ "--limit",
+ action="https://github.com/store",
+ type=int,
+ default=100,
+ help="Maximum number or employees to return (default: 100)",
+ )
+ parser.add_argument(
+ "--sort",
+ action="https://github.com/store",
+ type=str,
+ default=None,
+ help='Sort by "name" (ascending) or by "height" (descending) (default: no sorting)',
+ )
+ parser.add_argument(
+ "query", action="https://github.com/store", help="The name or partial name to search for"
+ )
+ return parser.parse_args()
+
+
+def main() -> None:
+ args = parse_args()
+
+ # initiate the default connection to elasticsearch
+ connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
+
+ if args.recreate_index or not Employee._index.exists():
+ create()
+ Employee.init()
+
+ search(args.query, args.limit, args.sort)
+
+ # close the connection
+ connections.get_connection().close()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/dsl/semantic_text.py b/examples/dsl/semantic_text.py
index 8d552a2aa..6f4bb8f7d 100644
--- a/examples/dsl/semantic_text.py
+++ b/examples/dsl/semantic_text.py
@@ -21,7 +21,7 @@
Requirements:
-$ pip install "elasticsearch" tqdm
+$ pip install elasticsearch tqdm
Before running this example, an ELSER inference endpoint must be created in the
Elasticsearch cluster. This can be done manually from Kibana, or with the
diff --git a/examples/dsl/sparse_vectors.py b/examples/dsl/sparse_vectors.py
index a92e82026..c328769eb 100644
--- a/examples/dsl/sparse_vectors.py
+++ b/examples/dsl/sparse_vectors.py
@@ -20,7 +20,7 @@
Requirements:
-$ pip install nltk tqdm "elasticsearch"
+$ pip install nltk tqdm elasticsearch
Before running this example, the ELSER v2 model must be downloaded and deployed
to the Elasticsearch cluster, and an ingest pipeline must be defined. This can
diff --git a/examples/dsl/vectors.py b/examples/dsl/vectors.py
index 3afd76991..b4c700b71 100644
--- a/examples/dsl/vectors.py
+++ b/examples/dsl/vectors.py
@@ -20,7 +20,7 @@
Requirements:
-$ pip install nltk sentence_transformers tqdm "elasticsearch"
+$ pip install nltk sentence_transformers tqdm elasticsearch
To run the example:
diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_esql.py b/test_elasticsearch/test_dsl/test_integration/_async/test_esql.py
index 27d26ca99..ae99873f8 100644
--- a/test_elasticsearch/test_dsl/test_integration/_async/test_esql.py
+++ b/test_elasticsearch/test_dsl/test_integration/_async/test_esql.py
@@ -17,8 +17,13 @@
import pytest
-from elasticsearch.dsl import AsyncDocument, E, M
-from elasticsearch.esql import ESQL, functions
+from elasticsearch.dsl import AsyncDocument, InnerDoc, M
+from elasticsearch.esql import ESQL, E, functions
+
+
+class Address(InnerDoc):
+ address: M[str]
+ city: M[str]
class Employee(AsyncDocument):
@@ -27,6 +32,7 @@ class Employee(AsyncDocument):
last_name: M[str]
height: M[float]
still_hired: M[bool]
+ address: M[Address]
class Index:
name = "employees"
@@ -34,16 +40,86 @@ class Index:
async def load_db():
data = [
- [10000, "Joseph", "Wall", 2.2, True],
- [10001, "Stephanie", "Ward", 1.749, True],
- [10002, "David", "Keller", 1.872, True],
- [10003, "Roger", "Hinton", 1.694, False],
- [10004, "Joshua", "Garcia", 1.661, False],
- [10005, "Matthew", "Richards", 1.633, False],
- [10006, "Maria", "Luna", 1.893, True],
- [10007, "Angela", "Navarro", 1.604, False],
- [10008, "Maria", "Cannon", 2.079, False],
- [10009, "Joseph", "Sutton", 2.025, True],
+ [
+ 10000,
+ "Joseph",
+ "Wall",
+ 2.2,
+ True,
+ Address(address="8875 Long Shoals Suite 441", city="Marcville, TX"),
+ ],
+ [
+ 10001,
+ "Stephanie",
+ "Ward",
+ 1.749,
+ True,
+ Address(address="90162 Carter Harbor Suite 099", city="Davisborough, DE"),
+ ],
+ [
+ 10002,
+ "David",
+ "Keller",
+ 1.872,
+ True,
+ Address(address="6697 Patrick Union Suite 797", city="Fuentesmouth, SD"),
+ ],
+ [
+ 10003,
+ "Roger",
+ "Hinton",
+ 1.694,
+ False,
+ Address(address="809 Kelly Mountains", city="South Megan, DE"),
+ ],
+ [
+ 10004,
+ "Joshua",
+ "Garcia",
+ 1.661,
+ False,
+ Address(address="718 Angela Forks", city="Port Erinland, MA"),
+ ],
+ [
+ 10005,
+ "Matthew",
+ "Richards",
+ 1.633,
+ False,
+ Address(address="2869 Brown Mountains", city="New Debra, NH"),
+ ],
+ [
+ 10006,
+ "Maria",
+ "Luna",
+ 1.893,
+ True,
+ Address(address="5861 Morgan Springs", city="Lake Daniel, WI"),
+ ],
+ [
+ 10007,
+ "Angela",
+ "Navarro",
+ 1.604,
+ False,
+ Address(address="2848 Allen Station", city="Saint Joseph, OR"),
+ ],
+ [
+ 10008,
+ "Maria",
+ "Cannon",
+ 2.079,
+ False,
+ Address(address="322 NW Johnston", city="Bakerburgh, MP"),
+ ],
+ [
+ 10009,
+ "Joseph",
+ "Sutton",
+ 2.025,
+ True,
+ Address(address="77 Cardinal E", city="Lakestown, IL"),
+ ],
]
if await Employee._index.exists():
await Employee._index.delete()
@@ -51,7 +127,12 @@ async def load_db():
for e in data:
employee = Employee(
- emp_no=e[0], first_name=e[1], last_name=e[2], height=e[3], still_hired=e[4]
+ emp_no=e[0],
+ first_name=e[1],
+ last_name=e[2],
+ height=e[3],
+ still_hired=e[4],
+ address=e[5],
)
await employee.save()
await Employee._index.refresh()
@@ -64,9 +145,9 @@ async def test_esql(async_client):
# get the full names of the employees
query = (
ESQL.from_(Employee)
- .eval(name=functions.concat(Employee.first_name, " ", Employee.last_name))
- .keep("name")
- .sort("name")
+ .eval(full_name=functions.concat(Employee.first_name, " ", Employee.last_name))
+ .keep("full_name")
+ .sort("full_name")
.limit(10)
)
r = await async_client.esql.query(query=str(query))
@@ -101,3 +182,73 @@ async def test_esql(async_client):
)
r = await async_client.esql.query(query=str(query), params=["Maria"])
assert r.body["values"] == [["Luna"], ["Cannon"]]
+
+
+@pytest.mark.asyncio
+async def test_esql_dsl(async_client):
+ await load_db()
+
+ # get employees with first name "Maria"
+ query = (
+ Employee.esql_from()
+ .where(Employee.first_name == "Maria")
+ .sort("last_name")
+ .limit(10)
+ )
+ marias = []
+ async for emp in Employee.esql_execute(query):
+ marias.append(emp)
+ assert len(marias) == 2
+ assert marias[0].last_name == "Cannon"
+ assert marias[0].address.address == "322 NW Johnston"
+ assert marias[0].address.city == "Bakerburgh, MP"
+ assert marias[1].last_name == "Luna"
+ assert marias[1].address.address == "5861 Morgan Springs"
+ assert marias[1].address.city == "Lake Daniel, WI"
+
+ # run a query with a missing field
+ query = (
+ Employee.esql_from()
+ .where(Employee.first_name == "Maria")
+ .drop(Employee.address.city)
+ .sort("last_name")
+ .limit(10)
+ )
+ with pytest.raises(ValueError):
+ await Employee.esql_execute(query).__anext__()
+ marias = []
+ async for emp in Employee.esql_execute(query, ignore_missing_fields=True):
+ marias.append(emp)
+ assert marias[0].last_name == "Cannon"
+ assert marias[0].address.address == "322 NW Johnston"
+ assert marias[0].address.city is None
+ assert marias[1].last_name == "Luna"
+ assert marias[1].address.address == "5861 Morgan Springs"
+ assert marias[1].address.city is None
+
+ # run a query with additional calculated fields
+ query = (
+ Employee.esql_from()
+ .where(Employee.first_name == "Maria")
+ .eval(
+ full_name=functions.concat(Employee.first_name, " ", Employee.last_name),
+ height_cm=functions.to_integer(Employee.height * 100),
+ )
+ .sort("last_name")
+ .limit(10)
+ )
+ assert isinstance(await Employee.esql_execute(query).__anext__(), Employee)
+ assert isinstance(
+ await Employee.esql_execute(query, return_additional=True).__anext__(), tuple
+ )
+ marias = []
+ async for emp, extra in Employee.esql_execute(query, return_additional=True):
+ marias.append([emp, extra])
+ assert marias[0][0].last_name == "Cannon"
+ assert marias[0][0].address.address == "322 NW Johnston"
+ assert marias[0][0].address.city == "Bakerburgh, MP"
+ assert marias[0][1] == {"full_name": "Maria Cannon", "height_cm": 208}
+ assert marias[1][0].last_name == "Luna"
+ assert marias[1][0].address.address == "5861 Morgan Springs"
+ assert marias[1][0].address.city == "Lake Daniel, WI"
+ assert marias[1][1] == {"full_name": "Maria Luna", "height_cm": 189}
diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_esql.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_esql.py
index 85ceee5ae..d02484013 100644
--- a/test_elasticsearch/test_dsl/test_integration/_sync/test_esql.py
+++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_esql.py
@@ -17,8 +17,13 @@
import pytest
-from elasticsearch.dsl import Document, E, M
-from elasticsearch.esql import ESQL, functions
+from elasticsearch.dsl import Document, InnerDoc, M
+from elasticsearch.esql import ESQL, E, functions
+
+
+class Address(InnerDoc):
+ address: M[str]
+ city: M[str]
class Employee(Document):
@@ -27,6 +32,7 @@ class Employee(Document):
last_name: M[str]
height: M[float]
still_hired: M[bool]
+ address: M[Address]
class Index:
name = "employees"
@@ -34,16 +40,86 @@ class Index:
def load_db():
data = [
- [10000, "Joseph", "Wall", 2.2, True],
- [10001, "Stephanie", "Ward", 1.749, True],
- [10002, "David", "Keller", 1.872, True],
- [10003, "Roger", "Hinton", 1.694, False],
- [10004, "Joshua", "Garcia", 1.661, False],
- [10005, "Matthew", "Richards", 1.633, False],
- [10006, "Maria", "Luna", 1.893, True],
- [10007, "Angela", "Navarro", 1.604, False],
- [10008, "Maria", "Cannon", 2.079, False],
- [10009, "Joseph", "Sutton", 2.025, True],
+ [
+ 10000,
+ "Joseph",
+ "Wall",
+ 2.2,
+ True,
+ Address(address="8875 Long Shoals Suite 441", city="Marcville, TX"),
+ ],
+ [
+ 10001,
+ "Stephanie",
+ "Ward",
+ 1.749,
+ True,
+ Address(address="90162 Carter Harbor Suite 099", city="Davisborough, DE"),
+ ],
+ [
+ 10002,
+ "David",
+ "Keller",
+ 1.872,
+ True,
+ Address(address="6697 Patrick Union Suite 797", city="Fuentesmouth, SD"),
+ ],
+ [
+ 10003,
+ "Roger",
+ "Hinton",
+ 1.694,
+ False,
+ Address(address="809 Kelly Mountains", city="South Megan, DE"),
+ ],
+ [
+ 10004,
+ "Joshua",
+ "Garcia",
+ 1.661,
+ False,
+ Address(address="718 Angela Forks", city="Port Erinland, MA"),
+ ],
+ [
+ 10005,
+ "Matthew",
+ "Richards",
+ 1.633,
+ False,
+ Address(address="2869 Brown Mountains", city="New Debra, NH"),
+ ],
+ [
+ 10006,
+ "Maria",
+ "Luna",
+ 1.893,
+ True,
+ Address(address="5861 Morgan Springs", city="Lake Daniel, WI"),
+ ],
+ [
+ 10007,
+ "Angela",
+ "Navarro",
+ 1.604,
+ False,
+ Address(address="2848 Allen Station", city="Saint Joseph, OR"),
+ ],
+ [
+ 10008,
+ "Maria",
+ "Cannon",
+ 2.079,
+ False,
+ Address(address="322 NW Johnston", city="Bakerburgh, MP"),
+ ],
+ [
+ 10009,
+ "Joseph",
+ "Sutton",
+ 2.025,
+ True,
+ Address(address="77 Cardinal E", city="Lakestown, IL"),
+ ],
]
if Employee._index.exists():
Employee._index.delete()
@@ -51,7 +127,12 @@ def load_db():
for e in data:
employee = Employee(
- emp_no=e[0], first_name=e[1], last_name=e[2], height=e[3], still_hired=e[4]
+ emp_no=e[0],
+ first_name=e[1],
+ last_name=e[2],
+ height=e[3],
+ still_hired=e[4],
+ address=e[5],
)
employee.save()
Employee._index.refresh()
@@ -64,9 +145,9 @@ def test_esql(client):
# get the full names of the employees
query = (
ESQL.from_(Employee)
- .eval(name=functions.concat(Employee.first_name, " ", Employee.last_name))
- .keep("name")
- .sort("name")
+ .eval(full_name=functions.concat(Employee.first_name, " ", Employee.last_name))
+ .keep("full_name")
+ .sort("full_name")
.limit(10)
)
r = client.esql.query(query=str(query))
@@ -101,3 +182,73 @@ def test_esql(client):
)
r = client.esql.query(query=str(query), params=["Maria"])
assert r.body["values"] == [["Luna"], ["Cannon"]]
+
+
+@pytest.mark.sync
+def test_esql_dsl(client):
+ load_db()
+
+ # get employees with first name "Maria"
+ query = (
+ Employee.esql_from()
+ .where(Employee.first_name == "Maria")
+ .sort("last_name")
+ .limit(10)
+ )
+ marias = []
+ for emp in Employee.esql_execute(query):
+ marias.append(emp)
+ assert len(marias) == 2
+ assert marias[0].last_name == "Cannon"
+ assert marias[0].address.address == "322 NW Johnston"
+ assert marias[0].address.city == "Bakerburgh, MP"
+ assert marias[1].last_name == "Luna"
+ assert marias[1].address.address == "5861 Morgan Springs"
+ assert marias[1].address.city == "Lake Daniel, WI"
+
+ # run a query with a missing field
+ query = (
+ Employee.esql_from()
+ .where(Employee.first_name == "Maria")
+ .drop(Employee.address.city)
+ .sort("last_name")
+ .limit(10)
+ )
+ with pytest.raises(ValueError):
+ Employee.esql_execute(query).__next__()
+ marias = []
+ for emp in Employee.esql_execute(query, ignore_missing_fields=True):
+ marias.append(emp)
+ assert marias[0].last_name == "Cannon"
+ assert marias[0].address.address == "322 NW Johnston"
+ assert marias[0].address.city is None
+ assert marias[1].last_name == "Luna"
+ assert marias[1].address.address == "5861 Morgan Springs"
+ assert marias[1].address.city is None
+
+ # run a query with additional calculated fields
+ query = (
+ Employee.esql_from()
+ .where(Employee.first_name == "Maria")
+ .eval(
+ full_name=functions.concat(Employee.first_name, " ", Employee.last_name),
+ height_cm=functions.to_integer(Employee.height * 100),
+ )
+ .sort("last_name")
+ .limit(10)
+ )
+ assert isinstance(Employee.esql_execute(query).__next__(), Employee)
+ assert isinstance(
+ Employee.esql_execute(query, return_additional=True).__next__(), tuple
+ )
+ marias = []
+ for emp, extra in Employee.esql_execute(query, return_additional=True):
+ marias.append([emp, extra])
+ assert marias[0][0].last_name == "Cannon"
+ assert marias[0][0].address.address == "322 NW Johnston"
+ assert marias[0][0].address.city == "Bakerburgh, MP"
+ assert marias[0][1] == {"full_name": "Maria Cannon", "height_cm": 208}
+ assert marias[1][0].last_name == "Luna"
+ assert marias[1][0].address.address == "5861 Morgan Springs"
+ assert marias[1][0].address.city == "Lake Daniel, WI"
+ assert marias[1][1] == {"full_name": "Maria Luna", "height_cm": 189}
diff --git a/utils/run-unasync-dsl.py b/utils/run-unasync-dsl.py
index 59c0b05bc..b74c748fa 100644
--- a/utils/run-unasync-dsl.py
+++ b/utils/run-unasync-dsl.py
@@ -121,7 +121,7 @@ def main(check=False):
[
"sed",
"-i.bak",
- "s/elasticsearch\\[async\\]/elasticsearch/",
+ 's/"elasticsearch\\[async\\]"/elasticsearch/',
f"{output_dir}{file}",
]
)
From 1ceb4fc080886c1e88aa0c3156ca8d7da0e77e24 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 8 Sep 2025 10:35:06 +0100
Subject: [PATCH 10/21] Mock sentence-transformers and nltk in tests (#3059)
(#3063)
* Mock sentence-transformers and nltk in tests
* Update test_elasticsearch/test_dsl/conftest.py
* switch to a local mock that only affects the one test
---------
(cherry picked from commit e05d7f1fd9750605d2c805989b3b37779618e8ae)
Co-authored-by: Miguel Grinberg
Co-authored-by: Quentin Pradet
---
pyproject.toml | 2 --
.../test_examples/_async/test_vectors.py | 31 +++++++++++++------
.../test_examples/_sync/test_vectors.py | 31 +++++++++++++------
3 files changed, 44 insertions(+), 20 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 6c81fc2b3..a8e5ead9e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,8 +77,6 @@ dev = [
"pandas",
"mapbox-vector-tile",
"jinja2",
- "nltk",
- "sentence_transformers",
"tqdm",
"mypy",
"pyright",
diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py
index dc45ceb52..3af9a877f 100644
--- a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py
+++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py
@@ -15,27 +15,27 @@
# specific language governing permissions and limitations
# under the License.
+import sys
from hashlib import md5
from typing import Any, List, Tuple
from unittest import SkipTest
+from unittest.mock import Mock, patch
import pytest
from elasticsearch import AsyncElasticsearch
-from ..async_examples import vectors
-
@pytest.mark.asyncio
async def test_vector_search(
- async_write_client: AsyncElasticsearch, es_version: Tuple[int, ...], mocker: Any
+ async_write_client: AsyncElasticsearch, es_version: Tuple[int, ...]
) -> None:
# this test only runs on Elasticsearch >= 8.11 because the example uses
# a dense vector without specifying an explicit size
if es_version < (8, 11):
raise SkipTest("This test requires Elasticsearch 8.11 or newer")
- class MockModel:
+ class MockSentenceTransformer:
def __init__(self, model: Any):
pass
@@ -44,9 +44,22 @@ def encode(self, text: str) -> List[float]:
total = sum(vector)
return [float(v) / total for v in vector]
- mocker.patch.object(vectors, "SentenceTransformer", new=MockModel)
+ def mock_nltk_tokenize(content: str):
+ return content.split("\n")
+
+ # mock sentence_transformers and nltk, because they are quite big and
+ # irrelevant for testing the example logic
+ with patch.dict(
+ sys.modules,
+ {
+ "sentence_transformers": Mock(SentenceTransformer=MockSentenceTransformer),
+ "nltk": Mock(sent_tokenize=mock_nltk_tokenize),
+ },
+ ):
+ # import the example after the dependencies are mocked
+ from ..async_examples import vectors
- await vectors.create()
- await vectors.WorkplaceDoc._index.refresh()
- results = await (await vectors.search("Welcome to our team!")).execute()
- assert results[0].name == "New Employee Onboarding Guide"
+ await vectors.create()
+ await vectors.WorkplaceDoc._index.refresh()
+ results = await (await vectors.search("Welcome to our team!")).execute()
+ assert results[0].name == "Intellectual Property Policy"
diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py
index 4b14c89a0..e8e61b5a0 100644
--- a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py
+++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py
@@ -15,27 +15,27 @@
# specific language governing permissions and limitations
# under the License.
+import sys
from hashlib import md5
from typing import Any, List, Tuple
from unittest import SkipTest
+from unittest.mock import Mock, patch
import pytest
from elasticsearch import Elasticsearch
-from ..examples import vectors
-
@pytest.mark.sync
def test_vector_search(
- write_client: Elasticsearch, es_version: Tuple[int, ...], mocker: Any
+ write_client: Elasticsearch, es_version: Tuple[int, ...]
) -> None:
# this test only runs on Elasticsearch >= 8.11 because the example uses
# a dense vector without specifying an explicit size
if es_version < (8, 11):
raise SkipTest("This test requires Elasticsearch 8.11 or newer")
- class MockModel:
+ class MockSentenceTransformer:
def __init__(self, model: Any):
pass
@@ -44,9 +44,22 @@ def encode(self, text: str) -> List[float]:
total = sum(vector)
return [float(v) / total for v in vector]
- mocker.patch.object(vectors, "SentenceTransformer", new=MockModel)
+ def mock_nltk_tokenize(content: str):
+ return content.split("\n")
+
+ # mock sentence_transformers and nltk, because they are quite big and
+ # irrelevant for testing the example logic
+ with patch.dict(
+ sys.modules,
+ {
+ "sentence_transformers": Mock(SentenceTransformer=MockSentenceTransformer),
+ "nltk": Mock(sent_tokenize=mock_nltk_tokenize),
+ },
+ ):
+ # import the example after the dependencies are mocked
+ from ..examples import vectors
- vectors.create()
- vectors.WorkplaceDoc._index.refresh()
- results = (vectors.search("Welcome to our team!")).execute()
- assert results[0].name == "New Employee Onboarding Guide"
+ vectors.create()
+ vectors.WorkplaceDoc._index.refresh()
+ results = (vectors.search("Welcome to our team!")).execute()
+ assert results[0].name == "Intellectual Property Policy"
From 5c61dba452f35321ad33c6e393b0bff21579da8c Mon Sep 17 00:00:00 2001
From: Elastic Machine
Date: Thu, 11 Sep 2025 11:35:23 -0400
Subject: [PATCH 11/21] Auto-generated code for 9.1 (#3037)
* Auto-generated API code
* Added missing types
---------
Co-authored-by: Miguel Grinberg
---
elasticsearch/_async/client/__init__.py | 25 +-
elasticsearch/_async/client/cat.py | 636 ++++++++++++++++++++++-
elasticsearch/_async/client/cluster.py | 9 +-
elasticsearch/_async/client/esql.py | 26 +-
elasticsearch/_async/client/indices.py | 8 +-
elasticsearch/_async/client/inference.py | 9 +-
elasticsearch/_async/client/sql.py | 2 +-
elasticsearch/_async/client/transform.py | 60 +++
elasticsearch/_sync/client/__init__.py | 25 +-
elasticsearch/_sync/client/cat.py | 636 ++++++++++++++++++++++-
elasticsearch/_sync/client/cluster.py | 9 +-
elasticsearch/_sync/client/esql.py | 26 +-
elasticsearch/_sync/client/indices.py | 8 +-
elasticsearch/_sync/client/inference.py | 9 +-
elasticsearch/_sync/client/sql.py | 2 +-
elasticsearch/_sync/client/transform.py | 60 +++
elasticsearch/dsl/types.py | 57 +-
17 files changed, 1499 insertions(+), 108 deletions(-)
diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py
index 902834328..8750504ca 100644
--- a/elasticsearch/_async/client/__init__.py
+++ b/elasticsearch/_async/client/__init__.py
@@ -608,6 +608,7 @@ async def bulk(
- JavaScript: Check out
client.helpers.*
- .NET: Check out
BulkAllObservable
- PHP: Check out bulk indexing.
+ - Ruby: Check out
Elasticsearch::Helpers::BulkHelper
Submitting bulk requests with cURL
If you're providing text file input to curl, you must use the --data-binary flag instead of plain -d.
@@ -1326,7 +1327,7 @@ async def delete(
)
@_rewrite_parameters(
- body_fields=("max_docs", "query", "slice"),
+ body_fields=("max_docs", "query", "slice", "sort"),
parameter_aliases={"from": "from_"},
)
async def delete_by_query(
@@ -1370,7 +1371,12 @@ async def delete_by_query(
] = None,
slice: t.Optional[t.Mapping[str, t.Any]] = None,
slices: t.Optional[t.Union[int, t.Union[str, t.Literal["auto"]]]] = None,
- sort: t.Optional[t.Sequence[str]] = None,
+ sort: t.Optional[
+ t.Union[
+ t.Sequence[t.Union[str, t.Mapping[str, t.Any]]],
+ t.Union[str, t.Mapping[str, t.Any]],
+ ]
+ ] = None,
stats: t.Optional[t.Sequence[str]] = None,
terminate_after: t.Optional[int] = None,
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
@@ -1502,7 +1508,7 @@ async def delete_by_query(
:param slice: Slice the request manually using the provided slice ID and total
number of slices.
:param slices: The number of slices this task should be divided into.
- :param sort: A comma-separated list of `:` pairs.
+ :param sort: A sort object that specifies the order of deleted documents.
:param stats: The specific `tag` of the request for logging and statistical purposes.
:param terminate_after: The maximum number of documents to collect for each shard.
If a query reaches this limit, Elasticsearch terminates the query early.
@@ -1592,8 +1598,6 @@ async def delete_by_query(
__query["search_type"] = search_type
if slices is not None:
__query["slices"] = slices
- if sort is not None:
- __query["sort"] = sort
if stats is not None:
__query["stats"] = stats
if terminate_after is not None:
@@ -1613,6 +1617,8 @@ async def delete_by_query(
__body["query"] = query
if slice is not None:
__body["slice"] = slice
+ if sort is not None:
+ __body["sort"] = sort
__headers = {"accept": "application/json", "content-type": "application/json"}
return await self.perform_request( # type: ignore[return-value]
"POST",
@@ -3870,6 +3876,13 @@ async def reindex(
In this case, the response includes a count of the version conflicts that were encountered.
Note that the handling of other error types is unaffected by the conflicts property.
Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than max_docs until it has successfully indexed max_docs documents into the target or it has gone through every document in the source query.
+ It's recommended to reindex on indices with a green status. Reindexing can fail when a node shuts down or crashes.
+
+ - When requested with
wait_for_completion=true (default), the request fails if the node shuts down.
+ - When requested with
wait_for_completion=false, a task id is returned, for use with the task management APIs. The task may disappear or fail if the node shuts down.
+ When retrying a failed reindex operation, it might be necessary to set conflicts=proceed or to first delete the partial destination index.
+ Additionally, dry runs, checking disk space, and fetching index recovery information can help address the root cause.
+
Refer to the linked documentation for examples of how to reindex documents.
@@ -5649,7 +5662,7 @@ async def termvectors(
doc: t.Optional[t.Mapping[str, t.Any]] = None,
error_trace: t.Optional[bool] = None,
field_statistics: t.Optional[bool] = None,
- fields: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ fields: t.Optional[t.Sequence[str]] = None,
filter: t.Optional[t.Mapping[str, t.Any]] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
diff --git a/elasticsearch/_async/client/cat.py b/elasticsearch/_async/client/cat.py
index 148ed721f..84f946f5c 100644
--- a/elasticsearch/_async/client/cat.py
+++ b/elasticsearch/_async/client/cat.py
@@ -47,7 +47,34 @@ async def aliases(
] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "alias",
+ "filter",
+ "index",
+ "is_write_index",
+ "routing.index",
+ "routing.search",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "alias",
+ "filter",
+ "index",
+ "is_write_index",
+ "routing.index",
+ "routing.search",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
@@ -74,7 +101,8 @@ async def aliases(
values, such as `open,hidden`.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param master_timeout: The period to wait for a connection to the master node.
@@ -137,7 +165,48 @@ async def allocation(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "disk.avail",
+ "disk.indices",
+ "disk.indices.forecast",
+ "disk.percent",
+ "disk.total",
+ "disk.used",
+ "host",
+ "ip",
+ "node",
+ "node.role",
+ "shards",
+ "shards.undesired",
+ "write_load.forecast",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "disk.avail",
+ "disk.indices",
+ "disk.indices.forecast",
+ "disk.percent",
+ "disk.total",
+ "disk.used",
+ "host",
+ "ip",
+ "node",
+ "node.role",
+ "shards",
+ "shards.undesired",
+ "write_load.forecast",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -161,7 +230,8 @@ async def allocation(
:param bytes: The unit used to display byte values.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -224,7 +294,36 @@ async def component_templates(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "alias_count",
+ "included_in",
+ "mapping_count",
+ "metadata_count",
+ "name",
+ "settings_count",
+ "version",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "alias_count",
+ "included_in",
+ "mapping_count",
+ "metadata_count",
+ "name",
+ "settings_count",
+ "version",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -249,7 +348,8 @@ async def component_templates(
If it is omitted, all component templates are returned.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -310,7 +410,12 @@ async def count(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[t.Union[str, t.Literal["count", "epoch", "timestamp"]]],
+ t.Union[str, t.Literal["count", "epoch", "timestamp"]],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
@@ -334,7 +439,8 @@ async def count(
and indices, omit this parameter or use `*` or `_all`.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param s: List of columns that determine how the table should be sorted. Sorting
@@ -389,7 +495,14 @@ async def fielddata(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[str, t.Literal["field", "host", "id", "ip", "node", "size"]]
+ ],
+ t.Union[str, t.Literal["field", "host", "id", "ip", "node", "size"]],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
@@ -412,7 +525,8 @@ async def fielddata(
:param bytes: The unit used to display byte values.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param s: List of columns that determine how the table should be sorted. Sorting
@@ -465,7 +579,52 @@ async def health(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "active_shards_percent",
+ "cluster",
+ "epoch",
+ "init",
+ "max_task_wait_time",
+ "node.data",
+ "node.total",
+ "pending_tasks",
+ "pri",
+ "relo",
+ "shards",
+ "status",
+ "timestamp",
+ "unassign",
+ "unassign.pri",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "active_shards_percent",
+ "cluster",
+ "epoch",
+ "init",
+ "max_task_wait_time",
+ "node.data",
+ "node.total",
+ "pending_tasks",
+ "pri",
+ "relo",
+ "shards",
+ "status",
+ "timestamp",
+ "unassign",
+ "unassign.pri",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
@@ -495,7 +654,8 @@ async def health(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param s: List of columns that determine how the table should be sorted. Sorting
@@ -583,7 +743,316 @@ async def indices(
] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "bulk.avg_size_in_bytes",
+ "bulk.avg_time",
+ "bulk.total_operations",
+ "bulk.total_size_in_bytes",
+ "bulk.total_time",
+ "completion.size",
+ "creation.date",
+ "creation.date.string",
+ "dataset.size",
+ "dense_vector.value_count",
+ "docs.count",
+ "docs.deleted",
+ "fielddata.evictions",
+ "fielddata.memory_size",
+ "flush.total",
+ "flush.total_time",
+ "get.current",
+ "get.exists_time",
+ "get.exists_total",
+ "get.missing_time",
+ "get.missing_total",
+ "get.time",
+ "get.total",
+ "health",
+ "index",
+ "indexing.delete_current",
+ "indexing.delete_time",
+ "indexing.delete_total",
+ "indexing.index_current",
+ "indexing.index_failed",
+ "indexing.index_failed_due_to_version_conflict",
+ "indexing.index_time",
+ "indexing.index_total",
+ "memory.total",
+ "merges.current",
+ "merges.current_docs",
+ "merges.current_size",
+ "merges.total",
+ "merges.total_docs",
+ "merges.total_size",
+ "merges.total_time",
+ "pri",
+ "pri.bulk.avg_size_in_bytes",
+ "pri.bulk.avg_time",
+ "pri.bulk.total_operations",
+ "pri.bulk.total_size_in_bytes",
+ "pri.bulk.total_time",
+ "pri.completion.size",
+ "pri.dense_vector.value_count",
+ "pri.fielddata.evictions",
+ "pri.fielddata.memory_size",
+ "pri.flush.total",
+ "pri.flush.total_time",
+ "pri.get.current",
+ "pri.get.exists_time",
+ "pri.get.exists_total",
+ "pri.get.missing_time",
+ "pri.get.missing_total",
+ "pri.get.time",
+ "pri.get.total",
+ "pri.indexing.delete_current",
+ "pri.indexing.delete_time",
+ "pri.indexing.delete_total",
+ "pri.indexing.index_current",
+ "pri.indexing.index_failed",
+ "pri.indexing.index_failed_due_to_version_conflict",
+ "pri.indexing.index_time",
+ "pri.indexing.index_total",
+ "pri.memory.total",
+ "pri.merges.current",
+ "pri.merges.current_docs",
+ "pri.merges.current_size",
+ "pri.merges.total",
+ "pri.merges.total_docs",
+ "pri.merges.total_size",
+ "pri.merges.total_time",
+ "pri.query_cache.evictions",
+ "pri.query_cache.memory_size",
+ "pri.refresh.external_time",
+ "pri.refresh.external_total",
+ "pri.refresh.listeners",
+ "pri.refresh.time",
+ "pri.refresh.total",
+ "pri.request_cache.evictions",
+ "pri.request_cache.hit_count",
+ "pri.request_cache.memory_size",
+ "pri.request_cache.miss_count",
+ "pri.search.fetch_current",
+ "pri.search.fetch_time",
+ "pri.search.fetch_total",
+ "pri.search.open_contexts",
+ "pri.search.query_current",
+ "pri.search.query_time",
+ "pri.search.query_total",
+ "pri.search.scroll_current",
+ "pri.search.scroll_time",
+ "pri.search.scroll_total",
+ "pri.segments.count",
+ "pri.segments.fixed_bitset_memory",
+ "pri.segments.index_writer_memory",
+ "pri.segments.memory",
+ "pri.segments.version_map_memory",
+ "pri.sparse_vector.value_count",
+ "pri.store.size",
+ "pri.suggest.current",
+ "pri.suggest.time",
+ "pri.suggest.total",
+ "pri.warmer.current",
+ "pri.warmer.total",
+ "pri.warmer.total_time",
+ "query_cache.evictions",
+ "query_cache.memory_size",
+ "refresh.external_time",
+ "refresh.external_total",
+ "refresh.listeners",
+ "refresh.time",
+ "refresh.total",
+ "rep",
+ "request_cache.evictions",
+ "request_cache.hit_count",
+ "request_cache.memory_size",
+ "request_cache.miss_count",
+ "search.fetch_current",
+ "search.fetch_time",
+ "search.fetch_total",
+ "search.open_contexts",
+ "search.query_current",
+ "search.query_time",
+ "search.query_total",
+ "search.scroll_current",
+ "search.scroll_time",
+ "search.scroll_total",
+ "segments.count",
+ "segments.fixed_bitset_memory",
+ "segments.index_writer_memory",
+ "segments.memory",
+ "segments.version_map_memory",
+ "sparse_vector.value_count",
+ "status",
+ "store.size",
+ "suggest.current",
+ "suggest.time",
+ "suggest.total",
+ "uuid",
+ "warmer.current",
+ "warmer.total",
+ "warmer.total_time",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "bulk.avg_size_in_bytes",
+ "bulk.avg_time",
+ "bulk.total_operations",
+ "bulk.total_size_in_bytes",
+ "bulk.total_time",
+ "completion.size",
+ "creation.date",
+ "creation.date.string",
+ "dataset.size",
+ "dense_vector.value_count",
+ "docs.count",
+ "docs.deleted",
+ "fielddata.evictions",
+ "fielddata.memory_size",
+ "flush.total",
+ "flush.total_time",
+ "get.current",
+ "get.exists_time",
+ "get.exists_total",
+ "get.missing_time",
+ "get.missing_total",
+ "get.time",
+ "get.total",
+ "health",
+ "index",
+ "indexing.delete_current",
+ "indexing.delete_time",
+ "indexing.delete_total",
+ "indexing.index_current",
+ "indexing.index_failed",
+ "indexing.index_failed_due_to_version_conflict",
+ "indexing.index_time",
+ "indexing.index_total",
+ "memory.total",
+ "merges.current",
+ "merges.current_docs",
+ "merges.current_size",
+ "merges.total",
+ "merges.total_docs",
+ "merges.total_size",
+ "merges.total_time",
+ "pri",
+ "pri.bulk.avg_size_in_bytes",
+ "pri.bulk.avg_time",
+ "pri.bulk.total_operations",
+ "pri.bulk.total_size_in_bytes",
+ "pri.bulk.total_time",
+ "pri.completion.size",
+ "pri.dense_vector.value_count",
+ "pri.fielddata.evictions",
+ "pri.fielddata.memory_size",
+ "pri.flush.total",
+ "pri.flush.total_time",
+ "pri.get.current",
+ "pri.get.exists_time",
+ "pri.get.exists_total",
+ "pri.get.missing_time",
+ "pri.get.missing_total",
+ "pri.get.time",
+ "pri.get.total",
+ "pri.indexing.delete_current",
+ "pri.indexing.delete_time",
+ "pri.indexing.delete_total",
+ "pri.indexing.index_current",
+ "pri.indexing.index_failed",
+ "pri.indexing.index_failed_due_to_version_conflict",
+ "pri.indexing.index_time",
+ "pri.indexing.index_total",
+ "pri.memory.total",
+ "pri.merges.current",
+ "pri.merges.current_docs",
+ "pri.merges.current_size",
+ "pri.merges.total",
+ "pri.merges.total_docs",
+ "pri.merges.total_size",
+ "pri.merges.total_time",
+ "pri.query_cache.evictions",
+ "pri.query_cache.memory_size",
+ "pri.refresh.external_time",
+ "pri.refresh.external_total",
+ "pri.refresh.listeners",
+ "pri.refresh.time",
+ "pri.refresh.total",
+ "pri.request_cache.evictions",
+ "pri.request_cache.hit_count",
+ "pri.request_cache.memory_size",
+ "pri.request_cache.miss_count",
+ "pri.search.fetch_current",
+ "pri.search.fetch_time",
+ "pri.search.fetch_total",
+ "pri.search.open_contexts",
+ "pri.search.query_current",
+ "pri.search.query_time",
+ "pri.search.query_total",
+ "pri.search.scroll_current",
+ "pri.search.scroll_time",
+ "pri.search.scroll_total",
+ "pri.segments.count",
+ "pri.segments.fixed_bitset_memory",
+ "pri.segments.index_writer_memory",
+ "pri.segments.memory",
+ "pri.segments.version_map_memory",
+ "pri.sparse_vector.value_count",
+ "pri.store.size",
+ "pri.suggest.current",
+ "pri.suggest.time",
+ "pri.suggest.total",
+ "pri.warmer.current",
+ "pri.warmer.total",
+ "pri.warmer.total_time",
+ "query_cache.evictions",
+ "query_cache.memory_size",
+ "refresh.external_time",
+ "refresh.external_total",
+ "refresh.listeners",
+ "refresh.time",
+ "refresh.total",
+ "rep",
+ "request_cache.evictions",
+ "request_cache.hit_count",
+ "request_cache.memory_size",
+ "request_cache.miss_count",
+ "search.fetch_current",
+ "search.fetch_time",
+ "search.fetch_total",
+ "search.open_contexts",
+ "search.query_current",
+ "search.query_time",
+ "search.query_total",
+ "search.scroll_current",
+ "search.scroll_time",
+ "search.scroll_total",
+ "segments.count",
+ "segments.fixed_bitset_memory",
+ "segments.index_writer_memory",
+ "segments.memory",
+ "segments.version_map_memory",
+ "sparse_vector.value_count",
+ "status",
+ "store.size",
+ "suggest.current",
+ "suggest.time",
+ "suggest.total",
+ "uuid",
+ "warmer.current",
+ "warmer.total",
+ "warmer.total_time",
+ ],
+ ],
+ ]
+ ] = None,
health: t.Optional[
t.Union[str, t.Literal["green", "red", "unavailable", "unknown", "yellow"]]
] = None,
@@ -627,7 +1096,8 @@ async def indices(
:param expand_wildcards: The type of index that wildcard patterns can match.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param health: The health status used to limit returned indices. By default,
the response includes indices of any health status.
:param help: When set to `true` will output available columns. This option can't
@@ -699,7 +1169,12 @@ async def master(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[t.Union[str, t.Literal["host", "id", "ip", "node"]]],
+ t.Union[str, t.Literal["host", "id", "ip", "node"]],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -720,7 +1195,8 @@ async def master(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -1689,7 +2165,24 @@ async def nodeattrs(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "attr", "host", "id", "ip", "node", "pid", "port", "value"
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "attr", "host", "id", "ip", "node", "pid", "port", "value"
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -1710,7 +2203,8 @@ async def nodeattrs(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -2050,7 +2544,19 @@ async def pending_tasks(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal["insertOrder", "priority", "source", "timeInQueue"],
+ ]
+ ],
+ t.Union[
+ str, t.Literal["insertOrder", "priority", "source", "timeInQueue"]
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -2074,7 +2580,8 @@ async def pending_tasks(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -2132,7 +2639,19 @@ async def plugins(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal["component", "description", "id", "name", "version"],
+ ]
+ ],
+ t.Union[
+ str, t.Literal["component", "description", "id", "name", "version"]
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
include_bootstrap: t.Optional[bool] = None,
@@ -2154,7 +2673,8 @@ async def plugins(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param include_bootstrap: Include bootstrap plugins in the response
@@ -2972,7 +3492,52 @@ async def tasks(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "action",
+ "id",
+ "ip",
+ "node",
+ "node_id",
+ "parent_task_id",
+ "port",
+ "running_time",
+ "running_time_ns",
+ "start_time",
+ "task_id",
+ "timestamp",
+ "type",
+ "version",
+ "x_opaque_id",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "action",
+ "id",
+ "ip",
+ "node",
+ "node_id",
+ "parent_task_id",
+ "port",
+ "running_time",
+ "running_time_ns",
+ "start_time",
+ "task_id",
+ "timestamp",
+ "type",
+ "version",
+ "x_opaque_id",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
nodes: t.Optional[t.Sequence[str]] = None,
@@ -3001,7 +3566,8 @@ async def tasks(
shard recoveries.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param nodes: Unique node identifiers, which are used to limit the response.
@@ -3070,7 +3636,24 @@ async def templates(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "composed_of", "index_patterns", "name", "order", "version"
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "composed_of", "index_patterns", "name", "order", "version"
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -3094,7 +3677,8 @@ async def templates(
If omitted, all templates are returned.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
diff --git a/elasticsearch/_async/client/cluster.py b/elasticsearch/_async/client/cluster.py
index 9ae420766..ced198b56 100644
--- a/elasticsearch/_async/client/cluster.py
+++ b/elasticsearch/_async/client/cluster.py
@@ -374,8 +374,13 @@ async def get_settings(
``_
:param flat_settings: If `true`, returns settings in flat format.
- :param include_defaults: If `true`, returns default cluster settings from the
- local node.
+ :param include_defaults: If `true`, also returns default values for all other
+ cluster settings, reflecting the values in the `elasticsearch.yml` file of
+ one of the nodes in the cluster. If the nodes in your cluster do not all
+ have the same values in their `elasticsearch.yml` config files then the values
+ returned by this API may vary from invocation to invocation and may not reflect
+ the values that Elasticsearch uses in all situations. Use the `GET _nodes/settings`
+ API to fetch the settings for each individual node in your cluster.
:param master_timeout: Period to wait for a connection to the master node. If
no response is received before the timeout expires, the request fails and
returns an error.
diff --git a/elasticsearch/_async/client/esql.py b/elasticsearch/_async/client/esql.py
index 9999f1db1..38e642779 100644
--- a/elasticsearch/_async/client/esql.py
+++ b/elasticsearch/_async/client/esql.py
@@ -28,6 +28,9 @@
_stability_warning,
)
+if t.TYPE_CHECKING:
+ from elasticsearch.esql import ESQLBase
+
class EsqlClient(NamespacedClient):
@@ -50,7 +53,7 @@ class EsqlClient(NamespacedClient):
async def async_query(
self,
*,
- query: t.Optional[str] = None,
+ query: t.Optional[t.Union[str, "ESQLBase"]] = None,
allow_partial_results: t.Optional[bool] = None,
columnar: t.Optional[bool] = None,
delimiter: t.Optional[str] = None,
@@ -111,7 +114,12 @@ async def async_query(
which has the name of all the columns.
:param filter: Specify a Query DSL query in the filter parameter to filter the
set of documents that an ES|QL query runs on.
- :param format: A short version of the Accept header, for example `json` or `yaml`.
+ :param format: A short version of the Accept header, e.g. json, yaml. `csv`,
+ `tsv`, and `txt` formats will return results in a tabular format, excluding
+ other metadata fields from the response. For async requests, nothing will
+ be returned if the async query doesn't finish within the timeout. The query
+ ID and running status are available in the `X-Elasticsearch-Async-Id` and
+ `X-Elasticsearch-Async-Is-Running` HTTP headers of the response, respectively.
:param include_ccs_metadata: When set to `true` and performing a cross-cluster
query, the response will include an extra `_clusters` object with information
about the clusters that participated in the search along with info such as
@@ -165,7 +173,7 @@ async def async_query(
__query["pretty"] = pretty
if not __body:
if query is not None:
- __body["query"] = query
+ __body["query"] = str(query)
if columnar is not None:
__body["columnar"] = columnar
if filter is not None:
@@ -405,6 +413,8 @@ async def get_query(
Returns an object extended information about a running ES|QL query.
+ ``_
+
:param id: The query ID
"""
if id in SKIP_IN_PATH:
@@ -446,6 +456,8 @@ async def list_queries(
Get running ES|QL queries information.
Returns an object containing IDs and other information about the running ES|QL queries.
+
+ ``_
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_query/queries"
@@ -484,7 +496,7 @@ async def list_queries(
async def query(
self,
*,
- query: t.Optional[str] = None,
+ query: t.Optional[t.Union[str, "ESQLBase"]] = None,
allow_partial_results: t.Optional[bool] = None,
columnar: t.Optional[bool] = None,
delimiter: t.Optional[str] = None,
@@ -539,7 +551,9 @@ async def query(
`all_columns` which has the name of all columns.
:param filter: Specify a Query DSL query in the filter parameter to filter the
set of documents that an ES|QL query runs on.
- :param format: A short version of the Accept header, e.g. json, yaml.
+ :param format: A short version of the Accept header, e.g. json, yaml. `csv`,
+ `tsv`, and `txt` formats will return results in a tabular format, excluding
+ other metadata fields from the response.
:param include_ccs_metadata: When set to `true` and performing a cross-cluster
query, the response will include an extra `_clusters` object with information
about the clusters that participated in the search along with info such as
@@ -579,7 +593,7 @@ async def query(
__query["pretty"] = pretty
if not __body:
if query is not None:
- __body["query"] = query
+ __body["query"] = str(query)
if columnar is not None:
__body["columnar"] = columnar
if filter is not None:
diff --git a/elasticsearch/_async/client/indices.py b/elasticsearch/_async/client/indices.py
index 0f627227c..c12ec00b0 100644
--- a/elasticsearch/_async/client/indices.py
+++ b/elasticsearch/_async/client/indices.py
@@ -1208,7 +1208,7 @@ async def delete_data_stream_options(
Removes the data stream options from a data stream.
- ``_
+ ``_
:param name: A comma-separated list of data streams of which the data stream
options will be deleted; use `*` to get all data streams
@@ -2568,7 +2568,7 @@ async def get_data_stream_options(
Get the data stream options configuration of one or more data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams to limit the request. Supports
wildcards (`*`). To target all data streams, omit this parameter or use `*`
@@ -3684,7 +3684,7 @@ async def put_data_stream_options(
Update the data stream options of the specified data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams used to limit the request.
Supports wildcards (`*`). To target all data streams use `*` or `_all`.
@@ -4051,7 +4051,7 @@ async def put_mapping(
- Change a field's mapping using reindexing
- Rename a field using a field alias
- Learn how to use the update mapping API with practical examples in the Update mapping API examples guide.
+ Learn how to use the update mapping API with practical examples in the Update mapping API examples guide.
``_
diff --git a/elasticsearch/_async/client/inference.py b/elasticsearch/_async/client/inference.py
index 422e51522..621156307 100644
--- a/elasticsearch/_async/client/inference.py
+++ b/elasticsearch/_async/client/inference.py
@@ -396,17 +396,18 @@ async def put(
Azure AI Studio (completion, text_embedding)
Azure OpenAI (completion, text_embedding)
Cohere (completion, rerank, text_embedding)
- DeepSeek (completion, chat_completion)
+ DeepSeek (chat_completion, completion)
Elasticsearch (rerank, sparse_embedding, text_embedding - this service is for built-in models and models uploaded through Eland)
ELSER (sparse_embedding)
Google AI Studio (completion, text_embedding)
- Google Vertex AI (rerank, text_embedding)
+ Google Vertex AI (chat_completion, completion, rerank, text_embedding)
Hugging Face (chat_completion, completion, rerank, text_embedding)
+ JinaAI (rerank, text_embedding)
+ Llama (chat_completion, completion, text_embedding)
Mistral (chat_completion, completion, text_embedding)
OpenAI (chat_completion, completion, text_embedding)
- VoyageAI (text_embedding, rerank)
+ VoyageAI (rerank, text_embedding)
Watsonx inference integration (text_embedding)
- JinaAI (text_embedding, rerank)
diff --git a/elasticsearch/_async/client/sql.py b/elasticsearch/_async/client/sql.py
index 3eb37a6cc..de423ea66 100644
--- a/elasticsearch/_async/client/sql.py
+++ b/elasticsearch/_async/client/sql.py
@@ -283,7 +283,7 @@ async def query(
keep_alive: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
keep_on_completion: t.Optional[bool] = None,
page_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- params: t.Optional[t.Mapping[str, t.Any]] = None,
+ params: t.Optional[t.Sequence[t.Any]] = None,
pretty: t.Optional[bool] = None,
query: t.Optional[str] = None,
request_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
diff --git a/elasticsearch/_async/client/transform.py b/elasticsearch/_async/client/transform.py
index 191c959d3..249fa35cb 100644
--- a/elasticsearch/_async/client/transform.py
+++ b/elasticsearch/_async/client/transform.py
@@ -602,6 +602,66 @@ async def schedule_now_transform(
path_parts=__path_parts,
)
+ @_rewrite_parameters()
+ async def set_upgrade_mode(
+ self,
+ *,
+ enabled: t.Optional[bool] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Set upgrade_mode for transform indices.
+ Sets a cluster wide upgrade_mode setting that prepares transform
+ indices for an upgrade.
+ When upgrading your cluster, in some circumstances you must restart your
+ nodes and reindex your transform indices. In those circumstances,
+ there must be no transforms running. You can close the transforms,
+ do the upgrade, then open all the transforms again. Alternatively,
+ you can use this API to temporarily halt tasks associated with the transforms
+ and prevent new transforms from opening. You can also use this API
+ during upgrades that do not require you to reindex your transform
+ indices, though stopping transforms is not a requirement in that case.
+ You can see the current value for the upgrade_mode setting by using the get
+ transform info API.
+
+
+ ``_
+
+ :param enabled: When `true`, it enables `upgrade_mode` which temporarily halts
+ all transform tasks and prohibits new transform tasks from starting.
+ :param timeout: The time to wait for the request to be completed.
+ """
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_transform/set_upgrade_mode"
+ __query: t.Dict[str, t.Any] = {}
+ if enabled is not None:
+ __query["enabled"] = enabled
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ __headers = {"accept": "application/json"}
+ return await self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="transform.set_upgrade_mode",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters(
parameter_aliases={"from": "from_"},
)
diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py
index 40f4cbed6..b7acad907 100644
--- a/elasticsearch/_sync/client/__init__.py
+++ b/elasticsearch/_sync/client/__init__.py
@@ -606,6 +606,7 @@ def bulk(
JavaScript: Check out client.helpers.*
.NET: Check out BulkAllObservable
PHP: Check out bulk indexing.
+ Ruby: Check out Elasticsearch::Helpers::BulkHelper
Submitting bulk requests with cURL
If you're providing text file input to curl, you must use the --data-binary flag instead of plain -d.
@@ -1324,7 +1325,7 @@ def delete(
)
@_rewrite_parameters(
- body_fields=("max_docs", "query", "slice"),
+ body_fields=("max_docs", "query", "slice", "sort"),
parameter_aliases={"from": "from_"},
)
def delete_by_query(
@@ -1368,7 +1369,12 @@ def delete_by_query(
] = None,
slice: t.Optional[t.Mapping[str, t.Any]] = None,
slices: t.Optional[t.Union[int, t.Union[str, t.Literal["auto"]]]] = None,
- sort: t.Optional[t.Sequence[str]] = None,
+ sort: t.Optional[
+ t.Union[
+ t.Sequence[t.Union[str, t.Mapping[str, t.Any]]],
+ t.Union[str, t.Mapping[str, t.Any]],
+ ]
+ ] = None,
stats: t.Optional[t.Sequence[str]] = None,
terminate_after: t.Optional[int] = None,
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
@@ -1500,7 +1506,7 @@ def delete_by_query(
:param slice: Slice the request manually using the provided slice ID and total
number of slices.
:param slices: The number of slices this task should be divided into.
- :param sort: A comma-separated list of `:` pairs.
+ :param sort: A sort object that specifies the order of deleted documents.
:param stats: The specific `tag` of the request for logging and statistical purposes.
:param terminate_after: The maximum number of documents to collect for each shard.
If a query reaches this limit, Elasticsearch terminates the query early.
@@ -1590,8 +1596,6 @@ def delete_by_query(
__query["search_type"] = search_type
if slices is not None:
__query["slices"] = slices
- if sort is not None:
- __query["sort"] = sort
if stats is not None:
__query["stats"] = stats
if terminate_after is not None:
@@ -1611,6 +1615,8 @@ def delete_by_query(
__body["query"] = query
if slice is not None:
__body["slice"] = slice
+ if sort is not None:
+ __body["sort"] = sort
__headers = {"accept": "application/json", "content-type": "application/json"}
return self.perform_request( # type: ignore[return-value]
"POST",
@@ -3868,6 +3874,13 @@ def reindex(
In this case, the response includes a count of the version conflicts that were encountered.
Note that the handling of other error types is unaffected by the conflicts property.
Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than max_docs until it has successfully indexed max_docs documents into the target or it has gone through every document in the source query.
+ It's recommended to reindex on indices with a green status. Reindexing can fail when a node shuts down or crashes.
+
+ - When requested with
wait_for_completion=true (default), the request fails if the node shuts down.
+ - When requested with
wait_for_completion=false, a task id is returned, for use with the task management APIs. The task may disappear or fail if the node shuts down.
+ When retrying a failed reindex operation, it might be necessary to set conflicts=proceed or to first delete the partial destination index.
+ Additionally, dry runs, checking disk space, and fetching index recovery information can help address the root cause.
+
Refer to the linked documentation for examples of how to reindex documents.
@@ -5647,7 +5660,7 @@ def termvectors(
doc: t.Optional[t.Mapping[str, t.Any]] = None,
error_trace: t.Optional[bool] = None,
field_statistics: t.Optional[bool] = None,
- fields: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ fields: t.Optional[t.Sequence[str]] = None,
filter: t.Optional[t.Mapping[str, t.Any]] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
diff --git a/elasticsearch/_sync/client/cat.py b/elasticsearch/_sync/client/cat.py
index b1ab46d99..5349a32ec 100644
--- a/elasticsearch/_sync/client/cat.py
+++ b/elasticsearch/_sync/client/cat.py
@@ -47,7 +47,34 @@ def aliases(
] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "alias",
+ "filter",
+ "index",
+ "is_write_index",
+ "routing.index",
+ "routing.search",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "alias",
+ "filter",
+ "index",
+ "is_write_index",
+ "routing.index",
+ "routing.search",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
@@ -74,7 +101,8 @@ def aliases(
values, such as `open,hidden`.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param master_timeout: The period to wait for a connection to the master node.
@@ -137,7 +165,48 @@ def allocation(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "disk.avail",
+ "disk.indices",
+ "disk.indices.forecast",
+ "disk.percent",
+ "disk.total",
+ "disk.used",
+ "host",
+ "ip",
+ "node",
+ "node.role",
+ "shards",
+ "shards.undesired",
+ "write_load.forecast",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "disk.avail",
+ "disk.indices",
+ "disk.indices.forecast",
+ "disk.percent",
+ "disk.total",
+ "disk.used",
+ "host",
+ "ip",
+ "node",
+ "node.role",
+ "shards",
+ "shards.undesired",
+ "write_load.forecast",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -161,7 +230,8 @@ def allocation(
:param bytes: The unit used to display byte values.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -224,7 +294,36 @@ def component_templates(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "alias_count",
+ "included_in",
+ "mapping_count",
+ "metadata_count",
+ "name",
+ "settings_count",
+ "version",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "alias_count",
+ "included_in",
+ "mapping_count",
+ "metadata_count",
+ "name",
+ "settings_count",
+ "version",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -249,7 +348,8 @@ def component_templates(
If it is omitted, all component templates are returned.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -310,7 +410,12 @@ def count(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[t.Union[str, t.Literal["count", "epoch", "timestamp"]]],
+ t.Union[str, t.Literal["count", "epoch", "timestamp"]],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
@@ -334,7 +439,8 @@ def count(
and indices, omit this parameter or use `*` or `_all`.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param s: List of columns that determine how the table should be sorted. Sorting
@@ -389,7 +495,14 @@ def fielddata(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[str, t.Literal["field", "host", "id", "ip", "node", "size"]]
+ ],
+ t.Union[str, t.Literal["field", "host", "id", "ip", "node", "size"]],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
@@ -412,7 +525,8 @@ def fielddata(
:param bytes: The unit used to display byte values.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param s: List of columns that determine how the table should be sorted. Sorting
@@ -465,7 +579,52 @@ def health(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "active_shards_percent",
+ "cluster",
+ "epoch",
+ "init",
+ "max_task_wait_time",
+ "node.data",
+ "node.total",
+ "pending_tasks",
+ "pri",
+ "relo",
+ "shards",
+ "status",
+ "timestamp",
+ "unassign",
+ "unassign.pri",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "active_shards_percent",
+ "cluster",
+ "epoch",
+ "init",
+ "max_task_wait_time",
+ "node.data",
+ "node.total",
+ "pending_tasks",
+ "pri",
+ "relo",
+ "shards",
+ "status",
+ "timestamp",
+ "unassign",
+ "unassign.pri",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
@@ -495,7 +654,8 @@ def health(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param s: List of columns that determine how the table should be sorted. Sorting
@@ -583,7 +743,316 @@ def indices(
] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "bulk.avg_size_in_bytes",
+ "bulk.avg_time",
+ "bulk.total_operations",
+ "bulk.total_size_in_bytes",
+ "bulk.total_time",
+ "completion.size",
+ "creation.date",
+ "creation.date.string",
+ "dataset.size",
+ "dense_vector.value_count",
+ "docs.count",
+ "docs.deleted",
+ "fielddata.evictions",
+ "fielddata.memory_size",
+ "flush.total",
+ "flush.total_time",
+ "get.current",
+ "get.exists_time",
+ "get.exists_total",
+ "get.missing_time",
+ "get.missing_total",
+ "get.time",
+ "get.total",
+ "health",
+ "index",
+ "indexing.delete_current",
+ "indexing.delete_time",
+ "indexing.delete_total",
+ "indexing.index_current",
+ "indexing.index_failed",
+ "indexing.index_failed_due_to_version_conflict",
+ "indexing.index_time",
+ "indexing.index_total",
+ "memory.total",
+ "merges.current",
+ "merges.current_docs",
+ "merges.current_size",
+ "merges.total",
+ "merges.total_docs",
+ "merges.total_size",
+ "merges.total_time",
+ "pri",
+ "pri.bulk.avg_size_in_bytes",
+ "pri.bulk.avg_time",
+ "pri.bulk.total_operations",
+ "pri.bulk.total_size_in_bytes",
+ "pri.bulk.total_time",
+ "pri.completion.size",
+ "pri.dense_vector.value_count",
+ "pri.fielddata.evictions",
+ "pri.fielddata.memory_size",
+ "pri.flush.total",
+ "pri.flush.total_time",
+ "pri.get.current",
+ "pri.get.exists_time",
+ "pri.get.exists_total",
+ "pri.get.missing_time",
+ "pri.get.missing_total",
+ "pri.get.time",
+ "pri.get.total",
+ "pri.indexing.delete_current",
+ "pri.indexing.delete_time",
+ "pri.indexing.delete_total",
+ "pri.indexing.index_current",
+ "pri.indexing.index_failed",
+ "pri.indexing.index_failed_due_to_version_conflict",
+ "pri.indexing.index_time",
+ "pri.indexing.index_total",
+ "pri.memory.total",
+ "pri.merges.current",
+ "pri.merges.current_docs",
+ "pri.merges.current_size",
+ "pri.merges.total",
+ "pri.merges.total_docs",
+ "pri.merges.total_size",
+ "pri.merges.total_time",
+ "pri.query_cache.evictions",
+ "pri.query_cache.memory_size",
+ "pri.refresh.external_time",
+ "pri.refresh.external_total",
+ "pri.refresh.listeners",
+ "pri.refresh.time",
+ "pri.refresh.total",
+ "pri.request_cache.evictions",
+ "pri.request_cache.hit_count",
+ "pri.request_cache.memory_size",
+ "pri.request_cache.miss_count",
+ "pri.search.fetch_current",
+ "pri.search.fetch_time",
+ "pri.search.fetch_total",
+ "pri.search.open_contexts",
+ "pri.search.query_current",
+ "pri.search.query_time",
+ "pri.search.query_total",
+ "pri.search.scroll_current",
+ "pri.search.scroll_time",
+ "pri.search.scroll_total",
+ "pri.segments.count",
+ "pri.segments.fixed_bitset_memory",
+ "pri.segments.index_writer_memory",
+ "pri.segments.memory",
+ "pri.segments.version_map_memory",
+ "pri.sparse_vector.value_count",
+ "pri.store.size",
+ "pri.suggest.current",
+ "pri.suggest.time",
+ "pri.suggest.total",
+ "pri.warmer.current",
+ "pri.warmer.total",
+ "pri.warmer.total_time",
+ "query_cache.evictions",
+ "query_cache.memory_size",
+ "refresh.external_time",
+ "refresh.external_total",
+ "refresh.listeners",
+ "refresh.time",
+ "refresh.total",
+ "rep",
+ "request_cache.evictions",
+ "request_cache.hit_count",
+ "request_cache.memory_size",
+ "request_cache.miss_count",
+ "search.fetch_current",
+ "search.fetch_time",
+ "search.fetch_total",
+ "search.open_contexts",
+ "search.query_current",
+ "search.query_time",
+ "search.query_total",
+ "search.scroll_current",
+ "search.scroll_time",
+ "search.scroll_total",
+ "segments.count",
+ "segments.fixed_bitset_memory",
+ "segments.index_writer_memory",
+ "segments.memory",
+ "segments.version_map_memory",
+ "sparse_vector.value_count",
+ "status",
+ "store.size",
+ "suggest.current",
+ "suggest.time",
+ "suggest.total",
+ "uuid",
+ "warmer.current",
+ "warmer.total",
+ "warmer.total_time",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "bulk.avg_size_in_bytes",
+ "bulk.avg_time",
+ "bulk.total_operations",
+ "bulk.total_size_in_bytes",
+ "bulk.total_time",
+ "completion.size",
+ "creation.date",
+ "creation.date.string",
+ "dataset.size",
+ "dense_vector.value_count",
+ "docs.count",
+ "docs.deleted",
+ "fielddata.evictions",
+ "fielddata.memory_size",
+ "flush.total",
+ "flush.total_time",
+ "get.current",
+ "get.exists_time",
+ "get.exists_total",
+ "get.missing_time",
+ "get.missing_total",
+ "get.time",
+ "get.total",
+ "health",
+ "index",
+ "indexing.delete_current",
+ "indexing.delete_time",
+ "indexing.delete_total",
+ "indexing.index_current",
+ "indexing.index_failed",
+ "indexing.index_failed_due_to_version_conflict",
+ "indexing.index_time",
+ "indexing.index_total",
+ "memory.total",
+ "merges.current",
+ "merges.current_docs",
+ "merges.current_size",
+ "merges.total",
+ "merges.total_docs",
+ "merges.total_size",
+ "merges.total_time",
+ "pri",
+ "pri.bulk.avg_size_in_bytes",
+ "pri.bulk.avg_time",
+ "pri.bulk.total_operations",
+ "pri.bulk.total_size_in_bytes",
+ "pri.bulk.total_time",
+ "pri.completion.size",
+ "pri.dense_vector.value_count",
+ "pri.fielddata.evictions",
+ "pri.fielddata.memory_size",
+ "pri.flush.total",
+ "pri.flush.total_time",
+ "pri.get.current",
+ "pri.get.exists_time",
+ "pri.get.exists_total",
+ "pri.get.missing_time",
+ "pri.get.missing_total",
+ "pri.get.time",
+ "pri.get.total",
+ "pri.indexing.delete_current",
+ "pri.indexing.delete_time",
+ "pri.indexing.delete_total",
+ "pri.indexing.index_current",
+ "pri.indexing.index_failed",
+ "pri.indexing.index_failed_due_to_version_conflict",
+ "pri.indexing.index_time",
+ "pri.indexing.index_total",
+ "pri.memory.total",
+ "pri.merges.current",
+ "pri.merges.current_docs",
+ "pri.merges.current_size",
+ "pri.merges.total",
+ "pri.merges.total_docs",
+ "pri.merges.total_size",
+ "pri.merges.total_time",
+ "pri.query_cache.evictions",
+ "pri.query_cache.memory_size",
+ "pri.refresh.external_time",
+ "pri.refresh.external_total",
+ "pri.refresh.listeners",
+ "pri.refresh.time",
+ "pri.refresh.total",
+ "pri.request_cache.evictions",
+ "pri.request_cache.hit_count",
+ "pri.request_cache.memory_size",
+ "pri.request_cache.miss_count",
+ "pri.search.fetch_current",
+ "pri.search.fetch_time",
+ "pri.search.fetch_total",
+ "pri.search.open_contexts",
+ "pri.search.query_current",
+ "pri.search.query_time",
+ "pri.search.query_total",
+ "pri.search.scroll_current",
+ "pri.search.scroll_time",
+ "pri.search.scroll_total",
+ "pri.segments.count",
+ "pri.segments.fixed_bitset_memory",
+ "pri.segments.index_writer_memory",
+ "pri.segments.memory",
+ "pri.segments.version_map_memory",
+ "pri.sparse_vector.value_count",
+ "pri.store.size",
+ "pri.suggest.current",
+ "pri.suggest.time",
+ "pri.suggest.total",
+ "pri.warmer.current",
+ "pri.warmer.total",
+ "pri.warmer.total_time",
+ "query_cache.evictions",
+ "query_cache.memory_size",
+ "refresh.external_time",
+ "refresh.external_total",
+ "refresh.listeners",
+ "refresh.time",
+ "refresh.total",
+ "rep",
+ "request_cache.evictions",
+ "request_cache.hit_count",
+ "request_cache.memory_size",
+ "request_cache.miss_count",
+ "search.fetch_current",
+ "search.fetch_time",
+ "search.fetch_total",
+ "search.open_contexts",
+ "search.query_current",
+ "search.query_time",
+ "search.query_total",
+ "search.scroll_current",
+ "search.scroll_time",
+ "search.scroll_total",
+ "segments.count",
+ "segments.fixed_bitset_memory",
+ "segments.index_writer_memory",
+ "segments.memory",
+ "segments.version_map_memory",
+ "sparse_vector.value_count",
+ "status",
+ "store.size",
+ "suggest.current",
+ "suggest.time",
+ "suggest.total",
+ "uuid",
+ "warmer.current",
+ "warmer.total",
+ "warmer.total_time",
+ ],
+ ],
+ ]
+ ] = None,
health: t.Optional[
t.Union[str, t.Literal["green", "red", "unavailable", "unknown", "yellow"]]
] = None,
@@ -627,7 +1096,8 @@ def indices(
:param expand_wildcards: The type of index that wildcard patterns can match.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param health: The health status used to limit returned indices. By default,
the response includes indices of any health status.
:param help: When set to `true` will output available columns. This option can't
@@ -699,7 +1169,12 @@ def master(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[t.Union[str, t.Literal["host", "id", "ip", "node"]]],
+ t.Union[str, t.Literal["host", "id", "ip", "node"]],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -720,7 +1195,8 @@ def master(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -1689,7 +2165,24 @@ def nodeattrs(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "attr", "host", "id", "ip", "node", "pid", "port", "value"
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "attr", "host", "id", "ip", "node", "pid", "port", "value"
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -1710,7 +2203,8 @@ def nodeattrs(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -2050,7 +2544,19 @@ def pending_tasks(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal["insertOrder", "priority", "source", "timeInQueue"],
+ ]
+ ],
+ t.Union[
+ str, t.Literal["insertOrder", "priority", "source", "timeInQueue"]
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -2074,7 +2580,8 @@ def pending_tasks(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
@@ -2132,7 +2639,19 @@ def plugins(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal["component", "description", "id", "name", "version"],
+ ]
+ ],
+ t.Union[
+ str, t.Literal["component", "description", "id", "name", "version"]
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
include_bootstrap: t.Optional[bool] = None,
@@ -2154,7 +2673,8 @@ def plugins(
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param include_bootstrap: Include bootstrap plugins in the response
@@ -2972,7 +3492,52 @@ def tasks(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "action",
+ "id",
+ "ip",
+ "node",
+ "node_id",
+ "parent_task_id",
+ "port",
+ "running_time",
+ "running_time_ns",
+ "start_time",
+ "task_id",
+ "timestamp",
+ "type",
+ "version",
+ "x_opaque_id",
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "action",
+ "id",
+ "ip",
+ "node",
+ "node_id",
+ "parent_task_id",
+ "port",
+ "running_time",
+ "running_time_ns",
+ "start_time",
+ "task_id",
+ "timestamp",
+ "type",
+ "version",
+ "x_opaque_id",
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
nodes: t.Optional[t.Sequence[str]] = None,
@@ -3001,7 +3566,8 @@ def tasks(
shard recoveries.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param nodes: Unique node identifiers, which are used to limit the response.
@@ -3070,7 +3636,24 @@ def templates(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
- h: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ h: t.Optional[
+ t.Union[
+ t.Sequence[
+ t.Union[
+ str,
+ t.Literal[
+ "composed_of", "index_patterns", "name", "order", "version"
+ ],
+ ]
+ ],
+ t.Union[
+ str,
+ t.Literal[
+ "composed_of", "index_patterns", "name", "order", "version"
+ ],
+ ],
+ ]
+ ] = None,
help: t.Optional[bool] = None,
human: t.Optional[bool] = None,
local: t.Optional[bool] = None,
@@ -3094,7 +3677,8 @@ def templates(
If omitted, all templates are returned.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
- :param h: List of columns to appear in the response. Supports simple wildcards.
+ :param h: A comma-separated list of columns names to display. It supports simple
+ wildcards.
:param help: When set to `true` will output available columns. This option can't
be combined with any other query string option.
:param local: If `true`, the request computes the list of selected nodes from
diff --git a/elasticsearch/_sync/client/cluster.py b/elasticsearch/_sync/client/cluster.py
index 2d4eebc54..fab832aae 100644
--- a/elasticsearch/_sync/client/cluster.py
+++ b/elasticsearch/_sync/client/cluster.py
@@ -374,8 +374,13 @@ def get_settings(
``_
:param flat_settings: If `true`, returns settings in flat format.
- :param include_defaults: If `true`, returns default cluster settings from the
- local node.
+ :param include_defaults: If `true`, also returns default values for all other
+ cluster settings, reflecting the values in the `elasticsearch.yml` file of
+ one of the nodes in the cluster. If the nodes in your cluster do not all
+ have the same values in their `elasticsearch.yml` config files then the values
+ returned by this API may vary from invocation to invocation and may not reflect
+ the values that Elasticsearch uses in all situations. Use the `GET _nodes/settings`
+ API to fetch the settings for each individual node in your cluster.
:param master_timeout: Period to wait for a connection to the master node. If
no response is received before the timeout expires, the request fails and
returns an error.
diff --git a/elasticsearch/_sync/client/esql.py b/elasticsearch/_sync/client/esql.py
index 0744a81bd..80843c2d3 100644
--- a/elasticsearch/_sync/client/esql.py
+++ b/elasticsearch/_sync/client/esql.py
@@ -28,6 +28,9 @@
_stability_warning,
)
+if t.TYPE_CHECKING:
+ from elasticsearch.esql import ESQLBase
+
class EsqlClient(NamespacedClient):
@@ -50,7 +53,7 @@ class EsqlClient(NamespacedClient):
def async_query(
self,
*,
- query: t.Optional[str] = None,
+ query: t.Optional[t.Union[str, "ESQLBase"]] = None,
allow_partial_results: t.Optional[bool] = None,
columnar: t.Optional[bool] = None,
delimiter: t.Optional[str] = None,
@@ -111,7 +114,12 @@ def async_query(
which has the name of all the columns.
:param filter: Specify a Query DSL query in the filter parameter to filter the
set of documents that an ES|QL query runs on.
- :param format: A short version of the Accept header, for example `json` or `yaml`.
+ :param format: A short version of the Accept header, e.g. json, yaml. `csv`,
+ `tsv`, and `txt` formats will return results in a tabular format, excluding
+ other metadata fields from the response. For async requests, nothing will
+ be returned if the async query doesn't finish within the timeout. The query
+ ID and running status are available in the `X-Elasticsearch-Async-Id` and
+ `X-Elasticsearch-Async-Is-Running` HTTP headers of the response, respectively.
:param include_ccs_metadata: When set to `true` and performing a cross-cluster
query, the response will include an extra `_clusters` object with information
about the clusters that participated in the search along with info such as
@@ -165,7 +173,7 @@ def async_query(
__query["pretty"] = pretty
if not __body:
if query is not None:
- __body["query"] = query
+ __body["query"] = str(query)
if columnar is not None:
__body["columnar"] = columnar
if filter is not None:
@@ -405,6 +413,8 @@ def get_query(
Returns an object extended information about a running ES|QL query.
+ ``_
+
:param id: The query ID
"""
if id in SKIP_IN_PATH:
@@ -446,6 +456,8 @@ def list_queries(
Get running ES|QL queries information.
Returns an object containing IDs and other information about the running ES|QL queries.
+
+ ``_
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_query/queries"
@@ -484,7 +496,7 @@ def list_queries(
def query(
self,
*,
- query: t.Optional[str] = None,
+ query: t.Optional[t.Union[str, "ESQLBase"]] = None,
allow_partial_results: t.Optional[bool] = None,
columnar: t.Optional[bool] = None,
delimiter: t.Optional[str] = None,
@@ -539,7 +551,9 @@ def query(
`all_columns` which has the name of all columns.
:param filter: Specify a Query DSL query in the filter parameter to filter the
set of documents that an ES|QL query runs on.
- :param format: A short version of the Accept header, e.g. json, yaml.
+ :param format: A short version of the Accept header, e.g. json, yaml. `csv`,
+ `tsv`, and `txt` formats will return results in a tabular format, excluding
+ other metadata fields from the response.
:param include_ccs_metadata: When set to `true` and performing a cross-cluster
query, the response will include an extra `_clusters` object with information
about the clusters that participated in the search along with info such as
@@ -579,7 +593,7 @@ def query(
__query["pretty"] = pretty
if not __body:
if query is not None:
- __body["query"] = query
+ __body["query"] = str(query)
if columnar is not None:
__body["columnar"] = columnar
if filter is not None:
diff --git a/elasticsearch/_sync/client/indices.py b/elasticsearch/_sync/client/indices.py
index ba1830708..26ca08f14 100644
--- a/elasticsearch/_sync/client/indices.py
+++ b/elasticsearch/_sync/client/indices.py
@@ -1208,7 +1208,7 @@ def delete_data_stream_options(
Removes the data stream options from a data stream.
- ``_
+ ``_
:param name: A comma-separated list of data streams of which the data stream
options will be deleted; use `*` to get all data streams
@@ -2568,7 +2568,7 @@ def get_data_stream_options(
Get the data stream options configuration of one or more data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams to limit the request. Supports
wildcards (`*`). To target all data streams, omit this parameter or use `*`
@@ -3684,7 +3684,7 @@ def put_data_stream_options(
Update the data stream options of the specified data streams.
- ``_
+ ``_
:param name: Comma-separated list of data streams used to limit the request.
Supports wildcards (`*`). To target all data streams use `*` or `_all`.
@@ -4051,7 +4051,7 @@ def put_mapping(
Change a field's mapping using reindexing
Rename a field using a field alias
- Learn how to use the update mapping API with practical examples in the Update mapping API examples guide.
+ Learn how to use the update mapping API with practical examples in the Update mapping API examples guide.
``_
diff --git a/elasticsearch/_sync/client/inference.py b/elasticsearch/_sync/client/inference.py
index 8505f25e5..eb430506c 100644
--- a/elasticsearch/_sync/client/inference.py
+++ b/elasticsearch/_sync/client/inference.py
@@ -396,17 +396,18 @@ def put(
Azure AI Studio (completion, text_embedding)
Azure OpenAI (completion, text_embedding)
Cohere (completion, rerank, text_embedding)
- DeepSeek (completion, chat_completion)
+ DeepSeek (chat_completion, completion)
Elasticsearch (rerank, sparse_embedding, text_embedding - this service is for built-in models and models uploaded through Eland)
ELSER (sparse_embedding)
Google AI Studio (completion, text_embedding)
- Google Vertex AI (rerank, text_embedding)
+ Google Vertex AI (chat_completion, completion, rerank, text_embedding)
Hugging Face (chat_completion, completion, rerank, text_embedding)
+ JinaAI (rerank, text_embedding)
+ Llama (chat_completion, completion, text_embedding)
Mistral (chat_completion, completion, text_embedding)
OpenAI (chat_completion, completion, text_embedding)
- VoyageAI (text_embedding, rerank)
+ VoyageAI (rerank, text_embedding)
Watsonx inference integration (text_embedding)
- JinaAI (text_embedding, rerank)
diff --git a/elasticsearch/_sync/client/sql.py b/elasticsearch/_sync/client/sql.py
index 90cb01681..b2750ede1 100644
--- a/elasticsearch/_sync/client/sql.py
+++ b/elasticsearch/_sync/client/sql.py
@@ -283,7 +283,7 @@ def query(
keep_alive: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
keep_on_completion: t.Optional[bool] = None,
page_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- params: t.Optional[t.Mapping[str, t.Any]] = None,
+ params: t.Optional[t.Sequence[t.Any]] = None,
pretty: t.Optional[bool] = None,
query: t.Optional[str] = None,
request_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
diff --git a/elasticsearch/_sync/client/transform.py b/elasticsearch/_sync/client/transform.py
index ea9941f74..c8c8b0d8b 100644
--- a/elasticsearch/_sync/client/transform.py
+++ b/elasticsearch/_sync/client/transform.py
@@ -602,6 +602,66 @@ def schedule_now_transform(
path_parts=__path_parts,
)
+ @_rewrite_parameters()
+ def set_upgrade_mode(
+ self,
+ *,
+ enabled: t.Optional[bool] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Set upgrade_mode for transform indices.
+ Sets a cluster wide upgrade_mode setting that prepares transform
+ indices for an upgrade.
+ When upgrading your cluster, in some circumstances you must restart your
+ nodes and reindex your transform indices. In those circumstances,
+ there must be no transforms running. You can close the transforms,
+ do the upgrade, then open all the transforms again. Alternatively,
+ you can use this API to temporarily halt tasks associated with the transforms
+ and prevent new transforms from opening. You can also use this API
+ during upgrades that do not require you to reindex your transform
+ indices, though stopping transforms is not a requirement in that case.
+ You can see the current value for the upgrade_mode setting by using the get
+ transform info API.
+
+
+ ``_
+
+ :param enabled: When `true`, it enables `upgrade_mode` which temporarily halts
+ all transform tasks and prohibits new transform tasks from starting.
+ :param timeout: The time to wait for the request to be completed.
+ """
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_transform/set_upgrade_mode"
+ __query: t.Dict[str, t.Any] = {}
+ if enabled is not None:
+ __query["enabled"] = enabled
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ __headers = {"accept": "application/json"}
+ return self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="transform.set_upgrade_mode",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters(
parameter_aliases={"from": "from_"},
)
diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py
index 452a945dd..534521437 100644
--- a/elasticsearch/dsl/types.py
+++ b/elasticsearch/dsl/types.py
@@ -144,12 +144,29 @@ def __init__(
class ChunkingSettings(AttrDict[Any]):
"""
- :arg strategy: (required) The chunking strategy: `sentence` or `word`.
- Defaults to `sentence` if omitted.
+ :arg strategy: (required) The chunking strategy: `sentence`, `word`,
+ `none` or `recursive`. * If `strategy` is set to `recursive`,
+ you must also specify: - `max_chunk_size` - either `separators`
+ or`separator_group` Learn more about different chunking
+ strategies in the linked documentation. Defaults to `sentence` if
+ omitted.
:arg max_chunk_size: (required) The maximum size of a chunk in words.
This value cannot be higher than `300` or lower than `20` (for
`sentence` strategy) or `10` (for `word` strategy). Defaults to
`250` if omitted.
+ :arg separator_group: Only applicable to the `recursive` strategy and
+ required when using it. Sets a predefined list of separators in
+ the saved chunking settings based on the selected text type.
+ Values can be `markdown` or `plaintext`. Using this parameter is
+ an alternative to manually specifying a custom `separators` list.
+ :arg separators: Only applicable to the `recursive` strategy and
+ required when using it. A list of strings used as possible split
+ points when chunking text. Each string can be a plain string or a
+ regular expression (regex) pattern. The system tries each
+ separator in order to split the text, starting from the first item
+ in the list. After splitting, it attempts to recombine smaller
+ pieces into larger chunks that stay within the `max_chunk_size`
+ limit, to reduce the total number of chunks generated.
:arg overlap: The number of overlapping words for chunks. It is
applicable only to a `word` chunking strategy. This value cannot
be higher than half the `max_chunk_size` value. Defaults to `100`
@@ -161,6 +178,8 @@ class ChunkingSettings(AttrDict[Any]):
strategy: Union[str, DefaultType]
max_chunk_size: Union[int, DefaultType]
+ separator_group: Union[str, DefaultType]
+ separators: Union[Sequence[str], DefaultType]
overlap: Union[int, DefaultType]
sentence_overlap: Union[int, DefaultType]
@@ -169,6 +188,8 @@ def __init__(
*,
strategy: Union[str, DefaultType] = DEFAULT,
max_chunk_size: Union[int, DefaultType] = DEFAULT,
+ separator_group: Union[str, DefaultType] = DEFAULT,
+ separators: Union[Sequence[str], DefaultType] = DEFAULT,
overlap: Union[int, DefaultType] = DEFAULT,
sentence_overlap: Union[int, DefaultType] = DEFAULT,
**kwargs: Any,
@@ -177,6 +198,10 @@ def __init__(
kwargs["strategy"] = strategy
if max_chunk_size is not DEFAULT:
kwargs["max_chunk_size"] = max_chunk_size
+ if separator_group is not DEFAULT:
+ kwargs["separator_group"] = separator_group
+ if separators is not DEFAULT:
+ kwargs["separators"] = separators
if overlap is not DEFAULT:
kwargs["overlap"] = overlap
if sentence_overlap is not DEFAULT:
@@ -4523,7 +4548,7 @@ class ArrayPercentilesItem(AttrDict[Any]):
:arg value_as_string:
"""
- key: str
+ key: float
value: Union[float, None]
value_as_string: str
@@ -5369,7 +5394,9 @@ class HdrPercentileRanksAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
@@ -5379,7 +5406,9 @@ class HdrPercentilesAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
@@ -5886,7 +5915,9 @@ class PercentilesBucketAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
@@ -6087,17 +6118,19 @@ class SearchProfile(AttrDict[Any]):
class ShardFailure(AttrDict[Any]):
"""
:arg reason: (required)
- :arg shard: (required)
:arg index:
:arg node:
+ :arg shard:
:arg status:
+ :arg primary:
"""
reason: "ErrorCause"
- shard: int
index: str
node: str
+ shard: int
status: str
+ primary: bool
class ShardProfile(AttrDict[Any]):
@@ -6421,7 +6454,9 @@ class TDigestPercentileRanksAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
@@ -6431,7 +6466,9 @@ class TDigestPercentilesAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
From f92da626a4d29ca7a13d45dd0243fe4422ac843e Mon Sep 17 00:00:00 2001
From: Miguel Grinberg
Date: Fri, 12 Sep 2025 14:23:45 +0100
Subject: [PATCH 12/21] Release 9.1.1 (#3067)
* Release 9.1.1
* release note fixes
* mypy fix
* run mypy with implicit reexports
---
docs/release-notes/index.md | 19 +++++++++++++++++++
elasticsearch/_version.py | 2 +-
elasticsearch/dsl/response/aggs.py | 2 +-
utils/build-dists.py | 3 +++
4 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/docs/release-notes/index.md b/docs/release-notes/index.md
index 76a1c610b..9ab11d2dd 100644
--- a/docs/release-notes/index.md
+++ b/docs/release-notes/index.md
@@ -18,6 +18,25 @@ To check for security updates, go to [Security announcements for the Elastic sta
% *
% ### Fixes [elasticsearch-python-client-next-fixes]
+## 9.1.1 (2025-09-11)
+
+* ES|QL query builder integration with the DSL module ([#3058](https://github.com/elastic/elasticsearch-py/pull/3058))
+* ES|QL query builder robustness fixes ([#3017](https://github.com/elastic/elasticsearch-py/pull/3017))
+* Fix ES|QL `multi_match()` signature ([#3052](https://github.com/elastic/elasticsearch-py/pull/3052))
+
+API
+* Add support for ES|QL query builder objects to ES|QL Query and Async Query APIs
+* Add Transform Set Upgrade Mode API
+* Fix type of `fields` parameter of Term Vectors API to array of strings
+* Fix type of `params` parameter of SQL Query API to array
+
+DSL
+* Preserve the `skip_empty` setting in `to_dict()` recursive serializations ([#3041](https://github.com/elastic/elasticsearch-py/pull/3041))
+* Add `separator_group` and `separators` attributes to `ChunkingSettings` type
+* Add `primary` attribute to `ShardFailure` type
+* Fix type of `key` attribute of `ArrayPercentilesItem` to float
+
+
## 9.1.0 (2025-07-30)
Enhancements
diff --git a/elasticsearch/_version.py b/elasticsearch/_version.py
index 7b6c8994d..8e8baef62 100644
--- a/elasticsearch/_version.py
+++ b/elasticsearch/_version.py
@@ -15,4 +15,4 @@
# specific language governing permissions and limitations
# under the License.
-__versionstr__ = "9.1.0"
+__versionstr__ = "9.1.1"
diff --git a/elasticsearch/dsl/response/aggs.py b/elasticsearch/dsl/response/aggs.py
index 8994fa761..51360f3f6 100644
--- a/elasticsearch/dsl/response/aggs.py
+++ b/elasticsearch/dsl/response/aggs.py
@@ -63,7 +63,7 @@ def _wrap_bucket(self, data: Dict[str, Any]) -> Bucket[_R]:
)
def __iter__(self) -> Iterator["Agg"]: # type: ignore[override]
- return iter(self.buckets) # type: ignore[arg-type]
+ return iter(self.buckets)
def __len__(self) -> int:
return len(self.buckets)
diff --git a/utils/build-dists.py b/utils/build-dists.py
index d67d6053a..fd1b477e0 100644
--- a/utils/build-dists.py
+++ b/utils/build-dists.py
@@ -121,6 +121,7 @@ def test_dist(dist):
"--install-types",
"--non-interactive",
"--ignore-missing-imports",
+ "--implicit-reexport",
os.path.join(base_dir, "test_elasticsearch/test_types/async_types.py"),
)
@@ -145,6 +146,7 @@ def test_dist(dist):
"--install-types",
"--non-interactive",
"--ignore-missing-imports",
+ "--implicit-reexport",
os.path.join(base_dir, "test_elasticsearch/test_types/sync_types.py"),
)
else:
@@ -156,6 +158,7 @@ def test_dist(dist):
"--install-types",
"--non-interactive",
"--ignore-missing-imports",
+ "--implicit-reexport",
os.path.join(
base_dir, "test_elasticsearch/test_types/aliased_types.py"
),
From f29032439178ad3c5fb4a172a56f75121f8fadc4 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 19 Sep 2025 12:07:27 +0100
Subject: [PATCH 13/21] Add most recently added fields as top-level exports
(#3078) (#3084)
* Add most recently added fields as top-level exports
* add unit test to ensure all fields are exported
(cherry picked from commit 9c610684aacf705ed6f302953478f47d1610dd7e)
Co-authored-by: Miguel Grinberg
---
elasticsearch/dsl/__init__.py | 28 +++++++++++++++++++++++
test_elasticsearch/test_dsl/test_field.py | 17 ++++++++++++++
2 files changed, 45 insertions(+)
diff --git a/elasticsearch/dsl/__init__.py b/elasticsearch/dsl/__init__.py
index e109db219..59294d632 100644
--- a/elasticsearch/dsl/__init__.py
+++ b/elasticsearch/dsl/__init__.py
@@ -38,23 +38,30 @@
TermsFacet,
)
from .field import (
+ AggregateMetricDouble,
+ Alias,
Binary,
Boolean,
Byte,
Completion,
ConstantKeyword,
+ CountedKeyword,
CustomField,
Date,
+ DateNanos,
DateRange,
DenseVector,
Double,
DoubleRange,
Field,
+ Flattened,
Float,
FloatRange,
GeoPoint,
GeoShape,
HalfFloat,
+ Histogram,
+ IcuCollationKeyword,
Integer,
IntegerRange,
Ip,
@@ -63,21 +70,28 @@
Keyword,
Long,
LongRange,
+ MatchOnlyText,
Murmur3,
Nested,
Object,
+ Passthrough,
Percolator,
Point,
RangeField,
RankFeature,
RankFeatures,
+ RankVectors,
ScaledFloat,
SearchAsYouType,
+ SemanticText,
Shape,
Short,
SparseVector,
Text,
TokenCount,
+ UnsignedLong,
+ Version,
+ Wildcard,
construct_field,
)
from .function import SF
@@ -108,6 +122,8 @@
"A",
"Agg",
"AggResponse",
+ "AggregateMetricDouble",
+ "Alias",
"AsyncComposableIndexTemplate",
"AsyncDocument",
"AsyncEmptySearch",
@@ -126,9 +142,11 @@
"Completion",
"ComposableIndexTemplate",
"ConstantKeyword",
+ "CountedKeyword",
"CustomField",
"Date",
"DateHistogramFacet",
+ "DateNanos",
"DateRange",
"DenseVector",
"Document",
@@ -142,12 +160,15 @@
"FacetedResponse",
"FacetedSearch",
"Field",
+ "Flattened",
"Float",
"FloatRange",
"GeoPoint",
"GeoShape",
"HalfFloat",
+ "Histogram",
"HistogramFacet",
+ "IcuCollationKeyword",
"IllegalOperation",
"Index",
"IndexTemplate",
@@ -162,12 +183,14 @@
"LongRange",
"M",
"Mapping",
+ "MatchOnlyText",
"MetaField",
"MultiSearch",
"Murmur3",
"Nested",
"NestedFacet",
"Object",
+ "Passthrough",
"Percolator",
"Point",
"Q",
@@ -177,11 +200,13 @@
"RangeField",
"RankFeature",
"RankFeatures",
+ "RankVectors",
"Response",
"SF",
"ScaledFloat",
"Search",
"SearchAsYouType",
+ "SemanticText",
"Shape",
"Short",
"SparseVector",
@@ -189,9 +214,12 @@
"Text",
"TokenCount",
"UnknownDslObject",
+ "UnsignedLong",
"UpdateByQuery",
"UpdateByQueryResponse",
"ValidationException",
+ "Version",
+ "Wildcard",
"analyzer",
"async_connections",
"char_filter",
diff --git a/test_elasticsearch/test_dsl/test_field.py b/test_elasticsearch/test_dsl/test_field.py
index 423936ae3..bf6bc7c83 100644
--- a/test_elasticsearch/test_dsl/test_field.py
+++ b/test_elasticsearch/test_dsl/test_field.py
@@ -23,6 +23,7 @@
import pytest
from dateutil import tz
+from elasticsearch import dsl
from elasticsearch.dsl import InnerDoc, Range, ValidationException, field
@@ -232,3 +233,19 @@ class Inner(InnerDoc):
with pytest.raises(ValidationException):
field.Object(doc_class=Inner, dynamic=False)
+
+
+def test_all_fields_exported() -> None:
+ """Make sure that all the generated field classes are exported at the top-level"""
+ fields = [
+ f
+ for f in dir(field)
+ if isinstance(getattr(field, f), type)
+ and issubclass(getattr(field, f), field.Field)
+ ]
+ all = dir(dsl)
+ not_found = []
+ for f in fields:
+ if f not in all:
+ not_found.append(f)
+ assert not_found == []
From 4f96941893c2a608cc2e3292178ac565b70a914f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 1 Oct 2025 11:20:16 +0100
Subject: [PATCH 14/21] Support values of inner docs given as AttrDict
instances (#3080) (#3093)
* Support values of inner docs given as AttrDict instances
* one more unit test
(cherry picked from commit f68539e5992cc4acbef9bff087ef09ef01fb00a5)
Co-authored-by: Miguel Grinberg
---
elasticsearch/dsl/field.py | 6 +++-
test_elasticsearch/test_dsl/test_field.py | 36 ++++++++++++++++++-
.../test_integration/_async/test_document.py | 24 ++++++++++++-
.../test_integration/_sync/test_document.py | 20 ++++++++++-
utils/templates/field.py.tpl | 6 +++-
5 files changed, 87 insertions(+), 5 deletions(-)
diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py
index 895765e66..3f71f76eb 100644
--- a/elasticsearch/dsl/field.py
+++ b/elasticsearch/dsl/field.py
@@ -572,7 +572,11 @@ def _serialize(
if isinstance(data, collections.abc.Mapping):
return data
- return data.to_dict(skip_empty=skip_empty)
+ try:
+ return data.to_dict(skip_empty=skip_empty)
+ except TypeError:
+ # this would only happen if an AttrDict was given instead of an InnerDoc
+ return data.to_dict()
def clean(self, data: Any) -> Any:
data = super().clean(data)
diff --git a/test_elasticsearch/test_dsl/test_field.py b/test_elasticsearch/test_dsl/test_field.py
index bf6bc7c83..181de6256 100644
--- a/test_elasticsearch/test_dsl/test_field.py
+++ b/test_elasticsearch/test_dsl/test_field.py
@@ -24,7 +24,14 @@
from dateutil import tz
from elasticsearch import dsl
-from elasticsearch.dsl import InnerDoc, Range, ValidationException, field
+from elasticsearch.dsl import (
+ AttrDict,
+ AttrList,
+ InnerDoc,
+ Range,
+ ValidationException,
+ field,
+)
def test_date_range_deserialization() -> None:
@@ -235,6 +242,33 @@ class Inner(InnerDoc):
field.Object(doc_class=Inner, dynamic=False)
+def test_dynamic_object() -> None:
+ f = field.Object(dynamic=True)
+ assert f.deserialize({"a": "b"}).to_dict() == {"a": "b"}
+ assert f.deserialize(AttrDict({"a": "b"})).to_dict() == {"a": "b"}
+ assert f.serialize({"a": "b"}) == {"a": "b"}
+ assert f.serialize(AttrDict({"a": "b"})) == {"a": "b"}
+
+
+def test_dynamic_nested() -> None:
+ f = field.Nested(dynamic=True)
+ assert f.deserialize([{"a": "b"}, {"c": "d"}]) == [{"a": "b"}, {"c": "d"}]
+ assert f.deserialize([AttrDict({"a": "b"}), {"c": "d"}]) == [
+ {"a": "b"},
+ {"c": "d"},
+ ]
+ assert f.deserialize(AttrList([AttrDict({"a": "b"}), {"c": "d"}])) == [
+ {"a": "b"},
+ {"c": "d"},
+ ]
+ assert f.serialize([{"a": "b"}, {"c": "d"}]) == [{"a": "b"}, {"c": "d"}]
+ assert f.serialize([AttrDict({"a": "b"}), {"c": "d"}]) == [{"a": "b"}, {"c": "d"}]
+ assert f.serialize(AttrList([AttrDict({"a": "b"}), {"c": "d"}])) == [
+ {"a": "b"},
+ {"c": "d"},
+ ]
+
+
def test_all_fields_exported() -> None:
"""Make sure that all the generated field classes are exported at the top-level"""
fields = [
diff --git a/test_elasticsearch/test_dsl/test_integration/_async/test_document.py b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py
index 3d769c606..36f055583 100644
--- a/test_elasticsearch/test_dsl/test_integration/_async/test_document.py
+++ b/test_elasticsearch/test_dsl/test_integration/_async/test_document.py
@@ -33,6 +33,7 @@
from elasticsearch.dsl import (
AsyncDocument,
AsyncSearch,
+ AttrDict,
Binary,
Boolean,
Date,
@@ -627,13 +628,17 @@ async def test_can_save_to_different_index(
@pytest.mark.asyncio
+@pytest.mark.parametrize("validate", (True, False))
async def test_save_without_skip_empty_will_include_empty_fields(
async_write_client: AsyncElasticsearch,
+ validate: bool,
) -> None:
test_repo = Repository(
field_1=[], field_2=None, field_3={}, owner={"name": None}, meta={"id": 42}
)
- assert await test_repo.save(index="test-document", skip_empty=False)
+ assert await test_repo.save(
+ index="test-document", skip_empty=False, validate=validate
+ )
assert_doc_equals(
{
@@ -650,6 +655,23 @@ async def test_save_without_skip_empty_will_include_empty_fields(
await async_write_client.get(index="test-document", id=42),
)
+ test_repo = Repository(owner=AttrDict({"name": None}), meta={"id": 43})
+ assert await test_repo.save(
+ index="test-document", skip_empty=False, validate=validate
+ )
+
+ assert_doc_equals(
+ {
+ "found": True,
+ "_index": "test-document",
+ "_id": "43",
+ "_source": {
+ "owner": {"name": None},
+ },
+ },
+ await async_write_client.get(index="test-document", id=43),
+ )
+
@pytest.mark.asyncio
async def test_delete(async_write_client: AsyncElasticsearch) -> None:
diff --git a/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py
index a005d45bf..62857cd9a 100644
--- a/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py
+++ b/test_elasticsearch/test_dsl/test_integration/_sync/test_document.py
@@ -31,6 +31,7 @@
from elasticsearch import ConflictError, Elasticsearch, NotFoundError
from elasticsearch.dsl import (
+ AttrDict,
Binary,
Boolean,
Date,
@@ -621,13 +622,15 @@ def test_can_save_to_different_index(
@pytest.mark.sync
+@pytest.mark.parametrize("validate", (True, False))
def test_save_without_skip_empty_will_include_empty_fields(
write_client: Elasticsearch,
+ validate: bool,
) -> None:
test_repo = Repository(
field_1=[], field_2=None, field_3={}, owner={"name": None}, meta={"id": 42}
)
- assert test_repo.save(index="test-document", skip_empty=False)
+ assert test_repo.save(index="test-document", skip_empty=False, validate=validate)
assert_doc_equals(
{
@@ -644,6 +647,21 @@ def test_save_without_skip_empty_will_include_empty_fields(
write_client.get(index="test-document", id=42),
)
+ test_repo = Repository(owner=AttrDict({"name": None}), meta={"id": 43})
+ assert test_repo.save(index="test-document", skip_empty=False, validate=validate)
+
+ assert_doc_equals(
+ {
+ "found": True,
+ "_index": "test-document",
+ "_id": "43",
+ "_source": {
+ "owner": {"name": None},
+ },
+ },
+ write_client.get(index="test-document", id=43),
+ )
+
@pytest.mark.sync
def test_delete(write_client: Elasticsearch) -> None:
diff --git a/utils/templates/field.py.tpl b/utils/templates/field.py.tpl
index 8699d852e..43df1b5f0 100644
--- a/utils/templates/field.py.tpl
+++ b/utils/templates/field.py.tpl
@@ -334,7 +334,11 @@ class {{ k.name }}({{ k.parent }}):
if isinstance(data, collections.abc.Mapping):
return data
- return data.to_dict(skip_empty=skip_empty)
+ try:
+ return data.to_dict(skip_empty=skip_empty)
+ except TypeError:
+ # this would only happen if an AttrDict was given instead of an InnerDoc
+ return data.to_dict()
def clean(self, data: Any) -> Any:
data = super().clean(data)
From 875d656e5e6d4be14b72c4d710c45494885df512 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 13 Oct 2025 14:30:52 +0400
Subject: [PATCH 15/21] Allow running .buildkite scripts on macOS (#3087)
(#3100)
realpath -s is a GNU extension, but we don't need to care about symlinks here.
(cherry picked from commit dd3957903b96d32a3ecbaa506892bd3c4b8fc9b8)
Co-authored-by: Quentin Pradet
---
.buildkite/functions/imports.sh | 2 +-
.buildkite/run-elasticsearch.sh | 2 +-
.buildkite/run-tests | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.buildkite/functions/imports.sh b/.buildkite/functions/imports.sh
index e732ebba0..ad7a20269 100755
--- a/.buildkite/functions/imports.sh
+++ b/.buildkite/functions/imports.sh
@@ -43,7 +43,7 @@ if [[ -z $es_node_name ]]; then
fi
- export script_path=$(dirname $(realpath -s $0))
+ export script_path=$(dirname $(realpath $0))
source $script_path/functions/cleanup.sh
source $script_path/functions/wait-for-container.sh
trap "cleanup_trap ${network_name}" EXIT
diff --git a/.buildkite/run-elasticsearch.sh b/.buildkite/run-elasticsearch.sh
index 2f73ea8d1..2dda5b4af 100755
--- a/.buildkite/run-elasticsearch.sh
+++ b/.buildkite/run-elasticsearch.sh
@@ -21,7 +21,7 @@
# - Moved ELASTIC_PASSWORD and xpack.security.enabled to the base arguments for "Security On by default"
# - Use https only when TEST_SUITE is "platinum", when "free" use http
-script_path=$(dirname $(realpath -s $0))
+script_path=$(dirname $(realpath $0))
source $script_path/functions/imports.sh
set -euo pipefail
diff --git a/.buildkite/run-tests b/.buildkite/run-tests
index 5d6b38039..90a95a209 100755
--- a/.buildkite/run-tests
+++ b/.buildkite/run-tests
@@ -10,7 +10,7 @@ export TEST_SUITE="${TEST_SUITE:=platinum}"
export PYTHON_VERSION="${PYTHON_VERSION:=3.13}"
export PYTHON_CONNECTION_CLASS="${PYTHON_CONNECTION_CLASS:=urllib3}"
-script_path=$(dirname $(realpath -s $0))
+script_path=$(dirname $(realpath $0))
source $script_path/functions/imports.sh
set -euo pipefail
From 66005e5bedb991ff6eb92c20aa7ac0a790c40c9a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 13 Oct 2025 14:35:02 +0100
Subject: [PATCH 16/21] Update the compatiblity mode section of the docs
(#3098) (#3105)
* Update the compatiblity mode section of the docs
* merge with compatiblity section in index.md
(cherry picked from commit fcdbbc09584f11400f88c829ed14b14cc6a60281)
Co-authored-by: Miguel Grinberg
---
docs/reference/connecting.md | 7 -------
docs/reference/index.md | 8 +++++++-
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/docs/reference/connecting.md b/docs/reference/connecting.md
index 27f7fecbc..56b3aaee7 100644
--- a/docs/reference/connecting.md
+++ b/docs/reference/connecting.md
@@ -277,13 +277,6 @@ client = Elasticsearch(
```
-## Enabling the Compatibility Mode [compatibility-mode]
-
-The {{es}} server version 8.0 is introducing a new compatibility mode that allows you a smoother upgrade experience from 7 to 8. In a nutshell, you can use the latest 7.x Python {{es}} {{es}} client with an 8.x {{es}} server, giving more room to coordinate the upgrade of your codebase to the next major version.
-
-If you want to leverage this functionality, please make sure that you are using the latest 7.x Python {{es}} client and set the environment variable `ELASTIC_CLIENT_APIVERSIONING` to `true`. The client is handling the rest internally. For every 8.0 and beyond Python {{es}} client, you’re all set! The compatibility mode is enabled by default.
-
-
## Using the Client in a Function-as-a-Service Environment [connecting-faas]
This section illustrates the best practices for leveraging the {{es}} client in a Function-as-a-Service (FaaS) environment.
diff --git a/docs/reference/index.md b/docs/reference/index.md
index 6046d7801..03143bbb2 100644
--- a/docs/reference/index.md
+++ b/docs/reference/index.md
@@ -69,8 +69,14 @@ Compatibility does not imply full feature parity. New {{es}} features are suppor
{{es}} language clients are also _backward compatible_ across minor versions — with default distributions and without guarantees.
+### Major version upgrades
+
:::{tip}
To upgrade to a new major version, first upgrade {{es}}, then upgrade the Python {{es}} client.
:::
-If you need to work with multiple client versions, note that older versions are also released as `elasticsearch7` and `elasticsearch8`.
+Since version 8.0, the {{es}} server supports a compatibility mode that allows smoother upgrade experiences. In a nutshell, this makes it possible to upgrade the {{es}} server to the next major version, while continuing to use the same client. This gives more room to coordinate the upgrade of your codebase to the next major version.
+
+For example, to upgrade a system that uses {{es}} 8.x you can upgrade the {{es}} server to 9.x first, and the 8.x Python {{es}} client will continue to work (aside from any breaking changes, which should be listed in the server release notes). You can continue using the 8.x client during the server migration, and only upgrade it once the server migration is complete. The process is described in detail in the [REST API compatibility workflow](https://www.elastic.co/docs/reference/elasticsearch/rest-apis/compatibility#_rest_api_compatibility_workflow) section of the {{es}} documentation.
+
+If you need to work with multiple client versions, note that older versions are also released with the `elasticsearch8` and `elasticsearch9` package names so that they can be installed together.
From 7a1b099a6addaabe706f36dff4acc2529d8bd4ab Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 13 Oct 2025 16:59:04 +0000
Subject: [PATCH 17/21] Bump checkout action from v4 to v5 (#3094) (#3107)
(cherry picked from commit a11aab5f5a97874e7fc917607e582cda86cdf32f)
Co-authored-by: Riccardo Solazzi <129967922+TheZalRevolt@users.noreply.github.com>
Co-authored-by: riccardo solazzi
Co-authored-by: Miguel Grinberg
---
.github/workflows/ci.yml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0bc43d985..b335bfac0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v5
- name: Set up Python 3.x
uses: actions/setup-python@v5
with:
@@ -23,7 +23,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v5
- name: Set up Python 3.x
uses: actions/setup-python@v5
with:
@@ -47,7 +47,7 @@ jobs:
continue-on-error: false
steps:
- name: Checkout Repository
- uses: actions/checkout@v4
+ uses: actions/checkout@v5
- name: Set Up Python - ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
From 854134d8425e576edb0d65bd59161a72f4532cd4 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 15 Oct 2025 12:53:30 +0100
Subject: [PATCH 18/21] Add 3.14 to CI builds (#3103) (#3111)
* Add 3.14 to CI builds
* Support Python 3.14 __annotate_func__
* Temporarily remove pyarrow
* Update pyproject.toml
* cleanup annotationlib use
---------
(cherry picked from commit 1f43ab95f1a863f0f58b5818f272911d89060d3f)
Co-authored-by: Miguel Grinberg
Co-authored-by: Quentin Pradet
---
.buildkite/Dockerfile | 2 +-
.buildkite/pipeline.yml | 5 +++--
.buildkite/run-tests | 2 +-
.github/workflows/ci.yml | 2 +-
elasticsearch/dsl/document_base.py | 15 +++++++++++++++
noxfile.py | 4 ++--
pyproject.toml | 2 +-
7 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/.buildkite/Dockerfile b/.buildkite/Dockerfile
index a68ad997d..2c6bfbe6c 100644
--- a/.buildkite/Dockerfile
+++ b/.buildkite/Dockerfile
@@ -1,4 +1,4 @@
-ARG PYTHON_VERSION=3.13
+ARG PYTHON_VERSION=3.14
FROM python:${PYTHON_VERSION}
# Default UID/GID to 1000
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index ac7c6b60a..729336bac 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -16,6 +16,7 @@ steps:
- "3.11"
- "3.12"
- "3.13"
+ - "3.14"
connection:
- "urllib3"
- "requests"
@@ -23,11 +24,11 @@ steps:
- "test"
adjustments:
- with:
- python: "3.9"
+ python: "3.10"
connection: "urllib3"
nox_session: "test_otel"
- with:
- python: "3.13"
+ python: "3.14"
connection: "urllib3"
nox_session: "test_otel"
command: ./.buildkite/run-tests
diff --git a/.buildkite/run-tests b/.buildkite/run-tests
index 90a95a209..8d0eb7ffd 100755
--- a/.buildkite/run-tests
+++ b/.buildkite/run-tests
@@ -7,7 +7,7 @@
# Default environment variables
export STACK_VERSION="${STACK_VERSION:=8.0.0-SNAPSHOT}"
export TEST_SUITE="${TEST_SUITE:=platinum}"
-export PYTHON_VERSION="${PYTHON_VERSION:=3.13}"
+export PYTHON_VERSION="${PYTHON_VERSION:=3.14}"
export PYTHON_CONNECTION_CLASS="${PYTHON_CONNECTION_CLASS:=urllib3}"
script_path=$(dirname $(realpath $0))
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b335bfac0..20663c601 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -38,7 +38,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
nox-session: [""]
runs-on: ["ubuntu-latest"]
diff --git a/elasticsearch/dsl/document_base.py b/elasticsearch/dsl/document_base.py
index 4df900a39..72f0364a4 100644
--- a/elasticsearch/dsl/document_base.py
+++ b/elasticsearch/dsl/document_base.py
@@ -34,6 +34,11 @@
overload,
)
+try:
+ import annotationlib
+except ImportError:
+ annotationlib = None
+
try:
from types import UnionType
except ImportError:
@@ -332,6 +337,16 @@ def __init__(self, name: str, bases: Tuple[type, ...], attrs: Dict[str, Any]):
# # ignore attributes
# field10: ClassVar[string] = "a regular class variable"
annotations = attrs.get("__annotations__", {})
+ if not annotations and annotationlib:
+ # Python 3.14+ uses annotationlib
+ annotate = annotationlib.get_annotate_from_class_namespace(attrs)
+ if annotate:
+ annotations = (
+ annotationlib.call_annotate_function(
+ annotate, format=annotationlib.Format.VALUE
+ )
+ or {}
+ )
fields = {n for n in attrs if isinstance(attrs[n], Field)}
fields.update(annotations.keys())
field_defaults = {}
diff --git a/noxfile.py b/noxfile.py
index d5a6099e7..01de5e4b4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -44,14 +44,14 @@ def pytest_argv():
]
-@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13"])
+@nox.session(python=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"])
def test(session):
session.install("-e", ".[dev]", env=INSTALL_ENV, silent=False)
session.run(*pytest_argv(), *session.posargs)
-@nox.session(python=["3.9", "3.13"])
+@nox.session(python=["3.10", "3.14"])
def test_otel(session):
session.install(
".[dev]",
diff --git a/pyproject.toml b/pyproject.toml
index a8e5ead9e..6b4915106 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,7 +73,7 @@ dev = [
"orjson",
"numpy",
"simsimd",
- "pyarrow",
+ "pyarrow; python_version<'3.14'",
"pandas",
"mapbox-vector-tile",
"jinja2",
From a863b537ce84c47383ee68201d19bc955850710a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 15 Oct 2025 15:01:45 +0100
Subject: [PATCH 19/21] Add `flush_after_seconds` option to `streaming_bulk()`
(#3064) (#3116)
* Add flush option to streaming_bulk()
* unit tests
* bulk timeouts
* use context manager to run the timeout background tasks
* format code
* integration tests
* docstrings
(cherry picked from commit 6fbdecb7219c708870af1985991264d753f66496)
Co-authored-by: Miguel Grinberg
---
elasticsearch/_async/helpers.py | 67 +++++++--
elasticsearch/compat.py | 44 +++++-
elasticsearch/helpers/__init__.py | 11 +-
elasticsearch/helpers/actions.py | 139 +++++++++++++-----
.../test_async/test_server/test_helpers.py | 40 +++++
test_elasticsearch/test_helpers.py | 50 ++++++-
.../test_server/test_helpers.py | 42 ++++++
7 files changed, 343 insertions(+), 50 deletions(-)
diff --git a/elasticsearch/_async/helpers.py b/elasticsearch/_async/helpers.py
index e4d5e6bc5..c9243af63 100644
--- a/elasticsearch/_async/helpers.py
+++ b/elasticsearch/_async/helpers.py
@@ -33,12 +33,16 @@
Union,
)
+from ..compat import safe_task
from ..exceptions import ApiError, NotFoundError, TransportError
from ..helpers.actions import (
_TYPE_BULK_ACTION,
_TYPE_BULK_ACTION_BODY,
_TYPE_BULK_ACTION_HEADER,
_TYPE_BULK_ACTION_HEADER_AND_BODY,
+ _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY,
+ _TYPE_BULK_ACTION_WITH_META,
+ BulkMeta,
_ActionChunker,
_process_bulk_chunk_error,
_process_bulk_chunk_success,
@@ -54,9 +58,10 @@
async def _chunk_actions(
- actions: AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY],
+ actions: AsyncIterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY],
chunk_size: int,
max_chunk_bytes: int,
+ flush_after_seconds: Optional[float],
serializer: Serializer,
) -> AsyncIterable[
Tuple[
@@ -76,10 +81,42 @@ async def _chunk_actions(
chunker = _ActionChunker(
chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes, serializer=serializer
)
- async for action, data in actions:
- ret = chunker.feed(action, data)
- if ret:
- yield ret
+
+ if not flush_after_seconds:
+ async for action, data in actions:
+ ret = chunker.feed(action, data)
+ if ret:
+ yield ret
+ else:
+ item_queue: asyncio.Queue[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY] = (
+ asyncio.Queue()
+ )
+
+ async def get_items() -> None:
+ try:
+ async for item in actions:
+ await item_queue.put(item)
+ finally:
+ await item_queue.put((BulkMeta.done, None))
+
+ async with safe_task(get_items()):
+ timeout: Optional[float] = flush_after_seconds
+ while True:
+ try:
+ action, data = await asyncio.wait_for(
+ item_queue.get(), timeout=timeout
+ )
+ timeout = flush_after_seconds
+ except asyncio.TimeoutError:
+ action, data = BulkMeta.flush, None
+ timeout = None
+
+ if action is BulkMeta.done:
+ break
+ ret = chunker.feed(action, data)
+ if ret:
+ yield ret
+
ret = chunker.flush()
if ret:
yield ret
@@ -159,9 +196,13 @@ async def azip(
async def async_streaming_bulk(
client: AsyncElasticsearch,
- actions: Union[Iterable[_TYPE_BULK_ACTION], AsyncIterable[_TYPE_BULK_ACTION]],
+ actions: Union[
+ Iterable[_TYPE_BULK_ACTION_WITH_META],
+ AsyncIterable[_TYPE_BULK_ACTION_WITH_META],
+ ],
chunk_size: int = 500,
max_chunk_bytes: int = 100 * 1024 * 1024,
+ flush_after_seconds: Optional[float] = None,
raise_on_error: bool = True,
expand_action_callback: Callable[
[_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -194,6 +235,9 @@ async def async_streaming_bulk(
:arg actions: iterable or async iterable containing the actions to be executed
:arg chunk_size: number of docs in one chunk sent to es (default: 500)
:arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
+ :arg flush_after_seconds: time in seconds after which a chunk is written even
+ if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
+ timeout-based flush. (default: 0)
:arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
from the execution of the last chunk when some occur. By default we raise.
:arg raise_on_exception: if ``False`` then don't propagate exceptions from
@@ -220,9 +264,14 @@ async def async_streaming_bulk(
if isinstance(retry_on_status, int):
retry_on_status = (retry_on_status,)
- async def map_actions() -> AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY]:
+ async def map_actions() -> (
+ AsyncIterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY]
+ ):
async for item in aiter(actions):
- yield expand_action_callback(item)
+ if isinstance(item, BulkMeta):
+ yield item, None
+ else:
+ yield expand_action_callback(item)
serializer = client.transport.serializers.get_serializer("application/json")
@@ -234,7 +283,7 @@ async def map_actions() -> AsyncIterable[_TYPE_BULK_ACTION_HEADER_AND_BODY]:
]
bulk_actions: List[bytes]
async for bulk_data, bulk_actions in _chunk_actions(
- map_actions(), chunk_size, max_chunk_bytes, serializer
+ map_actions(), chunk_size, max_chunk_bytes, flush_after_seconds, serializer
):
for attempt in range(max_retries + 1):
to_retry: List[bytes] = []
diff --git a/elasticsearch/compat.py b/elasticsearch/compat.py
index 007971306..b44b9daea 100644
--- a/elasticsearch/compat.py
+++ b/elasticsearch/compat.py
@@ -15,11 +15,14 @@
# specific language governing permissions and limitations
# under the License.
+import asyncio
import inspect
import os
import sys
+from contextlib import asynccontextmanager, contextmanager
from pathlib import Path
-from typing import Tuple, Type, Union
+from threading import Thread
+from typing import Any, AsyncIterator, Callable, Coroutine, Iterator, Tuple, Type, Union
string_types: Tuple[Type[str], Type[bytes]] = (str, bytes)
@@ -76,9 +79,48 @@ def warn_stacklevel() -> int:
return 0
+@contextmanager
+def safe_thread(
+ target: Callable[..., Any], *args: Any, **kwargs: Any
+) -> Iterator[Thread]:
+ """Run a thread within a context manager block.
+
+ The thread is automatically joined when the block ends. If the thread raised
+ an exception, it is raised in the caller's context.
+ """
+ captured_exception = None
+
+ def run() -> None:
+ try:
+ target(*args, **kwargs)
+ except BaseException as exc:
+ nonlocal captured_exception
+ captured_exception = exc
+
+ thread = Thread(target=run)
+ thread.start()
+ yield thread
+ thread.join()
+ if captured_exception:
+ raise captured_exception
+
+
+@asynccontextmanager
+async def safe_task(coro: Coroutine[Any, Any, Any]) -> AsyncIterator[asyncio.Task[Any]]:
+ """Run a background task within a context manager block.
+
+ The task is awaited when the block ends.
+ """
+ task = asyncio.create_task(coro)
+ yield task
+ await task
+
+
__all__ = [
"string_types",
"to_str",
"to_bytes",
"warn_stacklevel",
+ "safe_thread",
+ "safe_task",
]
diff --git a/elasticsearch/helpers/__init__.py b/elasticsearch/helpers/__init__.py
index 67676932b..6f8f24c21 100644
--- a/elasticsearch/helpers/__init__.py
+++ b/elasticsearch/helpers/__init__.py
@@ -19,12 +19,21 @@
from .._utils import fixup_module_metadata
from .actions import _chunk_actions # noqa: F401
from .actions import _process_bulk_chunk # noqa: F401
-from .actions import bulk, expand_action, parallel_bulk, reindex, scan, streaming_bulk
+from .actions import (
+ BULK_FLUSH,
+ bulk,
+ expand_action,
+ parallel_bulk,
+ reindex,
+ scan,
+ streaming_bulk,
+)
from .errors import BulkIndexError, ScanError
__all__ = [
"BulkIndexError",
"ScanError",
+ "BULK_FLUSH",
"expand_action",
"streaming_bulk",
"bulk",
diff --git a/elasticsearch/helpers/actions.py b/elasticsearch/helpers/actions.py
index d1a43a8dc..79197a1e4 100644
--- a/elasticsearch/helpers/actions.py
+++ b/elasticsearch/helpers/actions.py
@@ -16,9 +16,10 @@
# under the License.
import logging
+import queue
import time
+from enum import Enum
from operator import methodcaller
-from queue import Queue
from typing import (
Any,
Callable,
@@ -37,13 +38,21 @@
from elastic_transport import OpenTelemetrySpan
from .. import Elasticsearch
-from ..compat import to_bytes
+from ..compat import safe_thread, to_bytes
from ..exceptions import ApiError, NotFoundError, TransportError
from ..serializer import Serializer
from .errors import BulkIndexError, ScanError
logger = logging.getLogger("elasticsearch.helpers")
+
+class BulkMeta(Enum):
+ flush = 1
+ done = 2
+
+
+BULK_FLUSH = BulkMeta.flush
+
_TYPE_BULK_ACTION = Union[bytes, str, Dict[str, Any]]
_TYPE_BULK_ACTION_HEADER = Dict[str, Any]
_TYPE_BULK_ACTION_BODY = Union[None, bytes, Dict[str, Any]]
@@ -51,6 +60,13 @@
_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY
]
+_TYPE_BULK_ACTION_WITH_META = Union[bytes, str, Dict[str, Any], BulkMeta]
+_TYPE_BULK_ACTION_HEADER_WITH_META = Union[Dict[str, Any], BulkMeta]
+_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY = Union[
+ Tuple[_TYPE_BULK_ACTION_HEADER, _TYPE_BULK_ACTION_BODY],
+ Tuple[BulkMeta, Any],
+]
+
def expand_action(data: _TYPE_BULK_ACTION) -> _TYPE_BULK_ACTION_HEADER_AND_BODY:
"""
@@ -139,7 +155,9 @@ def __init__(
] = []
def feed(
- self, action: _TYPE_BULK_ACTION_HEADER, data: _TYPE_BULK_ACTION_BODY
+ self,
+ action: _TYPE_BULK_ACTION_HEADER_WITH_META,
+ data: _TYPE_BULK_ACTION_BODY,
) -> Optional[
Tuple[
List[
@@ -152,23 +170,25 @@ def feed(
]
]:
ret = None
- raw_action = action
- raw_data = data
- action_bytes = to_bytes(self.serializer.dumps(action), "utf-8")
- # +1 to account for the trailing new line character
- cur_size = len(action_bytes) + 1
-
- data_bytes: Optional[bytes]
- if data is not None:
- data_bytes = to_bytes(self.serializer.dumps(data), "utf-8")
- cur_size += len(data_bytes) + 1
- else:
- data_bytes = None
+ action_bytes = b""
+ data_bytes: Optional[bytes] = None
+ cur_size = 0
+ if not isinstance(action, BulkMeta):
+ action_bytes = to_bytes(self.serializer.dumps(action), "utf-8")
+ # +1 to account for the trailing new line character
+ cur_size = len(action_bytes) + 1
+
+ if data is not None:
+ data_bytes = to_bytes(self.serializer.dumps(data), "utf-8")
+ cur_size += len(data_bytes) + 1
+ else:
+ data_bytes = None
# full chunk, send it and start a new one
if self.bulk_actions and (
self.size + cur_size > self.max_chunk_bytes
or self.action_count == self.chunk_size
+ or (action == BulkMeta.flush and self.bulk_actions)
):
ret = (self.bulk_data, self.bulk_actions)
self.bulk_actions = []
@@ -176,15 +196,16 @@ def feed(
self.size = 0
self.action_count = 0
- self.bulk_actions.append(action_bytes)
- if data_bytes is not None:
- self.bulk_actions.append(data_bytes)
- self.bulk_data.append((raw_action, raw_data))
- else:
- self.bulk_data.append((raw_action,))
+ if not isinstance(action, BulkMeta):
+ self.bulk_actions.append(action_bytes)
+ if data_bytes is not None:
+ self.bulk_actions.append(data_bytes)
+ self.bulk_data.append((action, data))
+ else:
+ self.bulk_data.append((action,))
- self.size += cur_size
- self.action_count += 1
+ self.size += cur_size
+ self.action_count += 1
return ret
def flush(
@@ -209,9 +230,10 @@ def flush(
def _chunk_actions(
- actions: Iterable[_TYPE_BULK_ACTION_HEADER_AND_BODY],
+ actions: Iterable[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY],
chunk_size: int,
max_chunk_bytes: int,
+ flush_after_seconds: Optional[float],
serializer: Serializer,
) -> Iterable[
Tuple[
@@ -231,10 +253,41 @@ def _chunk_actions(
chunker = _ActionChunker(
chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes, serializer=serializer
)
- for action, data in actions:
- ret = chunker.feed(action, data)
- if ret:
- yield ret
+
+ if not flush_after_seconds:
+ for action, data in actions:
+ ret = chunker.feed(action, data)
+ if ret:
+ yield ret
+ else:
+ item_queue: queue.Queue[_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY] = (
+ queue.Queue()
+ )
+
+ def get_items() -> None:
+ try:
+ for item in actions:
+ item_queue.put(item)
+ finally:
+ # make sure we signal the end even if there is an exception
+ item_queue.put((BulkMeta.done, None))
+
+ with safe_thread(get_items):
+ timeout: Optional[float] = flush_after_seconds
+ while True:
+ try:
+ action, data = item_queue.get(timeout=timeout)
+ timeout = flush_after_seconds
+ except queue.Empty:
+ action, data = BulkMeta.flush, None
+ timeout = None
+
+ if action is BulkMeta.done:
+ break
+ ret = chunker.feed(action, data)
+ if ret:
+ yield ret
+
ret = chunker.flush()
if ret:
yield ret
@@ -361,9 +414,10 @@ def _process_bulk_chunk(
def streaming_bulk(
client: Elasticsearch,
- actions: Iterable[_TYPE_BULK_ACTION],
+ actions: Iterable[_TYPE_BULK_ACTION_WITH_META],
chunk_size: int = 500,
max_chunk_bytes: int = 100 * 1024 * 1024,
+ flush_after_seconds: Optional[float] = None,
raise_on_error: bool = True,
expand_action_callback: Callable[
[_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -397,6 +451,9 @@ def streaming_bulk(
:arg actions: iterable containing the actions to be executed
:arg chunk_size: number of docs in one chunk sent to es (default: 500)
:arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
+ :arg flush_after_seconds: time in seconds after which a chunk is written even
+ if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
+ timeout-based flush. (default: 0)
:arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
from the execution of the last chunk when some occur. By default we raise.
:arg raise_on_exception: if ``False`` then don't propagate exceptions from
@@ -425,6 +482,13 @@ def streaming_bulk(
serializer = client.transport.serializers.get_serializer("application/json")
+ def expand_action_with_meta(
+ data: _TYPE_BULK_ACTION_WITH_META,
+ ) -> _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY:
+ if isinstance(data, BulkMeta):
+ return data, None
+ return expand_action_callback(data)
+
bulk_data: List[
Union[
Tuple[_TYPE_BULK_ACTION_HEADER],
@@ -433,9 +497,10 @@ def streaming_bulk(
]
bulk_actions: List[bytes]
for bulk_data, bulk_actions in _chunk_actions(
- map(expand_action_callback, actions),
+ map(expand_action_with_meta, actions),
chunk_size,
max_chunk_bytes,
+ flush_after_seconds,
serializer,
):
for attempt in range(max_retries + 1):
@@ -557,6 +622,7 @@ def parallel_bulk(
thread_count: int = 4,
chunk_size: int = 500,
max_chunk_bytes: int = 100 * 1024 * 1024,
+ flush_after_seconds: Optional[float] = None,
queue_size: int = 4,
expand_action_callback: Callable[
[_TYPE_BULK_ACTION], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -573,6 +639,9 @@ def parallel_bulk(
:arg thread_count: size of the threadpool to use for the bulk requests
:arg chunk_size: number of docs in one chunk sent to es (default: 500)
:arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
+ :arg flush_after_seconds: time in seconds after which a chunk is written even
+ if hasn't reached `chunk_size` or `max_chunk_bytes`. Set to 0 to not use a
+ timeout-based flush. (default: 0)
:arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
from the execution of the last chunk when some occur. By default we raise.
:arg raise_on_exception: if ``False`` then don't propagate exceptions from
@@ -596,7 +665,7 @@ def _setup_queues(self) -> None:
super()._setup_queues() # type: ignore[misc]
# The queue must be at least the size of the number of threads to
# prevent hanging when inserting sentinel values during teardown.
- self._inqueue: Queue[
+ self._inqueue: queue.Queue[
Tuple[
List[
Union[
@@ -605,7 +674,7 @@ def _setup_queues(self) -> None:
],
List[bytes],
]
- ] = Queue(max(queue_size, thread_count))
+ ] = queue.Queue(max(queue_size, thread_count))
self._quick_put = self._inqueue.put
with client._otel.helpers_span("helpers.parallel_bulk") as otel_span:
@@ -625,7 +694,11 @@ def _setup_queues(self) -> None:
)
),
_chunk_actions(
- expanded_actions, chunk_size, max_chunk_bytes, serializer
+ expanded_actions,
+ chunk_size,
+ max_chunk_bytes,
+ flush_after_seconds,
+ serializer,
),
):
yield from result
diff --git a/test_elasticsearch/test_async/test_server/test_helpers.py b/test_elasticsearch/test_async/test_server/test_helpers.py
index a235784be..219b81b83 100644
--- a/test_elasticsearch/test_async/test_server/test_helpers.py
+++ b/test_elasticsearch/test_async/test_server/test_helpers.py
@@ -17,6 +17,7 @@
import asyncio
import logging
+import time
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, call, patch
@@ -123,6 +124,45 @@ def sync_gen():
"_source"
]
+ async def test_explicit_flushes(self, async_client):
+ async def async_gen():
+ yield {"answer": 2, "_id": 0}
+ yield {"answer": 1, "_id": 1}
+ yield helpers.BULK_FLUSH
+ await asyncio.sleep(0.5)
+ yield {"answer": 2, "_id": 2}
+
+ timestamps = []
+ async for ok, item in helpers.async_streaming_bulk(
+ async_client, async_gen(), index="test-index", refresh=True
+ ):
+ timestamps.append(time.time())
+ assert ok
+
+ # make sure there is a pause between the writing of the 2nd and 3rd items
+ assert timestamps[2] - timestamps[1] > (timestamps[1] - timestamps[0]) * 2
+
+ async def test_timeout_flushes(self, async_client):
+ async def async_gen():
+ yield {"answer": 2, "_id": 0}
+ yield {"answer": 1, "_id": 1}
+ await asyncio.sleep(0.5)
+ yield {"answer": 2, "_id": 2}
+
+ timestamps = []
+ async for ok, item in helpers.async_streaming_bulk(
+ async_client,
+ async_gen(),
+ index="test-index",
+ refresh=True,
+ flush_after_seconds=0.05,
+ ):
+ assert ok
+ timestamps.append(time.time())
+
+ # make sure there is a pause between the writing of the 2nd and 3rd items
+ assert timestamps[2] - timestamps[1] > (timestamps[1] - timestamps[0]) * 2
+
async def test_all_errors_from_chunk_are_raised_on_failure(self, async_client):
await async_client.indices.create(
index="i",
diff --git a/test_elasticsearch/test_helpers.py b/test_elasticsearch/test_helpers.py
index e30635f44..398cb6cc3 100644
--- a/test_elasticsearch/test_helpers.py
+++ b/test_elasticsearch/test_helpers.py
@@ -18,6 +18,7 @@
import pickle
import threading
import time
+from typing import Optional
from unittest import mock
import pytest
@@ -156,21 +157,34 @@ def test__source_metadata_or_source(self):
{"_source": {"key2": "val2"}, "key": "val", "_op_type": "update"}
) == ({"update": {}}, {"key2": "val2"})
- def test_chunks_are_chopped_by_byte_size(self):
+ @pytest.mark.parametrize("flush_seconds", [None, 10])
+ def test_chunks_are_chopped_by_byte_size(self, flush_seconds: Optional[float]):
assert 100 == len(
- list(helpers._chunk_actions(self.actions, 100000, 1, JSONSerializer()))
+ list(
+ helpers._chunk_actions(
+ self.actions, 100000, 1, flush_seconds, JSONSerializer()
+ )
+ )
)
- def test_chunks_are_chopped_by_chunk_size(self):
+ @pytest.mark.parametrize("flush_seconds", [None, 10])
+ def test_chunks_are_chopped_by_chunk_size(self, flush_seconds: Optional[float]):
assert 10 == len(
- list(helpers._chunk_actions(self.actions, 10, 99999999, JSONSerializer()))
+ list(
+ helpers._chunk_actions(
+ self.actions, 10, 99999999, flush_seconds, JSONSerializer()
+ )
+ )
)
- def test_chunks_are_chopped_by_byte_size_properly(self):
+ @pytest.mark.parametrize("flush_seconds", [None, 10])
+ def test_chunks_are_chopped_by_byte_size_properly(
+ self, flush_seconds: Optional[float]
+ ):
max_byte_size = 170
chunks = list(
helpers._chunk_actions(
- self.actions, 100000, max_byte_size, JSONSerializer()
+ self.actions, 100000, max_byte_size, flush_seconds, JSONSerializer()
)
)
assert 25 == len(chunks)
@@ -178,6 +192,30 @@ def test_chunks_are_chopped_by_byte_size_properly(self):
chunk = b"".join(chunk_actions)
assert len(chunk) <= max_byte_size
+ @pytest.mark.parametrize("flush_seconds", [None, 10])
+ def test_chunks_are_chopped_by_flush(self, flush_seconds: Optional[float]):
+ flush = (helpers.BULK_FLUSH, None)
+ actions = (
+ self.actions[:3]
+ + [flush] * 2 # two consecutive flushes after 3 items
+ + self.actions[3:4]
+ + [flush] # flush after one more item
+ + self.actions[4:]
+ + [flush] # flush at the end
+ )
+ chunks = list(
+ helpers._chunk_actions(
+ actions, 100, 99999999, flush_seconds, JSONSerializer()
+ )
+ )
+ assert 3 == len(chunks)
+ assert len(chunks[0][0]) == 3
+ assert len(chunks[0][1]) == 6
+ assert len(chunks[1][0]) == 1
+ assert len(chunks[1][1]) == 2
+ assert len(chunks[2][0]) == 96
+ assert len(chunks[2][1]) == 192
+
class TestExpandActions:
@pytest.mark.parametrize("action", ["whatever", b"whatever"])
diff --git a/test_elasticsearch/test_server/test_helpers.py b/test_elasticsearch/test_server/test_helpers.py
index 74b9f0ef8..f175c6e0b 100644
--- a/test_elasticsearch/test_server/test_helpers.py
+++ b/test_elasticsearch/test_server/test_helpers.py
@@ -16,6 +16,7 @@
# under the License.
import json
+import time
from datetime import datetime, timedelta
from unittest.mock import call, patch
@@ -75,6 +76,47 @@ def test_bulk_all_documents_get_inserted(sync_client):
assert {"answer": 42} == sync_client.get(index="test-index", id=42)["_source"]
+def test_explicit_flushes(sync_client):
+ def sync_gen():
+ yield {"answer": 0, "_id": 0}
+ yield {"answer": 1, "_id": 1}
+ yield helpers.BULK_FLUSH
+ time.sleep(0.5)
+ yield {"answer": 2, "_id": 2}
+
+ timestamps = []
+ for ok, item in helpers.streaming_bulk(
+ sync_client, sync_gen(), index="test-index", refresh=True
+ ):
+ assert ok
+ timestamps.append(time.time())
+
+ # make sure there is a pause between the writing of the 2nd and 3rd items
+ assert timestamps[2] - timestamps[1] > (timestamps[1] - timestamps[0]) * 2
+
+
+def test_timeout_flushes(sync_client):
+ def sync_gen():
+ yield {"answer": 0, "_id": 0}
+ yield {"answer": 1, "_id": 1}
+ time.sleep(0.5)
+ yield {"answer": 2, "_id": 2}
+
+ timestamps = []
+ for ok, item in helpers.streaming_bulk(
+ sync_client,
+ sync_gen(),
+ index="test-index",
+ refresh=True,
+ flush_after_seconds=0.05,
+ ):
+ assert ok
+ timestamps.append(time.time())
+
+ # make sure there is a pause between the writing of the 2nd and 3rd items
+ assert timestamps[2] - timestamps[1] > (timestamps[1] - timestamps[0]) * 2
+
+
def test_bulk_all_errors_from_chunk_are_raised_on_failure(sync_client):
sync_client.indices.create(
index="i",
From ca7a7ba88ee69a2d57142756fc84d88f0bbe17ec Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 23 Oct 2025 12:03:30 +0400
Subject: [PATCH 20/21] Update link in dsl_how_to_guides.md (#3122) (#3123)
I'd like to temporarily divert this Store and retrieve scripts linke
(cherry picked from commit f5e504d50d6064d8d0d87ff2b9b529d2e2e72cc7)
Co-authored-by: David Kilfoyle <41695641+kilfoyle@users.noreply.github.com>
---
docs/reference/dsl_how_to_guides.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/reference/dsl_how_to_guides.md b/docs/reference/dsl_how_to_guides.md
index 5f0884c3c..4e71984f8 100644
--- a/docs/reference/dsl_how_to_guides.md
+++ b/docs/reference/dsl_how_to_guides.md
@@ -917,7 +917,7 @@ first = Post.get(id=42)
first.update(published=True, published_by='me')
```
-In case you wish to use a `painless` script to perform the update you can pass in the script string as `script` or the `id` of a [stored script](docs-content://explore-analyze/scripting/modules-scripting-using.md#script-stored-scripts) via `script_id`. All additional keyword arguments to the `update` method will then be passed in as parameters of the script. The document will not be updated in place.
+In case you wish to use a `painless` script to perform the update you can pass in the script string as `script` or the `id` of a [stored script](docs-content://explore-analyze/scripting/modules-scripting-using.md) via `script_id`. All additional keyword arguments to the `update` method will then be passed in as parameters of the script. The document will not be updated in place.
```python
# retrieve the document
From 335154ebe7c4b89b4567e2a84675a68c3e0e5f1c Mon Sep 17 00:00:00 2001
From: Elastic Machine
Date: Fri, 24 Oct 2025 20:16:47 +0200
Subject: [PATCH 21/21] Auto-generated code for 9.1 (#3072)
* Auto-generated API code
* streams namespace
* using fixed generator
* update schema version
* more references to new namespaces
---------
Co-authored-by: Miguel Grinberg
---
docs/sphinx/api/indices.rst | 2 +-
docs/sphinx/api/streams.rst | 9 +
docs/sphinx/es_api.rst | 1 +
elasticsearch/_async/client/__init__.py | 2 +
elasticsearch/_async/client/cat.py | 506 +++++++++++++++++++++--
elasticsearch/_async/client/connector.py | 6 +-
elasticsearch/_async/client/indices.py | 32 +-
elasticsearch/_async/client/inference.py | 12 +-
elasticsearch/_async/client/logstash.py | 4 +-
elasticsearch/_async/client/nodes.py | 4 +-
elasticsearch/_async/client/shutdown.py | 20 +-
elasticsearch/_async/client/streams.py | 186 +++++++++
elasticsearch/_async/client/watcher.py | 6 +-
elasticsearch/_sync/client/__init__.py | 2 +
elasticsearch/_sync/client/cat.py | 506 +++++++++++++++++++++--
elasticsearch/_sync/client/connector.py | 6 +-
elasticsearch/_sync/client/indices.py | 32 +-
elasticsearch/_sync/client/inference.py | 12 +-
elasticsearch/_sync/client/logstash.py | 4 +-
elasticsearch/_sync/client/nodes.py | 4 +-
elasticsearch/_sync/client/shutdown.py | 20 +-
elasticsearch/_sync/client/streams.py | 186 +++++++++
elasticsearch/_sync/client/watcher.py | 6 +-
elasticsearch/_version.py | 1 +
elasticsearch/client.py | 2 +
elasticsearch/dsl/aggs.py | 97 +++++
elasticsearch/dsl/field.py | 17 +-
elasticsearch/dsl/query.py | 6 +-
elasticsearch/dsl/response/__init__.py | 3 +
elasticsearch/dsl/types.py | 240 ++++++++++-
pyproject.toml | 2 +-
31 files changed, 1796 insertions(+), 140 deletions(-)
create mode 100644 docs/sphinx/api/streams.rst
create mode 100644 elasticsearch/_async/client/streams.py
create mode 100644 elasticsearch/_sync/client/streams.py
diff --git a/docs/sphinx/api/indices.rst b/docs/sphinx/api/indices.rst
index 9523a3558..8d310a3b8 100644
--- a/docs/sphinx/api/indices.rst
+++ b/docs/sphinx/api/indices.rst
@@ -6,4 +6,4 @@ Indices
:no-index:
.. autoclass:: IndicesClient
- :members:
\ No newline at end of file
+ :members:
diff --git a/docs/sphinx/api/streams.rst b/docs/sphinx/api/streams.rst
new file mode 100644
index 000000000..d5f45b6e7
--- /dev/null
+++ b/docs/sphinx/api/streams.rst
@@ -0,0 +1,9 @@
+.. _streams:
+
+Streams
+-------
+.. py:module:: elasticsearch.client
+ :no-index:
+
+.. autoclass:: StreamsClient
+ :members:
diff --git a/docs/sphinx/es_api.rst b/docs/sphinx/es_api.rst
index d246ec6f5..0711a10eb 100644
--- a/docs/sphinx/es_api.rst
+++ b/docs/sphinx/es_api.rst
@@ -50,6 +50,7 @@ arguments are required for all calls.
api/snapshots
api/snapshottable-features
api/sql
+ api/streams
api/synonyms
api/tls-ssl
api/tasks
diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py
index 8750504ca..eb49da0d0 100644
--- a/elasticsearch/_async/client/__init__.py
+++ b/elasticsearch/_async/client/__init__.py
@@ -74,6 +74,7 @@
from .snapshot import SnapshotClient
from .sql import SqlClient
from .ssl import SslClient
+from .streams import StreamsClient
from .synonyms import SynonymsClient
from .tasks import TasksClient
from .text_structure import TextStructureClient
@@ -378,6 +379,7 @@ def __init__(
self.shutdown = ShutdownClient(self)
self.sql = SqlClient(self)
self.ssl = SslClient(self)
+ self.streams = StreamsClient(self)
self.synonyms = SynonymsClient(self)
self.text_structure = TextStructureClient(self)
self.transform = TransformClient(self)
diff --git a/elasticsearch/_async/client/cat.py b/elasticsearch/_async/client/cat.py
index 84f946f5c..fc121e9ca 100644
--- a/elasticsearch/_async/client/cat.py
+++ b/elasticsearch/_async/client/cat.py
@@ -36,6 +36,9 @@ async def aliases(
self,
*,
name: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
expand_wildcards: t.Optional[
t.Union[
@@ -80,6 +83,9 @@ async def aliases(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -95,6 +101,14 @@ async def aliases(
:param name: A comma-separated list of aliases to retrieve. Supports wildcards
(`*`). To retrieve all aliases, omit this parameter or use `*` or `_all`.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param expand_wildcards: The type of index that wildcard patterns can match.
If the request can target data streams, this argument determines whether
wildcard expressions match hidden data streams. It supports comma-separated
@@ -112,6 +126,12 @@ async def aliases(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -122,6 +142,8 @@ async def aliases(
__path_parts = {}
__path = "/_cat/aliases"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if expand_wildcards is not None:
@@ -142,6 +164,8 @@ async def aliases(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -213,6 +237,9 @@ async def allocation(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -227,7 +254,14 @@ async def allocation(
:param node_id: A comma-separated list of node identifiers or names used to limit
the returned information.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -242,6 +276,12 @@ async def allocation(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -274,6 +314,8 @@ async def allocation(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -291,6 +333,9 @@ async def component_templates(
self,
*,
name: t.Optional[str] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -330,6 +375,9 @@ async def component_templates(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -346,6 +394,14 @@ async def component_templates(
:param name: The name of the component template. It accepts wildcard expressions.
If it is omitted, all component templates are returned.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -360,6 +416,12 @@ async def component_templates(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -370,6 +432,8 @@ async def component_templates(
__path_parts = {}
__path = "/_cat/component_templates"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -390,6 +454,8 @@ async def component_templates(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -407,6 +473,9 @@ async def count(
self,
*,
index: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -420,6 +489,9 @@ async def count(
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -437,6 +509,14 @@ async def count(
:param index: A comma-separated list of data streams, indices, and aliases used
to limit the request. It supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -446,6 +526,12 @@ async def count(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -456,6 +542,8 @@ async def count(
__path_parts = {}
__path = "/_cat/count"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -472,6 +560,8 @@ async def count(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -507,6 +597,9 @@ async def fielddata(
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -522,7 +615,14 @@ async def fielddata(
:param fields: Comma-separated list of fields used to limit returned information.
To retrieve all fields, omit this parameter.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -532,6 +632,12 @@ async def fielddata(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -560,6 +666,8 @@ async def fielddata(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -576,6 +684,9 @@ async def fielddata(
async def health(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -652,6 +763,14 @@ async def health(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -661,13 +780,20 @@ async def health(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param ts: If true, returns `HH:MM:SS` and Unix epoch timestamps.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/health"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -1092,7 +1218,14 @@ async def indices(
:param index: Comma-separated list of data streams, indices, and aliases used
to limit the request. Supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param expand_wildcards: The type of index that wildcard patterns can match.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
@@ -1109,7 +1242,12 @@ async def indices(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -1166,6 +1304,9 @@ async def indices(
async def master(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -1181,6 +1322,9 @@ async def master(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -1193,6 +1337,14 @@ async def master(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -1207,11 +1359,19 @@ async def master(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/master"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -1232,6 +1392,8 @@ async def master(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -1374,8 +1536,15 @@ async def ml_data_frame_analytics(
:param id: The ID of the data frame analytics to fetch
:param allow_no_match: Whether to ignore if a wildcard expression matches no
- configs. (This includes `_all` string or when no configs have been specified)
- :param bytes: The unit in which to display byte values
+ configs. (This includes `_all` string or when no configs have been specified.)
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: Comma-separated list of column names to display.
@@ -1383,7 +1552,12 @@ async def ml_data_frame_analytics(
be combined with any other query string option.
:param s: Comma-separated list of column names or column aliases used to sort
the response.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -1434,6 +1608,9 @@ async def ml_datafeeds(
*,
datafeed_id: t.Optional[str] = None,
allow_no_match: t.Optional[bool] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -1549,6 +1726,14 @@ async def ml_datafeeds(
array when there are no matches and the subset of results when there are
partial matches. If `false`, the API returns a 404 status code when there
are no matches or only partial matches.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: Comma-separated list of column names to display.
@@ -1556,7 +1741,12 @@ async def ml_datafeeds(
be combined with any other query string option.
:param s: Comma-separated list of column names or column aliases used to sort
the response.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -1569,6 +1759,8 @@ async def ml_datafeeds(
__query: t.Dict[str, t.Any] = {}
if allow_no_match is not None:
__query["allow_no_match"] = allow_no_match
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -1914,7 +2106,14 @@ async def ml_jobs(
array when there are no matches and the subset of results when there are
partial matches. If `false`, the API returns a 404 status code when there
are no matches or only partial matches.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: Comma-separated list of column names to display.
@@ -1922,7 +2121,12 @@ async def ml_jobs(
be combined with any other query string option.
:param s: Comma-separated list of column names or column aliases used to sort
the response.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -2099,7 +2303,14 @@ async def ml_trained_models(
when there are no matches and the subset of results when there are partial
matches. If `false`, the API returns a 404 status code when there are no
matches or only partial matches.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param from_: Skips the specified number of transforms.
@@ -2109,7 +2320,12 @@ async def ml_trained_models(
:param s: A comma-separated list of column names or aliases used to sort the
response.
:param size: The maximum number of transforms to display.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -2162,6 +2378,9 @@ async def ml_trained_models(
async def nodeattrs(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -2189,6 +2408,9 @@ async def nodeattrs(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -2201,6 +2423,14 @@ async def nodeattrs(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -2215,11 +2445,19 @@ async def nodeattrs(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/nodeattrs"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -2240,6 +2478,8 @@ async def nodeattrs(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -2478,7 +2718,14 @@ async def nodes(
``_
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param full_id: If `true`, return the full node ID. If `false`, return the shortened
@@ -2493,7 +2740,12 @@ async def nodes(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
@@ -2541,6 +2793,9 @@ async def nodes(
async def pending_tasks(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -2578,6 +2833,14 @@ async def pending_tasks(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -2592,12 +2855,19 @@ async def pending_tasks(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/pending_tasks"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -2636,6 +2906,9 @@ async def pending_tasks(
async def plugins(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -2659,6 +2932,9 @@ async def plugins(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -2671,6 +2947,14 @@ async def plugins(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -2686,11 +2970,19 @@ async def plugins(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/plugins"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -2713,6 +3005,8 @@ async def plugins(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -2831,7 +3125,14 @@ async def recovery(
to limit the request. Supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
:param active_only: If `true`, the response only includes ongoing shard recoveries.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param detailed: If `true`, the response includes detailed information about
shard recoveries.
:param format: Specifies the format to return the columnar data in, can be set
@@ -2843,7 +3144,12 @@ async def recovery(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -2894,6 +3200,9 @@ async def recovery(
async def repositories(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -2904,6 +3213,9 @@ async def repositories(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -2916,6 +3228,14 @@ async def repositories(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: List of columns to appear in the response. Supports simple wildcards.
@@ -2929,11 +3249,19 @@ async def repositories(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/repositories"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -2954,6 +3282,8 @@ async def repositories(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -3029,6 +3359,9 @@ async def segments(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -3045,7 +3378,14 @@ async def segments(
:param index: A comma-separated list of data streams, indices, and aliases used
to limit the request. Supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -3060,6 +3400,12 @@ async def segments(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3092,6 +3438,8 @@ async def segments(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -3295,7 +3643,14 @@ async def shards(
:param index: A comma-separated list of data streams, indices, and aliases used
to limit the request. Supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: List of columns to appear in the response. Supports simple wildcards.
@@ -3305,7 +3660,12 @@ async def shards(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3355,6 +3715,9 @@ async def snapshots(
self,
*,
repository: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -3425,6 +3788,14 @@ async def snapshots(
:param repository: A comma-separated list of snapshot repositories used to limit
the request. Accepts wildcard expressions. `_all` returns all repositories.
If any repository fails during the request, Elasticsearch returns an error.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -3437,7 +3808,12 @@ async def snapshots(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3448,6 +3824,8 @@ async def snapshots(
__path_parts = {}
__path = "/_cat/snapshots"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -3488,6 +3866,9 @@ async def tasks(
self,
*,
actions: t.Optional[t.Sequence[str]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
detailed: t.Optional[bool] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
@@ -3562,6 +3943,14 @@ async def tasks(
``_
:param actions: The task action names, which are used to limit the response.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param detailed: If `true`, the response includes detailed information about
shard recoveries.
:param format: Specifies the format to return the columnar data in, can be set
@@ -3576,7 +3965,12 @@ async def tasks(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param timeout: Period to wait for a response. If no response is received before
the timeout expires, the request fails and returns an error.
:param v: When set to `true` will enable verbose output.
@@ -3588,6 +3982,8 @@ async def tasks(
__query: t.Dict[str, t.Any] = {}
if actions is not None:
__query["actions"] = actions
+ if bytes is not None:
+ __query["bytes"] = bytes
if detailed is not None:
__query["detailed"] = detailed
if error_trace is not None:
@@ -3633,6 +4029,9 @@ async def templates(
self,
*,
name: t.Optional[str] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -3660,6 +4059,9 @@ async def templates(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -3675,6 +4077,14 @@ async def templates(
:param name: The name of the template to return. Accepts wildcard expressions.
If omitted, all templates are returned.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -3689,6 +4099,12 @@ async def templates(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3699,6 +4115,8 @@ async def templates(
__path_parts = {}
__path = "/_cat/templates"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -3719,6 +4137,8 @@ async def templates(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -3736,6 +4156,9 @@ async def thread_pool(
self,
*,
thread_pool_patterns: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -3819,6 +4242,14 @@ async def thread_pool(
:param thread_pool_patterns: A comma-separated list of thread pool names used
to limit the request. Accepts wildcard expressions.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: List of columns to appear in the response. Supports simple wildcards.
@@ -3832,7 +4263,12 @@ async def thread_pool(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3843,6 +4279,8 @@ async def thread_pool(
__path_parts = {}
__path = "/_cat/thread_pool"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -3885,6 +4323,9 @@ async def transforms(
*,
transform_id: t.Optional[str] = None,
allow_no_match: t.Optional[bool] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -4084,6 +4525,14 @@ async def transforms(
array when there are no matches and the subset of results when there are
partial matches. If `false`, the request returns a 404 status code when there
are no matches or only partial matches.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param from_: Skips the specified number of transforms.
@@ -4093,7 +4542,12 @@ async def transforms(
:param s: Comma-separated list of column names or column aliases used to sort
the response.
:param size: The maximum number of transforms to obtain.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -4106,6 +4560,8 @@ async def transforms(
__query: t.Dict[str, t.Any] = {}
if allow_no_match is not None:
__query["allow_no_match"] = allow_no_match
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
diff --git a/elasticsearch/_async/client/connector.py b/elasticsearch/_async/client/connector.py
index 6c4bca043..70e468db7 100644
--- a/elasticsearch/_async/client/connector.py
+++ b/elasticsearch/_async/client/connector.py
@@ -103,7 +103,7 @@ async def delete(
:param connector_id: The unique identifier of the connector to be deleted
:param delete_sync_jobs: A flag indicating if associated sync jobs should be
- also removed. Defaults to false.
+ also removed.
:param hard: A flag indicating if the connector should be hard deleted.
"""
if connector_id in SKIP_IN_PATH:
@@ -360,7 +360,7 @@ async def list(
:param connector_name: A comma-separated list of connector names to fetch connector
documents for
- :param from_: Starting offset (default: 0)
+ :param from_: Starting offset
:param include_deleted: A flag to indicate if the desired connector should be
fetched, even if it was soft-deleted.
:param index_name: A comma-separated list of connector index names to fetch connector
@@ -955,7 +955,7 @@ async def sync_job_list(
``_
:param connector_id: A connector id to fetch connector sync jobs for
- :param from_: Starting offset (default: 0)
+ :param from_: Starting offset
:param job_type: A comma-separated list of job types to fetch the sync jobs for
:param size: Specifies a max number of results to get
:param status: A sync job status to fetch connector sync jobs for
diff --git a/elasticsearch/_async/client/indices.py b/elasticsearch/_async/client/indices.py
index c12ec00b0..13e217943 100644
--- a/elasticsearch/_async/client/indices.py
+++ b/elasticsearch/_async/client/indices.py
@@ -812,11 +812,7 @@ async def create_from(
raise ValueError("Empty value passed for parameter 'source'")
if dest in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'dest'")
- if create_from is None and body is None:
- raise ValueError(
- "Empty value passed for parameters 'create_from' and 'body', one of them should be set."
- )
- elif create_from is not None and body is not None:
+ if create_from is not None and body is not None:
raise ValueError("Cannot set both 'create_from' and 'body'")
__path_parts: t.Dict[str, str] = {
"source": _quote(source),
@@ -833,7 +829,11 @@ async def create_from(
if pretty is not None:
__query["pretty"] = pretty
__body = create_from if create_from is not None else body
- __headers = {"accept": "application/json", "content-type": "application/json"}
+ if not __body:
+ __body = None
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
return await self.perform_request( # type: ignore[return-value]
"PUT",
__path,
@@ -4549,6 +4549,7 @@ async def refresh(
For data streams, the API runs the refresh operation on the stream’s backing indices.
By default, Elasticsearch periodically refreshes indices every second, but only on indices that have received one search request or more in the last 30 seconds.
You can change this default interval with the index.refresh_interval setting.
+ In Elastic Cloud Serverless, the default refresh interval is 5 seconds across all indices.
Refresh requests are synchronous and do not return a response until the refresh operation completes.
Refreshes are resource-intensive.
To ensure good cluster performance, it's recommended to wait for Elasticsearch's periodic refresh rather than performing an explicit refresh when possible.
@@ -5414,7 +5415,9 @@ async def shrink(
path_parts=__path_parts,
)
- @_rewrite_parameters()
+ @_rewrite_parameters(
+ body_name="index_template",
+ )
async def simulate_index_template(
self,
*,
@@ -5425,6 +5428,8 @@ async def simulate_index_template(
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
include_defaults: t.Optional[bool] = None,
+ index_template: t.Optional[t.Mapping[str, t.Any]] = None,
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
) -> ObjectApiResponse[t.Any]:
@@ -5444,12 +5449,15 @@ async def simulate_index_template(
only be dry-run added if new or can also replace an existing one
:param include_defaults: If true, returns all relevant default configurations
for the index template.
+ :param index_template:
:param master_timeout: Period to wait for a connection to the master node. If
no response is received before the timeout expires, the request fails and
returns an error.
"""
if name in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'name'")
+ if index_template is not None and body is not None:
+ raise ValueError("Cannot set both 'index_template' and 'body'")
__path_parts: t.Dict[str, str] = {"name": _quote(name)}
__path = f'/_index_template/_simulate_index/{__path_parts["name"]}'
__query: t.Dict[str, t.Any] = {}
@@ -5469,12 +5477,18 @@ async def simulate_index_template(
__query["master_timeout"] = master_timeout
if pretty is not None:
__query["pretty"] = pretty
+ __body = index_template if index_template is not None else body
+ if not __body:
+ __body = None
__headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
return await self.perform_request( # type: ignore[return-value]
"POST",
__path,
params=__query,
headers=__headers,
+ body=__body,
endpoint_id="indices.simulate_index_template",
path_parts=__path_parts,
)
@@ -5823,8 +5837,8 @@ async def stats(
are requested).
:param include_unloaded_segments: If true, the response includes information
from segments that are not loaded into memory.
- :param level: Indicates whether statistics are aggregated at the cluster, index,
- or shard level.
+ :param level: Indicates whether statistics are aggregated at the cluster, indices,
+ or shards level.
"""
__path_parts: t.Dict[str, str]
if index not in SKIP_IN_PATH and metric not in SKIP_IN_PATH:
diff --git a/elasticsearch/_async/client/inference.py b/elasticsearch/_async/client/inference.py
index 621156307..333d3f5f9 100644
--- a/elasticsearch/_async/client/inference.py
+++ b/elasticsearch/_async/client/inference.py
@@ -2504,7 +2504,7 @@ async def sparse_embedding(
)
@_rewrite_parameters(
- body_fields=("input", "task_settings"),
+ body_fields=("input", "input_type", "task_settings"),
)
async def text_embedding(
self,
@@ -2514,6 +2514,7 @@ async def text_embedding(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
+ input_type: t.Optional[str] = None,
pretty: t.Optional[bool] = None,
task_settings: t.Optional[t.Any] = None,
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
@@ -2529,6 +2530,13 @@ async def text_embedding(
:param inference_id: The inference Id
:param input: Inference input. Either a string or an array of strings.
+ :param input_type: The input data type for the text embedding model. Possible
+ values include: * `SEARCH` * `INGEST` * `CLASSIFICATION` * `CLUSTERING` Not
+ all services support all values. Unsupported values will trigger a validation
+ exception. Accepted values depend on the configured inference service, refer
+ to the relevant service-specific documentation for more info. > info > The
+ `input_type` parameter specified on the root level of the request body will
+ take precedence over the `input_type` parameter specified in `task_settings`.
:param task_settings: Optional task settings
:param timeout: Specifies the amount of time to wait for the inference request
to complete.
@@ -2554,6 +2562,8 @@ async def text_embedding(
if not __body:
if input is not None:
__body["input"] = input
+ if input_type is not None:
+ __body["input_type"] = input_type
if task_settings is not None:
__body["task_settings"] = task_settings
if not __body:
diff --git a/elasticsearch/_async/client/logstash.py b/elasticsearch/_async/client/logstash.py
index c63983710..c724911dc 100644
--- a/elasticsearch/_async/client/logstash.py
+++ b/elasticsearch/_async/client/logstash.py
@@ -141,7 +141,9 @@ async def put_pipeline(
``_
- :param id: An identifier for the pipeline.
+ :param id: An identifier for the pipeline. Pipeline IDs must begin with a letter
+ or underscore and contain only letters, underscores, dashes, hyphens and
+ numbers.
:param pipeline:
"""
if id in SKIP_IN_PATH:
diff --git a/elasticsearch/_async/client/nodes.py b/elasticsearch/_async/client/nodes.py
index 1b007e7cb..1945501ee 100644
--- a/elasticsearch/_async/client/nodes.py
+++ b/elasticsearch/_async/client/nodes.py
@@ -404,8 +404,8 @@ async def stats(
are requested).
:param include_unloaded_segments: If `true`, the response includes information
from segments that are not loaded into memory.
- :param level: Indicates whether statistics are aggregated at the cluster, index,
- or shard level.
+ :param level: Indicates whether statistics are aggregated at the node, indices,
+ or shards level.
:param timeout: Period to wait for a response. If no response is received before
the timeout expires, the request fails and returns an error.
:param types: A comma-separated list of document types for the indexing index
diff --git a/elasticsearch/_async/client/shutdown.py b/elasticsearch/_async/client/shutdown.py
index 5dbc33e92..9502d1fe6 100644
--- a/elasticsearch/_async/client/shutdown.py
+++ b/elasticsearch/_async/client/shutdown.py
@@ -33,13 +33,9 @@ async def delete_node(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
- master_timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
- timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
) -> ObjectApiResponse[t.Any]:
"""
.. raw:: html
@@ -97,9 +93,7 @@ async def get_node(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
- master_timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
) -> ObjectApiResponse[t.Any]:
"""
@@ -162,14 +156,10 @@ async def put_node(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
- master_timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
target_node_name: t.Optional[str] = None,
- timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
body: t.Optional[t.Dict[str, t.Any]] = None,
) -> ObjectApiResponse[t.Any]:
"""
diff --git a/elasticsearch/_async/client/streams.py b/elasticsearch/_async/client/streams.py
new file mode 100644
index 000000000..622721bda
--- /dev/null
+++ b/elasticsearch/_async/client/streams.py
@@ -0,0 +1,186 @@
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import typing as t
+
+from elastic_transport import ObjectApiResponse, TextApiResponse
+
+from ._base import NamespacedClient
+from .utils import (
+ Stability,
+ _rewrite_parameters,
+ _stability_warning,
+)
+
+
+class StreamsClient(NamespacedClient):
+
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ async def logs_disable(
+ self,
+ *,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ pretty: t.Optional[bool] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ ) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
+ """
+ .. raw:: html
+
+ Disable logs stream.
+ Turn off the logs stream feature for this cluster.
+
+
+ ``_
+
+ :param master_timeout: The period to wait for a connection to the master node.
+ If no response is received before the timeout expires, the request fails
+ and returns an error.
+ :param timeout: The period to wait for a response. If no response is received
+ before the timeout expires, the request fails and returns an error.
+ """
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_streams/logs/_disable"
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if master_timeout is not None:
+ __query["master_timeout"] = master_timeout
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ __headers = {"accept": "application/json,text/plain"}
+ return await self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="streams.logs_disable",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ async def logs_enable(
+ self,
+ *,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ pretty: t.Optional[bool] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ ) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
+ """
+ .. raw:: html
+
+ Enable logs stream.
+ Turn on the logs stream feature for this cluster.
+ NOTE: To protect existing data, this feature can be turned on only if the
+ cluster does not have existing indices or data streams that match the pattern logs|logs.*.
+ If those indices or data streams exist, a 409 - Conflict response and error is returned.
+
+
+ ``_
+
+ :param master_timeout: The period to wait for a connection to the master node.
+ If no response is received before the timeout expires, the request fails
+ and returns an error.
+ :param timeout: The period to wait for a response. If no response is received
+ before the timeout expires, the request fails and returns an error.
+ """
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_streams/logs/_enable"
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if master_timeout is not None:
+ __query["master_timeout"] = master_timeout
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ __headers = {"accept": "application/json,text/plain"}
+ return await self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="streams.logs_enable",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ async def status(
+ self,
+ *,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ master_timeout: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Get the status of streams.
+ Get the current status for all types of streams.
+
+
+ ``_
+
+ :param master_timeout: Period to wait for a connection to the master node. If
+ no response is received before the timeout expires, the request fails and
+ returns an error.
+ """
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_streams/status"
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if master_timeout is not None:
+ __query["master_timeout"] = master_timeout
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __headers = {"accept": "application/json"}
+ return await self.perform_request( # type: ignore[return-value]
+ "GET",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="streams.status",
+ path_parts=__path_parts,
+ )
diff --git a/elasticsearch/_async/client/watcher.py b/elasticsearch/_async/client/watcher.py
index e0b7b39ec..8558b9920 100644
--- a/elasticsearch/_async/client/watcher.py
+++ b/elasticsearch/_async/client/watcher.py
@@ -552,11 +552,7 @@ async def put_watch(
__body["transform"] = transform
if trigger is not None:
__body["trigger"] = trigger
- if not __body:
- __body = None # type: ignore[assignment]
- __headers = {"accept": "application/json"}
- if __body is not None:
- __headers["content-type"] = "application/json"
+ __headers = {"accept": "application/json", "content-type": "application/json"}
return await self.perform_request( # type: ignore[return-value]
"PUT",
__path,
diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py
index b7acad907..c2662e505 100644
--- a/elasticsearch/_sync/client/__init__.py
+++ b/elasticsearch/_sync/client/__init__.py
@@ -74,6 +74,7 @@
from .snapshot import SnapshotClient
from .sql import SqlClient
from .ssl import SslClient
+from .streams import StreamsClient
from .synonyms import SynonymsClient
from .tasks import TasksClient
from .text_structure import TextStructureClient
@@ -378,6 +379,7 @@ def __init__(
self.shutdown = ShutdownClient(self)
self.sql = SqlClient(self)
self.ssl = SslClient(self)
+ self.streams = StreamsClient(self)
self.synonyms = SynonymsClient(self)
self.text_structure = TextStructureClient(self)
self.transform = TransformClient(self)
diff --git a/elasticsearch/_sync/client/cat.py b/elasticsearch/_sync/client/cat.py
index 5349a32ec..46b6820ae 100644
--- a/elasticsearch/_sync/client/cat.py
+++ b/elasticsearch/_sync/client/cat.py
@@ -36,6 +36,9 @@ def aliases(
self,
*,
name: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
expand_wildcards: t.Optional[
t.Union[
@@ -80,6 +83,9 @@ def aliases(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -95,6 +101,14 @@ def aliases(
:param name: A comma-separated list of aliases to retrieve. Supports wildcards
(`*`). To retrieve all aliases, omit this parameter or use `*` or `_all`.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param expand_wildcards: The type of index that wildcard patterns can match.
If the request can target data streams, this argument determines whether
wildcard expressions match hidden data streams. It supports comma-separated
@@ -112,6 +126,12 @@ def aliases(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -122,6 +142,8 @@ def aliases(
__path_parts = {}
__path = "/_cat/aliases"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if expand_wildcards is not None:
@@ -142,6 +164,8 @@ def aliases(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -213,6 +237,9 @@ def allocation(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -227,7 +254,14 @@ def allocation(
:param node_id: A comma-separated list of node identifiers or names used to limit
the returned information.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -242,6 +276,12 @@ def allocation(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -274,6 +314,8 @@ def allocation(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -291,6 +333,9 @@ def component_templates(
self,
*,
name: t.Optional[str] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -330,6 +375,9 @@ def component_templates(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -346,6 +394,14 @@ def component_templates(
:param name: The name of the component template. It accepts wildcard expressions.
If it is omitted, all component templates are returned.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -360,6 +416,12 @@ def component_templates(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -370,6 +432,8 @@ def component_templates(
__path_parts = {}
__path = "/_cat/component_templates"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -390,6 +454,8 @@ def component_templates(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -407,6 +473,9 @@ def count(
self,
*,
index: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -420,6 +489,9 @@ def count(
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -437,6 +509,14 @@ def count(
:param index: A comma-separated list of data streams, indices, and aliases used
to limit the request. It supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -446,6 +526,12 @@ def count(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -456,6 +542,8 @@ def count(
__path_parts = {}
__path = "/_cat/count"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -472,6 +560,8 @@ def count(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -507,6 +597,9 @@ def fielddata(
human: t.Optional[bool] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -522,7 +615,14 @@ def fielddata(
:param fields: Comma-separated list of fields used to limit returned information.
To retrieve all fields, omit this parameter.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -532,6 +632,12 @@ def fielddata(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -560,6 +666,8 @@ def fielddata(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -576,6 +684,9 @@ def fielddata(
def health(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -652,6 +763,14 @@ def health(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -661,13 +780,20 @@ def health(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param ts: If true, returns `HH:MM:SS` and Unix epoch timestamps.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/health"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -1092,7 +1218,14 @@ def indices(
:param index: Comma-separated list of data streams, indices, and aliases used
to limit the request. Supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param expand_wildcards: The type of index that wildcard patterns can match.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
@@ -1109,7 +1242,12 @@ def indices(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -1166,6 +1304,9 @@ def indices(
def master(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -1181,6 +1322,9 @@ def master(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -1193,6 +1337,14 @@ def master(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -1207,11 +1359,19 @@ def master(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/master"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -1232,6 +1392,8 @@ def master(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -1374,8 +1536,15 @@ def ml_data_frame_analytics(
:param id: The ID of the data frame analytics to fetch
:param allow_no_match: Whether to ignore if a wildcard expression matches no
- configs. (This includes `_all` string or when no configs have been specified)
- :param bytes: The unit in which to display byte values
+ configs. (This includes `_all` string or when no configs have been specified.)
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: Comma-separated list of column names to display.
@@ -1383,7 +1552,12 @@ def ml_data_frame_analytics(
be combined with any other query string option.
:param s: Comma-separated list of column names or column aliases used to sort
the response.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -1434,6 +1608,9 @@ def ml_datafeeds(
*,
datafeed_id: t.Optional[str] = None,
allow_no_match: t.Optional[bool] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -1549,6 +1726,14 @@ def ml_datafeeds(
array when there are no matches and the subset of results when there are
partial matches. If `false`, the API returns a 404 status code when there
are no matches or only partial matches.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: Comma-separated list of column names to display.
@@ -1556,7 +1741,12 @@ def ml_datafeeds(
be combined with any other query string option.
:param s: Comma-separated list of column names or column aliases used to sort
the response.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -1569,6 +1759,8 @@ def ml_datafeeds(
__query: t.Dict[str, t.Any] = {}
if allow_no_match is not None:
__query["allow_no_match"] = allow_no_match
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -1914,7 +2106,14 @@ def ml_jobs(
array when there are no matches and the subset of results when there are
partial matches. If `false`, the API returns a 404 status code when there
are no matches or only partial matches.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: Comma-separated list of column names to display.
@@ -1922,7 +2121,12 @@ def ml_jobs(
be combined with any other query string option.
:param s: Comma-separated list of column names or column aliases used to sort
the response.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -2099,7 +2303,14 @@ def ml_trained_models(
when there are no matches and the subset of results when there are partial
matches. If `false`, the API returns a 404 status code when there are no
matches or only partial matches.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param from_: Skips the specified number of transforms.
@@ -2109,7 +2320,12 @@ def ml_trained_models(
:param s: A comma-separated list of column names or aliases used to sort the
response.
:param size: The maximum number of transforms to display.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -2162,6 +2378,9 @@ def ml_trained_models(
def nodeattrs(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -2189,6 +2408,9 @@ def nodeattrs(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -2201,6 +2423,14 @@ def nodeattrs(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -2215,11 +2445,19 @@ def nodeattrs(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/nodeattrs"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -2240,6 +2478,8 @@ def nodeattrs(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -2478,7 +2718,14 @@ def nodes(
``_
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param full_id: If `true`, return the full node ID. If `false`, return the shortened
@@ -2493,7 +2740,12 @@ def nodes(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
@@ -2541,6 +2793,9 @@ def nodes(
def pending_tasks(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -2578,6 +2833,14 @@ def pending_tasks(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -2592,12 +2855,19 @@ def pending_tasks(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/pending_tasks"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -2636,6 +2906,9 @@ def pending_tasks(
def plugins(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -2659,6 +2932,9 @@ def plugins(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -2671,6 +2947,14 @@ def plugins(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -2686,11 +2970,19 @@ def plugins(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/plugins"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -2713,6 +3005,8 @@ def plugins(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -2831,7 +3125,14 @@ def recovery(
to limit the request. Supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
:param active_only: If `true`, the response only includes ongoing shard recoveries.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param detailed: If `true`, the response includes detailed information about
shard recoveries.
:param format: Specifies the format to return the columnar data in, can be set
@@ -2843,7 +3144,12 @@ def recovery(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -2894,6 +3200,9 @@ def recovery(
def repositories(
self,
*,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -2904,6 +3213,9 @@ def repositories(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -2916,6 +3228,14 @@ def repositories(
``_
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: List of columns to appear in the response. Supports simple wildcards.
@@ -2929,11 +3249,19 @@ def repositories(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str] = {}
__path = "/_cat/repositories"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -2954,6 +3282,8 @@ def repositories(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -3029,6 +3359,9 @@ def segments(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -3045,7 +3378,14 @@ def segments(
:param index: A comma-separated list of data streams, indices, and aliases used
to limit the request. Supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -3060,6 +3400,12 @@ def segments(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3092,6 +3438,8 @@ def segments(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -3295,7 +3643,14 @@ def shards(
:param index: A comma-separated list of data streams, indices, and aliases used
to limit the request. Supports wildcards (`*`). To target all data streams
and indices, omit this parameter or use `*` or `_all`.
- :param bytes: The unit used to display byte values.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: List of columns to appear in the response. Supports simple wildcards.
@@ -3305,7 +3660,12 @@ def shards(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3355,6 +3715,9 @@ def snapshots(
self,
*,
repository: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -3425,6 +3788,14 @@ def snapshots(
:param repository: A comma-separated list of snapshot repositories used to limit
the request. Accepts wildcard expressions. `_all` returns all repositories.
If any repository fails during the request, Elasticsearch returns an error.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -3437,7 +3808,12 @@ def snapshots(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3448,6 +3824,8 @@ def snapshots(
__path_parts = {}
__path = "/_cat/snapshots"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -3488,6 +3866,9 @@ def tasks(
self,
*,
actions: t.Optional[t.Sequence[str]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
detailed: t.Optional[bool] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
@@ -3562,6 +3943,14 @@ def tasks(
``_
:param actions: The task action names, which are used to limit the response.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param detailed: If `true`, the response includes detailed information about
shard recoveries.
:param format: Specifies the format to return the columnar data in, can be set
@@ -3576,7 +3965,12 @@ def tasks(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
- :param time: Unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param timeout: Period to wait for a response. If no response is received before
the timeout expires, the request fails and returns an error.
:param v: When set to `true` will enable verbose output.
@@ -3588,6 +3982,8 @@ def tasks(
__query: t.Dict[str, t.Any] = {}
if actions is not None:
__query["actions"] = actions
+ if bytes is not None:
+ __query["bytes"] = bytes
if detailed is not None:
__query["detailed"] = detailed
if error_trace is not None:
@@ -3633,6 +4029,9 @@ def templates(
self,
*,
name: t.Optional[str] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -3660,6 +4059,9 @@ def templates(
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
s: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ time: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
v: t.Optional[bool] = None,
) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
"""
@@ -3675,6 +4077,14 @@ def templates(
:param name: The name of the template to return. Accepts wildcard expressions.
If omitted, all templates are returned.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: A comma-separated list of columns names to display. It supports simple
@@ -3689,6 +4099,12 @@ def templates(
:param s: List of columns that determine how the table should be sorted. Sorting
defaults to ascending and can be changed by setting `:asc` or `:desc` as
a suffix to the column name.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3699,6 +4115,8 @@ def templates(
__path_parts = {}
__path = "/_cat/templates"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -3719,6 +4137,8 @@ def templates(
__query["pretty"] = pretty
if s is not None:
__query["s"] = s
+ if time is not None:
+ __query["time"] = time
if v is not None:
__query["v"] = v
__headers = {"accept": "text/plain,application/json"}
@@ -3736,6 +4156,9 @@ def thread_pool(
self,
*,
thread_pool_patterns: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -3819,6 +4242,14 @@ def thread_pool(
:param thread_pool_patterns: A comma-separated list of thread pool names used
to limit the request. Accepts wildcard expressions.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param h: List of columns to appear in the response. Supports simple wildcards.
@@ -3832,7 +4263,12 @@ def thread_pool(
:param s: A comma-separated list of column names or aliases that determines the
sort order. Sorting defaults to ascending and can be changed by setting `:asc`
or `:desc` as a suffix to the column name.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -3843,6 +4279,8 @@ def thread_pool(
__path_parts = {}
__path = "/_cat/thread_pool"
__query: t.Dict[str, t.Any] = {}
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
@@ -3885,6 +4323,9 @@ def transforms(
*,
transform_id: t.Optional[str] = None,
allow_no_match: t.Optional[bool] = None,
+ bytes: t.Optional[
+ t.Union[str, t.Literal["b", "gb", "kb", "mb", "pb", "tb"]]
+ ] = None,
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
format: t.Optional[str] = None,
@@ -4084,6 +4525,14 @@ def transforms(
array when there are no matches and the subset of results when there are
partial matches. If `false`, the request returns a 404 status code when there
are no matches or only partial matches.
+ :param bytes: Sets the units for columns that contain a byte-size value. Note
+ that byte-size value units work in terms of powers of 1024. For instance
+ `1kb` means 1024 bytes, not 1000 bytes. If omitted, byte-size values are
+ rendered with a suffix such as `kb`, `mb`, or `gb`, chosen such that the
+ numeric value of the column is as small as possible whilst still being at
+ least `1.0`. If given, byte-size values are rendered as an integer with no
+ suffix, representing the value of the column in the chosen unit. Values that
+ are not an exact multiple of the chosen unit are rounded down.
:param format: Specifies the format to return the columnar data in, can be set
to `text`, `json`, `cbor`, `yaml`, or `smile`.
:param from_: Skips the specified number of transforms.
@@ -4093,7 +4542,12 @@ def transforms(
:param s: Comma-separated list of column names or column aliases used to sort
the response.
:param size: The maximum number of transforms to obtain.
- :param time: The unit used to display time values.
+ :param time: Sets the units for columns that contain a time duration. If omitted,
+ time duration values are rendered with a suffix such as `ms`, `s`, `m` or
+ `h`, chosen such that the numeric value of the column is as small as possible
+ whilst still being at least `1.0`. If given, time duration values are rendered
+ as an integer with no suffix. Values that are not an exact multiple of the
+ chosen unit are rounded down.
:param v: When set to `true` will enable verbose output.
"""
__path_parts: t.Dict[str, str]
@@ -4106,6 +4560,8 @@ def transforms(
__query: t.Dict[str, t.Any] = {}
if allow_no_match is not None:
__query["allow_no_match"] = allow_no_match
+ if bytes is not None:
+ __query["bytes"] = bytes
if error_trace is not None:
__query["error_trace"] = error_trace
if filter_path is not None:
diff --git a/elasticsearch/_sync/client/connector.py b/elasticsearch/_sync/client/connector.py
index 58771108a..c28a4c823 100644
--- a/elasticsearch/_sync/client/connector.py
+++ b/elasticsearch/_sync/client/connector.py
@@ -103,7 +103,7 @@ def delete(
:param connector_id: The unique identifier of the connector to be deleted
:param delete_sync_jobs: A flag indicating if associated sync jobs should be
- also removed. Defaults to false.
+ also removed.
:param hard: A flag indicating if the connector should be hard deleted.
"""
if connector_id in SKIP_IN_PATH:
@@ -360,7 +360,7 @@ def list(
:param connector_name: A comma-separated list of connector names to fetch connector
documents for
- :param from_: Starting offset (default: 0)
+ :param from_: Starting offset
:param include_deleted: A flag to indicate if the desired connector should be
fetched, even if it was soft-deleted.
:param index_name: A comma-separated list of connector index names to fetch connector
@@ -955,7 +955,7 @@ def sync_job_list(
``_
:param connector_id: A connector id to fetch connector sync jobs for
- :param from_: Starting offset (default: 0)
+ :param from_: Starting offset
:param job_type: A comma-separated list of job types to fetch the sync jobs for
:param size: Specifies a max number of results to get
:param status: A sync job status to fetch connector sync jobs for
diff --git a/elasticsearch/_sync/client/indices.py b/elasticsearch/_sync/client/indices.py
index 26ca08f14..e40be2cd7 100644
--- a/elasticsearch/_sync/client/indices.py
+++ b/elasticsearch/_sync/client/indices.py
@@ -812,11 +812,7 @@ def create_from(
raise ValueError("Empty value passed for parameter 'source'")
if dest in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'dest'")
- if create_from is None and body is None:
- raise ValueError(
- "Empty value passed for parameters 'create_from' and 'body', one of them should be set."
- )
- elif create_from is not None and body is not None:
+ if create_from is not None and body is not None:
raise ValueError("Cannot set both 'create_from' and 'body'")
__path_parts: t.Dict[str, str] = {
"source": _quote(source),
@@ -833,7 +829,11 @@ def create_from(
if pretty is not None:
__query["pretty"] = pretty
__body = create_from if create_from is not None else body
- __headers = {"accept": "application/json", "content-type": "application/json"}
+ if not __body:
+ __body = None
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
return self.perform_request( # type: ignore[return-value]
"PUT",
__path,
@@ -4549,6 +4549,7 @@ def refresh(
For data streams, the API runs the refresh operation on the stream’s backing indices.
By default, Elasticsearch periodically refreshes indices every second, but only on indices that have received one search request or more in the last 30 seconds.
You can change this default interval with the index.refresh_interval setting.
+ In Elastic Cloud Serverless, the default refresh interval is 5 seconds across all indices.
Refresh requests are synchronous and do not return a response until the refresh operation completes.
Refreshes are resource-intensive.
To ensure good cluster performance, it's recommended to wait for Elasticsearch's periodic refresh rather than performing an explicit refresh when possible.
@@ -5414,7 +5415,9 @@ def shrink(
path_parts=__path_parts,
)
- @_rewrite_parameters()
+ @_rewrite_parameters(
+ body_name="index_template",
+ )
def simulate_index_template(
self,
*,
@@ -5425,6 +5428,8 @@ def simulate_index_template(
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
include_defaults: t.Optional[bool] = None,
+ index_template: t.Optional[t.Mapping[str, t.Any]] = None,
+ body: t.Optional[t.Mapping[str, t.Any]] = None,
master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
) -> ObjectApiResponse[t.Any]:
@@ -5444,12 +5449,15 @@ def simulate_index_template(
only be dry-run added if new or can also replace an existing one
:param include_defaults: If true, returns all relevant default configurations
for the index template.
+ :param index_template:
:param master_timeout: Period to wait for a connection to the master node. If
no response is received before the timeout expires, the request fails and
returns an error.
"""
if name in SKIP_IN_PATH:
raise ValueError("Empty value passed for parameter 'name'")
+ if index_template is not None and body is not None:
+ raise ValueError("Cannot set both 'index_template' and 'body'")
__path_parts: t.Dict[str, str] = {"name": _quote(name)}
__path = f'/_index_template/_simulate_index/{__path_parts["name"]}'
__query: t.Dict[str, t.Any] = {}
@@ -5469,12 +5477,18 @@ def simulate_index_template(
__query["master_timeout"] = master_timeout
if pretty is not None:
__query["pretty"] = pretty
+ __body = index_template if index_template is not None else body
+ if not __body:
+ __body = None
__headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
return self.perform_request( # type: ignore[return-value]
"POST",
__path,
params=__query,
headers=__headers,
+ body=__body,
endpoint_id="indices.simulate_index_template",
path_parts=__path_parts,
)
@@ -5823,8 +5837,8 @@ def stats(
are requested).
:param include_unloaded_segments: If true, the response includes information
from segments that are not loaded into memory.
- :param level: Indicates whether statistics are aggregated at the cluster, index,
- or shard level.
+ :param level: Indicates whether statistics are aggregated at the cluster, indices,
+ or shards level.
"""
__path_parts: t.Dict[str, str]
if index not in SKIP_IN_PATH and metric not in SKIP_IN_PATH:
diff --git a/elasticsearch/_sync/client/inference.py b/elasticsearch/_sync/client/inference.py
index eb430506c..448147133 100644
--- a/elasticsearch/_sync/client/inference.py
+++ b/elasticsearch/_sync/client/inference.py
@@ -2504,7 +2504,7 @@ def sparse_embedding(
)
@_rewrite_parameters(
- body_fields=("input", "task_settings"),
+ body_fields=("input", "input_type", "task_settings"),
)
def text_embedding(
self,
@@ -2514,6 +2514,7 @@ def text_embedding(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
+ input_type: t.Optional[str] = None,
pretty: t.Optional[bool] = None,
task_settings: t.Optional[t.Any] = None,
timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
@@ -2529,6 +2530,13 @@ def text_embedding(
:param inference_id: The inference Id
:param input: Inference input. Either a string or an array of strings.
+ :param input_type: The input data type for the text embedding model. Possible
+ values include: * `SEARCH` * `INGEST` * `CLASSIFICATION` * `CLUSTERING` Not
+ all services support all values. Unsupported values will trigger a validation
+ exception. Accepted values depend on the configured inference service, refer
+ to the relevant service-specific documentation for more info. > info > The
+ `input_type` parameter specified on the root level of the request body will
+ take precedence over the `input_type` parameter specified in `task_settings`.
:param task_settings: Optional task settings
:param timeout: Specifies the amount of time to wait for the inference request
to complete.
@@ -2554,6 +2562,8 @@ def text_embedding(
if not __body:
if input is not None:
__body["input"] = input
+ if input_type is not None:
+ __body["input_type"] = input_type
if task_settings is not None:
__body["task_settings"] = task_settings
if not __body:
diff --git a/elasticsearch/_sync/client/logstash.py b/elasticsearch/_sync/client/logstash.py
index f8abefa14..ae8e2a1dc 100644
--- a/elasticsearch/_sync/client/logstash.py
+++ b/elasticsearch/_sync/client/logstash.py
@@ -141,7 +141,9 @@ def put_pipeline(
``_
- :param id: An identifier for the pipeline.
+ :param id: An identifier for the pipeline. Pipeline IDs must begin with a letter
+ or underscore and contain only letters, underscores, dashes, hyphens and
+ numbers.
:param pipeline:
"""
if id in SKIP_IN_PATH:
diff --git a/elasticsearch/_sync/client/nodes.py b/elasticsearch/_sync/client/nodes.py
index ef6c67b10..242c443a4 100644
--- a/elasticsearch/_sync/client/nodes.py
+++ b/elasticsearch/_sync/client/nodes.py
@@ -404,8 +404,8 @@ def stats(
are requested).
:param include_unloaded_segments: If `true`, the response includes information
from segments that are not loaded into memory.
- :param level: Indicates whether statistics are aggregated at the cluster, index,
- or shard level.
+ :param level: Indicates whether statistics are aggregated at the node, indices,
+ or shards level.
:param timeout: Period to wait for a response. If no response is received before
the timeout expires, the request fails and returns an error.
:param types: A comma-separated list of document types for the indexing index
diff --git a/elasticsearch/_sync/client/shutdown.py b/elasticsearch/_sync/client/shutdown.py
index d7ec41511..28b360ca3 100644
--- a/elasticsearch/_sync/client/shutdown.py
+++ b/elasticsearch/_sync/client/shutdown.py
@@ -33,13 +33,9 @@ def delete_node(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
- master_timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
- timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
) -> ObjectApiResponse[t.Any]:
"""
.. raw:: html
@@ -97,9 +93,7 @@ def get_node(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
- master_timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
) -> ObjectApiResponse[t.Any]:
"""
@@ -162,14 +156,10 @@ def put_node(
error_trace: t.Optional[bool] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
- master_timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
pretty: t.Optional[bool] = None,
target_node_name: t.Optional[str] = None,
- timeout: t.Optional[
- t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
- ] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
body: t.Optional[t.Dict[str, t.Any]] = None,
) -> ObjectApiResponse[t.Any]:
"""
diff --git a/elasticsearch/_sync/client/streams.py b/elasticsearch/_sync/client/streams.py
new file mode 100644
index 000000000..720f5f1e3
--- /dev/null
+++ b/elasticsearch/_sync/client/streams.py
@@ -0,0 +1,186 @@
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import typing as t
+
+from elastic_transport import ObjectApiResponse, TextApiResponse
+
+from ._base import NamespacedClient
+from .utils import (
+ Stability,
+ _rewrite_parameters,
+ _stability_warning,
+)
+
+
+class StreamsClient(NamespacedClient):
+
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ def logs_disable(
+ self,
+ *,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ pretty: t.Optional[bool] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ ) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
+ """
+ .. raw:: html
+
+ Disable logs stream.
+ Turn off the logs stream feature for this cluster.
+
+
+ ``_
+
+ :param master_timeout: The period to wait for a connection to the master node.
+ If no response is received before the timeout expires, the request fails
+ and returns an error.
+ :param timeout: The period to wait for a response. If no response is received
+ before the timeout expires, the request fails and returns an error.
+ """
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_streams/logs/_disable"
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if master_timeout is not None:
+ __query["master_timeout"] = master_timeout
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ __headers = {"accept": "application/json,text/plain"}
+ return self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="streams.logs_disable",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ def logs_enable(
+ self,
+ *,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ pretty: t.Optional[bool] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ ) -> t.Union[ObjectApiResponse[t.Any], TextApiResponse]:
+ """
+ .. raw:: html
+
+ Enable logs stream.
+ Turn on the logs stream feature for this cluster.
+ NOTE: To protect existing data, this feature can be turned on only if the
+ cluster does not have existing indices or data streams that match the pattern logs|logs.*.
+ If those indices or data streams exist, a 409 - Conflict response and error is returned.
+
+
+ ``_
+
+ :param master_timeout: The period to wait for a connection to the master node.
+ If no response is received before the timeout expires, the request fails
+ and returns an error.
+ :param timeout: The period to wait for a response. If no response is received
+ before the timeout expires, the request fails and returns an error.
+ """
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_streams/logs/_enable"
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if master_timeout is not None:
+ __query["master_timeout"] = master_timeout
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ __headers = {"accept": "application/json,text/plain"}
+ return self.perform_request( # type: ignore[return-value]
+ "POST",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="streams.logs_enable",
+ path_parts=__path_parts,
+ )
+
+ @_rewrite_parameters()
+ @_stability_warning(Stability.EXPERIMENTAL)
+ def status(
+ self,
+ *,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ master_timeout: t.Optional[
+ t.Union[str, t.Literal["d", "h", "m", "micros", "ms", "nanos", "s"]]
+ ] = None,
+ pretty: t.Optional[bool] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Get the status of streams.
+ Get the current status for all types of streams.
+
+
+ ``_
+
+ :param master_timeout: Period to wait for a connection to the master node. If
+ no response is received before the timeout expires, the request fails and
+ returns an error.
+ """
+ __path_parts: t.Dict[str, str] = {}
+ __path = "/_streams/status"
+ __query: t.Dict[str, t.Any] = {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if master_timeout is not None:
+ __query["master_timeout"] = master_timeout
+ if pretty is not None:
+ __query["pretty"] = pretty
+ __headers = {"accept": "application/json"}
+ return self.perform_request( # type: ignore[return-value]
+ "GET",
+ __path,
+ params=__query,
+ headers=__headers,
+ endpoint_id="streams.status",
+ path_parts=__path_parts,
+ )
diff --git a/elasticsearch/_sync/client/watcher.py b/elasticsearch/_sync/client/watcher.py
index 9839cb80b..d14f8481d 100644
--- a/elasticsearch/_sync/client/watcher.py
+++ b/elasticsearch/_sync/client/watcher.py
@@ -552,11 +552,7 @@ def put_watch(
__body["transform"] = transform
if trigger is not None:
__body["trigger"] = trigger
- if not __body:
- __body = None # type: ignore[assignment]
- __headers = {"accept": "application/json"}
- if __body is not None:
- __headers["content-type"] = "application/json"
+ __headers = {"accept": "application/json", "content-type": "application/json"}
return self.perform_request( # type: ignore[return-value]
"PUT",
__path,
diff --git a/elasticsearch/_version.py b/elasticsearch/_version.py
index 8e8baef62..d24853de6 100644
--- a/elasticsearch/_version.py
+++ b/elasticsearch/_version.py
@@ -16,3 +16,4 @@
# under the License.
__versionstr__ = "9.1.1"
+__es_specification_commit__ = "cc623e3b52dd3dfd85848ee992713d37da020bfb"
diff --git a/elasticsearch/client.py b/elasticsearch/client.py
index 926ed5fe3..d2ee717c8 100644
--- a/elasticsearch/client.py
+++ b/elasticsearch/client.py
@@ -62,6 +62,7 @@
from ._sync.client.snapshot import SnapshotClient as SnapshotClient # noqa: F401
from ._sync.client.sql import SqlClient as SqlClient # noqa: F401
from ._sync.client.ssl import SslClient as SslClient # noqa: F401
+from ._sync.client.streams import StreamsClient as StreamsClient # noqa: F401
from ._sync.client.synonyms import SynonymsClient as SynonymsClient # noqa: F401
from ._sync.client.tasks import TasksClient as TasksClient # noqa: F401
from ._sync.client.text_structure import ( # noqa: F401
@@ -115,6 +116,7 @@
"SnapshotClient",
"SqlClient",
"SslClient",
+ "StreamsClient",
"TasksClient",
"TextStructureClient",
"TransformClient",
diff --git a/elasticsearch/dsl/aggs.py b/elasticsearch/dsl/aggs.py
index 802d6eca0..2a6b2ff91 100644
--- a/elasticsearch/dsl/aggs.py
+++ b/elasticsearch/dsl/aggs.py
@@ -653,6 +653,54 @@ def __init__(
)
+class CartesianBounds(Agg[_R]):
+ """
+ A metric aggregation that computes the spatial bounding box containing
+ all values for a Point or Shape field.
+
+ :arg field: The field on which to run the aggregation.
+ :arg missing: The value to apply to documents that do not have a
+ value. By default, documents without a value are ignored.
+ :arg script:
+ """
+
+ name = "cartesian_bounds"
+
+ def __init__(
+ self,
+ *,
+ field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
+ missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
+ script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
+ **kwargs: Any,
+ ):
+ super().__init__(field=field, missing=missing, script=script, **kwargs)
+
+
+class CartesianCentroid(Agg[_R]):
+ """
+ A metric aggregation that computes the weighted centroid from all
+ coordinate values for point and shape fields.
+
+ :arg field: The field on which to run the aggregation.
+ :arg missing: The value to apply to documents that do not have a
+ value. By default, documents without a value are ignored.
+ :arg script:
+ """
+
+ name = "cartesian_centroid"
+
+ def __init__(
+ self,
+ *,
+ field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
+ missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
+ script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
+ **kwargs: Any,
+ ):
+ super().__init__(field=field, missing=missing, script=script, **kwargs)
+
+
class CategorizeText(Bucket[_R]):
"""
A multi-bucket aggregation that groups semi-structured text into
@@ -735,6 +783,43 @@ def __init__(
)
+class ChangePoint(Pipeline[_R]):
+ """
+ A sibling pipeline that detects, spikes, dips, and change points in a
+ metric. Given a distribution of values provided by the sibling multi-
+ bucket aggregation, this aggregation indicates the bucket of any spike
+ or dip and/or the bucket at which the largest change in the
+ distribution of values, if they are statistically significant. There
+ must be at least 22 bucketed values. Fewer than 1,000 is preferred.
+
+ :arg format: `DecimalFormat` pattern for the output value. If
+ specified, the formatted value is returned in the aggregation’s
+ `value_as_string` property.
+ :arg gap_policy: Policy to apply when gaps are found in the data.
+ Defaults to `skip` if omitted.
+ :arg buckets_path: Path to the buckets that contain one set of values
+ to correlate.
+ """
+
+ name = "change_point"
+
+ def __init__(
+ self,
+ *,
+ format: Union[str, "DefaultType"] = DEFAULT,
+ gap_policy: Union[
+ Literal["skip", "insert_zeros", "keep_values"], "DefaultType"
+ ] = DEFAULT,
+ buckets_path: Union[
+ str, Sequence[str], Mapping[str, str], "DefaultType"
+ ] = DEFAULT,
+ **kwargs: Any,
+ ):
+ super().__init__(
+ format=format, gap_policy=gap_policy, buckets_path=buckets_path, **kwargs
+ )
+
+
class Children(Bucket[_R]):
"""
A single bucket aggregation that selects child documents that have the
@@ -2980,6 +3065,14 @@ class SignificantTerms(Bucket[_R]):
the foreground sample with a term divided by the number of
documents in the background with the term.
:arg script_heuristic: Customized score, implemented via a script.
+ :arg p_value: Significant terms heuristic that calculates the p-value
+ between the term existing in foreground and background sets. The
+ p-value is the probability of obtaining test results at least as
+ extreme as the results actually observed, under the assumption
+ that the null hypothesis is correct. The p-value is calculated
+ assuming that the foreground set and the background set are
+ independent https://en.wikipedia.org/wiki/Bernoulli_trial, with
+ the null hypothesis that the probabilities are the same.
:arg shard_min_doc_count: Regulates the certainty a shard has if the
term should actually be added to the candidate list or not with
respect to the `min_doc_count`. Terms will only be considered if
@@ -3033,6 +3126,9 @@ def __init__(
script_heuristic: Union[
"types.ScriptedHeuristic", Dict[str, Any], "DefaultType"
] = DEFAULT,
+ p_value: Union[
+ "types.PValueHeuristic", Dict[str, Any], "DefaultType"
+ ] = DEFAULT,
shard_min_doc_count: Union[int, "DefaultType"] = DEFAULT,
shard_size: Union[int, "DefaultType"] = DEFAULT,
size: Union[int, "DefaultType"] = DEFAULT,
@@ -3051,6 +3147,7 @@ def __init__(
mutual_information=mutual_information,
percentage=percentage,
script_heuristic=script_heuristic,
+ p_value=p_value,
shard_min_doc_count=shard_min_doc_count,
shard_size=shard_size,
size=size,
diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py
index 3f71f76eb..3b5075287 100644
--- a/elasticsearch/dsl/field.py
+++ b/elasticsearch/dsl/field.py
@@ -3866,14 +3866,21 @@ class SemanticText(Field):
by using the Update mapping API. Use the Create inference API to
create the endpoint. If not specified, the inference endpoint
defined by inference_id will be used at both index and query time.
+ :arg index_options: Settings for index_options that override any
+ defaults used by semantic_text, for example specific quantization
+ settings.
:arg chunking_settings: Settings for chunking text into smaller
passages. If specified, these will override the chunking settings
sent in the inference endpoint associated with inference_id. If
chunking settings are updated, they will not be applied to
existing documents until they are reindexed.
+ :arg fields:
"""
name = "semantic_text"
+ _param_defs = {
+ "fields": {"type": "field", "hash": True},
+ }
def __init__(
self,
@@ -3881,9 +3888,13 @@ def __init__(
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
inference_id: Union[str, "DefaultType"] = DEFAULT,
search_inference_id: Union[str, "DefaultType"] = DEFAULT,
+ index_options: Union[
+ "types.SemanticTextIndexOptions", Dict[str, Any], "DefaultType"
+ ] = DEFAULT,
chunking_settings: Union[
- "types.ChunkingSettings", Dict[str, Any], "DefaultType"
+ "types.ChunkingSettings", None, Dict[str, Any], "DefaultType"
] = DEFAULT,
+ fields: Union[Mapping[str, Field], "DefaultType"] = DEFAULT,
**kwargs: Any,
):
if meta is not DEFAULT:
@@ -3892,8 +3903,12 @@ def __init__(
kwargs["inference_id"] = inference_id
if search_inference_id is not DEFAULT:
kwargs["search_inference_id"] = search_inference_id
+ if index_options is not DEFAULT:
+ kwargs["index_options"] = index_options
if chunking_settings is not DEFAULT:
kwargs["chunking_settings"] = chunking_settings
+ if fields is not DEFAULT:
+ kwargs["fields"] = fields
super().__init__(*args, **kwargs)
diff --git a/elasticsearch/dsl/query.py b/elasticsearch/dsl/query.py
index 0a2cef032..927af6ad4 100644
--- a/elasticsearch/dsl/query.py
+++ b/elasticsearch/dsl/query.py
@@ -1079,6 +1079,8 @@ class Knn(Query):
a query_vector_builder or query_vector, but not both.
:arg num_candidates: The number of nearest neighbor candidates to
consider per shard
+ :arg visit_percentage: The percentage of vectors to explore per shard
+ while doing knn search with bbq_disk
:arg k: The final number of nearest neighbors to return as top hits
:arg filter: Filters for the kNN search query
:arg similarity: The minimum similarity for a vector to be considered
@@ -1107,6 +1109,7 @@ def __init__(
"types.QueryVectorBuilder", Dict[str, Any], "DefaultType"
] = DEFAULT,
num_candidates: Union[int, "DefaultType"] = DEFAULT,
+ visit_percentage: Union[float, "DefaultType"] = DEFAULT,
k: Union[int, "DefaultType"] = DEFAULT,
filter: Union[Query, Sequence[Query], "DefaultType"] = DEFAULT,
similarity: Union[float, "DefaultType"] = DEFAULT,
@@ -1122,6 +1125,7 @@ def __init__(
query_vector=query_vector,
query_vector_builder=query_vector_builder,
num_candidates=num_candidates,
+ visit_percentage=visit_percentage,
k=k,
filter=filter,
similarity=similarity,
@@ -1433,7 +1437,7 @@ def __init__(
] = DEFAULT,
version: Union[int, "DefaultType"] = DEFAULT,
version_type: Union[
- Literal["internal", "external", "external_gte", "force"], "DefaultType"
+ Literal["internal", "external", "external_gte"], "DefaultType"
] = DEFAULT,
boost: Union[float, "DefaultType"] = DEFAULT,
_name: Union[str, "DefaultType"] = DEFAULT,
diff --git a/elasticsearch/dsl/response/__init__.py b/elasticsearch/dsl/response/__init__.py
index 712cda27b..b58464e16 100644
--- a/elasticsearch/dsl/response/__init__.py
+++ b/elasticsearch/dsl/response/__init__.py
@@ -233,10 +233,13 @@ def search_after(self) -> "SearchBase[_R]":
"types.SimpleValueAggregate",
"types.DerivativeAggregate",
"types.BucketMetricValueAggregate",
+ "types.ChangePointAggregate",
"types.StatsAggregate",
"types.StatsBucketAggregate",
"types.ExtendedStatsAggregate",
"types.ExtendedStatsBucketAggregate",
+ "types.CartesianBoundsAggregate",
+ "types.CartesianCentroidAggregate",
"types.GeoBoundsAggregate",
"types.GeoCentroidAggregate",
"types.HistogramAggregate",
diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py
index 534521437..b62fad025 100644
--- a/elasticsearch/dsl/types.py
+++ b/elasticsearch/dsl/types.py
@@ -151,9 +151,10 @@ class ChunkingSettings(AttrDict[Any]):
strategies in the linked documentation. Defaults to `sentence` if
omitted.
:arg max_chunk_size: (required) The maximum size of a chunk in words.
- This value cannot be higher than `300` or lower than `20` (for
- `sentence` strategy) or `10` (for `word` strategy). Defaults to
- `250` if omitted.
+ This value cannot be lower than `20` (for `sentence` strategy) or
+ `10` (for `word` strategy). This value should not exceed the
+ window size for the associated model. Defaults to `250` if
+ omitted.
:arg separator_group: Only applicable to the `recursive` strategy and
required when using it. Sets a predefined list of separators in
the saved chunking settings based on the selected text type.
@@ -397,14 +398,17 @@ class DenseVectorIndexOptions(AttrDict[Any]):
HNSW graph. Only applicable to `hnsw`, `int8_hnsw`, `bbq_hnsw`,
and `int4_hnsw` index types. Defaults to `16` if omitted.
:arg rescore_vector: The rescore vector options. This is only
- applicable to `bbq_hnsw`, `int4_hnsw`, `int8_hnsw`, `bbq_flat`,
- `int4_flat`, and `int8_flat` index types.
+ applicable to `bbq_disk`, `bbq_hnsw`, `int4_hnsw`, `int8_hnsw`,
+ `bbq_flat`, `int4_flat`, and `int8_flat` index types.
+ :arg on_disk_rescore: `true` if vector rescoring should be done on-
+ disk Only applicable to `bbq_hnsw`
"""
type: Union[
Literal[
"bbq_flat",
"bbq_hnsw",
+ "bbq_disk",
"flat",
"hnsw",
"int4_flat",
@@ -420,6 +424,7 @@ class DenseVectorIndexOptions(AttrDict[Any]):
rescore_vector: Union[
"DenseVectorIndexOptionsRescoreVector", Dict[str, Any], DefaultType
]
+ on_disk_rescore: Union[bool, DefaultType]
def __init__(
self,
@@ -428,6 +433,7 @@ def __init__(
Literal[
"bbq_flat",
"bbq_hnsw",
+ "bbq_disk",
"flat",
"hnsw",
"int4_flat",
@@ -443,6 +449,7 @@ def __init__(
rescore_vector: Union[
"DenseVectorIndexOptionsRescoreVector", Dict[str, Any], DefaultType
] = DEFAULT,
+ on_disk_rescore: Union[bool, DefaultType] = DEFAULT,
**kwargs: Any,
):
if type is not DEFAULT:
@@ -455,6 +462,8 @@ def __init__(
kwargs["m"] = m
if rescore_vector is not DEFAULT:
kwargs["rescore_vector"] = rescore_vector
+ if on_disk_rescore is not DEFAULT:
+ kwargs["on_disk_rescore"] = on_disk_rescore
super().__init__(kwargs)
@@ -2326,9 +2335,7 @@ class LikeDocument(AttrDict[Any]):
per_field_analyzer: Union[Mapping[Union[str, InstrumentedField], str], DefaultType]
routing: Union[str, DefaultType]
version: Union[int, DefaultType]
- version_type: Union[
- Literal["internal", "external", "external_gte", "force"], DefaultType
- ]
+ version_type: Union[Literal["internal", "external", "external_gte"], DefaultType]
def __init__(
self,
@@ -2343,7 +2350,7 @@ def __init__(
routing: Union[str, DefaultType] = DEFAULT,
version: Union[int, DefaultType] = DEFAULT,
version_type: Union[
- Literal["internal", "external", "external_gte", "force"], DefaultType
+ Literal["internal", "external", "external_gte"], DefaultType
] = DEFAULT,
**kwargs: Any,
):
@@ -2774,6 +2781,31 @@ def __init__(
super().__init__(kwargs)
+class PValueHeuristic(AttrDict[Any]):
+ """
+ :arg background_is_superset:
+ :arg normalize_above: Should the results be normalized when above the
+ given value. Allows for consistent significance results at various
+ scales. Note: `0` is a special value which means no normalization
+ """
+
+ background_is_superset: Union[bool, DefaultType]
+ normalize_above: Union[int, DefaultType]
+
+ def __init__(
+ self,
+ *,
+ background_is_superset: Union[bool, DefaultType] = DEFAULT,
+ normalize_above: Union[int, DefaultType] = DEFAULT,
+ **kwargs: Any,
+ ):
+ if background_is_superset is not DEFAULT:
+ kwargs["background_is_superset"] = background_is_superset
+ if normalize_above is not DEFAULT:
+ kwargs["normalize_above"] = normalize_above
+ super().__init__(kwargs)
+
+
class PercentageScoreHeuristic(AttrDict[Any]):
pass
@@ -3164,6 +3196,33 @@ def __init__(
super().__init__(kwargs)
+class SemanticTextIndexOptions(AttrDict[Any]):
+ """
+ :arg dense_vector:
+ :arg sparse_vector:
+ """
+
+ dense_vector: Union["DenseVectorIndexOptions", Dict[str, Any], DefaultType]
+ sparse_vector: Union["SparseVectorIndexOptions", Dict[str, Any], DefaultType]
+
+ def __init__(
+ self,
+ *,
+ dense_vector: Union[
+ "DenseVectorIndexOptions", Dict[str, Any], DefaultType
+ ] = DEFAULT,
+ sparse_vector: Union[
+ "SparseVectorIndexOptions", Dict[str, Any], DefaultType
+ ] = DEFAULT,
+ **kwargs: Any,
+ ):
+ if dense_vector is not DEFAULT:
+ kwargs["dense_vector"] = dense_vector
+ if sparse_vector is not DEFAULT:
+ kwargs["sparse_vector"] = sparse_vector
+ super().__init__(kwargs)
+
+
class ShapeFieldQuery(AttrDict[Any]):
"""
:arg indexed_shape: Queries using a pre-indexed shape.
@@ -4009,24 +4068,25 @@ def __init__(
class TextEmbedding(AttrDict[Any]):
"""
- :arg model_id: (required)
:arg model_text: (required)
+ :arg model_id: Model ID is required for all dense_vector fields but
+ may be inferred for semantic_text fields
"""
- model_id: Union[str, DefaultType]
model_text: Union[str, DefaultType]
+ model_id: Union[str, DefaultType]
def __init__(
self,
*,
- model_id: Union[str, DefaultType] = DEFAULT,
model_text: Union[str, DefaultType] = DEFAULT,
+ model_id: Union[str, DefaultType] = DEFAULT,
**kwargs: Any,
):
- if model_id is not DEFAULT:
- kwargs["model_id"] = model_id
if model_text is not DEFAULT:
kwargs["model_text"] = model_text
+ if model_id is not DEFAULT:
+ kwargs["model_id"] = model_id
super().__init__(kwargs)
@@ -4659,6 +4719,82 @@ class CardinalityAggregate(AttrDict[Any]):
meta: Mapping[str, Any]
+class CartesianBoundsAggregate(AttrDict[Any]):
+ """
+ :arg bounds:
+ :arg meta:
+ """
+
+ bounds: "TopLeftBottomRightGeoBounds"
+ meta: Mapping[str, Any]
+
+
+class CartesianCentroidAggregate(AttrDict[Any]):
+ """
+ :arg count: (required)
+ :arg location:
+ :arg meta:
+ """
+
+ count: int
+ location: "CartesianPoint"
+ meta: Mapping[str, Any]
+
+
+class CartesianPoint(AttrDict[Any]):
+ """
+ :arg x: (required)
+ :arg y: (required)
+ """
+
+ x: float
+ y: float
+
+
+class ChangePointAggregate(AttrDict[Any]):
+ """
+ :arg type: (required)
+ :arg bucket:
+ :arg meta:
+ """
+
+ type: "ChangeType"
+ bucket: "ChangePointBucket"
+ meta: Mapping[str, Any]
+
+
+class ChangePointBucket(AttrDict[Any]):
+ """
+ :arg key: (required)
+ :arg doc_count: (required)
+ """
+
+ key: Union[int, float, str, bool, None]
+ doc_count: int
+
+
+class ChangeType(AttrDict[Any]):
+ """
+ :arg dip:
+ :arg distribution_change:
+ :arg indeterminable:
+ :arg non_stationary:
+ :arg spike:
+ :arg stationary:
+ :arg step_change:
+ :arg trend_change:
+ """
+
+ dip: "Dip"
+ distribution_change: "DistributionChange"
+ indeterminable: "Indeterminable"
+ non_stationary: "NonStationary"
+ spike: "Spike"
+ stationary: "Stationary"
+ step_change: "StepChange"
+ trend_change: "TrendChange"
+
+
class ChildrenAggregate(AttrDict[Any]):
"""
:arg doc_count: (required)
@@ -4936,6 +5072,26 @@ class DfsStatisticsProfile(AttrDict[Any]):
children: Sequence["DfsStatisticsProfile"]
+class Dip(AttrDict[Any]):
+ """
+ :arg p_value: (required)
+ :arg change_point: (required)
+ """
+
+ p_value: float
+ change_point: int
+
+
+class DistributionChange(AttrDict[Any]):
+ """
+ :arg p_value: (required)
+ :arg change_point: (required)
+ """
+
+ p_value: float
+ change_point: int
+
+
class DoubleTermsAggregate(AttrDict[Any]):
"""
Result of a `terms` aggregation when the field is some kind of decimal
@@ -5497,6 +5653,14 @@ class HitsMetadata(AttrDict[Any]):
max_score: Union[float, None]
+class Indeterminable(AttrDict[Any]):
+ """
+ :arg reason: (required)
+ """
+
+ reason: str
+
+
class InferenceAggregate(AttrDict[Any]):
"""
:arg value:
@@ -5899,6 +6063,18 @@ class NestedIdentity(AttrDict[Any]):
_nested: "NestedIdentity"
+class NonStationary(AttrDict[Any]):
+ """
+ :arg p_value: (required)
+ :arg r_value: (required)
+ :arg trend: (required)
+ """
+
+ p_value: float
+ r_value: float
+ trend: str
+
+
class ParentAggregate(AttrDict[Any]):
"""
:arg doc_count: (required)
@@ -6256,6 +6432,16 @@ class SimpleValueAggregate(AttrDict[Any]):
meta: Mapping[str, Any]
+class Spike(AttrDict[Any]):
+ """
+ :arg p_value: (required)
+ :arg change_point: (required)
+ """
+
+ p_value: float
+ change_point: int
+
+
class StandardDeviationBounds(AttrDict[Any]):
"""
:arg upper: (required)
@@ -6292,6 +6478,10 @@ class StandardDeviationBoundsAsString(AttrDict[Any]):
lower_sampling: str
+class Stationary(AttrDict[Any]):
+ pass
+
+
class StatsAggregate(AttrDict[Any]):
"""
Statistics aggregation result. `min`, `max` and `avg` are missing if
@@ -6347,6 +6537,16 @@ class StatsBucketAggregate(AttrDict[Any]):
meta: Mapping[str, Any]
+class StepChange(AttrDict[Any]):
+ """
+ :arg p_value: (required)
+ :arg change_point: (required)
+ """
+
+ p_value: float
+ change_point: int
+
+
class StringRareTermsAggregate(AttrDict[Any]):
"""
Result of the `rare_terms` aggregation when the field is a string.
@@ -6578,6 +6778,18 @@ class TotalHits(AttrDict[Any]):
value: int
+class TrendChange(AttrDict[Any]):
+ """
+ :arg p_value: (required)
+ :arg r_value: (required)
+ :arg change_point: (required)
+ """
+
+ p_value: float
+ r_value: float
+ change_point: int
+
+
class UnmappedRareTermsAggregate(AttrDict[Any]):
"""
Result of a `rare_terms` aggregation when the field is unmapped.
diff --git a/pyproject.toml b/pyproject.toml
index 6b4915106..19307fe84 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,7 +97,7 @@ Homepage = "https://github.com/elastic/elasticsearch-py"
[tool.hatch.version]
path = "elasticsearch/_version.py"
-pattern = "__versionstr__ = \"(?P[^']+)\""
+pattern = "__versionstr__ = \"(?P[^']+?)\""
[tool.hatch.build.targets.sdist]
include = [