diff --git a/.ci/Dockerfile.elasticsearch b/.ci/Dockerfile.elasticsearch new file mode 100644 index 00000000..fb9f58cf --- /dev/null +++ b/.ci/Dockerfile.elasticsearch @@ -0,0 +1,24 @@ +ARG ELASTIC_STACK_VERSION +FROM docker.elastic.co/elasticsearch/elasticsearch$distribution_suffix:$ELASTIC_STACK_VERSION + +ARG plugin_path=/usr/share/plugins/plugin +ARG es_path=/usr/share/elasticsearch +ARG es_yml=$es_path/config/elasticsearch.yml +ARG SECURE_INTEGRATION +ARG ES_SSL_SUPPORTED_PROTOCOLS + +RUN rm -f $es_path/config/scripts + +COPY --chown=elasticsearch:elasticsearch spec/fixtures/test_certs/* $es_path/config/test_certs/ + +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.enabled: true" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.key: $es_path/config/test_certs/es.key" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/es.chain.crt" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate_authorities: [ '$es_path/config/test_certs/ca.crt' ]" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.verification_mode: certificate" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] && [ -n "$ES_SSL_SUPPORTED_PROTOCOLS" ] ; then echo "xpack.security.http.ssl.supported_protocols: ${ES_SSL_SUPPORTED_PROTOCOLS}" >> $es_yml; fi + +RUN cat $es_yml + +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then $es_path/bin/elasticsearch-users useradd simpleuser -p abc123 -r superuser; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then $es_path/bin/elasticsearch-users useradd 'f@ncyuser' -p 'ab%12#' -r superuser; fi diff --git a/.ci/docker-compose.override.yml b/.ci/docker-compose.override.yml new file mode 100644 index 00000000..eb8377c7 --- /dev/null +++ b/.ci/docker-compose.override.yml @@ -0,0 +1,35 @@ +version: '3' + +services: + + logstash: + command: /usr/share/plugins/plugin/.ci/logstash-run.sh + environment: + - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION + - INTEGRATION=${INTEGRATION:-false} + - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} + - ES_SSL_SUPPORTED_PROTOCOLS=$ES_SSL_SUPPORTED_PROTOCOLS + elasticsearch: + build: + context: ../ + dockerfile: .ci/Dockerfile.elasticsearch + args: + - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION + - INTEGRATION=${INTEGRATION:-false} + - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} + - ES_SSL_SUPPORTED_PROTOCOLS=$ES_SSL_SUPPORTED_PROTOCOLS + environment: + #- ELASTIC_PASSWORD=$ELASTIC_PASSWORD a user is setup manually + - xpack.security.enabled=${SECURE_INTEGRATION:-false} + - bootstrap.memory_lock=true + - discovery.type=single-node + - ES_JAVA_OPTS=-Xms640m -Xmx640m + tty: true + ulimits: + memlock: + soft: -1 + hard: -1 + ports: + - "9200:9200" + user: elasticsearch + diff --git a/.ci/docker-run.sh b/.ci/docker-run.sh new file mode 100755 index 00000000..02f0c42c --- /dev/null +++ b/.ci/docker-run.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# This is intended to be run inside the docker container as the command of the docker-compose. +set -ex + +cd .ci + +if [ "$INTEGRATION" == "true" ]; then + docker-compose up --exit-code-from logstash +else + docker-compose up --exit-code-from logstash logstash +fi diff --git a/.ci/logstash-run.sh b/.ci/logstash-run.sh new file mode 100755 index 00000000..61892fbe --- /dev/null +++ b/.ci/logstash-run.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +env + +set -ex + +export PATH=$BUILD_DIR/gradle/bin:$PATH + +# CentOS 7 using curl defaults does not enable TLSv1.3 +CURL_OPTS="-k --tlsv1.2 --tls-max 1.3" + +wait_for_es() { + es_url="http://elasticsearch:9200/" + if [[ "$SECURE_INTEGRATION" == "true" ]]; then + es_url="https://elasticsearch:9200/" + fi + count=120 + while ! curl $CURL_OPTS $es_url && [[ $count -ne 0 ]]; do + count=$(( $count - 1 )) + [[ $count -eq 0 ]] && return 1 + sleep 1 + done + echo $(curl $CURL_OPTS -v $ES_URL) + + return 0 +} + +if [[ "$INTEGRATION" != "true" ]]; then + bundle exec rspec --format=documentation spec/inputs --tag ~integration --tag ~secure_integration +else + if [[ "$SECURE_INTEGRATION" == "true" ]]; then + extra_tag_args="--tag secure_integration" + else + extra_tag_args="--tag ~secure_integration --tag integration" + fi + + echo "Waiting for elasticsearch to respond..." + wait_for_es + echo "Elasticsearch is Up !" + bundle exec rspec --format=documentation $extra_tag_args --tag update_tests:painless --tag es_version:$ELASTIC_STACK_VERSION spec/inputs/integration +fi diff --git a/.ci/setup.sh b/.ci/setup.sh new file mode 100755 index 00000000..54a42db2 --- /dev/null +++ b/.ci/setup.sh @@ -0,0 +1,11 @@ +# user_agent requires /etc/protocols, which is provided by netbase. +# https://github.com/jruby/jruby/issues/3955 +if [ ! -f "/etc/protocols" ]; then + if [ $(command -v apt-get) ]; then + echo "installing netbase with apt-get" + sudo apt-get install -y netbase + else + echo "installing netbase with yum" + sudo yum install -y netbase + fi +fi \ No newline at end of file diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000..d3253288 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,65 @@ +# Contributing to Logstash + +All contributions are welcome: ideas, patches, documentation, bug reports, +complaints, etc! + +Programming is not a required skill, and there are many ways to help out! +It is more important to us that you are able to contribute. + +That said, some basic guidelines, which you are free to ignore :) + +## Want to learn? + +Want to lurk about and see what others are doing with Logstash? + +* The irc channel (#logstash on irc.freenode.org) is a good place for this +* The [forum](https://discuss.elastic.co/c/logstash) is also + great for learning from others. + +## Got Questions? + +Have a problem you want Logstash to solve for you? + +* You can ask a question in the [forum](https://discuss.elastic.co/c/logstash) +* Alternately, you are welcome to join the IRC channel #logstash on +irc.freenode.org and ask for help there! + +## Have an Idea or Feature Request? + +* File a ticket on [GitHub](https://github.com/elastic/logstash/issues). Please remember that GitHub is used only for issues and feature requests. If you have a general question, the [forum](https://discuss.elastic.co/c/logstash) or IRC would be the best place to ask. + +## Something Not Working? Found a Bug? + +If you think you found a bug, it probably is a bug. + +* If it is a general Logstash or a pipeline issue, file it in [Logstash GitHub](https://github.com/elasticsearch/logstash/issues) +* If it is specific to a plugin, please file it in the respective repository under [logstash-plugins](https://github.com/logstash-plugins) +* or ask the [forum](https://discuss.elastic.co/c/logstash). + +# Contributing Documentation and Code Changes + +If you have a bugfix or new feature that you would like to contribute to +logstash, and you think it will take more than a few minutes to produce the fix +(ie; write code), it is worth discussing the change with the Logstash users and developers first! You can reach us via [GitHub](https://github.com/elastic/logstash/issues), the [forum](https://discuss.elastic.co/c/logstash), or via IRC (#logstash on freenode irc) +Please note that Pull Requests without tests will not be merged. If you would like to contribute but do not have experience with writing tests, please ping us on IRC/forum or create a PR and ask our help. + +## Contributing to plugins + +Check our [documentation](https://www.elastic.co/guide/en/logstash/current/contributing-to-logstash.html) on how to contribute to plugins or write your own! It is super easy! + +## Contribution Steps + +1. Test your changes! [Run](https://github.com/elastic/logstash#testing) the test suite +2. Please make sure you have signed our [Contributor License + Agreement](https://www.elastic.co/contributor-agreement/). We are not + asking you to assign copyright to us, but to give us the right to distribute + your code without restriction. We ask this of all contributors in order to + assure our users of the origin and continuing existence of the code. You + only need to sign the CLA once. +3. Send a pull request! Push your changes to your fork of the repository and + [submit a pull + request](https://help.github.com/articles/using-pull-requests). In the pull + request, describe what your changes do and mention any bugs/issues related + to the pull request. + + diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000..c3cc91df --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,9 @@ +Please post all product and debugging questions on our [forum](https://discuss.elastic.co/c/logstash). Your questions will reach our wider community members there, and if we confirm that there is a bug, then we can open a new issue here. + +For all general issues, please provide the following details for fast resolution: + +- Version: +- Operating System: +- Config File (if you have sensitive info, please remove it): +- Sample Data: +- Steps to Reproduce: diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..a1538275 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1 @@ +Thanks for contributing to Logstash! If you haven't already signed our CLA, here's a handy link: https://www.elastic.co/contributor-agreement/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..f2356fc9 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,18 @@ +import: +- logstash-plugins/.ci:travis/travis.yml@1.x + +before_install: +- sudo sysctl -w vm.max_map_count=262144 # due ES bootstrap requirements + +jobs: + include: + - stage: "Integration Tests" + env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.previous + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current + - env: INTEGRATION=true SNAPSHOT=false LOG_LEVEL=info ELASTIC_STACK_VERSION=9.current + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=9.next + - stage: "Secure Integration Tests" + env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current SNAPSHOT=true + - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current + - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current ES_SSL_SUPPORTED_PROTOCOLS=TLSv1.3 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..cd025e52 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,231 @@ +## 5.2.1 + - Added support for encoded and non encoded api-key formats on plugin configuration [#237](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/237) + +## 5.2.0 + - ES|QL support [#233](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/233) + +## 5.1.0 + - Add "cursor"-like index tracking [#205](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/205) + +## 5.0.2 + - Add elastic-transport client support used in elasticsearch-ruby 8.x [#223](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/223) + +## 5.0.1 + - Fix: prevent plugin crash when hits contain illegal structure [#218](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/218) + - When a hit cannot be converted to an event, the input now emits an event tagged with `_elasticsearch_input_failure` with an `[event][original]` containing a JSON-encoded string representation of the entire hit. + +## 5.0.0 + - SSL settings that were marked deprecated in version `4.17.0` are now marked obsolete, and will prevent the plugin from starting. + - These settings are: + - `ssl`, which should bre replaced by `ssl_enabled` + - `ca_file`, which should bre replaced by `ssl_certificate_authorities` + - `ssl_certificate_verification`, which should bre replaced by `ssl_verification_mode` + - [#213](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/213) + - Add support for custom headers [#207](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/207) + +## 4.20.5 + - Add `x-elastic-product-origin` header to Elasticsearch requests [#211](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/211) + +## 4.20.4 + - Fix issue where the `index` parameter was being ignored when using `response_type => aggregations` [#209](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/209) + +## 4.20.3 + - [DOC] Update link to bypass redirect, resolving directly to correct content [#206](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/206) + +## 4.20.2 + - fix case when aggregation returns an error [#204](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/204) + +## 4.20.1 + - Fix license header [#203](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/203) + +## 4.20.0 + - Added `response_type` configuration option to allow processing result of aggregations [#202](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/202) + +## 4.19.1 + - Plugin version bump to pick up docs fix in [#199](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/199) required to clear build error in docgen. [#200](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/200) + +## 4.19.0 + - Added `search_api` option to support `search_after` and `scroll` [#198](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/198) + - The default value `auto` uses `search_after` for Elasticsearch >= 8, otherwise, fall back to `scroll` + +## 4.18.0 + - Added request header `Elastic-Api-Version` for serverless [#195](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/195) + +## 4.17.2 + - Fixes a regression introduced in 4.17.0 which could prevent a connection from being established to Elasticsearch in some SSL configurations [#193](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/193) + +## 4.17.1 + - Fix: scroll slice high memory consumption [#189](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/189) + +## 4.17.0 + - Added SSL settings for: [#185](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/185) + - `ssl_enabled`: Enable/disable the SSL settings. If not provided, the value is inferred from the hosts scheme + - `ssl_certificate`: OpenSSL-style X.509 certificate file to authenticate the client + - `ssl_key`: OpenSSL-style RSA private key that corresponds to the `ssl_certificate` + - `ssl_truststore_path`: The JKS truststore to validate the server's certificate + - `ssl_truststore_type`: The format of the truststore file + - `ssl_truststore_password`: The truststore password + - `ssl_keystore_path`: The keystore used to present a certificate to the server + - `ssl_keystore_type`: The format of the keystore file + - `ssl_keystore_password`: The keystore password + - `ssl_cipher_suites`: The list of cipher suites to use + - `ssl_supported_protocols`: Supported protocols with versions + - Reviewed and deprecated SSL settings to comply with Logstash's naming convention + - Deprecated `ssl` in favor of `ssl_enabled` + - Deprecated `ca_file` in favor of `ssl_certificate_authorities` + - Deprecated `ssl_certificate_verification` in favor of `ssl_verification_mode` + +## 4.16.0 + - Added `ssl_certificate_verification` option to control SSL certificate verification [#180](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/180) + +## 4.15.0 + - Feat: add `retries` option. allow retry for failing query [#179](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/179) + +## 4.14.0 + - Refactor: switch to using scheduler mixin [#177](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/177) + +## 4.13.0 + - Added support for `ca_trusted_fingerprint` when run on Logstash 8.3+ [#178](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/178) + +## 4.12.3 + - Fix: update Elasticsearch Ruby client to correctly customize 'user-agent' header [#171](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/171) + +## 4.12.2 + - Fix: hosts => "es_host:port" regression [#168](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/168) + +## 4.12.1 + - Fixed too_long_frame_exception by passing scroll_id in the body [#159](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/159) + +## 4.12.0 + - Feat: Update Elasticsearch client to 7.14.0 [#157](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/157) + +## 4.11.0 + - Feat: add user-agent header passed to the Elasticsearch HTTP connection [#158](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/158) + +## 4.10.0 + - Feat: added ecs_compatibility + event_factory support [#149](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/149) + +## 4.9.3 + - Fixed SSL handshake hang indefinitely with proxy setup [#156](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/156) + +## 4.9.2 + - Fix: a regression (in LS 7.14.0) where due the elasticsearch client update (from 5.0.5 to 7.5.0) the `Authorization` + header isn't passed, this leads to the plugin not being able to leverage `user`/`password` credentials set by the user. + [#153](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/153) + +## 4.9.1 + - [DOC] Replaced hard-coded links with shared attributes [#143](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/143) + - [DOC] Added missing quote to docinfo_fields example [#145](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/145) + +## 4.9.0 + - Added `target` option, allowing the hit's source to target a specific field instead of being expanded at the root of the event. This allows the input to play nicer with the Elastic Common Schema when the input does not follow the schema. [#117](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/117) + +## 4.8.3 + - [DOC] Fixed links to restructured Logstash-to-cloud docs [#139](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/139) + +## 4.8.2 + - [DOC] Document the permissions required in secured clusters [#137](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/137) + +## 4.8.1 + - Fixed connection error when using multiple `slices`. [#133](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/133) + +## 4.8.0 + - Added the ability to configure connection-, request-, and socket-timeouts with `connect_timeout_seconds`, `request_timeout_seconds`, and `socket_timeout_seconds` [#121](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/121) + +## 4.7.1 + - [DOC] Updated sliced scroll link to resolve to correct location after doc structure change [#135](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/135) + - [DOC] Added usage example of docinfo metadata [#98](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/98) + +## 4.7.0 + - Added api_key support [#131](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/131) + +## 4.6.2 + - Added scroll clearing and better handling of scroll expiration [#128](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/128) + +## 4.6.1 + - [DOC] Removed outdated compatibility notice [#124](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/124) + +## 4.6.0 + - Feat: added option to specify proxy for ES [#114](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/114) + +## 4.5.0 + - Feat: Added support for cloud_id / cloud_auth configuration [#112](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/112) + +## 4.4.0 + - Changed Elasticsearch Client transport to use Manticore [#111](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/111) + +## 4.3.3 + - Loosen restrictions on Elasticsearch gem [#110](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/110) + +## 4.3.2 + - Fixed broken link to Elasticsearch Reference [#106](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/106) + +## 4.3.1 + - Fixed deeplink to Elasticsearch Reference [#103](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/103) + +## 4.3.0 + - Added managed slice scrolling with `slices` option + +## 4.2.1 + - Docs: Set the default_codec doc attribute. + +## 4.2.0 + - Docs: Deprecate `document_type` + - Add support for scheduling periodic execution of the query #81 + +## 4.1.1 + - Update gemspec summary + +## 4.1.0 + - Enable use of docinfo (@metadata) fields in `add_field` decorations + +## 4.0.6 + - Docs: Fix link syntax + +## 4.0.5 + - Fix some documentation issues + +## 4.0.3 + - Docs: Add requirement to use version 4.0.2 or higher to support sending Content-Type headers + - Fix scrolling to use json bodies in the requests (this makes scrolling not work in ES 1.x) + +## 4.0.2 + - Bump ES client to 5.0.2 to get content-type: json behavior + - Revert unneeded manticore change + +## 4.0.1 + - Switch internal HTTP client to support TLSv1.2 + - Upgrade ES client internally to better support ES 5.x + +## 4.0.0 + - Remove `scan` from list of options as this is no longer allowed in + Elasticsearch 5.0. + - Change default query to sort by \_doc, as this replicates the `scan` + behavior + - Improve documentation to show sort by \_doc, and how to add it to custom + queries. + +## 3.0.2 + - Relax constraint on logstash-core-plugin-api to >= 1.60 <= 2.99 + +## 3.0.1 + - Republish all the gems under jruby. +## 3.0.0 + - Update the plugin to the version 2.0 of the plugin api, this change is required for Logstash 5.0 compatibility. See https://github.com/elastic/logstash/issues/5141 +# 2.0.5 + - Depend on logstash-core-plugin-api instead of logstash-core, removing the need to mass update plugins on major releases of logstash +# 2.0.4 + - New dependency requirements for logstash-core for the 5.0 release +## 2.0.3 + - Refactored field references and cleanups + +## 2.0.0 + - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully, + instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895 + - Dependency on logstash-core update to 2.0 + +## 1.0.2 (September 3 - 2015) + - fix scan/scroll response handling + +## 1.0.1 + - refactor request logic into own method (better memory gc perf) diff --git a/CONTRIBUTORS b/CONTRIBUTORS new file mode 100644 index 00000000..673b50ad --- /dev/null +++ b/CONTRIBUTORS @@ -0,0 +1,18 @@ +The following is a list of people who have contributed ideas, code, bug +reports, or in general have helped logstash along its way. + +Contributors: +* Colin Surprenant (colinsurprenant) +* Jonathan Van Eenwyk (jdve) +* Jordan Sissel (jordansissel) +* João Duarte (jsvd) +* Kurt Hurtado (kurtado) +* Luca Belluccini (lucabelluccini) +* Pier-Hugues Pellerin (ph) +* Richard Pijnenburg (electrical) +* Suyog Rao (suyograo) + +Note: If you've sent us patches, bug reports, or otherwise contributed to +Logstash, and you aren't on the list above and want to be, please let us know +and we'll make sure you're here. Contributions from folks like you are what make +open source awesome. diff --git a/Gemfile b/Gemfile index ccb90122..85bf0375 100644 --- a/Gemfile +++ b/Gemfile @@ -1,4 +1,14 @@ source 'https://rubygems.org/' -gem 'rake' -gem 'gem_publisher' -gem 'archive-tar-minitar' + +gemspec + +logstash_path = ENV["LOGSTASH_PATH"] || "../../logstash" +use_logstash_source = ENV["LOGSTASH_SOURCE"] && ENV["LOGSTASH_SOURCE"].to_s == "1" + +if Dir.exist?(logstash_path) && use_logstash_source + gem 'logstash-core', :path => "#{logstash_path}/logstash-core" + gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api" +end + +gem 'manticore', ENV['MANTICORE_VERSION'] if ENV['MANTICORE_VERSION'] +gem 'elasticsearch', ENV['ELASTICSEARCH_VERSION'] if ENV['ELASTICSEARCH_VERSION'] diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..a80a3fd5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Elastic and contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NOTICE.TXT b/NOTICE.TXT new file mode 100644 index 00000000..0b8a9475 --- /dev/null +++ b/NOTICE.TXT @@ -0,0 +1,5 @@ +Elasticsearch +Copyright 2012-2015 Elasticsearch + +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..c6a17d4d --- /dev/null +++ b/README.md @@ -0,0 +1,99 @@ +# Logstash Plugin + +[![Gem Version](https://badge.fury.io/rb/logstash-input-elasticsearch.svg)](https://badge.fury.io/rb/logstash-input-elasticsearch) +[![Travis Build Status](https://travis-ci.com/logstash-plugins/logstash-input-elasticsearch.svg)](https://travis-ci.com/logstash-plugins/logstash-input-elasticsearch) + +This is a plugin for [Logstash](https://github.com/elastic/logstash). + +It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way. + +## Documentation + +Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elastic.co/guide/en/logstash/current/). + +- For formatting code or config example, you can use the asciidoc `[source,ruby]` directive +- For more asciidoc formatting tips, see the excellent reference here https://github.com/elastic/docs#asciidoc-guide + +## Need Help? + +Need help? Try #logstash on freenode IRC or the https://discuss.elastic.co/c/logstash discussion forum. + +## Developing + +### 1. Plugin Developement and Testing + +#### Code +- To get started, you'll need JRuby with the Bundler gem installed. + +- Create a new plugin or clone and existing from the GitHub [logstash-plugins](https://github.com/logstash-plugins) organization. We also provide [example plugins](https://github.com/logstash-plugins?query=example). + +- Install dependencies +```sh +bundle install +``` + +#### Test + +- Update your dependencies + +```sh +bundle install +``` + +- Run tests + +```sh +bundle exec rspec +``` + +### 2. Running your unpublished Plugin in Logstash + +#### 2.1 Run in a local Logstash clone + +- Edit Logstash `Gemfile` and add the local plugin path, for example: +```ruby +gem "logstash-filter-awesome", :path => "/your/local/logstash-filter-awesome" +``` +- Install plugin +```sh +# Logstash 2.3 and higher +bin/logstash-plugin install --no-verify + +# Prior to Logstash 2.3 +bin/plugin install --no-verify + +``` +- Run Logstash with your plugin +```sh +bin/logstash -e 'filter {awesome {}}' +``` +At this point any modifications to the plugin code will be applied to this local Logstash setup. After modifying the plugin, simply rerun Logstash. + +#### 2.2 Run in an installed Logstash + +You can use the same **2.1** method to run your plugin in an installed Logstash by editing its `Gemfile` and pointing the `:path` to your local plugin development directory or you can build the gem and install it using: + +- Build your plugin gem +```sh +gem build logstash-filter-awesome.gemspec +``` +- Install the plugin from the Logstash home +```sh +# Logstash 2.3 and higher +bin/logstash-plugin install --no-verify + +# Prior to Logstash 2.3 +bin/plugin install --no-verify + +``` +- Start Logstash and proceed to test the plugin + +## Contributing + +All contributions are welcome: ideas, patches, documentation, bug reports, complaints, and even something you drew up on a napkin. + +Programming is not a required skill. Whatever you've seen about open source and maintainers or community members saying "send patches or die" - you will not see that here. + +It is more important to the community that you are able to contribute. + +For more information about contributing, see the [CONTRIBUTING](https://github.com/elastic/logstash/blob/master/CONTRIBUTING.md) file. diff --git a/Rakefile b/Rakefile index da6b220e..4f4b8586 100644 --- a/Rakefile +++ b/Rakefile @@ -4,3 +4,4 @@ task :default do system("rake -T") end +require "logstash/devutils/rake" diff --git a/docs/index.asciidoc b/docs/index.asciidoc new file mode 100644 index 00000000..677b11ec --- /dev/null +++ b/docs/index.asciidoc @@ -0,0 +1,950 @@ +:plugin: elasticsearch +:type: input +:default_codec: json + +/////////////////////////////////////////// +START - GENERATED VARIABLES, DO NOT EDIT! +/////////////////////////////////////////// +:version: %VERSION% +:release_date: %RELEASE_DATE% +:changelog_url: %CHANGELOG_URL% +:include_path: ../../../../logstash/docs/include +/////////////////////////////////////////// +END - GENERATED VARIABLES, DO NOT EDIT! +/////////////////////////////////////////// + +[id="plugins-{type}s-{plugin}"] + +=== Elasticsearch input plugin + +include::{include_path}/plugin_header.asciidoc[] + +==== Description + +Read from an Elasticsearch cluster, based on search query results. +This is useful for replaying test logs, reindexing, etc. +You can periodically schedule ingestion using a cron syntax +(see `schedule` setting) or run the query one time to load +data into Logstash. + +Example: +[source,ruby] + input { + # Read all documents from Elasticsearch matching the given query + elasticsearch { + hosts => "localhost" + query => '{ "query": { "match": { "statuscode": 200 } }, "sort": [ "_doc" ] }' + } + } + +This would create an Elasticsearch query with the following format: +[source,json] + curl 'http://localhost:9200/logstash-*/_search?&scroll=1m&size=1000' -d '{ + "query": { + "match": { + "statuscode": 200 + } + }, + "sort": [ "_doc" ] + }' + +[id="plugins-{type}s-{plugin}-scheduling"] +==== Scheduling + +Input from this plugin can be scheduled to run periodically according to a specific +schedule. This scheduling syntax is powered by https://github.com/jmettraux/rufus-scheduler[rufus-scheduler]. +The syntax is cron-like with some extensions specific to Rufus (e.g. timezone support ). + +Examples: + +|========================================================== +| `* 5 * 1-3 *` | will execute every minute of 5am every day of January through March. +| `0 * * * *` | will execute on the 0th minute of every hour every day. +| `0 6 * * * America/Chicago` | will execute at 6:00am (UTC/GMT -5) every day. +|========================================================== + + +Further documentation describing this syntax can be found +https://github.com/jmettraux/rufus-scheduler#parsing-cronlines-and-time-strings[here]. + + +[id="plugins-{type}s-{plugin}-auth"] +==== Authentication + +Authentication to a secure Elasticsearch cluster is possible using _one_ of the following options: + +* <> AND <> +* <> +* <> + +[id="plugins-{type}s-{plugin}-autz"] +==== Authorization + +Authorization to a secure Elasticsearch cluster requires `read` permission at index level and `monitoring` permissions at cluster level. +The `monitoring` permission at cluster level is necessary to perform periodic connectivity checks. + +[id="plugins-{type}s-{plugin}-ecs"] +==== Compatibility with the Elastic Common Schema (ECS) + +When ECS compatibility is disabled, `docinfo_target` uses the `"@metadata"` field as a default, with ECS enabled the plugin +uses a naming convention `"[@metadata][input][elasticsearch]"` as a default target for placing document information. + +The plugin logs a warning when ECS is enabled and `target` isn't set. + +TIP: Set the `target` option to avoid potential schema conflicts. + +[id="plugins-{type}s-{plugin}-failure-handling"] +==== Failure handling + +When this input plugin cannot create a structured `Event` from a hit result, it will instead create an `Event` that is tagged with `_elasticsearch_input_failure` whose `[event][original]` is a JSON-encoded string representation of the entire hit. + +Common causes are: + + - When the hit result contains top-level fields that are {logstash-ref}/processing.html#reserved-fields[reserved in Logstash] but do not have the expected shape. Use the <> directive to avoid conflicts with the top-level namespace. + - When <> is enabled and the docinfo fields cannot be merged into the hit result. Combine <> and <> to avoid conflict. + +[id="plugins-{type}s-{plugin}-cursor"] +==== Tracking a field's value across runs + +.Technical Preview: Tracking a field's value +**** +The feature that allows tracking a field's value across runs is in _Technical Preview_. +Configuration options and implementation details are subject to change in minor releases without being preceded by deprecation warnings. +**** + +Some uses cases require tracking the value of a particular field between two jobs. +Examples include: + +* avoiding the need to re-process the entire result set of a long query after an unplanned restart +* grabbing only new data from an index instead of processing the entire set on each job. + +The Elasticsearch input plugin provides the <> and <> options. +When <> is set, the plugin records the value of that field for the last document retrieved in a run into +a file. +(The file location defaults to <>.) + +You can then inject this value in the query using the placeholder `:last_value`. +The value will be injected into the query before execution, and then updated after the query completes if new data was found. + +This feature works best when: + +* the query sorts by the tracking field, +* the timestamp field is added by {es}, and +* the field type has enough resolution so that two events are unlikely to have the same value. + +Consider using a tracking field whose type is https://www.elastic.co/guide/en/elasticsearch/reference/current/date_nanos.html[date nanoseconds]. +If the tracking field is of this data type, you can use an extra placeholder called `:present` to inject the nano-second based value of "now-30s". +This placeholder is useful as the right-hand side of a range filter, allowing the collection of +new data but leaving partially-searchable bulk request data to the next scheduled job. + +[id="plugins-{type}s-{plugin}-tracking-sample"] +===== Sample configuration: Track field value across runs + +This section contains a series of steps to help you set up the "tailing" of data being written to a set of indices, using a date nanosecond field added by an Elasticsearch ingest pipeline and the `tracking_field` capability of this plugin. + +. Create ingest pipeline that adds Elasticsearch's `_ingest.timestamp` field to the documents as `event.ingested`: ++ +[source, json] + PUT _ingest/pipeline/my-pipeline + { + "processors": [ + { + "script": { + "lang": "painless", + "source": "ctx.putIfAbsent(\"event\", [:]); ctx.event.ingested = metadata().now.format(DateTimeFormatter.ISO_INSTANT);" + } + } + ] + } + +[start=2] +. Create an index mapping where the tracking field is of date nanosecond type and invokes the defined pipeline: ++ +[source, json] + PUT /_template/my_template + { + "index_patterns": ["test-*"], + "settings": { + "index.default_pipeline": "my-pipeline", + }, + "mappings": { + "properties": { + "event": { + "properties": { + "ingested": { + "type": "date_nanos", + "format": "strict_date_optional_time_nanos" + } + } + } + } + } + } + +[start=3] +. Define a query that looks at all data of the indices, sorted by the tracking field, and with a range filter since the last value seen until present: ++ +[source,json] +{ + "query": { + "range": { + "event.ingested": { + "gt": ":last_value", + "lt": ":present" + } + } + }, + "sort": [ + { + "event.ingested": { + "order": "asc", + "format": "strict_date_optional_time_nanos", + "numeric_type": "date_nanos" + } + } + ] +} + +[start=4] +. Configure the Elasticsearch input to query the indices with the query defined above, every minute, and track the `event.ingested` field: ++ +[source, ruby] + input { + elasticsearch { + id => tail_test_index + hosts => [ 'https://../'] + api_key => '....' + index => 'test-*' + query => '{ "query": { "range": { "event.ingested": { "gt": ":last_value", "lt": ":present"}}}, "sort": [ { "event.ingested": {"order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type" : "date_nanos" } } ] }' + tracking_field => "[event][ingested]" + slices => 5 # optional use of slices to speed data processing, should be equal to or less than number of primary shards + schedule => '* * * * *' # every minute + schedule_overlap => false # don't accumulate jobs if one takes longer than 1 minute + } + } + +With this sample setup, new documents are indexed into a `test-*` index. +The next scheduled run: + +* selects all new documents since the last observed value of the tracking field, +* uses {ref}/point-in-time-api.html#point-in-time-api[Point in time (PIT)] + {ref}/paginate-search-results.html#search-after[Search after] to paginate through all the data, and +* updates the value of the field at the end of the pagination. + +[id="plugins-{type}s-{plugin}-esql"] +==== {esql} support + +.Technical Preview +**** +The {esql} feature that allows using ES|QL queries with this plugin is in Technical Preview. +Configuration options and implementation details are subject to change in minor releases without being preceded by deprecation warnings. +**** + +{es} Query Language ({esql}) provides a SQL-like interface for querying your {es} data. + +To use {esql}, this plugin needs to be installed in {ls} 8.17.4 or newer, and must be connected to {es} 8.11 or newer. + +To configure {esql} query in the plugin, set the `query_type` to `esql` and provide your {esql} query in the `query` parameter. + +IMPORTANT: {esql} is evolving and may still have limitations with regard to result size or supported field types. We recommend understanding https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-limitations.html[ES|QL current limitations] before using it in production environments. + +The following is a basic scheduled {esql} query that runs hourly: +[source, ruby] + input { + elasticsearch { + id => hourly_cron_job + hosts => [ 'https://../'] + api_key => '....' + query_type => 'esql' + query => ' + FROM food-index + | WHERE spicy_level = "hot" AND @timestamp > NOW() - 1 hour + | LIMIT 500 + ' + schedule => '0 * * * *' # every hour at min 0 + } + } + +Set `config.support_escapes: true` in `logstash.yml` if you need to escape special chars in the query. + +NOTE: With {esql} query, {ls} doesn't generate `event.original`. + +[id="plugins-{type}s-{plugin}-esql-event-mapping"] +===== Mapping {esql} result to {ls} event +{esql} returns query results in a structured tabular format, where data is organized into _columns_ (fields) and _values_ (entries). +The plugin maps each value entry to an event, populating corresponding fields. +For example, a query might produce a table like: + +[cols="2,1,1,1,2",options="header"] +|=== +|`timestamp` |`user_id` | `action` | `status.code` | `status.desc` + +|2025-04-10T12:00:00 |123 |login |200 | Success +|2025-04-10T12:05:00 |456 |purchase |403 | Forbidden (unauthorized user) +|=== + +For this case, the plugin emits two events look like +[source, json] +[ + { + "timestamp": "2025-04-10T12:00:00", + "user_id": 123, + "action": "login", + "status": { + "code": 200, + "desc": "Success" + } + }, + { + "timestamp": "2025-04-10T12:05:00", + "user_id": 456, + "action": "purchase", + "status": { + "code": 403, + "desc": "Forbidden (unauthorized user)" + } + } +] + +NOTE: If your index has a mapping with sub-objects where `status.code` and `status.desc` actually dotted fields, they appear in {ls} events as a nested structure. + +[id="plugins-{type}s-{plugin}-esql-multifields"] +===== Conflict on multi-fields + +{esql} query fetches all parent and sub-fields fields if your {es} index has https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] or https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/subobjects[subobjects]. +Since {ls} events cannot contain parent field's concrete value and sub-field values together, the plugin ignores sub-fields with warning and includes parent. +We recommend using the `RENAME` (or `DROP` to avoid warnings) keyword in your {esql} query explicitly rename the fields to include sub-fields into the event. + +This a common occurrence if your template or mapping follows the pattern of always indexing strings as "text" (`field`) + " keyword" (`field.keyword`) multi-field. +In this case it's recommended to do `KEEP field` if the string is identical and there is only one subfield as the engine will optimize and retrieve the keyword, otherwise you can do `KEEP field.keyword | RENAME field.keyword as field`. + +To illustrate the situation with example, assuming your mapping has a time `time` field with `time.min` and `time.max` sub-fields as following: +[source, ruby] + "properties": { + "time": { "type": "long" }, + "time.min": { "type": "long" }, + "time.max": { "type": "long" } + } + +The {esql} result will contain all three fields but the plugin cannot map them into {ls} event. +To avoid this, you can use the `RENAME` keyword to rename the `time` parent field to get all three fields with unique fields. +[source, ruby] + ... + query => 'FROM my-index | RENAME time AS time.current' + ... + +For comprehensive {esql} syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[{esql} documentation]. + +[id="plugins-{type}s-{plugin}-options"] +==== Elasticsearch Input configuration options + +This plugin supports these configuration options plus the <> described later. + +NOTE: As of version `5.0.0` of this plugin, a number of previously deprecated settings related to SSL have been removed. +Please check out <> for details. + +[cols="<,<,<",options="header",] +|======================================================================= +|Setting |Input type|Required +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> | <>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>, one of `["dsl","esql"]`|No +| <> |<>, one of `["hits","aggregations"]`|No +| <> | <>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>, one of `["auto", "search_after", "scroll"]`|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |list of <>|No +| <> |list of <>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>, one of `["full", "none"]`|No +| <> | <>|No +| <> | {logstash-ref}/field-references-deepdive.html[field reference] | No +| <> |<>|No +| <> |<>|No +| <> | <>|No +| <> |<>|No +|======================================================================= + +Also see <> for a list of options supported by all +input plugins. + +  + +[id="plugins-{type}s-{plugin}-api_key"] +===== `api_key` + + * Value type is <> + * There is no default value for this setting. + +Authenticate using Elasticsearch API key. Note that this option also requires enabling the <> option. + +Format is `id:api_key` where `id` and `api_key` are as returned by the +Elasticsearch +{ref}/security-api-create-api-key.html[Create +API key API]. + +[id="plugins-{type}s-{plugin}-ca_trusted_fingerprint"] +===== `ca_trusted_fingerprint` + +* Value type is <>, and must contain exactly 64 hexadecimal characters. +* There is no default value for this setting. +* Use of this option _requires_ Logstash 8.3+ + +The SHA-256 fingerprint of an SSL Certificate Authority to trust, such as the autogenerated self-signed CA for an Elasticsearch cluster. + +[id="plugins-{type}s-{plugin}-cloud_auth"] +===== `cloud_auth` + + * Value type is <> + * There is no default value for this setting. + +Cloud authentication string (":" format) is an alternative for the `user`/`password` pair. + +For more info, check out the +{logstash-ref}/connecting-to-cloud.html[Logstash-to-Cloud documentation]. + +[id="plugins-{type}s-{plugin}-cloud_id"] +===== `cloud_id` + + * Value type is <> + * There is no default value for this setting. + +Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. + +For more info, check out the +{logstash-ref}/connecting-to-cloud.html[Logstash-to-Cloud documentation]. + +[id="plugins-{type}s-{plugin}-connect_timeout_seconds"] +===== `connect_timeout_seconds` + + * Value type is <> + * Default value is `10` + +The maximum amount of time, in seconds, to wait while establishing a connection to Elasticsearch. +Connect timeouts tend to occur when Elasticsearch or an intermediate proxy is overloaded with requests and has exhausted its connection pool. + +[id="plugins-{type}s-{plugin}-custom_headers"] +===== `custom_headers` + + * Value type is <> + * Default value is empty + +Pass a set of key value pairs as the headers sent in each request to an elasticsearch node. +The headers will be used for any kind of request. +These custom headers will override any headers previously set by the plugin such as the User Agent or Authorization headers. + +[id="plugins-{type}s-{plugin}-docinfo"] +===== `docinfo` + + * Value type is <> + * Default value is `false` + +If set, include Elasticsearch document information such as index, type, and +the id in the event. + +It might be important to note, with regards to metadata, that if you're +ingesting documents with the intent to re-index them (or just update them) +that the `action` option in the elasticsearch output wants to know how to +handle those things. It can be dynamically assigned with a field +added to the metadata. + +Example +[source, ruby] + input { + elasticsearch { + hosts => "es.production.mysite.org" + index => "mydata-2018.09.*" + query => '{ "query": { "query_string": { "query": "*" } } }' + size => 500 + scroll => "5m" + docinfo => true + docinfo_target => "[@metadata][doc]" + } + } + output { + elasticsearch { + index => "copy-of-production.%{[@metadata][doc][_index]}" + document_type => "%{[@metadata][doc][_type]}" + document_id => "%{[@metadata][doc][_id]}" + } + } + +If set, you can use metadata information in the <> common option. + +Example +[source, ruby] + input { + elasticsearch { + docinfo => true + docinfo_target => "[@metadata][doc]" + add_field => { + identifier => "%{[@metadata][doc][_index]}:%{[@metadata][doc][_type]}:%{[@metadata][doc][_id]}" + } + } + } + + +[id="plugins-{type}s-{plugin}-docinfo_fields"] +===== `docinfo_fields` + + * Value type is <> + * Default value is `["_index", "_type", "_id"]` + +If document metadata storage is requested by enabling the `docinfo` option, this +option lists the metadata fields to save in the current event. See +{ref}/mapping-fields.html[Meta-Fields] in the Elasticsearch documentation for +more information. + +[id="plugins-{type}s-{plugin}-docinfo_target"] +===== `docinfo_target` + + * Value type is <> + * Default value depends on whether <> is enabled: + ** ECS Compatibility disabled: `"@metadata"` + ** ECS Compatibility enabled: `"[@metadata][input][elasticsearch]"` + +If document metadata storage is requested by enabling the `docinfo` option, +this option names the field under which to store the metadata fields as subfields. + +[id="plugins-{type}s-{plugin}-ecs_compatibility"] +===== `ecs_compatibility` + + * Value type is <> + * Supported values are: + ** `disabled`: CSV data added at root level + ** `v1`,`v8`: Elastic Common Schema compliant behavior + * Default value depends on which version of Logstash is running: + ** When Logstash provides a `pipeline.ecs_compatibility` setting, its value is used as the default + ** Otherwise, the default value is `disabled` + +Controls this plugin's compatibility with the {ecs-ref}[Elastic Common Schema (ECS)]. + +[id="plugins-{type}s-{plugin}-hosts"] +===== `hosts` + + * Value type is <> + * There is no default value for this setting. + +List of one or more Elasticsearch hosts to use for querying. Each host +can be either IP, HOST, IP:port, or HOST:port. The port defaults to +9200. + +[id="plugins-{type}s-{plugin}-index"] +===== `index` + + * Value type is <> + * Default value is `"logstash-*"` + +The index or alias to search. +Check out {ref}/api-conventions.html#api-multi-index[Multi Indices +documentation] in the Elasticsearch documentation for info on +referencing multiple indices. + +[id="plugins-{type}s-{plugin}-last_run_metadata_path"] +===== `last_run_metadata_path` + + * Value type is <> + * There is no default value for this setting. + +The path to store the last observed value of the tracking field, when used. +By default this file is stored as `/plugins/inputs/elasticsearch//last_run_value`. + +This setting should point to file, not a directory, and Logstash must have read+write access to this file. + +[id="plugins-{type}s-{plugin}-password"] +===== `password` + + * Value type is <> + * There is no default value for this setting. + +The password to use together with the username in the `user` option +when authenticating to the Elasticsearch server. If set to an empty +string authentication will be disabled. + +[id="plugins-{type}s-{plugin}-proxy"] +===== `proxy` + +* Value type is <> +* There is no default value for this setting. + +Set the address of a forward HTTP proxy. +An empty string is treated as if proxy was not set, this is useful when using +environment variables e.g. `proxy => '${LS_PROXY:}'`. + +[id="plugins-{type}s-{plugin}-query"] +===== `query` + + * Value type is <> + * Default value is `'{ "sort": [ "_doc" ] }'` + +The query to be executed. +Accepted query shape is DSL or {esql} (when `query_type => 'esql'`). +Read the {ref}/query-dsl.html[{es} query DSL documentation] or {ref}/esql.html[{esql} documentation] for more information. + +When <> resolves to `search_after` and the query does not specify `sort`, +the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the query. Please refer to the {ref}/paginate-search-results.html#search-after[Elasticsearch search_after] parameter to know more. + +[id="plugins-{type}s-{plugin}-query_type"] +===== `query_type` + +* Value can be `dsl` or `esql` +* Default value is `dsl` + +Defines the <> shape. +When `dsl`, the query shape must be valid {es} JSON-style string. +When `esql`, the query shape must be a valid {esql} string and `index`, `size`, `slices`, `search_api`, `docinfo`, `docinfo_target`, `docinfo_fields`, `response_type` and `tracking_field` parameters are not allowed. + +[id="plugins-{type}s-{plugin}-response_type"] +===== `response_type` + + * Value can be any of: `hits`, `aggregations`, `esql` + * Default value is `hits` + +Which part of the result to transform into Logstash events when processing the +response from the query. + +The default `hits` will generate one event per returned document (i.e. "hit"). + +When set to `aggregations`, a single {ls} event will be generated with the +contents of the `aggregations` object of the query's response. In this case the +`hits` object will be ignored. The parameter `size` will be always be set to +0 regardless of the default or user-defined value set in this plugin. + +[id="plugins-{type}s-{plugin}-request_timeout_seconds"] +===== `request_timeout_seconds` + + * Value type is <> + * Default value is `60` + +The maximum amount of time, in seconds, for a single request to Elasticsearch. +Request timeouts tend to occur when an individual page of data is very large, such as when it contains large-payload +documents and/or the <> has been specified as a large value. + + +[id="plugins-{type}s-{plugin}-retries"] +===== `retries` + +* Value type is <> +* Default value is `0` + +The number of times to re-run the query after the first failure. If the query fails after all retries, it logs an error message. +The default is 0 (no retry). This value should be equal to or greater than zero. + +NOTE: Partial failures - such as errors in a subset of all slices - can result in the entire query being retried, which can lead to duplication of data. Avoiding this would require Logstash to store the entire result set of a query in memory which is often not possible. + +[id="plugins-{type}s-{plugin}-schedule"] +===== `schedule` + + * Value type is <> + * There is no default value for this setting. + +Schedule of when to periodically run statement, in Cron format +for example: "* * * * *" (execute query every minute, on the minute) + +There is no schedule by default. If no schedule is given, then the statement is run +exactly once. + +[id="plugins-{type}s-{plugin}-schedule_overlap"] +===== `schedule_overlap` + + * Value type is <> + * Default value is `true` + +Whether to allow queuing of a scheduled run if a run is occurring. +While this is ideal for ensuring a new run happens immediately after the previous on finishes if there +is a lot of work to do, but given the queue is unbounded it may lead to an out of memory over long periods of time +if the queue grows continuously. + +When in doubt, set `schedule_overlap` to false (it may become the default value in the future). + +[id="plugins-{type}s-{plugin}-scroll"] +===== `scroll` + + * Value type is <> + * Default value is `"1m"` + +This parameter controls the keepalive time in seconds of the scrolling +request and initiates the scrolling process. The timeout applies per +round trip (i.e. between the previous scroll request, to the next). + +[id="plugins-{type}s-{plugin}-search_api"] +===== `search_api` + +* Value can be any of: `auto`, `search_after`, `scroll` +* Default value is `auto` + +With `auto` the plugin uses the `search_after` parameter for Elasticsearch version `8.0.0` or higher, otherwise the `scroll` API is used instead. + +`search_after` uses {ref}/point-in-time-api.html#point-in-time-api[point in time] and sort value to search. +The query requires at least one `sort` field, as described in the <> parameter. + +`scroll` uses {ref}/paginate-search-results.html#scroll-search-results[scroll] API to search, which is no longer recommended. + +[id="plugins-{type}s-{plugin}-size"] +===== `size` + + * Value type is <> + * Default value is `1000` + +This allows you to set the maximum number of hits returned per scroll. + +[id="plugins-{type}s-{plugin}-slices"] +===== `slices` + + * Value type is <> + * There is no default value. + * Sensible values range from 2 to about 8. + +In some cases, it is possible to improve overall throughput by consuming multiple +distinct slices of a query simultaneously using +{ref}/paginate-search-results.html#slice-scroll[sliced scrolls], +especially if the pipeline is spending significant time waiting on Elasticsearch +to provide results. + +If set, the `slices` parameter tells the plugin how many slices to divide the work +into, and will produce events from the slices in parallel until all of them are done +scrolling. + +NOTE: The Elasticsearch manual indicates that there can be _negative_ performance + implications to both the query and the Elasticsearch cluster when a scrolling + query uses more slices than shards in the index. + +If the `slices` parameter is left unset, the plugin will _not_ inject slice +instructions into the query. + +[id="plugins-{type}s-{plugin}-ssl_certificate"] +===== `ssl_certificate` + * Value type is <> + * There is no default value for this setting. + +SSL certificate to use to authenticate the client. This certificate should be an OpenSSL-style X.509 certificate file. + +NOTE: This setting can be used only if <> is set. + +[id="plugins-{type}s-{plugin}-ssl_certificate_authorities"] +===== `ssl_certificate_authorities` + + * Value type is a list of <> + * There is no default value for this setting + +The `.cer` or `.pem` files to validate the server's certificate. + +NOTE: You cannot use this setting and <> at the same time. + +[id="plugins-{type}s-{plugin}-ssl_cipher_suites"] +===== `ssl_cipher_suites` + * Value type is a list of <> + * There is no default value for this setting + +The list of cipher suites to use, listed by priorities. +Supported cipher suites vary depending on the Java and protocol versions. + +[id="plugins-{type}s-{plugin}-ssl_enabled"] +===== `ssl_enabled` + + * Value type is <> + * There is no default value for this setting. + +Enable SSL/TLS secured communication to Elasticsearch cluster. +Leaving this unspecified will use whatever scheme is specified in the URLs listed in <> or extracted from the <>. +If no explicit protocol is specified plain HTTP will be used. + +When not explicitly set, SSL will be automatically enabled if any of the specified hosts use HTTPS. + +[id="plugins-{type}s-{plugin}-ssl_key"] +===== `ssl_key` + * Value type is <> + * There is no default value for this setting. + +OpenSSL-style RSA private key that corresponds to the <>. + +NOTE: This setting can be used only if <> is set. + +[id="plugins-{type}s-{plugin}-ssl_keystore_password"] +===== `ssl_keystore_password` + + * Value type is <> + * There is no default value for this setting. + +Set the keystore password + +[id="plugins-{type}s-{plugin}-ssl_keystore_path"] +===== `ssl_keystore_path` + + * Value type is <> + * There is no default value for this setting. + +The keystore used to present a certificate to the server. +It can be either `.jks` or `.p12` + +NOTE: You cannot use this setting and <> at the same time. + +[id="plugins-{type}s-{plugin}-ssl_keystore_type"] +===== `ssl_keystore_type` + + * Value can be any of: `jks`, `pkcs12` + * If not provided, the value will be inferred from the keystore filename. + +The format of the keystore file. It must be either `jks` or `pkcs12`. + +[id="plugins-{type}s-{plugin}-ssl_supported_protocols"] +===== `ssl_supported_protocols` + + * Value type is <> + * Allowed values are: `'TLSv1.1'`, `'TLSv1.2'`, `'TLSv1.3'` + * Default depends on the JDK being used. With up-to-date Logstash, the default is `['TLSv1.2', 'TLSv1.3']`. + `'TLSv1.1'` is not considered secure and is only provided for legacy applications. + +List of allowed SSL/TLS versions to use when establishing a connection to the Elasticsearch cluster. + +For Java 8 `'TLSv1.3'` is supported only since **8u262** (AdoptOpenJDK), but requires that you set the +`LS_JAVA_OPTS="-Djdk.tls.client.protocols=TLSv1.3"` system property in Logstash. + +NOTE: If you configure the plugin to use `'TLSv1.1'` on any recent JVM, such as the one packaged with Logstash, +the protocol is disabled by default and needs to be enabled manually by changing `jdk.tls.disabledAlgorithms` in +the *$JDK_HOME/conf/security/java.security* configuration file. That is, `TLSv1.1` needs to be removed from the list. + +[id="plugins-{type}s-{plugin}-ssl_truststore_password"] +===== `ssl_truststore_password` + + * Value type is <> + * There is no default value for this setting. + +Set the truststore password. + +[id="plugins-{type}s-{plugin}-ssl_truststore_path"] +===== `ssl_truststore_path` + + * Value type is <> + * There is no default value for this setting. + +The truststore to validate the server's certificate. +It can be either .jks or .p12. + +NOTE: You cannot use this setting and <> at the same time. + +[id="plugins-{type}s-{plugin}-ssl_truststore_type"] +===== `ssl_truststore_type` + + * Value can be any of: `jks`, `pkcs12` + * If not provided, the value will be inferred from the truststore filename. + +The format of the truststore file. It must be either `jks` or `pkcs12`. + +[id="plugins-{type}s-{plugin}-ssl_verification_mode"] +===== `ssl_verification_mode` + + * Value can be any of: `full`, `none` + * Default value is `full` + +Defines how to verify the certificates presented by another party in the TLS connection: + +`full` validates that the server certificate has an issue date that’s within +the not_before and not_after dates; chains to a trusted Certificate Authority (CA), and +has a hostname or IP address that matches the names within the certificate. + +`none` performs no certificate validation. + +WARNING: Setting certificate verification to `none` disables many security benefits of SSL/TLS, which is very dangerous. For more information on disabling certificate verification please read https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf + +[id="plugins-{type}s-{plugin}-socket_timeout_seconds"] +===== `socket_timeout_seconds` + + * Value type is <> + * Default value is `60` + +The maximum amount of time, in seconds, to wait on an incomplete response from Elasticsearch while no additional data has been appended. +Socket timeouts usually occur while waiting for the first byte of a response, such as when executing a particularly complex query. + + +[id="plugins-{type}s-{plugin}-target"] +===== `target` + +* Value type is {logstash-ref}/field-references-deepdive.html[field reference] +* There is no default value for this setting. + +Without a `target`, events are created from each hit's `_source` at the root level. +When the `target` is set to a field reference, the `_source` of the hit is placed in the target field instead. + +This option can be useful to avoid populating unknown fields when a downstream schema such as ECS is enforced. +It is also possible to target an entry in the event's metadata, which will be available during event processing but not exported to your outputs (e.g., `target \=> "[@metadata][_source]"`). + +[id="plugins-{type}s-{plugin}-tracking_field"] +===== `tracking_field` + +* Value type is <> +* There is no default value for this setting. + +Which field from the last event of a previous run will be used a cursor value for the following run. +The value of this field is injected into each query if the query uses the placeholder `:last_value`. +For the first query after a pipeline is started, the value used is either read from <> file, +or taken from <> setting. + +Note: The tracking value is updated after each page is read and at the end of each Point in Time. In case of a crash the last saved value will be used so some duplication of data can occur. For this reason the use of unique document IDs for each event is recommended in the downstream destination. + +[id="plugins-{type}s-{plugin}-tracking_field_seed"] +===== `tracking_field_seed` + +* Value type is <> +* Default value is `"1970-01-01T00:00:00.000000000Z"` + +The starting value for the <> if there is no <> already. +This field defaults to the nanosecond precision ISO8601 representation of `epoch`, or "1970-01-01T00:00:00.000000000Z", given nano-second precision timestamps are the +most reliable data format to use for this feature. + +[id="plugins-{type}s-{plugin}-user"] +===== `user` + + * Value type is <> + * There is no default value for this setting. + +The username to use together with the password in the `password` +option when authenticating to the Elasticsearch server. If set to an +empty string authentication will be disabled. + + +[id="plugins-{type}s-{plugin}-obsolete-options"] +==== Elasticsearch Input Obsolete Configuration Options + +WARNING: As of version `5.0.0` of this plugin, some configuration options have been replaced. +The plugin will fail to start if it contains any of these obsolete options. + + +[cols="<,<",options="header",] +|======================================================================= +|Setting|Replaced by +| ca_file | <> +| ssl | <> +| ssl_certificate_verification | <> +|======================================================================= + +[id="plugins-{type}s-{plugin}-common-options"] +include::{include_path}/{type}.asciidoc[] + +:no_codec!: diff --git a/lib/logstash/helpers/loggable_try.rb b/lib/logstash/helpers/loggable_try.rb new file mode 100644 index 00000000..07e5cea5 --- /dev/null +++ b/lib/logstash/helpers/loggable_try.rb @@ -0,0 +1,14 @@ +require 'stud/try' + +module LogStash module Helpers + class LoggableTry < Stud::Try + def initialize(logger, name) + @logger = logger + @name = name + end + + def log_failure(exception, fail_count, message) + @logger.warn("Attempt to #{@name} but failed. #{message}", fail_count: fail_count, exception: exception.message) + end + end +end end \ No newline at end of file diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index d34630ea..c16b714d 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -1,134 +1,803 @@ # encoding: utf-8 require "logstash/inputs/base" require "logstash/namespace" -require "logstash/util/socket_peer" require "logstash/json" +require "logstash/util/safe_uri" +require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter' +require 'logstash/plugin_mixins/event_support/event_factory_adapter' +require 'logstash/plugin_mixins/ecs_compatibility_support' +require 'logstash/plugin_mixins/ecs_compatibility_support/target_check' +require 'logstash/plugin_mixins/ca_trusted_fingerprint_support' +require "logstash/plugin_mixins/scheduler" +require "logstash/plugin_mixins/normalize_config_support" +require "base64" +require "elasticsearch" +require "manticore" + +# .Compatibility Note +# [NOTE] +# ================================================================================ +# Starting with Elasticsearch 5.3, there's an {ref}modules-http.html[HTTP setting] +# called `http.content_type.required`. If this option is set to `true`, and you +# are using Logstash 2.4 through 5.2, you need to update the Elasticsearch input +# plugin to version 4.0.2 or higher. +# +# ================================================================================ +# # Read from an Elasticsearch cluster, based on search query results. # This is useful for replaying test logs, reindexing, etc. +# It also supports periodically scheduling lookup enrichments +# using a cron syntax (see `schedule` setting). # # Example: -# +# [source,ruby] # input { # # Read all documents from Elasticsearch matching the given query # elasticsearch { -# host => "localhost" -# query => "ERROR" +# hosts => "localhost" +# query => '{ "query": { "match": { "statuscode": 200 } }, "sort": [ "_doc" ] }' # } # } # # This would create an Elasticsearch query with the following format: +# [source,json] +# curl 'http://localhost:9200/logstash-*/_search?&scroll=1m&size=1000' -d '{ +# "query": { +# "match": { +# "statuscode": 200 +# } +# }, +# "sort": [ "_doc" ] +# }' +# +# ==== Scheduling +# +# Input from this plugin can be scheduled to run periodically according to a specific +# schedule. This scheduling syntax is powered by https://github.com/jmettraux/rufus-scheduler[rufus-scheduler]. +# The syntax is cron-like with some extensions specific to Rufus (e.g. timezone support ). +# +# Examples: +# +# |========================================================== +# | `* 5 * 1-3 *` | will execute every minute of 5am every day of January through March. +# | `0 * * * *` | will execute on the 0th minute of every hour every day. +# | `0 6 * * * America/Chicago` | will execute at 6:00am (UTC/GMT -5) every day. +# |========================================================== +# +# +# Further documentation describing this syntax can be found https://github.com/jmettraux/rufus-scheduler#parsing-cronlines-and-time-strings[here]. # -# http://localhost:9200/logstash-*/_search?q=ERROR&scroll=1m&size=1000 # -# * TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing? class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base - config_name "elasticsearch" - milestone 1 - default :codec, "json" + require 'logstash/inputs/elasticsearch/paginated_search' + require 'logstash/inputs/elasticsearch/aggregation' + require 'logstash/inputs/elasticsearch/cursor_tracker' + require 'logstash/inputs/elasticsearch/esql' + + include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1) + include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck + + include LogStash::PluginMixins::EventSupport::EventFactoryAdapter + + extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter - # The IP address or hostname of your Elasticsearch server. - config :host, :validate => :string, :required => true + include LogStash::PluginMixins::Scheduler - # The HTTP port of your Elasticsearch server's REST interface. - config :port, :validate => :number, :default => 9200 + include LogStash::PluginMixins::NormalizeConfigSupport + + config_name "elasticsearch" + + # List of elasticsearch hosts to use for querying. + # Each host can be either IP, HOST, IP:port or HOST:port. + # Port defaults to 9200 + config :hosts, :validate => :array # The index or alias to search. config :index, :validate => :string, :default => "logstash-*" - # The query to be executed. - config :query, :validate => :string, :default => "*" + # A type of Elasticsearch query, provided by @query. This will validate query shape and other params. + config :query_type, :validate => %w[dsl esql], :default => 'dsl' + + # The query to be executed. DSL or ES|QL (when `query_type => 'esql'`) query shape is accepted. + # Read the following documentations for more info + # Query DSL: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html + # ES|QL: https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html + config :query, :validate => :string, :default => '{ "sort": [ "_doc" ] }' - # Enable the Elasticsearch "scan" search type. This will disable - # sorting but increase speed and performance. - config :scan, :validate => :boolean, :default => true + # This allows you to specify the DSL response type: one of [hits, aggregations] + # where + # hits: normal search request + # aggregations: aggregation request + # Note that this param is invalid when `query_type => 'esql'`, ES|QL response shape is always a tabular format + config :response_type, :validate => %w[hits aggregations], :default => 'hits' # This allows you to set the maximum number of hits returned per scroll. config :size, :validate => :number, :default => 1000 + # The number of retries to run the query. If the query fails after all retries, it logs an error message. + config :retries, :validate => :number, :default => 0 + + # Default `auto` will use `search_after` api for Elasticsearch 8 and use `scroll` api for 7 + # Set to scroll to fallback to previous version + config :search_api, :validate => %w[auto search_after scroll], :default => "auto" + # This parameter controls the keepalive time in seconds of the scrolling # request and initiates the scrolling process. The timeout applies per - # round trip (i.e. between the previous scan scroll request, to the next). + # round trip (i.e. between the previous scroll request, to the next). config :scroll, :validate => :string, :default => "1m" - public + # This parameter controls the number of parallel slices to be consumed simultaneously + # by this pipeline input. + config :slices, :validate => :number + + # Enable tracking the value of a given field to be used as a cursor + # Main concerns: + # * using anything other than _event.timestamp easily leads to data loss + # * the first "synchronization run can take a long time" + config :tracking_field, :validate => :string + + # Define the initial seed value of the tracking_field + config :tracking_field_seed, :validate => :string, :default => "1970-01-01T00:00:00.000000000Z" + + # The location of where the tracking field value will be stored + # The value is persisted after each scheduled run (and not per result) + # If it's not set it defaults to '${path.data}/plugins/inputs/elasticsearch//last_run_value' + config :last_run_metadata_path, :validate => :string + + # If set, include Elasticsearch document information such as index, type, and + # the id in the event. + # + # It might be important to note, with regards to metadata, that if you're + # ingesting documents with the intent to re-index them (or just update them) + # that the `action` option in the elasticsearch output wants to know how to + # handle those things. It can be dynamically assigned with a field + # added to the metadata. + # + # Example + # [source, ruby] + # input { + # elasticsearch { + # hosts => "es.production.mysite.org" + # index => "mydata-2018.09.*" + # query => "*" + # size => 500 + # scroll => "5m" + # docinfo => true + # } + # } + # output { + # elasticsearch { + # index => "copy-of-production.%{[@metadata][_index]}" + # document_type => "%{[@metadata][_type]}" + # document_id => "%{[@metadata][_id]}" + # } + # } + # + config :docinfo, :validate => :boolean, :default => false + + # Where to move the Elasticsearch document information. + # default: [@metadata][input][elasticsearch] in ECS mode, @metadata field otherwise + config :docinfo_target, :validate=> :field_reference + + # List of document metadata to move to the `docinfo_target` field. + # To learn more about Elasticsearch metadata fields read + # http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html + config :docinfo_fields, :validate => :array, :default => ['_index', '_type', '_id'] + + # Custom headers for Elasticsearch requests + config :custom_headers, :validate => :hash, :default => {} + + # Basic Auth - username + config :user, :validate => :string + + # Basic Auth - password + config :password, :validate => :password + + # Connection Timeout, in Seconds + config :connect_timeout_seconds, :validate => :positive_whole_number, :default => 10 + + # Request Timeout, in Seconds + config :request_timeout_seconds, :validate => :positive_whole_number, :default => 60 + + # Socket Timeout, in Seconds + config :socket_timeout_seconds, :validate => :positive_whole_number, :default => 60 + + # Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. + # + # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[Logstash-to-Cloud documentation] + config :cloud_id, :validate => :string + + # Cloud authentication string (":" format) is an alternative for the `user`/`password` configuration. + # + # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_auth[Logstash-to-Cloud documentation] + config :cloud_auth, :validate => :password + + # Authenticate using Elasticsearch API key. + # format is id:api_key (as returned by https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Create API key]) + config :api_key, :validate => :password + + # Set the address of a forward HTTP proxy. + config :proxy, :validate => :uri_or_empty + + # OpenSSL-style X.509 certificate certificate to authenticate the client + config :ssl_certificate, :validate => :path + + # SSL Certificate Authority files in PEM encoded format, must also include any chain certificates as necessary + config :ssl_certificate_authorities, :validate => :path, :list => true + + # The list of cipher suites to use, listed by priorities. + # Supported cipher suites vary depending on which version of Java is used. + config :ssl_cipher_suites, :validate => :string, :list => true + + # SSL + config :ssl_enabled, :validate => :boolean + + # OpenSSL-style RSA private key to authenticate the client + config :ssl_key, :validate => :path + + # Set the keystore password + config :ssl_keystore_password, :validate => :password + + # The keystore used to present a certificate to the server. + # It can be either .jks or .p12 + config :ssl_keystore_path, :validate => :path + + # The format of the keystore file. It must be either jks or pkcs12 + config :ssl_keystore_type, :validate => %w[pkcs12 jks] + + # Supported protocols with versions. + config :ssl_supported_protocols, :validate => %w[TLSv1.1 TLSv1.2 TLSv1.3], :default => [], :list => true + + # Set the truststore password + config :ssl_truststore_password, :validate => :password + + # The JKS truststore to validate the server's certificate. + config :ssl_truststore_path, :validate => :path + + # The format of the truststore file. It must be either jks or pkcs12 + config :ssl_truststore_type, :validate => %w[pkcs12 jks] + + # Options to verify the server's certificate. + # "full": validates that the provided certificate has an issue date that’s within the not_before and not_after dates; + # chains to a trusted Certificate Authority (CA); has a hostname or IP address that matches the names within the certificate. + # "none": performs no certificate validation. Disabling this severely compromises security (https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf) + config :ssl_verification_mode, :validate => %w[full none], :default => 'full' + + # Schedule of when to periodically run statement, in Cron format + # for example: "* * * * *" (execute query every minute, on the minute) + # + # There is no schedule by default. If no schedule is given, then the statement is run + # exactly once. + config :schedule, :validate => :string + + # Allow scheduled runs to overlap (enabled by default). Setting to false will + # only start a new scheduled run after the previous one completes. + config :schedule_overlap, :validate => :boolean + + # If set, the _source of each hit will be added nested under the target instead of at the top-level + config :target, :validate => :field_reference + + # Obsolete Settings + config :ssl, :obsolete => "Set 'ssl_enabled' instead." + config :ca_file, :obsolete => "Set 'ssl_certificate_authorities' instead." + config :ssl_certificate_verification, :obsolete => "Set 'ssl_verification_mode' instead." + + # config :ca_trusted_fingerprint, :validate => :sha_256_hex + include LogStash::PluginMixins::CATrustedFingerprintSupport + + attr_reader :pipeline_id + + BUILD_FLAVOR_SERVERLESS = 'serverless'.freeze + DEFAULT_EAV_HEADER = { "Elastic-Api-Version" => "2023-10-31" }.freeze + INTERNAL_ORIGIN_HEADER = { 'x-elastic-product-origin' => 'logstash-input-elasticsearch'}.freeze + + LS_ESQL_SUPPORT_VERSION = "8.17.4" # the version started using elasticsearch-ruby v8 + ES_ESQL_SUPPORT_VERSION = "8.11.0" + + def initialize(params={}) + super(params) + + if docinfo_target.nil? + @docinfo_target = ecs_select[disabled: '@metadata', v1: '[@metadata][input][elasticsearch]'] + end + end + def register - require "ftw" - @agent = FTW::Agent.new + require "rufus/scheduler" + + @pipeline_id = execution_context&.pipeline_id || 'main' + + fill_hosts_from_cloud_id + setup_ssl_params! + + if @query_type == 'esql' + validate_ls_version_for_esql_support! + validate_esql_query! + not_allowed_options = original_params.keys & %w(index size slices search_api docinfo docinfo_target docinfo_fields response_type tracking_field) + raise(LogStash::ConfigurationError, "Configured #{not_allowed_options} params are not allowed while using ES|QL query") if not_allowed_options&.size > 1 + else + @base_query = LogStash::Json.load(@query) + if @slices + @base_query.include?('slice') && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `query` option cannot specify specific `slice` when configured to manage parallel slices with `slices` option") + @slices < 1 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `slices` option must be greater than zero, got `#{@slices}`") + end + end + + @retries < 0 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `retries` option must be equal or greater than zero, got `#{@retries}`") + + validate_authentication + fill_user_password_from_cloud_auth - params = { - "q" => @query, - "scroll" => @scroll, - "size" => "#{@size}", + transport_options = {:headers => {}} + transport_options[:headers].merge!(INTERNAL_ORIGIN_HEADER) + transport_options[:headers].merge!(setup_basic_auth(user, password)) + transport_options[:headers].merge!(setup_api_key(api_key)) + transport_options[:headers].merge!({'user-agent' => prepare_user_agent()}) + transport_options[:headers].merge!(@custom_headers) unless @custom_headers.empty? + transport_options[:request_timeout] = @request_timeout_seconds unless @request_timeout_seconds.nil? + transport_options[:connect_timeout] = @connect_timeout_seconds unless @connect_timeout_seconds.nil? + transport_options[:socket_timeout] = @socket_timeout_seconds unless @socket_timeout_seconds.nil? + + hosts = setup_hosts + ssl_options = setup_client_ssl + + @logger.warn "Supplied proxy setting (proxy => '') has no effect" if @proxy.eql?('') + + transport_options[:proxy] = @proxy.to_s if @proxy && !@proxy.eql?('') + + @client_options = { + :hosts => hosts, + :transport_options => transport_options, + :transport_class => get_transport_client_class, + :ssl => ssl_options } - params['search_type'] = "scan" if @scan - @search_url = "http://#{@host}:#{@port}/#{@index}/_search?#{encode(params)}" - @scroll_url = "http://#{@host}:#{@port}/_search/scroll?#{encode({"scroll" => @scroll})}" - end # def register + @client = Elasticsearch::Client.new(@client_options) - private - def encode(hash) - return hash.collect do |key, value| - CGI.escape(key) + "=" + CGI.escape(value) - end.join("&") - end # def encode + test_connection! - private - def execute_search_request - response = @agent.get!(@search_url) - json = "" - response.read_body { |c| json << c } - json + validate_es_for_esql_support! + + setup_serverless + + setup_search_api + + @query_executor = create_query_executor + + setup_cursor_tracker + + @client end - private - def execute_scroll_request(scroll_id) - response = @agent.post!(@scroll_url, :body => scroll_id) - json = "" - response.read_body { |c| json << c } - json + def run(output_queue) + if @schedule + scheduler.cron(@schedule, :overlap => @schedule_overlap) do + @query_executor.do_run(output_queue, get_query_object()) + end + scheduler.join + else + @query_executor.do_run(output_queue, get_query_object()) + end end + ## + # This can be called externally from the query_executor public - def run(output_queue) - result = LogStash::Json.load(execute_search_request) - scroll_id = result["_scroll_id"] - - # When using the search_type=scan we don't get an initial result set. - # So we do it here. - if @scan - result = LogStash::Json.load(execute_scroll_request(scroll_id)) - end - - loop do - break if result.nil? - hits = result["hits"]["hits"] - break if hits.empty? - - hits.each do |hit| - # Hack to make codecs work - @codec.decode(LogStash::Json.dump(hit["_source"])) do |event| - decorate(event) - output_queue << event - end + def push_hit(hit, output_queue, root_field = '_source') + event = event_from_hit(hit, root_field) + decorate(event) + output_queue << event + record_last_value(event) + end + + def decorate_event(event) + decorate(event) + end + + private + + def get_query_object + return @query if @query_type == 'esql' + if @cursor_tracker + query = @cursor_tracker.inject_cursor(@query) + @logger.debug("new query is #{query}") + else + query = @query + end + LogStash::Json.load(query) + end + + def record_last_value(event) + @cursor_tracker.record_last_value(event) if @tracking_field + end + + def event_from_hit(hit, root_field) + event = targeted_event_factory.new_event hit[root_field] + set_docinfo_fields(hit, event) if @docinfo + + event + rescue => e + serialized_hit = hit.to_json + logger.warn("Event creation error, original data now in [event][original] field", message: e.message, exception: e.class, data: serialized_hit) + return event_factory.new_event('event' => { 'original' => serialized_hit }, 'tags' => ['_elasticsearch_input_failure']) + end + + def set_docinfo_fields(hit, event) + # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. + docinfo_target = event.get(@docinfo_target) || {} + + unless docinfo_target.is_a?(Hash) + # expect error to be handled by `#event_from_hit` + fail RuntimeError, "Incompatible event; unable to merge docinfo fields into docinfo_target=`#{@docinfo_target}`" + end + + @docinfo_fields.each do |field| + docinfo_target[field] = hit[field] + end + + event.set(@docinfo_target, docinfo_target) + end + + def hosts_default?(hosts) + hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? ) + end + + def effectively_ssl? + return true if @ssl_enabled + + hosts = Array(@hosts) + return false if hosts.nil? || hosts.empty? + + hosts.all? { |host| host && host.to_s.start_with?("https") } + end + + def validate_authentication + authn_options = 0 + authn_options += 1 if @cloud_auth + authn_options += 1 if (@api_key && @api_key.value) + authn_options += 1 if (@user || (@password && @password.value)) + + if authn_options > 1 + raise LogStash::ConfigurationError, 'Multiple authentication options are specified, please only use one of user/password, cloud_auth or api_key' + end + + if @api_key && @api_key.value && @ssl_enabled != true + raise(LogStash::ConfigurationError, "Using api_key authentication requires SSL/TLS secured communication using the `ssl_enabled => true` option") + end + end + + def setup_client_ssl + ssl_options = {} + ssl_options[:ssl] = true if @ssl_enabled + + unless @ssl_enabled + return ssl_options + end + + ssl_certificate_authorities, ssl_truststore_path, ssl_certificate, ssl_keystore_path = params.values_at('ssl_certificate_authorities', 'ssl_truststore_path', 'ssl_certificate', 'ssl_keystore_path') + + if ssl_certificate_authorities && ssl_truststore_path + raise LogStash::ConfigurationError, 'Use either "ssl_certificate_authorities/ca_file" or "ssl_truststore_path" when configuring the CA certificate' + end + + if ssl_certificate && ssl_keystore_path + raise LogStash::ConfigurationError, 'Use either "ssl_certificate" or "ssl_keystore_path/keystore" when configuring client certificates' + end + + if ssl_certificate_authorities&.any? + raise LogStash::ConfigurationError, 'Multiple values on "ssl_certificate_authorities" are not supported by this plugin' if ssl_certificate_authorities.size > 1 + ssl_options[:ca_file] = ssl_certificate_authorities.first + end + + if ssl_truststore_path + ssl_options[:truststore] = ssl_truststore_path + ssl_options[:truststore_type] = params["ssl_truststore_type"] if params.include?("ssl_truststore_type") + ssl_options[:truststore_password] = params["ssl_truststore_password"].value if params.include?("ssl_truststore_password") + end + + if ssl_keystore_path + ssl_options[:keystore] = ssl_keystore_path + ssl_options[:keystore_type] = params["ssl_keystore_type"] if params.include?("ssl_keystore_type") + ssl_options[:keystore_password] = params["ssl_keystore_password"].value if params.include?("ssl_keystore_password") + end + + ssl_key = params["ssl_key"] + if ssl_certificate + raise LogStash::ConfigurationError, 'Using an "ssl_certificate" requires an "ssl_key"' unless ssl_key + ssl_options[:client_cert] = ssl_certificate + ssl_options[:client_key] = ssl_key + elsif !ssl_key.nil? + raise LogStash::ConfigurationError, 'An "ssl_certificate" is required when using an "ssl_key"' + end + + ssl_verification_mode = params["ssl_verification_mode"] + unless ssl_verification_mode.nil? + case ssl_verification_mode + when 'none' + logger.warn "You have enabled encryption but DISABLED certificate verification, " + + "to make sure your data is secure set `ssl_verification_mode => full`" + ssl_options[:verify] = :disable + else + # Manticore's :default maps to Apache HTTP Client's DefaultHostnameVerifier, + # which is the modern STRICT verifier that replaces the deprecated StrictHostnameVerifier + ssl_options[:verify] = :default end + end - # Get the scroll id from the previous result set and use it for getting the next data set - scroll_id = result["_scroll_id"] + ssl_options[:cipher_suites] = params["ssl_cipher_suites"] if params.include?("ssl_cipher_suites") - # Fetch the next result set - result = LogStash::Json.load(execute_scroll_request(scroll_id)) + protocols = params['ssl_supported_protocols'] + ssl_options[:protocols] = protocols if protocols&.any? + ssl_options[:trust_strategy] = trust_strategy_for_ca_trusted_fingerprint - if result["error"] - @logger.warn(result["error"], :request => scroll_url) - # TODO(sissel): raise an error instead of breaking - break + ssl_options + end + + def setup_ssl_params! + # Only infer ssl_enabled if it wasn't explicitly set + unless original_params.include?('ssl_enabled') + @ssl_enabled = effectively_ssl? + params['ssl_enabled'] = @ssl_enabled + end + end + + def setup_hosts + @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s + @hosts.map do |h| + if h.start_with?('http:', 'https:') + h + else + host, port = h.split(':') + { host: host, port: port, scheme: (@ssl_enabled ? 'https' : 'http') } end + end + end + + def setup_basic_auth(user, password) + return {} unless user && password && password.value + + token = ::Base64.strict_encode64("#{user}:#{password.value}") + { 'Authorization' => "Basic #{token}" } + end + + def setup_api_key(api_key) + return {} unless (api_key&.value) + + token = base64?(api_key.value) ? api_key.value : Base64.strict_encode64(api_key.value) + { 'Authorization' => "ApiKey #{token}" } + end + + def base64?(string) + string == Base64.strict_encode64(Base64.strict_decode64(string)) + rescue ArgumentError + false + end + + def prepare_user_agent + os_name = java.lang.System.getProperty('os.name') + os_version = java.lang.System.getProperty('os.version') + os_arch = java.lang.System.getProperty('os.arch') + jvm_vendor = java.lang.System.getProperty('java.vendor') + jvm_version = java.lang.System.getProperty('java.version') + + plugin_version = Gem.loaded_specs["logstash-input-elasticsearch"].version + # example: logstash/7.14.1 (OS=Linux-5.4.0-84-generic-amd64; JVM=AdoptOpenJDK-11.0.11) logstash-input-elasticsearch/4.10.0 + "logstash/#{LOGSTASH_VERSION} (OS=#{os_name}-#{os_version}-#{os_arch}; JVM=#{jvm_vendor}-#{jvm_version}) logstash-#{@plugin_type}-#{config_name}/#{plugin_version}" + end + + def fill_user_password_from_cloud_auth + return unless @cloud_auth + + @user, @password = parse_user_password_from_cloud_auth(@cloud_auth) + params['user'], params['password'] = @user, @password + end + + def fill_hosts_from_cloud_id + return unless @cloud_id + + if @hosts && !hosts_default?(@hosts) + raise LogStash::ConfigurationError, 'Both cloud_id and hosts specified, please only use one of those.' + end + @hosts = parse_host_uri_from_cloud_id(@cloud_id) + end + + def parse_host_uri_from_cloud_id(cloud_id) + begin # might not be available on older LS + require 'logstash/util/cloud_setting_id' + rescue LoadError + raise LogStash::ConfigurationError, 'The cloud_id setting is not supported by your version of Logstash, ' + + 'please upgrade your installation (or set hosts instead).' + end + + begin + cloud_id = LogStash::Util::CloudSettingId.new(cloud_id) # already does append ':{port}' to host + rescue ArgumentError => e + raise LogStash::ConfigurationError, e.message.to_s.sub(/Cloud Id/i, 'cloud_id') + end + cloud_uri = "#{cloud_id.elasticsearch_scheme}://#{cloud_id.elasticsearch_host}" + LogStash::Util::SafeURI.new(cloud_uri) + end + def parse_user_password_from_cloud_auth(cloud_auth) + begin # might not be available on older LS + require 'logstash/util/cloud_setting_auth' + rescue LoadError + raise LogStash::ConfigurationError, 'The cloud_auth setting is not supported by your version of Logstash, ' + + 'please upgrade your installation (or set user/password instead).' end - rescue LogStash::ShutdownSignal - # Do nothing, let us quit. - end # def run -end # class LogStash::Inputs::Elasticsearch + + cloud_auth = cloud_auth.value if cloud_auth.is_a?(LogStash::Util::Password) + begin + cloud_auth = LogStash::Util::CloudSettingAuth.new(cloud_auth) + rescue ArgumentError => e + raise LogStash::ConfigurationError, e.message.to_s.sub(/Cloud Auth/i, 'cloud_auth') + end + [ cloud_auth.username, cloud_auth.password ] + end + + # @private used by unit specs + attr_reader :client + + def test_connection! + @client.ping + rescue Elasticsearch::UnsupportedProductError + raise LogStash::ConfigurationError, "Could not connect to a compatible version of Elasticsearch" + end + + def es_info + @es_info ||= @client.info + end + + def es_version + @es_version ||= es_info&.dig('version', 'number') + end + + def es_major_version + @es_major_version ||= es_version.split('.').first.to_i + end + + # recreate client with default header when it is serverless + # verify the header by sending GET / + def setup_serverless + if serverless? + @client_options[:transport_options][:headers].merge!(DEFAULT_EAV_HEADER) + @client = Elasticsearch::Client.new(@client_options) + @client.info + end + rescue => e + @logger.error("Failed to retrieve Elasticsearch info", message: e.message, exception: e.class, backtrace: e.backtrace) + raise LogStash::ConfigurationError, "Could not connect to a compatible version of Elasticsearch" + end + + def build_flavor + @build_flavor ||= es_info&.dig('version', 'build_flavor') + end + + def serverless? + @is_serverless ||= (build_flavor == BUILD_FLAVOR_SERVERLESS) + end + + def setup_search_api + @resolved_search_api = if @search_api == "auto" + api = if es_major_version >= 8 + "search_after" + else + "scroll" + end + logger.info("`search_api => auto` resolved to `#{api}`", :elasticsearch => es_version) + api + else + @search_api + end + + end + + def create_query_executor + return LogStash::Inputs::Elasticsearch::Esql.new(@client, self) if @query_type == 'esql' + + # DSL query executor + return LogStash::Inputs::Elasticsearch::Aggregation.new(@client, self) if @response_type == 'aggregations' + # response_type is hits, executor can be search_after or scroll type + return LogStash::Inputs::Elasticsearch::SearchAfter.new(@client, self) if @resolved_search_api == "search_after" + + logger.warn("scroll API is no longer recommended for pagination. Consider using search_after instead.") if es_major_version >= 8 + LogStash::Inputs::Elasticsearch::Scroll.new(@client, self) + end + + def setup_cursor_tracker + return unless @tracking_field + return unless @query_executor.is_a?(LogStash::Inputs::Elasticsearch::SearchAfter) + + if @resolved_search_api != "search_after" || @response_type != "hits" + raise ConfigurationError.new("The `tracking_field` feature can only be used with `search_after` non-aggregation queries") + end + + @cursor_tracker = CursorTracker.new(last_run_metadata_path: last_run_metadata_path, + tracking_field: @tracking_field, + tracking_field_seed: @tracking_field_seed) + @query_executor.cursor_tracker = @cursor_tracker + end + + def last_run_metadata_path + return @last_run_metadata_path if @last_run_metadata_path + + last_run_metadata_path = ::File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "elasticsearch", pipeline_id, "last_run_value") + FileUtils.mkdir_p ::File.dirname(last_run_metadata_path) + last_run_metadata_path + end + + def get_transport_client_class + # LS-core includes `elasticsearch` gem. The gem is composed of two separate gems: `elasticsearch-api` and `elasticsearch-transport` + # And now `elasticsearch-transport` is old, instead we have `elastic-transport`. + # LS-core updated `elasticsearch` > 8: https://github.com/elastic/logstash/pull/17161 + # Following source bits are for the compatibility to support both `elasticsearch-transport` and `elastic-transport` gems + require "elasticsearch/transport/transport/http/manticore" + require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" + require_relative "elasticsearch/patches/_elasticsearch_transport_connections_selector" + ::Elasticsearch::Transport::Transport::HTTP::Manticore + rescue ::LoadError + require "elastic/transport/transport/http/manticore" + ::Elastic::Transport::Transport::HTTP::Manticore + end + + def validate_ls_version_for_esql_support! + if Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create(LS_ESQL_SUPPORT_VERSION) + fail("Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least #{LS_ESQL_SUPPORT_VERSION}") + end + end + + def validate_esql_query! + fail(LogStash::ConfigurationError, "`query` cannot be empty") if @query.strip.empty? + source_commands = %w[FROM ROW SHOW] + contains_source_command = source_commands.any? { |source_command| @query.strip.start_with?(source_command) } + fail(LogStash::ConfigurationError, "`query` needs to start with any of #{source_commands}") unless contains_source_command + end + + def validate_es_for_esql_support! + return unless @query_type == 'esql' + # make sure connected ES supports ES|QL (8.11+) + es_supports_esql = Gem::Version.create(es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION) + fail("Connected Elasticsearch #{es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql + end + + module URIOrEmptyValidator + ## + # @override to provide :uri_or_empty validator + # @param value [Array] + # @param validator [nil,Array,Symbol] + # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value + # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason. + def validate_value(value, validator) + return super unless validator == :uri_or_empty + + value = deep_replace(value) + value = hash_or_array(value) + + return true, value.first if value.size == 1 && value.first.empty? + + return super(value, :uri) + end + end + extend(URIOrEmptyValidator) + + module PositiveWholeNumberValidator + ## + # @override to provide :positive_whole_number validator + # @param value [Array] + # @param validator [nil,Array,Symbol] + # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value + # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason. + def validate_value(value, validator) + return super unless validator == :positive_whole_number + + is_number, coerced_number = super(value, :number) + + return [true, coerced_number.to_i] if is_number && coerced_number.denominator == 1 && coerced_number > 0 + + return [false, "Expected positive whole number, got `#{value.inspect}`"] + end + end + extend(PositiveWholeNumberValidator) +end diff --git a/lib/logstash/inputs/elasticsearch/aggregation.rb b/lib/logstash/inputs/elasticsearch/aggregation.rb new file mode 100644 index 00000000..91c12443 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/aggregation.rb @@ -0,0 +1,49 @@ +require 'logstash/helpers/loggable_try' + +module LogStash + module Inputs + class Elasticsearch + class Aggregation + include LogStash::Util::Loggable + + AGGREGATION_JOB = "aggregation" + + def initialize(client, plugin) + @client = client + @plugin_params = plugin.params + + @index = @plugin_params["index"] + @size = @plugin_params["size"] + @retries = @plugin_params["retries"] + @plugin = plugin + end + + def retryable(job_name, &block) + stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) + stud_try.try((@retries + 1).times) { yield } + rescue => e + error_details = {:message => e.message, :cause => e.cause} + error_details[:backtrace] = e.backtrace if logger.debug? + logger.error("Tried #{job_name} unsuccessfully", error_details) + false + end + + def aggregation_options(query_object) + { + :index => @index, + :size => 0, + :body => query_object + } + end + + def do_run(output_queue, query_object) + logger.info("Aggregation starting") + r = retryable(AGGREGATION_JOB) do + @client.search(aggregation_options(query_object)) + end + @plugin.push_hit(r, output_queue, 'aggregations') if r + end + end + end + end +end diff --git a/lib/logstash/inputs/elasticsearch/cursor_tracker.rb b/lib/logstash/inputs/elasticsearch/cursor_tracker.rb new file mode 100644 index 00000000..d43b1fd8 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/cursor_tracker.rb @@ -0,0 +1,58 @@ +require 'fileutils' + +module LogStash; module Inputs; class Elasticsearch + class CursorTracker + include LogStash::Util::Loggable + + attr_reader :last_value + + def initialize(last_run_metadata_path:, tracking_field:, tracking_field_seed:) + @last_run_metadata_path = last_run_metadata_path + @last_value_hashmap = Java::java.util.concurrent.ConcurrentHashMap.new + @last_value = IO.read(@last_run_metadata_path) rescue nil || tracking_field_seed + @tracking_field = tracking_field + logger.info "Starting value for cursor field \"#{@tracking_field}\": #{@last_value}" + @mutex = Mutex.new + end + + def checkpoint_cursor(intermediate: true) + @mutex.synchronize do + if intermediate + # in intermediate checkpoints pick the smallest + converge_last_value {|v1, v2| v1 < v2 ? v1 : v2} + else + # in the last search of a PIT choose the largest + converge_last_value {|v1, v2| v1 > v2 ? v1 : v2} + @last_value_hashmap.clear + end + IO.write(@last_run_metadata_path, @last_value) + end + end + + def converge_last_value(&block) + return if @last_value_hashmap.empty? + new_last_value = @last_value_hashmap.reduceValues(1000, &block) + logger.debug? && logger.debug("converge_last_value: got #{@last_value_hashmap.values.inspect}. won: #{new_last_value}") + return if new_last_value == @last_value + @last_value = new_last_value + logger.info "New cursor value for field \"#{@tracking_field}\" is: #{new_last_value}" + end + + def record_last_value(event) + value = event.get(@tracking_field) + logger.trace? && logger.trace("storing last_value if #{@tracking_field} for #{Thread.current.object_id}: #{value}") + @last_value_hashmap.put(Thread.current.object_id, value) + end + + def inject_cursor(query_json) + # ":present" means "now - 30s" to avoid grabbing partially visible data in the PIT + result = query_json.gsub(":last_value", @last_value.to_s).gsub(":present", now_minus_30s) + logger.debug("inject_cursor: injected values for ':last_value' and ':present'", :query => result) + result + end + + def now_minus_30s + Java::java.time.Instant.now.minusSeconds(30).to_s + end + end +end; end; end diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb new file mode 100644 index 00000000..30afb723 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -0,0 +1,153 @@ +require 'logstash/helpers/loggable_try' + +module LogStash + module Inputs + class Elasticsearch + class Esql + include LogStash::Util::Loggable + + ESQL_JOB = "ES|QL job" + + ESQL_PARSERS_BY_TYPE = Hash.new(lambda { |x| x }).merge( + 'date' => ->(value) { value && LogStash::Timestamp.new(value) }, + ) + + # Initialize the ESQL query executor + # @param client [Elasticsearch::Client] The Elasticsearch client instance + # @param plugin [LogStash::Inputs::Elasticsearch] The parent plugin instance + def initialize(client, plugin) + @client = client + @event_decorator = plugin.method(:decorate_event) + @retries = plugin.params["retries"] + + target_field = plugin.params["target"] + if target_field + def self.apply_target(path); "[#{target_field}][#{path}]"; end + else + def self.apply_target(path); path; end + end + + @query = plugin.params["query"] + unless @query.include?('METADATA') + logger.info("`METADATA` not found the query. `_id`, `_version` and `_index` will not be available in the result", {:query => @query}) + end + logger.debug("ES|QL executor initialized with", {:query => @query}) + end + + # Execute the ESQL query and process results + # @param output_queue [Queue] The queue to push processed events to + # @param query A query (to obey interface definition) + def do_run(output_queue, query) + logger.info("ES|QL executor has started") + response = retryable(ESQL_JOB) do + @client.esql.query({ body: { query: @query }, format: 'json', drop_null_columns: true }) + end + # retriable already printed error details + return if response == false + + if response&.headers&.dig("warning") + logger.warn("ES|QL executor received warning", {:warning_message => response.headers["warning"]}) + end + columns = response['columns']&.freeze + values = response['values']&.freeze + logger.debug("ES|QL query response size: #{values&.size}") + + process_response(columns, values, output_queue) if columns && values + end + + # Execute a retryable operation with proper error handling + # @param job_name [String] Name of the job for logging purposes + # @yield The block to execute + # @return [Boolean] true if successful, false otherwise + def retryable(job_name, &block) + stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) + stud_try.try((@retries + 1).times) { yield } + rescue => e + error_details = {:message => e.message, :cause => e.cause} + error_details[:backtrace] = e.backtrace if logger.debug? + logger.error("#{job_name} failed with ", error_details) + false + end + + private + + # Process the ESQL response and push events to the output queue + # @param columns [Array[Hash]] The ESQL query response columns + # @param values [Array[Array]] The ESQL query response hits + # @param output_queue [Queue] The queue to push processed events to + def process_response(columns, values, output_queue) + column_specs = columns.map { |column| ColumnSpec.new(column) } + sub_element_mark_map = mark_sub_elements(column_specs) + multi_fields = sub_element_mark_map.filter_map { |key, val| key.name if val == true } + logger.warn("Multi-fields found in ES|QL result and they will not be available in the event. Please use `RENAME` command if you want to include them.", { :detected_multi_fields => multi_fields }) if multi_fields.any? + + values.each do |row| + event = column_specs.zip(row).each_with_object(LogStash::Event.new) do |(column, value), event| + # `unless value.nil?` is a part of `drop_null_columns` that if some of columns' values are not `nil`, `nil` values appear + # we should continuously filter out them to achieve full `drop_null_columns` on each individual row (ideal `LIMIT 1` result) + # we also exclude sub-elements of main field + if value && sub_element_mark_map[column] == false + field_reference = apply_target(column.field_reference) + event.set(field_reference, ESQL_PARSERS_BY_TYPE[column.type].call(value)) + end + end + @event_decorator.call(event) + output_queue << event + rescue => e + # if event creation fails with whatever reason, inform user and tag with failure and return entry as it is + logger.warn("Event creation error, ", message: e.message, exception: e.class, data: { "columns" => columns, "values" => [row] }) + failed_event = LogStash::Event.new("columns" => columns, "values" => [row], "tags" => ['_elasticsearch_input_failure']) + output_queue << failed_event + end + end + + # Determines whether each column in a collection is a nested sub-element (example "user.age") + # of another column in the same collection (example "user"). + # + # @param columns [Array] An array of objects with a `name` attribute representing field paths. + # @return [Hash] A hash mapping each column to `true` if it is a sub-element of another field, `false` otherwise. + # Time complexity: (O(NlogN+N*K)) where K is the number of conflict depth + # without (`prefix_set`) memoization, it would be O(N^2) + def mark_sub_elements(columns) + # Sort columns by name length (ascending) + sorted_columns = columns.sort_by { |c| c.name.length } + prefix_set = Set.new # memoization set + + sorted_columns.each_with_object({}) do |column, memo| + # Split the column name into parts (e.g., "user.profile.age" → ["user", "profile", "age"]) + parts = column.name.split('.') + + # Generate all possible parent prefixes (e.g., "user", "user.profile") + # and check if any parent prefix exists in the set + parent_prefixes = (0...parts.size - 1).map { |i| parts[0..i].join('.') } + memo[column] = parent_prefixes.any? { |prefix| prefix_set.include?(prefix) } + prefix_set.add(column.name) + end + end + end + + # Class representing a column specification in the ESQL response['columns'] + # The class's main purpose is to provide a structure for the event key + # columns is an array with `name` and `type` pair (example: `{"name"=>"@timestamp", "type"=>"date"}`) + # @attr_reader :name [String] The name of the column + # @attr_reader :type [String] The type of the column + class ColumnSpec + attr_reader :name, :type + + def initialize(spec) + @name = isolate(spec.fetch('https://github.com/name')) + @type = isolate(spec.fetch('https://github.com/type')) + end + + def field_reference + @_field_reference ||= '[' + name.gsub('.', '][') + ']' + end + + private + def isolate(value) + value.frozen? ? value : value.clone.freeze + end + end + end + end +end \ No newline at end of file diff --git a/lib/logstash/inputs/elasticsearch/paginated_search.rb b/lib/logstash/inputs/elasticsearch/paginated_search.rb new file mode 100644 index 00000000..dd66b2c0 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/paginated_search.rb @@ -0,0 +1,241 @@ +require 'logstash/helpers/loggable_try' + +module LogStash + module Inputs + class Elasticsearch + class PaginatedSearch + include LogStash::Util::Loggable + + def initialize(client, plugin) + @client = client + @plugin_params = plugin.params + + @index = @plugin_params["index"] + @query = LogStash::Json.load(@plugin_params["query"]) + @scroll = @plugin_params["scroll"] + @size = @plugin_params["size"] + @slices = @plugin_params["slices"] + @retries = @plugin_params["retries"] + + @plugin = plugin + @pipeline_id = plugin.pipeline_id + end + + def do_run(output_queue, query) + @query = query + + return retryable_search(output_queue) if @slices.nil? || @slices <= 1 + retryable_slice_search(output_queue) + end + + def retryable(job_name, &block) + stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) + stud_try.try((@retries + 1).times) { yield } + rescue => e + error_details = {:message => e.message, :cause => e.cause} + error_details[:backtrace] = e.backtrace if logger.debug? + logger.error("Tried #{job_name} unsuccessfully", error_details) + end + + def retryable_search(output_queue) + raise NotImplementedError + end + + def retryable_slice_search(output_queue) + raise NotImplementedError + end + end + + class Scroll < PaginatedSearch + SCROLL_JOB = "scroll paginated search" + + def search_options(slice_id) + query = @query + query = @query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil? + { + :index => @index, + :scroll => @scroll, + :size => @size, + :body => LogStash::Json.dump(query) + } + end + + def initial_search(slice_id) + options = search_options(slice_id) + @client.search(options) + end + + def next_page(scroll_id) + @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) + end + + def process_page(output_queue) + r = yield + r['hits']['hits'].each { |hit| @plugin.push_hit(hit, output_queue) } + [r['hits']['hits'].any?, r['_scroll_id']] + end + + def search(output_queue, slice_id=nil) + log_details = {} + log_details = log_details.merge({ slice_id: slice_id, slices: @slices }) unless slice_id.nil? + + logger.info("Query start", log_details) + has_hits, scroll_id = process_page(output_queue) { initial_search(slice_id) } + + while has_hits && scroll_id && !@plugin.stop? + logger.debug("Query progress", log_details) + has_hits, scroll_id = process_page(output_queue) { next_page(scroll_id) } + end + + logger.info("Query completed", log_details) + ensure + clear(scroll_id) + end + + def retryable_search(output_queue, slice_id=nil) + retryable(SCROLL_JOB) do + search(output_queue, slice_id) + end + end + + def retryable_slice_search(output_queue) + logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8 + + @slices.times.map do |slice_id| + Thread.new do + LogStash::Util::set_thread_name("[#{@pipeline_id}]|input|elasticsearch|slice_#{slice_id}") + retryable_search(output_queue, slice_id) + end + end.map(&:join) + + logger.trace("#{@slices} slices completed") + end + + def clear(scroll_id) + @client.clear_scroll(:body => { :scroll_id => scroll_id }) if scroll_id + rescue => e + # ignore & log any clear_scroll errors + logger.debug("Ignoring clear_scroll exception", message: e.message, exception: e.class) + end + end + + class SearchAfter < PaginatedSearch + PIT_JOB = "create point in time (PIT)" + SEARCH_AFTER_JOB = "search_after paginated search" + + attr_accessor :cursor_tracker + + def do_run(output_queue, query) + super(output_queue, query) + @cursor_tracker.checkpoint_cursor(intermediate: false) if @cursor_tracker + end + + def pit?(id) + !!id&.is_a?(String) + end + + def create_pit + logger.info("Create point in time (PIT)") + r = @client.open_point_in_time(index: @index, keep_alive: @scroll) + r['id'] + end + + def search_options(pit_id: , search_after: nil, slice_id: nil) + body = @query.merge({ + :pit => { + :id => pit_id, + :keep_alive => @scroll + } + }) + + # search_after requires at least a sort field explicitly + # we add default sort "_shard_doc": "asc" if the query doesn't have any sort field + # by default, ES adds the same implicitly on top of the provided "sort" + # https://www.elastic.co/guide/en/elasticsearch/reference/8.10/paginate-search-results.html#CO201-2 + body = body.merge(:sort => {"_shard_doc": "asc"}) if @query&.dig("sort").nil? + + body = body.merge(:search_after => search_after) unless search_after.nil? + body = body.merge(:slice => {:id => slice_id, :max => @slices}) unless slice_id.nil? + { + :size => @size, + :body => body + } + end + + def next_page(pit_id: , search_after: nil, slice_id: nil) + options = search_options(pit_id: pit_id, search_after: search_after, slice_id: slice_id) + logger.trace("search options", options) + @client.search(options) + end + + def process_page(output_queue) + r = yield + r['hits']['hits'].each { |hit| @plugin.push_hit(hit, output_queue) } + + has_hits = r['hits']['hits'].any? + search_after = r['hits']['hits'][-1]['sort'] rescue nil + logger.warn("Query got data but the sort value is empty") if has_hits && search_after.nil? + [ has_hits, search_after ] + end + + def with_pit + pit_id = retryable(PIT_JOB) { create_pit } + yield pit_id if pit?(pit_id) + ensure + clear(pit_id) + end + + def search(output_queue:, slice_id: nil, pit_id:) + log_details = {} + log_details = log_details.merge({ slice_id: slice_id, slices: @slices }) unless slice_id.nil? + logger.info("Query start", log_details) + + has_hits = true + search_after = nil + + while has_hits && !@plugin.stop? + logger.debug("Query progress", log_details) + has_hits, search_after = process_page(output_queue) do + next_page(pit_id: pit_id, search_after: search_after, slice_id: slice_id) + end + end + + @cursor_tracker.checkpoint_cursor(intermediate: true) if @cursor_tracker + + logger.info("Query completed", log_details) + end + + def retryable_search(output_queue) + with_pit do |pit_id| + retryable(SEARCH_AFTER_JOB) do + search(output_queue: output_queue, pit_id: pit_id) + end + end + end + + def retryable_slice_search(output_queue) + with_pit do |pit_id| + @slices.times.map do |slice_id| + Thread.new do + LogStash::Util::set_thread_name("[#{@pipeline_id}]|input|elasticsearch|slice_#{slice_id}") + retryable(SEARCH_AFTER_JOB) do + search(output_queue: output_queue, slice_id: slice_id, pit_id: pit_id) + end + end + end.map(&:join) + end + + logger.trace("#{@slices} slices completed") + end + + def clear(pit_id) + logger.info("Closing point in time (PIT)") + @client.close_point_in_time(:body => {:id => pit_id} ) if pit?(pit_id) + rescue => e + logger.debug("Ignoring close_point_in_time exception", message: e.message, exception: e.class) + end + end + + end + end +end diff --git a/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_connections_selector.rb b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_connections_selector.rb new file mode 100644 index 00000000..b0d87869 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_connections_selector.rb @@ -0,0 +1,51 @@ +require 'elasticsearch' +require 'elasticsearch/transport/transport/connections/selector' + +if Gem.loaded_specs['elasticsearch-transport'].version < Gem::Version.new("7.2.0") + # elasticsearch-transport versions prior to 7.2.0 suffered of a race condition on accessing + # the connection pool. This issue was fixed (in 7.2.0) with + # https://github.com/elastic/elasticsearch-ruby/commit/15f9d78591a6e8823948494d94b15b0ca38819d1 + # + # This plugin, at the moment, is using elasticsearch >= 5.0.5 + # When this requirement ceases, this patch could be removed. + module Elasticsearch + module Transport + module Transport + module Connections + module Selector + + # "Round-robin" selector strategy (default). + # + class RoundRobin + include Base + + # @option arguments [Connections::Collection] :connections Collection with connections. + # + def initialize(arguments = {}) + super + @mutex = Mutex.new + @current = nil + end + + # Returns the next connection from the collection, rotating them in round-robin fashion. + # + # @return [Connections::Connection] + # + def select(options={}) + @mutex.synchronize do + conns = connections + if @current && (@current < conns.size-1) + @current += 1 + else + @current = 0 + end + conns[@current] + end + end + end + end + end + end + end + end +end \ No newline at end of file diff --git a/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb new file mode 100644 index 00000000..e39bd9c2 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb @@ -0,0 +1,43 @@ +# encoding: utf-8 +require "elasticsearch" +require "elasticsearch/transport/transport/http/manticore" + +es_client_version = Gem.loaded_specs['elasticsearch-transport'].version +if es_client_version >= Gem::Version.new('7.2') && es_client_version < Gem::Version.new('7.16') + # elasticsearch-transport 7.2.0 - 7.14.0 had a bug where setting http headers + # ES::Client.new ..., transport_options: { headers: { 'Authorization' => ... } } + # would be lost https://github.com/elastic/elasticsearch-ruby/issues/1428 + # + # NOTE: needs to be idempotent as filter ES plugin might apply the same patch! + # + # @private + module Elasticsearch + module Transport + module Transport + module HTTP + class Manticore + + def apply_headers(request_options, options) + headers = (options && options[:headers]) || {} + headers[CONTENT_TYPE_STR] = find_value(headers, CONTENT_TYPE_REGEX) || DEFAULT_CONTENT_TYPE + + # this code is necessary to grab the correct user-agent header + # when this method is invoked with apply_headers(@request_options, options) + # from https://github.com/elastic/elasticsearch-ruby/blob/v7.14.0/elasticsearch-transport/lib/elasticsearch/transport/transport/http/manticore.rb#L113-L114 + transport_user_agent = nil + if (options && options[:transport_options] && options[:transport_options][:headers]) + transport_headers = options[:transport_options][:headers] + transport_user_agent = find_value(transport_headers, USER_AGENT_REGEX) + end + + headers[USER_AGENT_STR] = transport_user_agent || find_value(headers, USER_AGENT_REGEX) || user_agent_header + headers[ACCEPT_ENCODING] = GZIP if use_compression? + (request_options[:headers] ||= {}).merge!(headers) # this line was changed + end + + end + end + end + end + end +end \ No newline at end of file diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index e22bafc8..cf72d539 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,29 +1,45 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '0.1.0' + s.version = '5.2.1' s.licenses = ['Apache License (2.0)'] - s.summary = "Read from an Elasticsearch cluster, based on search query results" - s.description = "Read from an Elasticsearch cluster, based on search query results" - s.authors = ["Elasticsearch"] - s.email = 'richard.pijnenburg@elasticsearch.com' - s.homepage = "http://logstash.net/" + s.summary = "Reads query results from an Elasticsearch cluster" + s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" + s.authors = ["Elastic"] + s.email = 'info@elastic.co' + s.homepage = "https://elastic.co/logstash" s.require_paths = ["lib"] # Files - s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*') + s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"] # Tests s.test_files = s.files.grep(%r{^(test|spec|features)/}) # Special flag to let us know this is actually a logstash plugin - s.metadata = { "logstash_plugin" => "true", "group" => "input" } + s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0' - - s.add_runtime_dependency 'ftw', ['~> 0.0.39'] - s.add_runtime_dependency 'logstash-codec-json' - + s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" + s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.3' + s.add_runtime_dependency 'logstash-mixin-event_support', '~> 1.0' + s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' + s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0' + + s.add_runtime_dependency 'elasticsearch', '>= 7.17.9', '< 9' + s.add_runtime_dependency 'logstash-mixin-ca_trusted_fingerprint_support', '~> 1.0' + s.add_runtime_dependency 'logstash-mixin-normalize_config_support', '~>1.0' + + s.add_runtime_dependency 'tzinfo' + s.add_runtime_dependency 'tzinfo-data' + s.add_runtime_dependency 'manticore', ">= 0.7.1" + + s.add_development_dependency 'logstash-codec-plain' + s.add_development_dependency 'logstash-devutils' + s.add_development_dependency 'timecop' + s.add_development_dependency 'cabin', ['~> 0.6'] + s.add_development_dependency 'webrick' + + # 3.8.0 has breaking changes WRT to joining, which break our specs + s.add_development_dependency 'rufus-scheduler', '~> 3.0.9' end - diff --git a/rakelib/publish.rake b/rakelib/publish.rake deleted file mode 100644 index 0ef58c08..00000000 --- a/rakelib/publish.rake +++ /dev/null @@ -1,9 +0,0 @@ -require "gem_publisher" - -desc "Publish gem to RubyGems.org" -task :publish_gem do |t| - gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first - gem = GemPublisher.publish_if_updated(gem_file, :rubygems) - puts "Published #{gem}" if gem -end - diff --git a/rakelib/vendor.rake b/rakelib/vendor.rake deleted file mode 100644 index 2135119c..00000000 --- a/rakelib/vendor.rake +++ /dev/null @@ -1,169 +0,0 @@ -require "net/http" -require "uri" -require "digest/sha1" - -def vendor(*args) - return File.join("vendor", *args) -end - -directory "vendor/" => ["vendor"] do |task, args| - mkdir task.name -end - -def fetch(url, sha1, output) - - puts "Downloading #{url}" - actual_sha1 = download(url, output) - - if actual_sha1 != sha1 - fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')" - end -end # def fetch - -def file_fetch(url, sha1) - filename = File.basename( URI(url).path ) - output = "vendor/#{filename}" - task output => [ "vendor/" ] do - begin - actual_sha1 = file_sha1(output) - if actual_sha1 != sha1 - fetch(url, sha1, output) - end - rescue Errno::ENOENT - fetch(url, sha1, output) - end - end.invoke - - return output -end - -def file_sha1(path) - digest = Digest::SHA1.new - fd = File.new(path, "r") - while true - begin - digest << fd.sysread(16384) - rescue EOFError - break - end - end - return digest.hexdigest -ensure - fd.close if fd -end - -def download(url, output) - uri = URI(url) - digest = Digest::SHA1.new - tmp = "#{output}.tmp" - Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http| - request = Net::HTTP::Get.new(uri.path) - http.request(request) do |response| - fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code) - size = (response["content-length"].to_i || -1).to_f - count = 0 - File.open(tmp, "w") do |fd| - response.read_body do |chunk| - fd.write(chunk) - digest << chunk - if size > 0 && $stdout.tty? - count += chunk.bytesize - $stdout.write(sprintf("\r%0.2f%%", count/size * 100)) - end - end - end - $stdout.write("\r \r") if $stdout.tty? - end - end - - File.rename(tmp, output) - - return digest.hexdigest -rescue SocketError => e - puts "Failure while downloading #{url}: #{e}" - raise -ensure - File.unlink(tmp) if File.exist?(tmp) -end # def download - -def untar(tarball, &block) - require "archive/tar/minitar" - tgz = Zlib::GzipReader.new(File.open(tarball)) - # Pull out typesdb - tar = Archive::Tar::Minitar::Input.open(tgz) - tar.each do |entry| - path = block.call(entry) - next if path.nil? - parent = File.dirname(path) - - mkdir_p parent unless File.directory?(parent) - - # Skip this file if the output file is the same size - if entry.directory? - mkdir path unless File.directory?(path) - else - entry_mode = entry.instance_eval { @mode } & 0777 - if File.exists?(path) - stat = File.stat(path) - # TODO(sissel): Submit a patch to archive-tar-minitar upstream to - # expose headers in the entry. - entry_size = entry.instance_eval { @size } - # If file sizes are same, skip writing. - next if stat.size == entry_size && (stat.mode & 0777) == entry_mode - end - puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}" - File.open(path, "w") do |fd| - # eof? check lets us skip empty files. Necessary because the API provided by - # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an - # IO object. Something about empty files in this EntryStream causes - # IO.copy_stream to throw "can't convert nil into String" on JRuby - # TODO(sissel): File a bug about this. - while !entry.eof? - chunk = entry.read(16384) - fd.write(chunk) - end - #IO.copy_stream(entry, fd) - end - File.chmod(entry_mode, path) - end - end - tar.close - File.unlink(tarball) if File.file?(tarball) -end # def untar - -def ungz(file) - - outpath = file.gsub('.gz', '') - tgz = Zlib::GzipReader.new(File.open(file)) - begin - File.open(outpath, "w") do |out| - IO::copy_stream(tgz, out) - end - File.unlink(file) - rescue - File.unlink(outpath) if File.file?(outpath) - raise - end - tgz.close -end - -desc "Process any vendor files required for this plugin" -task "vendor" do |task, args| - - @files.each do |file| - download = file_fetch(file['url'], file['sha1']) - if download =~ /.tar.gz/ - prefix = download.gsub('.tar.gz', '').gsub('vendor/', '') - untar(download) do |entry| - if !file['files'].nil? - next unless file['files'].include?(entry.full_name.gsub(prefix, '')) - out = entry.full_name.split("/").last - end - File.join('vendor', out) - end - elsif download =~ /.gz/ - ungz(download) - end - end - -end diff --git a/spec/es_helper.rb b/spec/es_helper.rb new file mode 100644 index 00000000..cab670e6 --- /dev/null +++ b/spec/es_helper.rb @@ -0,0 +1,61 @@ +module ESHelper + def self.get_host_port + if ENV["INTEGRATION"] == "true" || ENV["SECURE_INTEGRATION"] == "true" + "elasticsearch:9200" + else + "localhost:9200" # for local running integration specs outside docker + end + end + + def self.curl_and_get_json_response(url, method: :get, args: nil); require 'open3' + cmd = "curl -s -v --show-error #{args} -X #{method.to_s.upcase} -k #{url}" + begin + out, err, status = Open3.capture3(cmd) + rescue Errno::ENOENT + fail "curl not available, make sure curl binary is installed and available on $PATH" + end + + if status.success? + http_status = err.match(/< HTTP\/1.1 (.*?)/)[1] || '0' # < HTTP/1.1 200 OK\r\n + if http_status.strip[0].to_i > 2 + warn out + fail "#{cmd.inspect} unexpected response: #{http_status}\n\n#{err}" + end + + LogStash::Json.load(out) + else + warn out + fail "#{cmd.inspect} process failed: #{status}\n\n#{err}" + end + end + + def self.doc_type + if ESHelper.es_version_satisfies?(">=8") + nil + elsif ESHelper.es_version_satisfies?(">=7") + "_doc" + else + "doc" + end + end + + def self.index_doc(es, params) + type = doc_type + params[:type] = doc_type unless type.nil? + es.index(params) + end + + def self.es_version + ENV['ES_VERSION'] || ENV['ELASTIC_STACK_VERSION'] + end + + def self.es_version_satisfies?(*requirement) + es_version = RSpec.configuration.filter[:es_version] || ENV['ES_VERSION'] || ENV['ELASTIC_STACK_VERSION'] + if es_version.nil? + puts "Info: ES_VERSION, ELASTIC_STACK_VERSION or 'es_version' tag wasn't set. Returning false to all `es_version_satisfies?` call." + return false + end + es_release_version = Gem::Version.new(es_version).release + Gem::Requirement.new(requirement).satisfied_by?(es_release_version) + end +end \ No newline at end of file diff --git a/spec/fixtures/test_certs/GENERATED_AT b/spec/fixtures/test_certs/GENERATED_AT new file mode 100644 index 00000000..8bfc7041 --- /dev/null +++ b/spec/fixtures/test_certs/GENERATED_AT @@ -0,0 +1 @@ +2024-12-26T22:27:15+00:00 diff --git a/spec/fixtures/test_certs/ca.crt b/spec/fixtures/test_certs/ca.crt new file mode 100644 index 00000000..87e96b75 --- /dev/null +++ b/spec/fixtures/test_certs/ca.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDFTCCAf2gAwIBAgIBATANBgkqhkiG9w0BAQsFADA0MTIwMAYDVQQDEylFbGFz +dGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTAeFw0yNDEyMjYy +MjI3MTVaFw0yNTEyMjYyMjI3MTVaMDQxMjAwBgNVBAMTKUVsYXN0aWMgQ2VydGlm +aWNhdGUgVG9vbCBBdXRvZ2VuZXJhdGVkIENBMIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEArUe66xG4Y2zO13gRC+rBwyvxe+c01pqV6ukw6isIbJIQWs1/ +QfEMhUwYwKs6/UXxK+VwardcA2zYwngXbGGEtms+mpUfH5CdJnrqW7lHz1BVK4yH +90IzGE0GU4D90OW/L4QkGX0fv3VQbL8KGFKBoF04pXIaSGMStFN4wirutHtQboYv +99X4kbLjVSIuubUpA/v9dUP1TNl8ar+HKUWRM96ijHkFTF3FR0NnZyt44gP5qC0h +i4lUiR6Uo9D6WMFjeRYFF7GolCy/I1SzWBmmOnNhQLO5VxcNG4ldhBcapZeGwE98 +m/5lxLIwgFR9ZP8bXdxZTWLC58/LQ2NqOjA9mwIDAQABozIwMDAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBTIJMnuftpfkxNCOkbF0R4xgcKQRjANBgkqhkiG9w0B +AQsFAAOCAQEAhfg/cmXc4Uh90yiXU8jOW8saQjTsq4ZMDQiLfJsNmNNYmHFN0vhv +lJRI1STdy7+GpjS5QbrMjQIxWSS8X8xysE4Rt81IrWmLuao35TRFyoiE1seBQ5sz +p/BxZUe57JvWi9dyzv2df4UfWFdGBhzdr80odZmz4i5VIv6qCKJKsGikcuLpepmp +E/UKnKHeR/dFWsxzA9P2OzHTUNBMOOA2PyAUL49pwoChwJeOWN/zAgwMWLbuHFG0 +IN0u8swAmeH98QdvzbhiOatGNpqfTNvQEDc19yVjfXKpBVZQ79WtronYSqrbrUa1 +T2zD8bIVP7CdddD/UmpT1SSKh4PJxudy5Q== +-----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/ca.der.sha256 b/spec/fixtures/test_certs/ca.der.sha256 new file mode 100644 index 00000000..a2d2cd59 --- /dev/null +++ b/spec/fixtures/test_certs/ca.der.sha256 @@ -0,0 +1 @@ +b1e955819b0d14f64f863adb103c248ddacf2e17bea48d04ee4b57c64814ccc4 diff --git a/spec/fixtures/test_certs/ca.key b/spec/fixtures/test_certs/ca.key new file mode 100644 index 00000000..ace213f3 --- /dev/null +++ b/spec/fixtures/test_certs/ca.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEArUe66xG4Y2zO13gRC+rBwyvxe+c01pqV6ukw6isIbJIQWs1/ +QfEMhUwYwKs6/UXxK+VwardcA2zYwngXbGGEtms+mpUfH5CdJnrqW7lHz1BVK4yH +90IzGE0GU4D90OW/L4QkGX0fv3VQbL8KGFKBoF04pXIaSGMStFN4wirutHtQboYv +99X4kbLjVSIuubUpA/v9dUP1TNl8ar+HKUWRM96ijHkFTF3FR0NnZyt44gP5qC0h +i4lUiR6Uo9D6WMFjeRYFF7GolCy/I1SzWBmmOnNhQLO5VxcNG4ldhBcapZeGwE98 +m/5lxLIwgFR9ZP8bXdxZTWLC58/LQ2NqOjA9mwIDAQABAoIBABmBC0P6Ebegljkk +lO26GdbOKvbfqulDS3mN5QMyXkUMopea03YzMnKUJriE+2O33a1mUcuDPWnLpYPK +BTiQieYHlulNtY0Bzf+R69igRq9+1WpZftGnzrlu7NVxkOokRqWJv3546ilV7QZ0 +f9ngmu+tiN7hEnlBC8m613VMuGGb3czwbCizEVZxlZX0Dk2GExbH7Yf3NNs/aOP/ +8x6CqgL+rhrtOQ80xwRrOlEF8oSSjXCzypa3nFv21YO3J2lVo4BoIwnHgOzyz46A +b37gekqXXajIYQ0HAB+NDgVoCRFFJ7Xe16mgB3DpyUpUJzwiMedJkeQ0TprIownQ ++1mPe9ECgYEA/K4jc0trr3sk8KtcZjOYdpvwrhEqSSGEPeGfFujZaKOb8PZ8PX6j +MbCTV12nEgm8FEhZQ3azxLnO17gbJ2A+Ksm/IIwnTWlqvvMZD5qTQ7L3qZuCtbWQ ++EGC/H1SDjhiwvjHcXP61/tYL/peApBSoj0L4kC+U/VaNyvicudKk08CgYEAr46J +4VJBJfZ4ZaUBRy53+fy+mknOfaj2wo8MnD3u+/x4YWTapqvDOPN2nJVtKlIsxbS4 +qCO+fzUV17YHlsQmGULNbtFuXWJkP/RcLVbe8VYg/6tmk0dJwNAe90flagX2KJov +8eDX129nNpuUqrNNWsfeLmPmH6vUzpKlga+1zfUCgYBrbUHHJ96dmbZn2AMNtIvy +iXP3HXcj5msJwB3aKJ8eHMkU1kaWAnwxiQfrkfaQ9bCP0v6YbyQY1IJ7NlvdDs7/ +dAydMtkW0WW/zyztdGN92d3vrx0QUiRTV87vt/wl7ZUXnZt1wcB5CPRCWaiUYHWx +YlDmHW6N1XdIk5DQF0OegwKBgEt7S8k3Zo9+A5IgegYy8p7njsQjy8a3qTFJ9DAR +aPmrOc8WX/SdkVihRXRZwxAZOOrgoyyYAcYL+xI+T9EBESh3UoC9R2ibb2MYG7Ha +0gyN7a4/8eCNHCbs1QOZRAhr+8TFVqv28pbMbWJLToZ+hVns6Zikl0MyzFLtNoAm +HlMpAoGBAIOkqnwwuRKhWprL59sdcJfWY26os9nvuDV4LoKFNEFLJhj2AA2/3UlV +v85gqNSxnMNlHLZC9l2HZ3mKv/mfx1aikmFvyhJAnk5u0f9KkexmCPLjQzS5q3ba +yFuxK2DXwN4x46RgQPFlLjOTCX0BG6rkEu4JdonF8ETSjoCtGEU8 +-----END RSA PRIVATE KEY----- diff --git a/spec/fixtures/test_certs/es.chain.crt b/spec/fixtures/test_certs/es.chain.crt new file mode 100644 index 00000000..334de63d --- /dev/null +++ b/spec/fixtures/test_certs/es.chain.crt @@ -0,0 +1,38 @@ +-----BEGIN CERTIFICATE----- +MIIDIzCCAgugAwIBAgIBATANBgkqhkiG9w0BAQsFADA0MTIwMAYDVQQDEylFbGFz +dGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTAeFw0yNDEyMjYy +MjI3MTVaFw0yNTEyMjYyMjI3MTVaMA0xCzAJBgNVBAMTAmVzMIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEArZLZvLSWDK7Ul+AaBnjU81dsfaow8zOjCC5V +V21nXpYzQJoQbuWcvGYxwL7ZDs2ca4Wc8BVCj1NDduHuP7U+QIlUdQpl8kh5a0Zz +36pcFw7UyF51/AzWixJrht/Azzkb5cpZtE22ZK0KhS4oCsjJmTN0EABAsGhDI9/c +MjNrUC7iP0dvfOuzAPp7ufY83h98jKKXUYV24snbbvmqoWI6GQQNSG/sEo1+1UGH +/z07/mVKoBAa5DVoNGvxN0fCE7vW7hkhT8+frJcsYFatAbnf6ql0KzEa8lN9u0gR +hQNM3zcKKsjEMomBzVBc4SV3KXO0d/jGdDtlqsm2oXqlTMdtGwIDAQABo2cwZTAY +BgNVHREEETAPgg1lbGFzdGljc2VhcmNoMAkGA1UdEwQCMAAwHQYDVR0OBBYEFFQU +K+6Cg2kExRj1xSDzEi4kkgKXMB8GA1UdIwQYMBaAFMgkye5+2l+TE0I6RsXRHjGB +wpBGMA0GCSqGSIb3DQEBCwUAA4IBAQB6cZ7IrDzcAoOZgAt9RlOe2yzQeH+alttp +CSQVINjJotS1WvmtqjBB6ArqLpXIGU89TZsktNe/NQJzgYSaMnlIuHVLFdxJYmwU +T1cP6VC/brmqP/dd5y7VWE7Lp+Wd5CxKl/WY+9chmgc+a1fW/lnPEJJ6pca1Bo8b +byIL0yY2IUv4R2eh1IyQl9oGH1GOPLgO7cY04eajxYcOVA2eDSItoyDtrJfkFP/P +UXtC1JAkvWKuujFEiBj0AannhroWlp3gvChhBwCuCAU0KXD6g8BE8tn6oT1+FW7J +avSfHxAe+VHtYhF8sJ8jrdm0d7E4GKS9UR/pkLAL1JuRdJ1VkPx3 +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIDFTCCAf2gAwIBAgIBATANBgkqhkiG9w0BAQsFADA0MTIwMAYDVQQDEylFbGFz +dGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTAeFw0yNDEyMjYy +MjI3MTVaFw0yNTEyMjYyMjI3MTVaMDQxMjAwBgNVBAMTKUVsYXN0aWMgQ2VydGlm +aWNhdGUgVG9vbCBBdXRvZ2VuZXJhdGVkIENBMIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEArUe66xG4Y2zO13gRC+rBwyvxe+c01pqV6ukw6isIbJIQWs1/ +QfEMhUwYwKs6/UXxK+VwardcA2zYwngXbGGEtms+mpUfH5CdJnrqW7lHz1BVK4yH +90IzGE0GU4D90OW/L4QkGX0fv3VQbL8KGFKBoF04pXIaSGMStFN4wirutHtQboYv +99X4kbLjVSIuubUpA/v9dUP1TNl8ar+HKUWRM96ijHkFTF3FR0NnZyt44gP5qC0h +i4lUiR6Uo9D6WMFjeRYFF7GolCy/I1SzWBmmOnNhQLO5VxcNG4ldhBcapZeGwE98 +m/5lxLIwgFR9ZP8bXdxZTWLC58/LQ2NqOjA9mwIDAQABozIwMDAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBTIJMnuftpfkxNCOkbF0R4xgcKQRjANBgkqhkiG9w0B +AQsFAAOCAQEAhfg/cmXc4Uh90yiXU8jOW8saQjTsq4ZMDQiLfJsNmNNYmHFN0vhv +lJRI1STdy7+GpjS5QbrMjQIxWSS8X8xysE4Rt81IrWmLuao35TRFyoiE1seBQ5sz +p/BxZUe57JvWi9dyzv2df4UfWFdGBhzdr80odZmz4i5VIv6qCKJKsGikcuLpepmp +E/UKnKHeR/dFWsxzA9P2OzHTUNBMOOA2PyAUL49pwoChwJeOWN/zAgwMWLbuHFG0 +IN0u8swAmeH98QdvzbhiOatGNpqfTNvQEDc19yVjfXKpBVZQ79WtronYSqrbrUa1 +T2zD8bIVP7CdddD/UmpT1SSKh4PJxudy5Q== +-----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/es.crt b/spec/fixtures/test_certs/es.crt new file mode 100644 index 00000000..7676eace --- /dev/null +++ b/spec/fixtures/test_certs/es.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDIzCCAgugAwIBAgIBATANBgkqhkiG9w0BAQsFADA0MTIwMAYDVQQDEylFbGFz +dGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTAeFw0yNDEyMjYy +MjI3MTVaFw0yNTEyMjYyMjI3MTVaMA0xCzAJBgNVBAMTAmVzMIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEArZLZvLSWDK7Ul+AaBnjU81dsfaow8zOjCC5V +V21nXpYzQJoQbuWcvGYxwL7ZDs2ca4Wc8BVCj1NDduHuP7U+QIlUdQpl8kh5a0Zz +36pcFw7UyF51/AzWixJrht/Azzkb5cpZtE22ZK0KhS4oCsjJmTN0EABAsGhDI9/c +MjNrUC7iP0dvfOuzAPp7ufY83h98jKKXUYV24snbbvmqoWI6GQQNSG/sEo1+1UGH +/z07/mVKoBAa5DVoNGvxN0fCE7vW7hkhT8+frJcsYFatAbnf6ql0KzEa8lN9u0gR +hQNM3zcKKsjEMomBzVBc4SV3KXO0d/jGdDtlqsm2oXqlTMdtGwIDAQABo2cwZTAY +BgNVHREEETAPgg1lbGFzdGljc2VhcmNoMAkGA1UdEwQCMAAwHQYDVR0OBBYEFFQU +K+6Cg2kExRj1xSDzEi4kkgKXMB8GA1UdIwQYMBaAFMgkye5+2l+TE0I6RsXRHjGB +wpBGMA0GCSqGSIb3DQEBCwUAA4IBAQB6cZ7IrDzcAoOZgAt9RlOe2yzQeH+alttp +CSQVINjJotS1WvmtqjBB6ArqLpXIGU89TZsktNe/NQJzgYSaMnlIuHVLFdxJYmwU +T1cP6VC/brmqP/dd5y7VWE7Lp+Wd5CxKl/WY+9chmgc+a1fW/lnPEJJ6pca1Bo8b +byIL0yY2IUv4R2eh1IyQl9oGH1GOPLgO7cY04eajxYcOVA2eDSItoyDtrJfkFP/P +UXtC1JAkvWKuujFEiBj0AannhroWlp3gvChhBwCuCAU0KXD6g8BE8tn6oT1+FW7J +avSfHxAe+VHtYhF8sJ8jrdm0d7E4GKS9UR/pkLAL1JuRdJ1VkPx3 +-----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/es.key b/spec/fixtures/test_certs/es.key new file mode 100644 index 00000000..d283ded1 --- /dev/null +++ b/spec/fixtures/test_certs/es.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEArZLZvLSWDK7Ul+AaBnjU81dsfaow8zOjCC5VV21nXpYzQJoQ +buWcvGYxwL7ZDs2ca4Wc8BVCj1NDduHuP7U+QIlUdQpl8kh5a0Zz36pcFw7UyF51 +/AzWixJrht/Azzkb5cpZtE22ZK0KhS4oCsjJmTN0EABAsGhDI9/cMjNrUC7iP0dv +fOuzAPp7ufY83h98jKKXUYV24snbbvmqoWI6GQQNSG/sEo1+1UGH/z07/mVKoBAa +5DVoNGvxN0fCE7vW7hkhT8+frJcsYFatAbnf6ql0KzEa8lN9u0gRhQNM3zcKKsjE +MomBzVBc4SV3KXO0d/jGdDtlqsm2oXqlTMdtGwIDAQABAoIBAQCm/VBDz41ImG7p +yu3e6iMeFi7HW5SKdlRUS5dJbHT1uBWJAm/q8TbwvnUBVdsn9cKWY06QYDPQBjAy +0LxRSIKivjyl+aIJDZbbEUXrmk/M0zT9rHtgSc2isM8ITH6IHw5q7lmNMPLYOu6T +IMvfTDtADBOOTV/vF+/4NKf5GCUXVt1XTzLBFMK0p/ZoI7Fsw7fhH6FR12vk0xA4 +BEC4pwRbGfHo7P31ii0by8epkve93tF4IZuFmN92A84bN1z7Kc4TYaSbua2rgguz +FzMyWpsTxr363HzCK1xOJb6JyJOiXbq4+j2oqtne3GIvyozJeiyKRgjLIMoe/LV7 +fPPc5wlhAoGBAOD3z0JH2eyR/1RHILFsWInH2nDbKHHuCjhFIL2XloeXsJkiJZ95 +BpdjExMZCqD44tPNRW/GgWKwoVwltm6zB0aq0aW/OfOzw6fhKt1W+go47L7Tpwap +VQgy6BFXSueUKfQDlZEWV4E2gakf8vOl0/VRQExae/CeKf1suEedQaErAoGBAMWE +LOmNDEU2NFqghfNBAFYyFJst3YnBmSmlL7W22+OsfSK/PhxnJbuNHxMgxpg9rieW +tVyjuZRo/i7WLVm3uG+dK1RJ9t8Y6kpYkCRKpi9G8DBOj3PSulOybBr+fdRfW9mf +8UmqOjOkrhxXPkchc9TY4EM7/1XeKvEidlIp0gvRAoGAAurz4zYvW2QhXaR2hhaT +p2XSLXiKM8AUndo3rH3U0/lhrvrEZicZsMj2LF88xg20U27sIaD/eJo13Y4XqaPk +ykPY6D9srv574SeIeMpx/8PxPiBcoDd+BNc0L1VkgVBoouORAwq5I9HjKKBjdEmI +UDw3i0X5KYvDm6fXVAZ0HXUCgYBWc4To8KiXPqNpq2sVzrSkBaWJSmj2G7u7Q6b/ +RTs3is72v3gjHG6iiaE5URY7mnu4rjlRhAP9Vnsy6uHMrCJZEBTf/sPEYHZj9iGZ +EOduOAF3U1tsmaaebbDtm8hdhSOBvITy9kQlSIZAt1r17Ulytz5pj0AySFzJUIkz +a0SZkQKBgCWixtUxiK8PAdWhyS++90WJeJn8eqjuSAz+VMtFQFRRWDUbkiHvGMRu +o/Hhk6zS46gSF2Evb1d26uUEenXnJlIp6YWzb0DLPrfy5P53kPA6YEvYq5MSAg3l +DZOJUF+ko7cWXSZkeTIBH/jrGOdP4tTALZt6DNt+Gz7xwPO5tGgV +-----END RSA PRIVATE KEY----- diff --git a/spec/fixtures/test_certs/renew.sh b/spec/fixtures/test_certs/renew.sh new file mode 100755 index 00000000..8ef56cc9 --- /dev/null +++ b/spec/fixtures/test_certs/renew.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -e +cd "$(dirname "$0")" + +openssl x509 -x509toreq -in ca.crt -copy_extensions copyall -signkey ca.key -out ca.csr +openssl x509 -req -copy_extensions copyall -days 365 -in ca.csr -set_serial 0x01 -signkey ca.key -out ca.crt && rm ca.csr +openssl x509 -in ca.crt -outform der | sha256sum | awk '{print $1}' > ca.der.sha256 + +openssl x509 -x509toreq -in es.crt -copy_extensions copyall -signkey es.key -out es.csr +openssl x509 -req -copy_extensions copyall -days 365 -in es.csr -set_serial 0x01 -CA ca.crt -CAkey ca.key -out es.crt && rm es.csr +cat es.crt ca.crt > es.chain.crt + +# output ISO8601 timestamp to file +date -Iseconds > GENERATED_AT \ No newline at end of file diff --git a/spec/inputs/cursor_tracker_spec.rb b/spec/inputs/cursor_tracker_spec.rb new file mode 100644 index 00000000..291d6c61 --- /dev/null +++ b/spec/inputs/cursor_tracker_spec.rb @@ -0,0 +1,72 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/devutils/rspec/shared_examples" +require "logstash/inputs/elasticsearch" +require "logstash/inputs/elasticsearch/cursor_tracker" + +describe LogStash::Inputs::Elasticsearch::CursorTracker do + + let(:last_run_metadata_path) { Tempfile.new('cursor_tracker_testing').path } + let(:tracking_field_seed) { "1980-01-01T23:59:59.999999999Z" } + let(:options) do + { + :last_run_metadata_path => last_run_metadata_path, + :tracking_field => "my_field", + :tracking_field_seed => tracking_field_seed + } + end + + subject { described_class.new(**options) } + + it "creating a class works" do + expect(subject).to be_a described_class + end + + describe "checkpoint_cursor" do + before(:each) do + subject.checkpoint_cursor(intermediate: false) # store seed value + [ + Thread.new(subject) {|subject| subject.record_last_value(LogStash::Event.new("my_field" => "2025-01-03T23:59:59.999999999Z")) }, + Thread.new(subject) {|subject| subject.record_last_value(LogStash::Event.new("my_field" => "2025-01-01T23:59:59.999999999Z")) }, + Thread.new(subject) {|subject| subject.record_last_value(LogStash::Event.new("my_field" => "2025-01-02T23:59:59.999999999Z")) }, + ].each(&:join) + end + context "when doing intermediate checkpoint" do + it "persists the smallest value" do + subject.checkpoint_cursor(intermediate: true) + expect(IO.read(last_run_metadata_path)).to eq("2025-01-01T23:59:59.999999999Z") + end + end + context "when doing non-intermediate checkpoint" do + it "persists the largest value" do + subject.checkpoint_cursor(intermediate: false) + expect(IO.read(last_run_metadata_path)).to eq("2025-01-03T23:59:59.999999999Z") + end + end + end + + describe "inject_cursor" do + let(:new_value) { "2025-01-03T23:59:59.999999999Z" } + let(:fake_now) { "2026-09-19T23:59:59.999999999Z" } + + let(:query) do + %q[ + { "query": { "range": { "event.ingested": { "gt": :last_value, "lt": :present}}}, "sort": [ { "event.ingested": {"order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type" : "date_nanos" } } ] } + ] + end + + before(:each) do + subject.record_last_value(LogStash::Event.new("my_field" => new_value)) + subject.checkpoint_cursor(intermediate: false) + allow(subject).to receive(:now_minus_30s).and_return(fake_now) + end + + it "injects the value of the cursor into json query if it contains :last_value" do + expect(subject.inject_cursor(query)).to match(/#{new_value}/) + end + + it "injects current time into json query if it contains :present" do + expect(subject.inject_cursor(query)).to match(/#{fake_now}/) + end + end +end diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb new file mode 100644 index 00000000..f958dea3 --- /dev/null +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -0,0 +1,180 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/inputs/elasticsearch" +require "elasticsearch" + +describe LogStash::Inputs::Elasticsearch::Esql do + let(:client) { instance_double(Elasticsearch::Client) } + let(:esql_client) { double("esql-client") } + + let(:plugin) { instance_double(LogStash::Inputs::Elasticsearch, params: plugin_config, decorate_event: nil) } + let(:plugin_config) do + { + "query" => "FROM test-index | STATS count() BY field", + "retries" => 3 + } + end + let(:esql_executor) { described_class.new(client, plugin) } + + describe "#initialization" do + it "sets up the ESQL client with correct parameters" do + expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) + expect(esql_executor.instance_variable_get(:@retries)).to eq(plugin_config["retries"]) + expect(esql_executor.instance_variable_get(:@target_field)).to eq(nil) + end + end + + describe "#execution" do + let(:output_queue) { Queue.new } + + context "when faces error while retrying" do + it "retries the given block the specified number of times" do + attempts = 0 + result = esql_executor.retryable("Test Job") do + attempts += 1 + raise StandardError if attempts < 3 + "success" + end + expect(attempts).to eq(3) + expect(result).to eq("success") + end + + it "returns false if the block fails all attempts" do + result = esql_executor.retryable("Test Job") do + raise StandardError + end + expect(result).to eq(false) + end + end + + context "when executing chain of processes" do + let(:response) { { 'values' => [%w[foo bar]], 'columns' => [{ 'name' => 'a.b.1.d', 'type' => 'keyword' }, + { 'name' => 'h_g.k$l.m.0', 'type' => 'keyword' }] } } + + before do + allow(esql_executor).to receive(:retryable).and_yield + allow(client).to receive_message_chain(:esql, :query).and_return(response) + end + + it "executes the ESQL query and processes the results" do + allow(response).to receive(:headers).and_return({}) + esql_executor.do_run(output_queue, plugin_config["query"]) + expect(output_queue.size).to eq(1) + + event = output_queue.pop + expect(event.get('[a][b][1][d]')).to eq('foo') + expect(event.get('[h_g][k$l][m][0]')).to eq('bar') + end + + it "logs a warning if the response contains a warning header" do + allow(response).to receive(:headers).and_return({ "warning" => "some warning" }) + expect(esql_executor.logger).to receive(:warn).with("ES|QL executor received warning", { :warning_message => "some warning" }) + esql_executor.do_run(output_queue, plugin_config["query"]) + end + + it "does not log a warning if the response does not contain a warning header" do + allow(response).to receive(:headers).and_return({}) + expect(esql_executor.logger).not_to receive(:warn) + esql_executor.do_run(output_queue, plugin_config["query"]) + end + end + + describe "multiple rows in the result" do + let(:response) { { 'values' => rows, 'columns' => [{ 'name' => 'key.1', 'type' => 'keyword' }, + { 'name' => 'key.2', 'type' => 'keyword' }] } } + + before do + allow(esql_executor).to receive(:retryable).and_yield + allow(client).to receive_message_chain(:esql, :query).and_return(response) + allow(response).to receive(:headers).and_return({}) + end + + context "when mapping" do + let(:rows) { [%w[foo bar], %w[hello world]] } + + it "1:1 maps rows to events" do + esql_executor.do_run(output_queue, plugin_config["query"]) + expect(output_queue.size).to eq(2) + + event_1 = output_queue.pop + expect(event_1.get('[key][1]')).to eq('foo') + expect(event_1.get('[key][2]')).to eq('bar') + + event_2 = output_queue.pop + expect(event_2.get('[key][1]')).to eq('hello') + expect(event_2.get('[key][2]')).to eq('world') + end + end + + context "when partial nil values appear" do + let(:rows) { [[nil, "bar"], ["hello", nil]] } + + it "ignores the nil values" do + esql_executor.do_run(output_queue, plugin_config["query"]) + expect(output_queue.size).to eq(2) + + event_1 = output_queue.pop + expect(event_1.get('[key][1]')).to eq(nil) + expect(event_1.get('[key][2]')).to eq('bar') + + event_2 = output_queue.pop + expect(event_2.get('[key][1]')).to eq('hello') + expect(event_2.get('[key][2]')).to eq(nil) + end + end + end + + context "when sub-elements occur in the result" do + let(:response) { { + 'values' => [[50, 1, 100], [50, 0, 1000], [50, 9, 99999]], + 'columns' => + [ + { 'name' => 'time', 'type' => 'long' }, + { 'name' => 'time.min', 'type' => 'long' }, + { 'name' => 'time.max', 'type' => 'long' }, + ] + } } + + before do + allow(esql_executor).to receive(:retryable).and_yield + allow(client).to receive_message_chain(:esql, :query).and_return(response) + allow(response).to receive(:headers).and_return({}) + end + + it "includes 1st depth elements into event" do + esql_executor.do_run(output_queue, plugin_config["query"]) + + expect(output_queue.size).to eq(3) + 3.times do + event = output_queue.pop + expect(event.get('time')).to eq(50) + expect(event.get('[time][min]')).to eq(nil) + expect(event.get('[time][max]')).to eq(nil) + end + end + end + end + + describe "#column spec" do + let(:valid_spec) { { 'name' => 'field.name', 'type' => 'keyword' } } + let(:column_spec) { LogStash::Inputs::Elasticsearch::ColumnSpec.new(valid_spec) } + + context "when initializes" do + it "sets the name and type attributes" do + expect(column_spec.name).to eq("field.name") + expect(column_spec.type).to eq("keyword") + end + + it "freezes the name and type attributes" do + expect(column_spec.name).to be_frozen + expect(column_spec.type).to be_frozen + end + end + + context "when calls the field reference" do + it "returns the correct field reference format" do + expect(column_spec.field_reference).to eq("[field][name]") + end + end + end +end if LOGSTASH_VERSION >= LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION \ No newline at end of file diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 4eb0f456..18bb4097 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1,80 +1,1512 @@ -require "spec_helper" +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/devutils/rspec/shared_examples" require "logstash/inputs/elasticsearch" +require "elasticsearch" +require "timecop" +require "stud/temporary" +require "time" +require "date" +require "cabin" +require "webrick" +require "uri" -describe "inputs/elasticsearch" do - +require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper' - search_response = <<-RESPONSE +describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do + + let(:plugin) { described_class.new(config) } + let(:queue) { Queue.new } + let(:build_flavor) { "default" } + let(:es_version) { "7.5.0" } + let(:cluster_info) { {"version" => {"number" => es_version, "build_flavor" => build_flavor}, "tagline" => "You Know, for Search"} } + + def elastic_ruby_v8_client_available? + Elasticsearch::Transport + false + rescue NameError # NameError: uninitialized constant Elasticsearch::Transport if Elastic Ruby client is not available + true + end + + before(:each) do + Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method + allow_any_instance_of(Elasticsearch::Client).to receive(:info).and_return(cluster_info) + end + + let(:base_config) do { - "_scroll_id":"xxx", - "took":5, - "timed_out":false, - "_shards":{"total":15,"successful":15,"failed":0}, - "hits":{ - "total":1000050, - "max_score":1.0, - "hits":[ + 'hosts' => ["localhost"], + 'query' => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + } + end + + context "register" do + let(:config) do + { + "schedule" => "* * * * * UTC" + } + end + + context "against authentic Elasticsearch" do + it "should not raise an exception" do + expect { plugin.register }.to_not raise_error + end + + it "does not set header Elastic-Api-Version" do + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).not_to match hash_including("Elastic-Api-Version" => "2023-10-31") + end + + it "sets an x-elastic-product-origin header identifying this as an internal plugin request" do + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including("x-elastic-product-origin"=>"logstash-input-elasticsearch") + end + end + + describe 'handling obsolete settings' do + [{:name => 'ssl', :replacement => 'ssl_enabled', :sample_value => true}, + {:name => 'ca_file', :replacement => 'ssl_certificate_authorities', :sample_value => 'spec/fixtures/test_certs/ca.crt'}, + {:name => 'ssl_certificate_verification', :replacement => 'ssl_verification_mode', :sample_value => false }].each do | obsolete_setting| + context "with obsolete #{obsolete_setting[:name]}" do + let (:config) { {obsolete_setting[:name] => obsolete_setting[:sample_value]} } + it "should raise a config error with the appropriate message" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /The setting `#{obsolete_setting[:name]}` in plugin `elasticsearch` is obsolete and is no longer available. Set '#{obsolete_setting[:replacement]}' instead/i + end + end + end + end + + context "against not authentic Elasticsearch" do + before(:each) do + Elasticsearch::Client.send(:define_method, :ping) { raise Elasticsearch::UnsupportedProductError.new("Fake error") } # define error ping method + end + + it "should raise ConfigurationError" do + expect { plugin.register }.to raise_error(LogStash::ConfigurationError) + end + end + + context "against serverless Elasticsearch" do + before do + allow(plugin).to receive(:test_connection!) + allow(plugin).to receive(:serverless?).and_return(true) + end + + context "with unsupported header" do + let(:es_client) { double("es_client") } + + before do + allow(Elasticsearch::Client).to receive(:new).and_return(es_client) + if elastic_ruby_v8_client_available? + allow(es_client).to receive(:info).and_raise(Elastic::Transport::Transport::Errors::BadRequest.new) + else + allow(es_client).to receive(:info).and_raise(Elasticsearch::Transport::Transport::Errors::BadRequest.new) + end + end + + it "raises an exception" do + expect {plugin.register}.to raise_error(LogStash::ConfigurationError) + end + end + + context "with supported header" do + it "set default header to rest client" do + expect_any_instance_of(Elasticsearch::Client).to receive(:info).and_return(true) + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including("Elastic-Api-Version" => "2023-10-31") + end + + it "sets an x-elastic-product-origin header identifying this as an internal plugin request" do + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including("x-elastic-product-origin"=>"logstash-input-elasticsearch") + end + end + + context "with custom headers" do + let(:config) do { - "_index":"logstash2", - "_type":"logs", - "_id":"AmaqL7VuSWKF-F6N_Gz72g", - "_score":1.0, - "_source" : { - "message":"foobar", - "@version":"1", - "@timestamp":"2014-05-19T21:08:39.000Z", - "host":"colin-mbp13r" + "schedule" => "* * * * * UTC", + "custom_headers" => { "Custom-Header-1" => "Custom Value 1", "Custom-Header-2" => "Custom Value 2" } + } + end + + + it "sets custom headers" do + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including(config["custom_headers"]) + end + end + end + + context "retry" do + let(:config) do + { + "retries" => -1 + } + end + it "should raise an exception with negative number" do + expect { plugin.register }.to raise_error(LogStash::ConfigurationError) + end + end + + context "search_api" do + before(:each) do + plugin.register + end + + context "ES 8" do + let(:es_version) { "8.10.0" } + it "resolves `auto` to `search_after`" do + expect(plugin.instance_variable_get(:@query_executor)).to be_a LogStash::Inputs::Elasticsearch::SearchAfter + end + end + + context "ES 7" do + let(:es_version) { "7.17.0" } + it "resolves `auto` to `scroll`" do + expect(plugin.instance_variable_get(:@query_executor)).to be_a LogStash::Inputs::Elasticsearch::Scroll + end + end + end + end + + it_behaves_like "an interruptible input plugin" do + let(:config) do + { + "schedule" => "* * * * * UTC" + } + end + + before :each do + @esclient = double("elasticsearch-client") + allow(Elasticsearch::Client).to receive(:new).and_return(@esclient) + hit = { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["ohayo"] } + } + allow(@esclient).to receive(:search) { { "hits" => { "hits" => [hit] } } } + allow(@esclient).to receive(:scroll) { { "hits" => { "hits" => [hit] } } } + allow(@esclient).to receive(:clear_scroll).and_return(nil) + allow(@esclient).to receive(:ping) + allow(@esclient).to receive(:info).and_return(cluster_info) + end + end + + + ecs_compatibility_matrix(:disabled, :v1, :v8) do |ecs_select| + + before(:each) do + allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility) + end + + let(:config) do + { + 'hosts' => ["localhost"], + 'query' => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + } + end + + let(:mock_response) do + { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["ohayo"] } + } ] + } + } + end + + let(:mock_scroll_response) do + { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + end + + before(:each) do + client = Elasticsearch::Client.new + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:search).with(any_args).and_return(mock_response) + expect(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(mock_scroll_response) + expect(client).to receive(:clear_scroll).and_return(nil) + expect(client).to receive(:ping) + end + + before { plugin.register } + + it 'creates the events from the hits' do + plugin.run queue + event = queue.pop + + expect(event).to be_a(LogStash::Event) + expect(event.get("message")).to eql [ "ohayo" ] + end + + context 'when a target is set' do + let(:config) do + { + 'hosts' => ["localhost"], + 'query' => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', + 'target' => "[@metadata][_source]" + } + end + + it 'creates the event using the target' do + plugin.run queue + event = queue.pop + + expect(event).to be_a(LogStash::Event) + expect(event.get("[@metadata][_source][message]")).to eql [ "ohayo" ] + end + end + + end + + # This spec is an adapter-spec, ensuring that we send the right sequence of messages to our Elasticsearch Client + # to support sliced scrolling. The underlying implementation will spawn its own threads to consume, so we must be + # careful to use thread-safe constructs. + context "with managed sliced scrolling" do + let(:config) do + { + 'query' => "#{LogStash::Json.dump(query)}", + 'slices' => slices, + 'docinfo' => true, # include ids + 'docinfo_target' => '[@metadata]' + } + end + let(:query) do + { + "query" => { + "match" => { "city_name" => "Okinawa" } + }, + "fields" => ["message"] + } + end + let(:slices) { 2 } + + context 'with `slices => 0`' do + let(:slices) { 0 } + it 'fails to register' do + expect { plugin.register }.to raise_error(LogStash::ConfigurationError) + end + end + + context 'with `slices => 1`' do + let(:slices) { 1 } + before { plugin.register } + + it 'runs just one slice' do + expect(plugin.instance_variable_get(:@query_executor)).to receive(:search).with(duck_type(:<<), nil) + expect(Thread).to_not receive(:new) + + plugin.run([]) + end + end + + context 'without slices directive' do + let(:config) { super().tap { |h| h.delete('slices') } } + before { plugin.register } + + it 'runs just one slice' do + expect(plugin.instance_variable_get(:@query_executor)).to receive(:search).with(duck_type(:<<), nil) + expect(Thread).to_not receive(:new) + + plugin.run([]) + end + end + + 2.upto(8) do |slice_count| + context "with `slices => #{slice_count}`" do + let(:slices) { slice_count } + before { plugin.register } + + it "runs #{slice_count} independent slices" do + expect(Thread).to receive(:new).and_call_original.exactly(slice_count).times + slice_count.times do |slice_id| + expect(plugin.instance_variable_get(:@query_executor)).to receive(:search).with(duck_type(:<<), slice_id) + end + + plugin.run([]) + end + end + end + + # This section of specs heavily mocks the Elasticsearch::Client, and ensures that the Elasticsearch Input Plugin + # behaves as expected when handling a series of sliced, scrolled requests/responses. + context 'adapter/integration' do + let(:response_template) do + { + "took" => 12, + "timed_out" => false, + "shards" => { + "total" => 6, + "successful" => 6, + "failed" => 0 + } + } + end + + let(:hits_template) do + { + "total" => 4, + "max_score" => 1.0, + "hits" => [] + } + end + + let(:hit_template) do + { + "_index" => "logstash-2018.08.23", + "_type" => "logs", + "_score" => 1.0, + "_source" => { "message" => ["hello, world"] } + } + end + + # BEGIN SLICE 0: a sequence of THREE scrolled responses containing 2, 1, and 0 items + # end-of-slice is reached when slice0_response2 is empty. + begin + let(:slice0_response0) do + response_template.merge({ + "_scroll_id" => slice0_scroll1, + "hits" => hits_template.merge("hits" => [ + hit_template.merge('_id' => "slice0-response0-item0"), + hit_template.merge('_id' => "slice0-response0-item1") + ]) + }) + end + let(:slice0_scroll1) { 'slice:0,scroll:1' } + let(:slice0_response1) do + response_template.merge({ + "_scroll_id" => slice0_scroll2, + "hits" => hits_template.merge("hits" => [ + hit_template.merge('_id' => "slice0-response1-item0") + ]) + }) + end + let(:slice0_scroll2) { 'slice:0,scroll:2' } + let(:slice0_response2) do + response_template.merge( + "_scroll_id" => slice0_scroll3, + "hits" => hits_template.merge({"hits" => []}) + ) + end + let(:slice0_scroll3) { 'slice:0,scroll:3' } + end + # END SLICE 0 + + # BEGIN SLICE 1: a sequence of TWO scrolled responses containing 2 and 2 items. + # end-of-slice is reached when slice1_response1 does not contain a next scroll id + begin + let(:slice1_response0) do + response_template.merge({ + "_scroll_id" => slice1_scroll1, + "hits" => hits_template.merge("hits" => [ + hit_template.merge('_id' => "slice1-response0-item0"), + hit_template.merge('_id' => "slice1-response0-item1") + ]) + }) + end + let(:slice1_scroll1) { 'slice:1,scroll:1' } + let(:slice1_response1) do + response_template.merge({ + "hits" => hits_template.merge("hits" => [ + hit_template.merge('_id' => "slice1-response1-item0"), + hit_template.merge('_id' => "slice1-response1-item1") + ]) + }) + end + end + # END SLICE 1 + + + # RSpec mocks validations are not threadsafe. + # Allow caller to synchronize. + def synchronize_method!(object, method_name) + original_method = object.method(method_name) + mutex = Mutex.new + allow(object).to receive(method_name).with(any_args) do |*method_args, &method_block| + mutex.synchronize do + original_method.call(*method_args,&method_block) + end + end + end + + describe "with normal response" do + before(:each) do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + plugin.register + + expect(client).to receive(:clear_scroll).and_return(nil) + + # SLICE0 is a three-page scroll in which the last page is empty + slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2) + allow(client).to receive(:ping) + + # SLICE1 is a two-page scroll in which the last page has no next scroll id + slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1) + + synchronize_method!(plugin.instance_variable_get(:@query_executor), :next_page) + synchronize_method!(plugin.instance_variable_get(:@query_executor), :initial_search) + end + + let(:client) { Elasticsearch::Client.new } + + let(:emitted_events) do + queue = Queue.new # since we are running slices in threads, we need a thread-safe queue. + plugin.run(queue) + events = [] + events << queue.pop until queue.empty? + events + end + + let(:emitted_event_ids) do + emitted_events.map { |event| event.get('[@metadata][_id]') } + end + + it 'emits the hits on the first page of the first slice' do + expect(emitted_event_ids).to include('slice0-response0-item0') + expect(emitted_event_ids).to include('slice0-response0-item1') + end + it 'emits the hits on the second page of the first slice' do + expect(emitted_event_ids).to include('slice0-response1-item0') + end + + it 'emits the hits on the first page of the second slice' do + expect(emitted_event_ids).to include('slice1-response0-item0') + expect(emitted_event_ids).to include('slice1-response0-item1') + end + + it 'emits the hits on the second page of the second slice' do + expect(emitted_event_ids).to include('slice1-response1-item0') + expect(emitted_event_ids).to include('slice1-response1-item1') + end + + it 'does not double-emit' do + expect(emitted_event_ids.uniq).to eq(emitted_event_ids) + end + + it 'emits events with appropriate fields' do + emitted_events.each do |event| + expect(event).to be_a(LogStash::Event) + expect(event.get('message')).to eq(['hello, world']) + expect(event.get('[@metadata][_id]')).to_not be_nil + expect(event.get('[@metadata][_id]')).to_not be_empty + expect(event.get('[@metadata][_index]')).to start_with('logstash-') + end + end + end + + describe "with scroll request fail" do + before(:each) do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + plugin.register + + expect(client).to receive(:clear_scroll).twice.and_return(nil) + + # SLICE0 is a three-page scroll + slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2) + allow(client).to receive(:ping) + + # SLICE1 is a two-page scroll in which the last page throws exception + slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_raise("boom") + + synchronize_method!(plugin.instance_variable_get(:@query_executor), :next_page) + synchronize_method!(plugin.instance_variable_get(:@query_executor), :initial_search) + end + + let(:client) { Elasticsearch::Client.new } + + it 'insert event to queue without waiting other slices' do + expect(plugin.instance_variable_get(:@query_executor)).to receive(:search).twice.and_wrap_original do |m, *args| + q = args[0] + slice_id = args[1] + if slice_id == 0 + m.call(*args) + expect(q.size).to eq(3) + else + sleep(1) + m.call(*args) + end + end + + queue = Queue.new + plugin.run(queue) + expect(queue.size).to eq(5) + end + end + end + end + + context "with Elasticsearch document information" do + let!(:response) do + { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { + "message" => ["ohayo"], + "metadata_with_hash" => { "awesome" => "logstash" }, + "metadata_with_string" => "a string" } + } ] + } + } + end + + let(:scroll_reponse) do + { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + end + + let(:client) { Elasticsearch::Client.new } + + before do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:search).with(any_args).and_return(response) + allow(client).to receive(:scroll).with({ :body => {:scroll_id => "cXVlcnlUaGVuRmV0Y2g"}, :scroll => "1m" }).and_return(scroll_reponse) + allow(client).to receive(:clear_scroll).and_return(nil) + allow(client).to receive(:ping).and_return(nil) + end + + ecs_compatibility_matrix(:disabled, :v1, :v8) do |ecs_select| + + before(:each) do + allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility) + end + + before do + if do_register + plugin.register + plugin.run queue + end + end + + let(:do_register) { true } + + let(:event) { queue.pop } + + context 'with docinfo enabled' do + let(:config) { base_config.merge 'docinfo' => true } + + it "provides document info under metadata" do + if ecs_select.active_mode == :disabled + expect(event.get("[@metadata][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[@metadata][_type]")).to eq('logs') + expect(event.get("[@metadata][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + else + expect(event.get("[@metadata][input][elasticsearch][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[@metadata][input][elasticsearch][_type]")).to eq('logs') + expect(event.get("[@metadata][input][elasticsearch][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + end + end + + context 'with docinfo_target' do + let(:config) { base_config.merge 'docinfo' => true, 'docinfo_target' => docinfo_target } + let(:docinfo_target) { 'metadata_with_hash' } + + it 'merges values if the `docinfo_target` already exist in the `_source` document' do + expect(event.get("[metadata_with_hash][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[metadata_with_hash][_type]")).to eq('logs') + expect(event.get("[metadata_with_hash][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + expect(event.get("[metadata_with_hash][awesome]")).to eq("logstash") + end + + context 'non-existent' do + let(:docinfo_target) { 'meta' } + + it 'should move the document information to the specified field' do + expect(event.get("[meta][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[meta][_type]")).to eq('logs') + expect(event.get("[meta][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + end + + end + + end + + context 'if the `docinfo_target` exist but is not of type hash' do + let(:config) { base_config.merge 'docinfo' => true, "docinfo_target" => 'metadata_with_string' } + let(:do_register) { false } + let(:mock_queue) { double('Queue', :<< => nil) } + let(:hit) { response.dig('hits', 'hits').first } + + it 'emits a tagged event with JSON-serialized event in [event][original]' do + allow(plugin).to receive(:logger).and_return(double('Logger').as_null_object) + + plugin.register + plugin.run(mock_queue) + + expect(mock_queue).to have_received(:<<) do |event| + expect(event).to be_a_kind_of LogStash::Event + + expect(event.get('tags')).to include("_elasticsearch_input_failure") + expect(event.get('[event][original]')).to be_a_kind_of String + expect(JSON.load(event.get('[event][original]'))).to eq hit + end + + expect(plugin.logger) + .to have_received(:warn).with( + a_string_including("Event creation error, original data now in [event][original] field"), + a_hash_including(:message => a_string_including('unable to merge docinfo fields into docinfo_target=`metadata_with_string`'), + :data => a_string_including('"_id":"C5b2xLQwTZa76jBmHIbwHQ"'))) + end + + end + + context 'with docinfo_fields' do + let(:config) { base_config.merge 'docinfo' => true, "docinfo_fields" => ["_index"] } + + it "allows to specify which fields from the document info to save to metadata" do + meta_base = event.get(ecs_select.active_mode == :disabled ? "@metadata" : "[@metadata][input][elasticsearch]") + expect(meta_base.keys).to eql ["_index"] + end + + end + + context 'add_field' do + let(:config) { base_config.merge 'docinfo' => true, + 'add_field' => { 'identifier' => "foo:%{[@metadata][_type]}:%{[@metadata][_id]}" } } + + it 'should be able to reference metadata fields in `add_field` decorations' do + expect(event.get('identifier')).to eq('foo:logs:C5b2xLQwTZa76jBmHIbwHQ') + end if ecs_select.active_mode == :disabled + + end + + end + + context "when not defining the docinfo" do + let(:config) { base_config } + + it 'should keep the document information in the root of the event' do + expect(event.get("[@metadata]")).to be_empty + end + end + + end + end + + describe "client" do + let(:config) do + { + + } + end + let(:plugin) { described_class.new(config) } + let(:event) { LogStash::Event.new({}) } + + describe "cloud.id" do + let(:valid_cloud_id) do + 'sample:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJGFjMzFlYmI5MDI0MTc3MzE1NzA0M2MzNGZkMjZmZDQ2OjkyNDMkYTRjMDYyMzBlNDhjOGZjZTdiZTg4YTA3NGEzYmIzZTA6OTI0NA==' + end + + let(:config) { super().merge({ 'cloud_id' => valid_cloud_id }) } + + it "should set host(s)" do + plugin.register + client = plugin.send(:client) + target_field = :@seeds + begin + Elasticsearch::Transport::Client + rescue + target_field = :@hosts + end + expect( client.transport.instance_variable_get(target_field) ).to eql [{ + :scheme => "https", + :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", + :port => 9243, + :path => "", + :protocol => "https" + }] + end + + context 'invalid' do + let(:config) { super().merge({ 'cloud_id' => 'invalid:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlv' }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_id.*? is invalid/ + end + end + + context 'hosts also set' do + let(:config) { super().merge({ 'cloud_id' => valid_cloud_id, 'hosts' => [ 'localhost:9200' ] }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_id and hosts/ + end + end + end if LOGSTASH_VERSION > '6.0' + + describe "cloud.auth" do + let(:config) { super().merge({ 'cloud_auth' => LogStash::Util::Password.new('elastic:my-passwd-00') }) } + + it "should set authorization" do + plugin.register + client = plugin.send(:client) + auth_header = extract_transport(client).options[:transport_options][:headers]['Authorization'] + + expect( auth_header ).to eql "Basic #{Base64.encode64('elastic:my-passwd-00').rstrip}" + end + + context 'invalid' do + let(:config) { super().merge({ 'cloud_auth' => 'invalid-format' }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_auth.*? format/ + end + end + + context 'user also set' do + let(:config) { super().merge({ 'cloud_auth' => 'elastic:my-passwd-00', 'user' => 'another' }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Multiple authentication options are specified/ + end + end + end if LOGSTASH_VERSION > '6.0' + + describe "api_key" do + context "without ssl" do + let(:config) { super().merge({ 'api_key' => LogStash::Util::Password.new('foo:bar') }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /api_key authentication requires SSL\/TLS/ + end + end + + context "with ssl" do + let(:api_key_value) { nil } + let(:config) { super().merge("ssl_enabled" => true, 'api_key' => LogStash::Util::Password.new(api_key_value)) } + let(:encoded_api_key) { Base64.strict_encode64('foo:bar') } + + shared_examples "a plugin that sets the ApiKey authorization header" do + it "correctly sets the Authorization header" do + plugin.register + client = plugin.send(:client) + auth_header = extract_transport(client).options[:transport_options][:headers]['Authorization'] + + expect(auth_header).to eql("ApiKey #{encoded_api_key}") + end + end + + context "with a non-encoded API key" do + let(:api_key_value) { "foo:bar" } + it_behaves_like "a plugin that sets the ApiKey authorization header" + end + + context "with an encoded API key" do + let(:api_key_value) { encoded_api_key } + it_behaves_like "a plugin that sets the ApiKey authorization header" + end + + context 'user also set' do + let(:config) { super().merge({ 'api_key' => 'foo:bar', 'user' => 'another' }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Multiple authentication options are specified/ + end + end + + context 'ssl verification disabled' do + let(:config) { super().merge({ 'ssl_verification_mode' => 'none' }) } + it 'should warn data security risk' do + expect(plugin.logger).to receive(:warn).once.with("You have enabled encryption but DISABLED certificate verification, to make sure your data is secure set `ssl_verification_mode => full`") + plugin.register + end + end + end + end if LOGSTASH_VERSION > '6.0' + + describe "proxy" do + let(:config) { super().merge({ 'proxy' => 'http://localhost:1234/' }) } + + it "should set proxy" do + plugin.register + client = plugin.send(:client) + proxy = extract_transport(client).options[:transport_options][:proxy] + + expect( proxy ).to eql "http://localhost:1234/" + end + + context 'invalid' do + let(:config) { super().merge({ 'proxy' => '${A_MISSING_ENV_VAR:}' }) } + + it "should not set proxy" do + plugin.register + client = plugin.send(:client) + + expect( extract_transport(client).options[:transport_options] ).to_not include(:proxy) + end + end + end + + class StoppableServer + + attr_reader :port + + def initialize() + queue = Queue.new + @first_req_waiter = java.util.concurrent.CountDownLatch.new(1) + @first_request = nil + + @t = java.lang.Thread.new( + proc do + begin + @server = WEBrick::HTTPServer.new :Port => 0, :DocumentRoot => ".", + :Logger => Cabin::Channel.get, # silence WEBrick logging + :StartCallback => Proc.new { queue.push("started") } + @port = @server.config[:Port] + @server.mount_proc '/' do |req, res| + res.body = ''' + { + "name": "ce7ccfb438e8", + "cluster_name": "docker-cluster", + "cluster_uuid": "DyR1hN03QvuCWXRy3jtb0g", + "version": { + "number": "7.13.1", + "build_flavor": "default", + "build_type": "docker", + "build_hash": "9a7758028e4ea59bcab41c12004603c5a7dd84a9", + "build_date": "2021-05-28T17:40:59.346932922Z", + "build_snapshot": false, + "lucene_version": "8.8.2", + "minimum_wire_compatibility_version": "6.8.0", + "minimum_index_compatibility_version": "6.0.0-beta1" + }, + "tagline": "You Know, for Search" + } + ''' + res.status = 200 + res['Content-Type'] = 'application/json' + @first_request = req + @first_req_waiter.countDown() + end + + @server.mount_proc '/logstash_unit_test/_search' do |req, res| + res.body = ''' + { + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 10000, + "relation" : "gte" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "test_bulk_index_2", + "_type" : "_doc", + "_id" : "sHe6A3wBesqF7ydicQvG", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2021-09-20T15:02:02.557Z", + "message" : "{\"name\": \"Andrea\"}", + "@version" : "1", + "host" : "kalispera", + "sequence" : 5 + } + } + ] + } + } + ''' + res.status = 200 + res['Content-Type'] = 'application/json' + @first_request = req + @first_req_waiter.countDown() + end + + @server.start + rescue => e + warn "ERROR in webserver thread #{e.inspect}\n #{e.backtrace.join("\n ")}" + # ignore + end + end + ) + @t.daemon = true + @t.start + queue.pop # blocks until the server is up + end + + def stop + @server.shutdown + end + + def wait_receive_request + @first_req_waiter.await(2, java.util.concurrent.TimeUnit::SECONDS) + @first_request + end + end + + describe "'user-agent' header" do + let!(:webserver) { StoppableServer.new } # webserver must be started before the call, so no lazy "let" + + after :each do + webserver.stop + end + + it "server should be started" do + require 'net/http' + response = nil + Net::HTTP.start('localhost', webserver.port) {|http| + response = http.request_get('/') + } + expect(response.code.to_i).to eq(200) + end + + context "used by plugin" do + let(:config) do + { + "hosts" => ["localhost:#{webserver.port}"], + "query" => '{ "query": { "match": { "statuscode": 200 } }, "sort": [ "_doc" ] }', + "index" => "logstash_unit_test" } - ] + end + let(:plugin) { described_class.new(config) } + let(:event) { LogStash::Event.new({}) } + + # elasticsearch-ruby 7.17.9 initialize two user agent headers, `user-agent` and `User-Agent` + # hence, fail this header size test case + xit "client should sent the expect user-agent" do + plugin.register + + queue = [] + plugin.run(queue) + + request = webserver.wait_receive_request + + expect(request.header['user-agent'].size).to eq(1) + expect(request.header['user-agent'][0]).to match(/logstash\/\d*\.\d*\.\d* \(OS=.*; JVM=.*\) logstash-input-elasticsearch\/\d*\.\d*\.\d*/) + end + end + end + + shared_examples 'configurable timeout' do |config_name, manticore_transport_option| + let(:config_value) { fail NotImplementedError } + let(:config) { super().merge(config_name => config_value) } + { + :string => 'banana', + :negative => -123, + :zero => 0, + }.each do |value_desc, value| + let(:config_value) { value } + context "with an invalid #{value_desc} value" do + it 'prevents instantiation with a helpful message' do + expect(described_class.logger).to receive(:error).with(/Expected positive whole number/) + expect { described_class.new(config) }.to raise_error(LogStash::ConfigurationError) + end + end + end + + context 'with a valid value' do + let(:config_value) { 17 } + + it "instantiates the elasticsearch client with the timeout value set via #{manticore_transport_option} in the transport options" do + expect(Elasticsearch::Client).to receive(:new) do |new_elasticsearch_client_params| + # We rely on Manticore-specific transport options, fail early if we are using a different + # transport or are allowing the client to determine its own transport class. + expect(new_elasticsearch_client_params).to include(:transport_class) + expect(new_elasticsearch_client_params[:transport_class].name).to match(/\bManticore\b/) + + expect(new_elasticsearch_client_params).to include(:transport_options) + transport_options = new_elasticsearch_client_params[:transport_options] + expect(transport_options).to include(manticore_transport_option) + expect(transport_options[manticore_transport_option]).to eq(config_value.to_i) + mock_client = double("fake_client") + allow(mock_client).to receive(:ping) + allow(mock_client).to receive(:info).and_return(cluster_info) + mock_client + end + + plugin.register + end + + after { plugin.do_stop } + end + end + + context 'connect_timeout_seconds' do + include_examples('configurable timeout', 'connect_timeout_seconds', :connect_timeout) + end + context 'request_timeout_seconds' do + include_examples('configurable timeout', 'request_timeout_seconds', :request_timeout) + end + context 'socket_timeout_seconds' do + include_examples('configurable timeout', 'socket_timeout_seconds', :socket_timeout) + end + end + + context "when scheduling" do + let(:config) do + { + "hosts" => ["localhost"], + "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', + "schedule" => "* * * * * * UTC" # every second } - } - RESPONSE + end - scroll_response = <<-RESPONSE - { - "hits":{ - "hits":[] + before do + plugin.register + end + + it "should properly schedule" do + begin + expect(plugin.instance_variable_get(:@query_executor)).to receive(:do_run) { + queue << LogStash::Event.new({}) + }.at_least(:twice) + runner = Thread.start { plugin.run(queue) } + expect(queue.pop).not_to be_nil + cron_jobs = plugin.instance_variable_get(:@_scheduler).instance_variable_get(:@impl).jobs + expect(cron_jobs[0].next_time - cron_jobs[0].last_time).to be <= 5.0 + expect(queue.pop).not_to be_nil + ensure + plugin.do_stop + runner.join if runner + end + end + end + + context "aggregations" do + let(:index_name) { "rainbow" } + let(:config) do + { + 'hosts' => ["localhost"], + 'query' => '{ "query": {}, "size": 0, "aggs":{"total_count": { "value_count": { "field": "type" }}, "empty_count": { "sum": { "field": "_meta.empty_event" }}}}', + 'response_type' => 'aggregations', + 'size' => 0, + 'index' => index_name } - } - RESPONSE + end - config <<-CONFIG - input { - elasticsearch { - host => "localhost" - scan => false + let(:mock_response) do + { + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "skipped" => 0, + "failed" => 0 + }, + "hits" => { + "total" => 10, + "max_score" => 1.0, + "hits" => [] + }, + "aggregations" => { + "total_counter" => { + "value" => 10 + }, + "empty_counter" => { + "value" => 5 + }, + } } - } - CONFIG - - it "should retrieve json event from elasticseach" do - # I somewhat duplicated our "input" rspec extension because I needed to add mocks for the the actual ES calls - # and rspec expectations need to be in "it" statement but the "input" extension defines the "it" - # TODO(colin) see how we can improve our rspec extension to better integrate in these scenarios - - expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_search_request).and_return(search_response) - expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_scroll_request).with(any_args).and_return(scroll_response) - - pipeline = LogStash::Pipeline.new(config) - queue = Queue.new - pipeline.instance_eval do - @output_func = lambda { |event| queue << event } - end - pipeline_thread = Thread.new { pipeline.run } - event = queue.pop - - insist { event["message"] } == "foobar" - - # do not call pipeline.shutdown here, as it will stop the plugin execution randomly - # and maybe kill input before calling execute_scroll_request. - # TODO(colin) we should rework the pipeliene shutdown to allow a soft/clean shutdown mecanism, - # using a shutdown event which can be fed into each plugin queue and when the plugin sees it - # exits after completing its processing. - # - # pipeline.shutdown - # - # instead, since our scroll_response will terminate the plugin, we can just join the pipeline thread - pipeline_thread.join + end + + let(:client) { Elasticsearch::Client.new } + before(:each) do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:ping) + end + + before { plugin.register } + + it 'creates the events from the aggregations' do + expect(client).to receive(:search).with(hash_including(:body => anything, :size => 0, :index => index_name)).and_return(mock_response) + plugin.run queue + event = queue.pop + + expect(event).to be_a(LogStash::Event) + expect(event.get("[total_counter][value]")).to eql 10 + expect(event.get("[empty_counter][value]")).to eql 5 + end + + context "when there's an exception" do + before(:each) do + allow(client).to receive(:search).and_raise RuntimeError.new("test exception") + end + it 'produces no events' do + plugin.run queue + expect(queue).to be_empty + end + end + end + + context "retries" do + let(:client) { Elasticsearch::Client.new } + let(:config) do + { + "hosts" => ["localhost"], + "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', + "retries" => 1 + } + end + + shared_examples "a retryable plugin" do + it "retry and log error when all search request fail" do + expect_any_instance_of(LogStash::Helpers::LoggableTry).to receive(:log_failure).with(instance_of(Manticore::UnknownException), instance_of(Integer), instance_of(String)).twice + expect(client).to receive(:search).with(instance_of(Hash)).and_raise(Manticore::UnknownException).at_least(:twice) + + plugin.register + + expect{ plugin.run(queue) }.not_to raise_error + end + + it "retry successfully when search request fail for one time" do + expect_any_instance_of(LogStash::Helpers::LoggableTry).to receive(:log_failure).with(instance_of(Manticore::UnknownException), 1, instance_of(String)) + expect(client).to receive(:search).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException) + expect(client).to receive(:search).with(instance_of(Hash)).once.and_return(search_response) + + plugin.register + + expect{ plugin.run(queue) }.not_to raise_error + end + end + + describe "scroll" do + let(:search_response) do + { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["ohayo"] } + } ] + } + } + end + + let(:empty_scroll_response) do + { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + end + + before(:each) do + allow(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + allow(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(empty_scroll_response) + allow(client).to receive(:clear_scroll).and_return(nil) + allow(client).to receive(:ping) + end + + it_behaves_like "a retryable plugin" + end + + describe "search_after" do + let(:es_version) { "8.10.0" } + let(:config) { super().merge({ "search_api" => "search_after" }) } + + let(:search_response) do + { + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ ] # empty hits to break the loop + } + } + end + + before(:each) do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:open_point_in_time).once.and_return({ "id" => "cXVlcnlUaGVuRmV0Y2g"}) + expect(client).to receive(:close_point_in_time).once.and_return(nil) + expect(client).to receive(:ping) + end + + it_behaves_like "a retryable plugin" + end + end + + context '#push_hit' do + let(:config) do + { + 'docinfo' => true, # include ids + 'docinfo_target' => '[@metadata][docinfo]' + } + end + + let(:hit) do + JSON.load(<<~EOJSON) + { + "_index" : "test_bulk_index_2", + "_type" : "_doc", + "_id" : "sHe6A3wBesqF7ydicQvG", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2021-09-20T15:02:02.557Z", + "message" : "ping", + "@version" : "17", + "sequence" : 7, + "host" : { + "name" : "maybe.local", + "ip" : "127.0.0.1" + } + } + } + EOJSON + end + + let(:mock_queue) { double('queue', :<< => nil) } + + before(:each) do + plugin.send(:setup_cursor_tracker) + end + + it 'pushes a generated event to the queue' do + plugin.send(:push_hit, hit, mock_queue) + expect(mock_queue).to have_received(:<<) do |event| + expect(event).to be_a_kind_of LogStash::Event + + # fields overriding defaults + expect(event.timestamp.to_s).to eq("2021-09-20T15:02:02.557Z") + expect(event.get('@version')).to eq("17") + + # structure from hit's _source + expect(event.get('message')).to eq("ping") + expect(event.get('sequence')).to eq(7) + expect(event.get('[host][name]')).to eq("maybe.local") + expect(event.get('[host][ip]')).to eq("127.0.0.1") + + # docinfo fields + expect(event.get('[@metadata][docinfo][_index]')).to eq("test_bulk_index_2") + expect(event.get('[@metadata][docinfo][_type]')).to eq("_doc") + expect(event.get('[@metadata][docinfo][_id]')).to eq("sHe6A3wBesqF7ydicQvG") + end + end + + context 'when event creation fails' do + before(:each) do + allow(plugin).to receive(:logger).and_return(double('Logger').as_null_object) + + allow(plugin.event_factory).to receive(:new_event).and_call_original + allow(plugin.event_factory).to receive(:new_event).with(a_hash_including hit['_source']).and_raise(RuntimeError, 'intentional') + end + + it 'pushes a tagged event containing a JSON-encoded hit in [event][original]' do + plugin.send(:push_hit, hit, mock_queue) + + expect(mock_queue).to have_received(:<<) do |event| + expect(event).to be_a_kind_of LogStash::Event + + expect(event.get('tags')).to include("_elasticsearch_input_failure") + expect(event.get('[event][original]')).to be_a_kind_of String + expect(JSON.load(event.get('[event][original]'))).to eq hit + end + + expect(plugin.logger) + .to have_received(:warn).with( + a_string_including("Event creation error, original data now in [event][original] field"), + a_hash_including(:message => a_string_including('intentional'), + :data => a_string_including('"_id":"sHe6A3wBesqF7ydicQvG"'))) + + end + end + end + + # @note can be removed once we depends on elasticsearch gem >= 6.x + def extract_transport(client) # on 7.x client.transport is a ES::Transport::Client + client.transport.respond_to?(:transport) ? client.transport.transport : client.transport + end + + describe "#ESQL" do + let(:config) do + { + "query" => "FROM test-index | STATS count() BY field", + "query_type" => "esql", + "retries" => 3 + } + end + let(:es_version) { LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION } + let(:ls_version) { LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION } + + before(:each) do + stub_const("LOGSTASH_VERSION", ls_version) + end + + describe "#initialize" do + it "sets up the ESQL client with correct parameters" do + expect(plugin.instance_variable_get(:@query_type)).to eq(config["query_type"]) + expect(plugin.instance_variable_get(:@query)).to eq(config["query"]) + expect(plugin.instance_variable_get(:@retries)).to eq(config["retries"]) + end + end + + describe "#register" do + before(:each) do + Elasticsearch::Client.send(:define_method, :ping) { } + allow_any_instance_of(Elasticsearch::Client).to receive(:info).and_return(cluster_info) + end + it "creates ES|QL executor" do + plugin.register + expect(plugin.instance_variable_get(:@query_executor)).to be_an_instance_of(LogStash::Inputs::Elasticsearch::Esql) + end + end + + describe "#validation" do + + describe "LS version" do + context "when compatible" do + + it "does not raise an error" do + expect { plugin.send(:validate_ls_version_for_esql_support!) }.not_to raise_error + end + end + + context "when incompatible" do + before(:each) do + stub_const("LOGSTASH_VERSION", "8.10.0") + end + + it "raises a runtime error" do + expect { plugin.send(:validate_ls_version_for_esql_support!) } + .to raise_error(RuntimeError, /Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least #{ls_version}/) + end + end + end + + describe "ES version" do + before(:each) do + allow(plugin).to receive(:es_version).and_return("8.10.5") + end + + context "when incompatible" do + it "raises a runtime error" do + expect { plugin.send(:validate_es_for_esql_support!) } + .to raise_error(RuntimeError, /Connected Elasticsearch 8.10.5 version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{es_version} version./) + end + end + end + + context "ES|QL query and DSL params used together" do + let(:config) { + super().merge({ + "index" => "my-index", + "size" => 1, + "slices" => 1, + "search_api" => "auto", + "docinfo" => true, + "docinfo_target" => "[@metadata][docinfo]", + "docinfo_fields" => ["_index"], + "response_type" => "hits", + "tracking_field" => "[@metadata][tracking]" + })} + + it "raises a config error" do + mixed_fields = %w[index size slices docinfo_fields response_type tracking_field] + expect { plugin.register }.to raise_error(LogStash::ConfigurationError, /Configured #{mixed_fields} params are not allowed while using ES|QL query/) + end + end + + describe "ES|QL query" do + context "when query is valid" do + it "does not raise an error" do + expect { plugin.send(:validate_esql_query!) }.not_to raise_error + end + end + + context "when query is empty" do + let(:config) do + { + "query" => " " + } + end + + it "raises a configuration error" do + expect { plugin.send(:validate_esql_query!) } + .to raise_error(LogStash::ConfigurationError, /`query` cannot be empty/) + end + end + + context "when query doesn't align with ES syntax" do + let(:config) do + { + "query" => "RANDOM query" + } + end + + it "raises a configuration error" do + source_commands = %w[FROM ROW SHOW] + expect { plugin.send(:validate_esql_query!) } + .to raise_error(LogStash::ConfigurationError, "`query` needs to start with any of #{source_commands}") + end + end + end + end end end diff --git a/spec/inputs/elasticsearch_ssl_spec.rb b/spec/inputs/elasticsearch_ssl_spec.rb new file mode 100644 index 00000000..f0a3a671 --- /dev/null +++ b/spec/inputs/elasticsearch_ssl_spec.rb @@ -0,0 +1,267 @@ +require 'stud/temporary' +require "elasticsearch" + +describe "SSL options" do + let(:es_client_double) { double("Elasticsearch::Client #{self.inspect}") } + let(:hosts) {["localhost"]} + let(:settings) { { "ssl_enabled" => true, "hosts" => hosts } } + + subject do + require "logstash/inputs/elasticsearch" + LogStash::Inputs::Elasticsearch.new(settings) + end + + before do + allow(es_client_double).to receive(:close) + allow(es_client_double).to receive(:ping).with(any_args).and_return(double("pong").as_null_object) + allow(es_client_double).to receive(:info).and_return({"version" => {"number" => "7.5.0", "build_flavor" => "default"}, + "tagline" => "You Know, for Search"}) + allow(Elasticsearch::Client).to receive(:new).and_return(es_client_double) + end + + after do + subject.close + end + + context "when ssl_enabled is" do + context "true and there is no https hosts" do + let(:hosts) { %w[http://es01 http://es01] } + + it "should not infer the ssl_enabled value" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(true) + expect(subject.params).to match hash_including("ssl_enabled" => true) + end + end + + context "false and cloud_id resolved host is https" do + let(:settings) {{ + "ssl_enabled" => false, + "hosts" => [], + "cloud_id" => "sample:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJGFjMzFlYmI5MDI0MTc3MzE1NzA0M2MzNGZkMjZmZDQ2OjkyNDMkYTRjMDYyMzBlNDhjOGZjZTdiZTg4YTA3NGEzYmIzZTA6OTI0NA==" + }} + + it "should not infer the ssl_enabled value" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(false) + expect(subject.params).to match hash_including("ssl_enabled" => false) + end + end + end + + context "when neither ssl nor ssl_enabled is set" do + let(:settings) { super().reject { |k| %w[ssl ssl_enabled].include?(k) } } + + context "and there is no https hosts" do + let(:hosts) { %w[http://es01 http://es01] } + + it "should infer the ssl_enabled value to false" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(false) + expect(subject.params).to match hash_including("ssl_enabled" => false) + end + end + + context "and there is https hosts" do + let(:hosts) { %w[https://sec-es01 https://sec-es01] } + + it "should infer the ssl_enabled value to true" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(true) + expect(subject.params).to match hash_including("ssl_enabled" => true) + end + end + + context "and hosts have no scheme defined" do + let(:hosts) { %w[es01 es01] } + + it "should infer the ssl_enabled value to false" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(false) + expect(subject.params).to match hash_including("ssl_enabled" => false) + end + end + + context "and cloud_id resolved host is https" do + let(:settings) {{ + "hosts" => [], + "cloud_id" => "sample:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJGFjMzFlYmI5MDI0MTc3MzE1NzA0M2MzNGZkMjZmZDQ2OjkyNDMkYTRjMDYyMzBlNDhjOGZjZTdiZTg4YTA3NGEzYmIzZTA6OTI0NA==" + }} + + it "should infer the ssl_enabled value to false" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(true) + expect(subject.params).to match hash_including("ssl_enabled" => true) + end + end + end + + context "when ssl_verification_mode" do + context "is set to none" do + let(:settings) { super().merge( + "ssl_verification_mode" => "none", + ) } + + it "should print a warning" do + expect(subject.logger).to receive(:warn).with(/You have enabled encryption but DISABLED certificate verification/).at_least(:once) + allow(subject.logger).to receive(:warn).with(any_args) + + subject.register + end + + it "should pass the flag to the ES client" do + expect(::Elasticsearch::Client).to receive(:new) do |args| + expect(args[:ssl]).to match hash_including(:ssl => true, :verify => :disable) + end.and_return(es_client_double) + + subject.register + end + end + + context "is set to full" do + let(:settings) { super().merge( + "ssl_verification_mode" => 'full', + ) } + + it "should pass the flag to the ES client" do + expect(::Elasticsearch::Client).to receive(:new) do |args| + expect(args[:ssl]).to match hash_including(:ssl => true, :verify => :default) + end.and_return(es_client_double) + + subject.register + end + end + end + + context "with the conflicting configs" do + context "ssl_certificate_authorities and ssl_truststore_path set" do + let(:ssl_truststore_path) { Stud::Temporary.file.path } + let(:ssl_certificate_authorities_path) { Stud::Temporary.file.path } + let(:settings) { super().merge( + "ssl_truststore_path" => ssl_truststore_path, + "ssl_certificate_authorities" => ssl_certificate_authorities_path + ) } + + after :each do + File.delete(ssl_truststore_path) + File.delete(ssl_certificate_authorities_path) + end + + it "should raise a configuration error" do + expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Use either "ssl_certificate_authorities\/ca_file" or "ssl_truststore_path"/) + end + end + + context "ssl_certificate and ssl_keystore_path set" do + let(:ssl_keystore_path) { Stud::Temporary.file.path } + let(:ssl_certificate_path) { Stud::Temporary.file.path } + let(:settings) { super().merge( + "ssl_certificate" => ssl_certificate_path, + "ssl_keystore_path" => ssl_keystore_path + ) } + + after :each do + File.delete(ssl_keystore_path) + File.delete(ssl_certificate_path) + end + + it "should raise a configuration error" do + expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Use either "ssl_certificate" or "ssl_keystore_path\/keystore"/) + end + end + end + + context "when configured with Java store files" do + let(:ssl_truststore_path) { Stud::Temporary.file.path } + let(:ssl_keystore_path) { Stud::Temporary.file.path } + + after :each do + File.delete(ssl_truststore_path) + File.delete(ssl_keystore_path) + end + + let(:settings) { super().merge( + "ssl_truststore_path" => ssl_truststore_path, + "ssl_truststore_type" => "jks", + "ssl_truststore_password" => "foo", + "ssl_keystore_path" => ssl_keystore_path, + "ssl_keystore_type" => "jks", + "ssl_keystore_password" => "bar", + "ssl_verification_mode" => "full", + "ssl_cipher_suites" => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], + "ssl_supported_protocols" => ["TLSv1.3"] + ) } + + it "should pass the parameters to the ES client" do + expect(::Elasticsearch::Client).to receive(:new) do |args| + expect(args[:ssl]).to match hash_including( + :ssl => true, + :keystore => ssl_keystore_path, + :keystore_type => "jks", + :keystore_password => "bar", + :truststore => ssl_truststore_path, + :truststore_type => "jks", + :truststore_password => "foo", + :verify => :default, + :cipher_suites => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], + :protocols => ["TLSv1.3"], + ) + end.and_return(es_client_double) + + subject.register + end + end + + context "when configured with certificate files" do + let(:ssl_certificate_authorities_path) { Stud::Temporary.file.path } + let(:ssl_certificate_path) { Stud::Temporary.file.path } + let(:ssl_key_path) { Stud::Temporary.file.path } + let(:settings) { super().merge( + "ssl_certificate_authorities" => [ssl_certificate_authorities_path], + "ssl_certificate" => ssl_certificate_path, + "ssl_key" => ssl_key_path, + "ssl_verification_mode" => "full", + "ssl_cipher_suites" => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], + "ssl_supported_protocols" => ["TLSv1.3"] + ) } + + after :each do + File.delete(ssl_certificate_authorities_path) + File.delete(ssl_certificate_path) + File.delete(ssl_key_path) + end + + it "should pass the parameters to the ES client" do + expect(::Elasticsearch::Client).to receive(:new) do |args| + expect(args[:ssl]).to match hash_including( + :ssl => true, + :ca_file => ssl_certificate_authorities_path, + :client_cert => ssl_certificate_path, + :client_key => ssl_key_path, + :verify => :default, + :cipher_suites => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], + :protocols => ["TLSv1.3"], + ) + end.and_return(es_client_double) + + subject.register + end + + context "and only the ssl_certificate is set" do + let(:settings) { super().reject { |k| "ssl_key".eql?(k) } } + + it "should raise a configuration error" do + expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Using an "ssl_certificate" requires an "ssl_key"/) + end + end + + context "and only the ssl_key is set" do + let(:settings) { super().reject { |k| "ssl_certificate".eql?(k) } } + + it "should raise a configuration error" do + expect { subject.register }.to raise_error(LogStash::ConfigurationError, /An "ssl_certificate" is required when using an "ssl_key"/) + end + end + end +end + diff --git a/spec/inputs/integration/elasticsearch_esql_spec.rb b/spec/inputs/integration/elasticsearch_esql_spec.rb new file mode 100644 index 00000000..b25f65a5 --- /dev/null +++ b/spec/inputs/integration/elasticsearch_esql_spec.rb @@ -0,0 +1,150 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/inputs/elasticsearch" +require "elasticsearch" +require_relative "../../../spec/es_helper" + +describe LogStash::Inputs::Elasticsearch, integration: true do + + SECURE_INTEGRATION = ENV['SECURE_INTEGRATION'].eql? 'true' + ES_HOSTS = ["http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}"] + + let(:plugin) { described_class.new(config) } + let(:es_index) { "logstash-esql-integration-#{rand(1000)}" } + let(:test_documents) do + [ + { "message" => "test message 1", "type" => "a", "count" => 1 }, + { "message" => "test message 2", "type" => "a", "count" => 2 }, + { "message" => "test message 3", "type" => "b", "count" => 3 }, + { "message" => "test message 4", "type" => "b", "count" => 4 }, + { "message" => "test message 5", "type" => "c", "count" => 5 } + ] + end + let(:config) do + { + "hosts" => ES_HOSTS, + "query_type" => "esql" + } + end + let(:es_client) do + Elasticsearch::Client.new(hosts: ES_HOSTS) + end + + before(:all) do + is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create(LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION) + skip "LS version does not have ES client which supports ES|QL" unless is_ls_with_esql_supported_client + + # Skip tests if ES version doesn't support ES||QL + es_client = Elasticsearch::Client.new(hosts: ES_HOSTS) # need to separately create since let isn't allowed in before(:context) + es_version_info = es_client.info["version"] + es_gem_version = Gem::Version.create(es_version_info["number"]) + skip "ES version does not support ES|QL" if es_gem_version.nil? || es_gem_version < Gem::Version.create(LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION) + end + + before(:each) do + # Create index with test documents + es_client.indices.create(index: es_index, body: {}) unless es_client.indices.exists?(index: es_index) + + test_documents.each do |doc| + es_client.index(index: es_index, body: doc, refresh: true) + end + end + + after(:each) do + es_client.indices.delete(index: es_index) if es_client.indices.exists?(index: es_index) + end + + context "#run ES|QL queries" do + + before do + stub_const("LOGSTASH_VERSION", LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION) + allow_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:exit_plugin?).and_return false, true + end + + before(:each) do + plugin.register + end + + shared_examples "ESQL query execution" do |expected_count| + it "correctly retrieves documents" do + queue = Queue.new + plugin.run(queue) + + event_count = 0 + expected_count.times do |i| + event = queue.pop + expect(event).to be_a(LogStash::Event) + event_count += 1 + end + expect(event_count).to eq(expected_count) + end + end + + context "#FROM query" do + let(:config) do + super().merge("query" => "FROM #{es_index} | SORT count") + end + + include_examples "ESQL query execution", 5 + end + + context "#FROM query and WHERE clause" do + let(:config) do + super().merge("query" => "FROM #{es_index} | WHERE type == \"a\" | SORT count") + end + + include_examples "ESQL query execution", 2 + end + + context "#STATS aggregation" do + let(:config) do + super().merge("query" => "FROM #{es_index} | STATS avg(count) BY type") + end + + it "retrieves aggregated stats" do + queue = Queue.new + plugin.run(queue) + results = [] + 3.times do + event = queue.pop + expect(event).to be_a(LogStash::Event) + results << event.get("avg(count)") + end + + expected_averages = [1.5, 3.5, 5.0] + expect(results.sort).to eq(expected_averages) + end + end + + context "#METADATA" do + let(:config) do + super().merge("query" => "FROM #{es_index} METADATA _index, _id, _version | DROP message.keyword, type.keyword | SORT count") + end + + it "includes document metadata" do + queue = Queue.new + plugin.run(queue) + + 5.times do + event = queue.pop + expect(event).to be_a(LogStash::Event) + expect(event.get("_index")).not_to be_nil + expect(event.get("_id")).not_to be_nil + expect(event.get("_version")).not_to be_nil + end + end + end + + context "#invalid ES|QL query" do + let(:config) do + super().merge("query" => "FROM undefined index | LIMIT 1") + end + + it "doesn't produce events" do + queue = Queue.new + plugin.run(queue) + expect(queue.empty?).to eq(true) + end + end + end +end \ No newline at end of file diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb new file mode 100644 index 00000000..a7c45bd5 --- /dev/null +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -0,0 +1,149 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/plugin" +require "logstash/inputs/elasticsearch" +require_relative "../../../spec/es_helper" + +describe LogStash::Inputs::Elasticsearch do + + SECURE_INTEGRATION = ENV['SECURE_INTEGRATION'].eql? 'true' + + let(:config) { { 'hosts' => ["http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}"], + 'index' => 'logs', + 'query' => '{ "query": { "match": { "message": "Not found"} }}' } } + + let(:plugin) { described_class.new(config) } + let(:event) { LogStash::Event.new({}) } + let(:client_options) { Hash.new } + + let(:user) { ENV['ELASTIC_USER'] || 'simpleuser' } + let(:password) { ENV['ELASTIC_PASSWORD'] || 'abc123' } + let(:ca_file) { "spec/fixtures/test_certs/ca.crt" } + + let(:es_url) { "http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}" } + + let(:curl_args) do + config['user'] ? "-u #{config['user']}:#{config['password']}" : '' + end + + before(:each) do + # Delete all templates first. + # Clean ES of data before we start. + ESHelper.curl_and_get_json_response "#{es_url}/_index_template/*", method: 'DELETE', args: curl_args + # This can fail if there are no indexes, ignore failure. + ESHelper.curl_and_get_json_response( "#{es_url}/_index/*", method: 'DELETE', args: curl_args) rescue nil + doc_args = "#{curl_args} -H 'Content-Type: application/json' -d '{\"response\": 404, \"message\":\"Not Found\"}'" + 10.times do + ESHelper.curl_and_get_json_response "#{es_url}/logs/_doc", method: 'POST', args: doc_args + end + ESHelper.curl_and_get_json_response "#{es_url}/_refresh", method: 'POST', args: curl_args + end + + after(:each) do + ESHelper.curl_and_get_json_response "#{es_url}/_index_template/*", method: 'DELETE', args: curl_args + # This can fail if there are no indexes, ignore failure. + ESHelper.curl_and_get_json_response( "#{es_url}/_index/*", method: 'DELETE', args: curl_args) rescue nil + ESHelper.curl_and_get_json_response( "#{es_url}/logs", method: 'DELETE', args: curl_args) rescue nil + ESHelper.curl_and_get_json_response "#{es_url}/_refresh", method: 'POST', args: curl_args + end + + shared_examples 'an elasticsearch index plugin' do + before(:each) do + plugin.register + end + + it 'should retrieve json event from elasticsearch' do + queue = [] + plugin.run(queue) + expect(queue.size).to eq(10) + event = queue.pop + expect(event).to be_a(LogStash::Event) + expect(event.get("response")).to eql(404) + end + end + + describe 'against an unsecured elasticsearch', integration: true do + it_behaves_like 'an elasticsearch index plugin' + end + + describe 'against a secured elasticsearch', secure_integration: true do + + # client_options is for an out-of-band helper + let(:client_options) { { :ca_file => ca_file, :user => user, :password => password } } + + let(:config) { super().merge('user' => user, 'password' => password) } + + shared_examples 'secured_elasticsearch' do + it_behaves_like 'an elasticsearch index plugin' + + let(:unauth_exception_class) do + begin + Elasticsearch::Transport::Transport::Errors::Unauthorized + rescue + Elastic::Transport::Transport::Errors::Unauthorized + end + end + + context "incorrect auth credentials" do + + let(:config) do + super().merge('user' => 'archer', 'password' => 'b0gus!') + end + + let(:queue) { [] } + + it "fails to run the plugin" do + expect { plugin.register }.to raise_error unauth_exception_class + end + end + end + + context 'with ca_file' do + let(:config) { super().merge('ssl_enabled' => true, 'ssl_certificate_authorities' => ca_file) } + it_behaves_like 'secured_elasticsearch' + end + + context 'with `ca_trusted_fingerprint`' do + let(:ca_trusted_fingerprint) { File.read("spec/fixtures/test_certs/ca.der.sha256").chomp } + let(:config) { super().merge('ssl_enabled' => true, 'ca_trusted_fingerprint' => ca_trusted_fingerprint) } + + if Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create("8.3.0") + it_behaves_like 'secured_elasticsearch' + else + it 'raises a configuration error' do + expect { plugin }.to raise_exception(LogStash::ConfigurationError, a_string_including("ca_trusted_fingerprint")) + end + end + end + end + + context 'setting host:port', integration: true do + + let(:config) do + super().merge "hosts" => [ESHelper.get_host_port] + end + + it_behaves_like 'an elasticsearch index plugin' + + end + + context 'setting host:port (and ssl)', secure_integration: true do + + let(:client_options) { { :ssl_certificate_authorities => ca_file, :user => user, :password => password } } + + let(:config) do + config = super().merge "hosts" => [ESHelper.get_host_port] + config.merge('user' => user, 'password' => password, 'ssl_enabled' => true, 'ssl_certificate_authorities' => ca_file) + end + + it_behaves_like 'an elasticsearch index plugin' + + end + + describe 'slice', integration: true do + let(:config) { super().merge('slices' => 2, 'size' => 2) } + let(:plugin) { described_class.new(config) } + + it_behaves_like 'an elasticsearch index plugin' + end +end diff --git a/spec/inputs/paginated_search_spec.rb b/spec/inputs/paginated_search_spec.rb new file mode 100644 index 00000000..50d062f0 --- /dev/null +++ b/spec/inputs/paginated_search_spec.rb @@ -0,0 +1,129 @@ +require "logstash/devutils/rspec/spec_helper" +require "logstash/inputs/elasticsearch/paginated_search" + +describe "Paginated search" do + let(:es_client) { double("Elasticsearch::Client") } + let(:settings) { { "index" => "logs", "query" => "{ \"sort\": [ \"_doc\" ] }", "scroll" => "1m", "retries" => 0, "size" => 1000 } } + let(:plugin) { double("LogStash::Inputs::Elasticsearch", params: settings, pipeline_id: "main", stop?: false) } + let(:pit_id) { "08fsAwILcmVzaGFyZC0yZmIWdzFnbl" } + + describe "search after" do + subject do + LogStash::Inputs::Elasticsearch::SearchAfter.new(es_client, plugin) + end + + describe "search options" do + context "query without sort" do + let(:settings) { super().merge({"query" => "{\"match_all\": {} }"}) } + + it "adds default sort" do + options = subject.search_options(pit_id: pit_id) + expect(options[:body][:sort]).to match({"_shard_doc": "asc"}) + end + end + + context "customize settings" do + let(:size) { 2 } + let(:slices) { 4 } + let(:settings) { super().merge({"slices" => slices, "size" => size}) } + + it "gives updated options" do + slice_id = 1 + search_after = [0, 0] + options = subject.search_options(pit_id: pit_id, slice_id: slice_id, search_after: search_after) + expect(options[:size]).to match(size) + expect(options[:body][:slice]).to match({:id => slice_id, :max => slices}) + expect(options[:body][:search_after]).to match(search_after) + end + end + end + + describe "search" do + let(:queue) { double("queue") } + let(:doc1) do + { + "_index" => "logstash", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["Halloween"] }, + "sort" => [0, 0] + } + end + let(:first_resp) do + { + "pit_id" => pit_id, + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 2, + "successful" => 2, + "skipped" => 0, + "failed" => 0 + }, + "hits" => { + "total" => { + "value" => 500, + "relation" => "eq" + }, + "hits" => [ doc1 ] + } + } + end + let(:last_resp) do + { + "pit_id" => pit_id, + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 2, + "successful" => 2, + "skipped" => 0, + "failed" => 0 + }, + "hits" => { + "total" => { + "value" => 500, + "relation" => "eq" + }, + "hits" => [ ] # empty hits to break the loop + } + } + end + + context "happy case" do + it "runs" do + expect(es_client).to receive(:search).with(instance_of(Hash)).and_return(first_resp, last_resp) + expect(plugin).to receive(:push_hit).with(doc1, queue).once + subject.search(output_queue: queue, pit_id: pit_id) + end + end + + context "with exception" do + it "closes pit" do + expect(es_client).to receive(:open_point_in_time).once.and_return({ "id" => pit_id}) + expect(plugin).to receive(:push_hit).with(doc1, queue).once + expect(es_client).to receive(:search).with(instance_of(Hash)).once.and_return(first_resp) + expect(es_client).to receive(:search).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException) + expect(es_client).to receive(:close_point_in_time).with(any_args).once.and_return(nil) + subject.retryable_search(queue) + end + end + + context "with slices" do + let(:slices) { 2 } + let(:settings) { super().merge({"slices" => slices}) } + + it "runs two slices" do + expect(es_client).to receive(:open_point_in_time).once.and_return({ "id" => pit_id}) + expect(plugin).to receive(:push_hit).with(any_args).twice + expect(Thread).to receive(:new).and_call_original.exactly(slices).times + expect(es_client).to receive(:search).with(instance_of(Hash)).and_return(first_resp, last_resp, first_resp, last_resp) + expect(es_client).to receive(:close_point_in_time).with(any_args).once.and_return(nil) + subject.retryable_slice_search(queue) + end + end + end + end + +end \ No newline at end of file